Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 28 additions & 11 deletions ext/AdaptiveArrayPoolsCUDAExt/debug.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ Two checks per wrapper:
return
end
# Check 2: wrapper length exceeds backing vector — detects growth beyond backing
wrapper_len = prod(cu.dims)
# Use length() to avoid NTuple{N,Int} boxing from prod(cu.dims) when N is erased
wrapper_len = length(cu)
if wrapper_len > vec_len
@warn "Pool-backed CuArray{$T}: wrapper grew beyond backing vector " *
"(slot $i, wrapper: $wrapper_len, backing: $vec_len). " *
Expand Down Expand Up @@ -282,25 +283,41 @@ Throws `PoolRuntimeEscapeError` on overlap.
"""
function _check_cuda_pointer_overlap(arr::CuArray, pool::CuAdaptiveArrayPool, original_val = arr)
arr_ptr = UInt(pointer(arr))
arr_bytes = length(arr) * sizeof(eltype(arr))
arr_end = arr_ptr + arr_bytes
arr_end = arr_ptr + length(arr) * sizeof(eltype(arr))

return_site = let rs = pool._pending_return_site
isempty(rs) ? nothing : rs
end

current_depth = pool._current_depth

# Check fixed slots
AdaptiveArrayPools.foreach_fixed_slot(pool) do tp
_check_tp_cuda_overlap(tp, arr_ptr, arr_end, current_depth, pool, return_site, original_val)
end
# Explicit per-slot calls via @generated — avoids do-block closure allocation
_check_all_cuda_slots_overlap(pool, arr_ptr, arr_end, current_depth, return_site, original_val)
return
end

# Check others
for tp in values(pool.others)
_check_tp_cuda_overlap(tp, arr_ptr, arr_end, current_depth, pool, return_site, original_val)
# @generated unrolling over GPU_FIXED_SLOT_FIELDS — zero-allocation dispatch
@generated function _check_all_cuda_slots_overlap(
pool::CuAdaptiveArrayPool, arr_ptr::UInt, arr_end::UInt,
current_depth::Int, return_site, original_val
)
calls = [
:(
_check_tp_cuda_overlap(
getfield(pool, $(QuoteNode(f))), arr_ptr, arr_end,
current_depth, pool, return_site, original_val
)
)
for f in GPU_FIXED_SLOT_FIELDS
]
return quote
Base.@_inline_meta
$(calls...)
for tp in values(pool.others)
_check_tp_cuda_overlap(tp, arr_ptr, arr_end, current_depth, pool, return_site, original_val)
end
nothing
end
return
end

@noinline function _check_tp_cuda_overlap(
Expand Down
39 changes: 28 additions & 11 deletions ext/AdaptiveArrayPoolsMetalExt/debug.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ Two checks per wrapper:
return
end
# Check 2: wrapper length exceeds backing vector — detects growth beyond backing
wrapper_len = prod(getfield(mtl, :dims))
# Use length() to avoid NTuple{N,Int} boxing from prod(getfield(mtl, :dims)) when N is erased
wrapper_len = length(mtl)
if wrapper_len > vec_len
@warn "Pool-backed MtlArray{$T}: wrapper grew beyond backing vector " *
"(slot $i, wrapper: $wrapper_len, backing: $vec_len). " *
Expand Down Expand Up @@ -282,25 +283,41 @@ function _check_metal_overlap(arr::MtlArray, pool::MetalAdaptiveArrayPool, origi
arr_ptr = pointer(arr)
arr_buf = arr_ptr.buffer
arr_off = Int(arr_ptr.offset)
arr_sz = length(arr) * sizeof(eltype(arr))
arr_end = arr_off + arr_sz
arr_end = arr_off + length(arr) * sizeof(eltype(arr))

return_site = let rs = pool._pending_return_site
isempty(rs) ? nothing : rs
end

current_depth = pool._current_depth

# Check fixed slots
AdaptiveArrayPools.foreach_fixed_slot(pool) do tp
_check_tp_metal_overlap(tp, arr_buf, arr_off, arr_end, current_depth, pool, return_site, original_val)
end
# Explicit per-slot calls via @generated — avoids do-block closure allocation
_check_all_metal_slots_overlap(pool, arr_buf, arr_off, arr_end, current_depth, return_site, original_val)
return
end

# Check others
for tp in values(pool.others)
_check_tp_metal_overlap(tp, arr_buf, arr_off, arr_end, current_depth, pool, return_site, original_val)
# @generated unrolling over METAL_FIXED_SLOT_FIELDS — zero-allocation dispatch
@generated function _check_all_metal_slots_overlap(
pool::MetalAdaptiveArrayPool, arr_buf, arr_off::Int, arr_end::Int,
current_depth::Int, return_site, original_val
)
calls = [
:(
_check_tp_metal_overlap(
getfield(pool, $(QuoteNode(f))), arr_buf, arr_off, arr_end,
current_depth, pool, return_site, original_val
)
)
for f in METAL_FIXED_SLOT_FIELDS
]
return quote
Base.@_inline_meta
$(calls...)
for tp in values(pool.others)
_check_tp_metal_overlap(tp, arr_buf, arr_off, arr_end, current_depth, pool, return_site, original_val)
end
nothing
end
return
end

@noinline function _check_tp_metal_overlap(
Expand Down
86 changes: 57 additions & 29 deletions src/debug.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,41 +81,60 @@ end
# `arr` may be its parent Array used for the actual pointer comparison.
function _check_pointer_overlap(arr::Array, pool::AdaptiveArrayPool, original_val = arr)
arr_ptr = UInt(pointer(arr))
arr_len = length(arr) * sizeof(eltype(arr))
arr_end = arr_ptr + arr_len
arr_end = arr_ptr + length(arr) * sizeof(eltype(arr))

return_site = let rs = pool._pending_return_site
isempty(rs) ? nothing : rs
end

current_depth = pool._current_depth

check_overlap = function (tp)
boundary = _scope_boundary(tp, current_depth)
for i in (boundary + 1):tp.n_active
v = @inbounds tp.vectors[i]
v isa Array || continue # Skip BitVector (no pointer(); checked via _check_bitchunks_overlap)
v_ptr = UInt(pointer(v))
v_len = length(v) * sizeof(eltype(v))
v_end = v_ptr + v_len
if !(arr_end <= v_ptr || v_end <= arr_ptr)
callsite = _lookup_borrow_callsite(pool, v)
_throw_pool_escape_error(original_val, eltype(v), callsite, return_site)
end
end
return
end
# Explicit per-slot calls via @generated — avoids closure allocation (128 bytes)
_check_all_slots_pointer_overlap(pool, arr_ptr, arr_end, current_depth, return_site, original_val)
return
end

# Check fixed slots
foreach_fixed_slot(pool) do tp
check_overlap(tp)
# Standalone overlap check for a single TypedPool (no closure capture)
@noinline function _check_tp_pointer_overlap(
tp::AbstractTypedPool, arr_ptr::UInt, arr_end::UInt,
current_depth::Int, pool::AdaptiveArrayPool, return_site, original_val
)
boundary = _scope_boundary(tp, current_depth)
for i in (boundary + 1):tp.n_active
v = @inbounds tp.vectors[i]
v isa Array || continue # Skip BitVector (checked via _check_bitchunks_overlap)
v_ptr = UInt(pointer(v))
v_end = v_ptr + length(v) * sizeof(eltype(v))
if !(arr_end <= v_ptr || v_end <= arr_ptr)
callsite = _lookup_borrow_callsite(pool, v)
_throw_pool_escape_error(original_val, eltype(v), callsite, return_site)
end
end
return
end

# Check others
for tp in values(pool.others)
check_overlap(tp)
# @generated unrolling over FIXED_SLOT_FIELDS — zero-allocation dispatch
@generated function _check_all_slots_pointer_overlap(
pool::AdaptiveArrayPool, arr_ptr::UInt, arr_end::UInt,
current_depth::Int, return_site, original_val
)
calls = [
:(
_check_tp_pointer_overlap(
getfield(pool, $(QuoteNode(f))), arr_ptr, arr_end,
current_depth, pool, return_site, original_val
)
)
for f in FIXED_SLOT_FIELDS
]
return quote
Base.@_inline_meta
$(calls...)
for tp in values(pool.others)
_check_tp_pointer_overlap(tp, arr_ptr, arr_end, current_depth, pool, return_site, original_val)
end
nothing
end
return
end

"""
Expand Down Expand Up @@ -395,6 +414,10 @@ end
# (legacy structs lack arr_wrappers field — they use N-way nd_arrays cache instead)
_check_wrapper_mutation!(::AbstractTypedPool, ::Int, ::Int) = nothing

# Function barrier: reads wrapper's :size field and computes prod() without boxing NTuple{N,Int}.
# Using length() instead would be simpler but doesn't reflect setfield!(:size) on Julia 1.11.
@noinline _wrapper_prod_size(wrapper)::Int = prod(getfield(wrapper, :size))

# Julia 1.11+: TypedPool uses arr_wrappers (1:1 wrappers) and MemoryRef-based Array internals.
# Must not be defined on 1.10 where TypedPool has no arr_wrappers and Array has no :ref field.
@static if VERSION >= v"1.11-"
Expand All @@ -411,7 +434,8 @@ _check_wrapper_mutation!(::AbstractTypedPool, ::Int, ::Int) = nothing
@noinline function _check_wrapper_mutation!(tp::TypedPool{T}, new_n::Int, old_n::Int) where {T}
for i in (new_n + 1):old_n
@inbounds vec = tp.vectors[i]
vec_mem = getfield(vec, :ref).mem
# Use ccall for data pointer comparison (avoids boxing MemoryRef{T})
vec_ptr = ccall(:jl_array_ptr, Ptr{Cvoid}, (Any,), vec)
vec_len = length(vec)

for N_idx in 1:length(tp.arr_wrappers)
Expand All @@ -421,18 +445,22 @@ _check_wrapper_mutation!(::AbstractTypedPool, ::Int, ::Int) = nothing
i > length(wrappers) && continue
wrapper = @inbounds wrappers[i]
wrapper === nothing && continue
wrapper::Array # safety: ensure wrapper is Array before ccall (TypeError vs segfault)

arr = wrapper::Array
# Check 1: Memory identity — detects reallocation from resize!/push! beyond capacity
if getfield(arr, :ref).mem !== vec_mem
# Check 1: Data pointer identity — detects reallocation from resize!/push! beyond capacity
# ccall avoids boxing MemoryRef when wrapper's Array type is erased (from Vector{Any})
wrapper_ptr = ccall(:jl_array_ptr, Ptr{Cvoid}, (Any,), wrapper)
if wrapper_ptr != vec_ptr
@warn "Pool-backed Array{$T}: resize!/push! caused memory reallocation " *
"(slot $i). Pooling benefits (zero-alloc reuse) may be lost; " *
"temporary extra memory retention may occur. " *
"Consider requesting the exact size via acquire!(pool, T, n) if known in advance." maxlog = 1
return
end
# Check 2: wrapper length exceeds backing vector — detects growth beyond backing
wrapper_len = prod(getfield(arr, :size))
# Function barrier avoids NTuple{N,Int} boxing from prod(getfield(:size))
# (length() is not used because it may not reflect setfield!(:size) on Julia 1.11)
wrapper_len = _wrapper_prod_size(wrapper)
if wrapper_len > vec_len
@warn "Pool-backed Array{$T}: wrapper grew beyond backing vector " *
"(slot $i, wrapper: $wrapper_len, backing: $vec_len). " *
Expand Down
4 changes: 4 additions & 0 deletions src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,10 @@ runtime_check = true # or 1
"""
const RUNTIME_CHECK = _normalize_runtime_check(@load_preference("runtime_check", 0))

if RUNTIME_CHECK >= 1
@info "AdaptiveArrayPools: RUNTIME_CHECK is ENABLED (runtime escape detection active)"
end

# ==============================================================================
# AdaptiveArrayPool
# ==============================================================================
Expand Down
127 changes: 127 additions & 0 deletions test/test_zero_allocation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -528,3 +528,130 @@ const _ZERO_ALLOC_THRESHOLD = @static VERSION >= v"1.12-" ? 0 : 16
end

end # Zero-allocation Patterns

# ==============================================================================
# RUNTIME_CHECK=1 (S=1) Zero-Allocation Tests
# ==============================================================================
#
# Verifies that runtime safety checks (escape detection, borrow tracking,
# wrapper mutation detection, poisoning) add zero allocation after warmup.
# Uses AdaptiveArrayPool{1}() directly to test S=1 regardless of the global
# RUNTIME_CHECK preference.

@testset "Zero-allocation with RUNTIME_CHECK=1 (S=1)" begin
import AdaptiveArrayPools: _check_pointer_overlap, _lazy_checkpoint!, _lazy_rewind!

pool_s1 = AdaptiveArrayPool{1}()

# ------------------------------------------------------------------
# Pattern 1: Single type acquire + rewind
# ------------------------------------------------------------------
function _test_s1_single_type()
for _ in 1:5
_lazy_checkpoint!(pool_s1)
v = acquire!(pool_s1, Float64, 100)
fill!(v, 1.0)
_lazy_rewind!(pool_s1)
end
return @allocated for _ in 1:100
_lazy_checkpoint!(pool_s1)
v = acquire!(pool_s1, Float64, 100)
fill!(v, 1.0)
_lazy_rewind!(pool_s1)
end
end
@testset "S=1 single type" begin
@test _test_s1_single_type() == 0
end

# ------------------------------------------------------------------
# Pattern 2: Multi-type acquire + rewind
# ------------------------------------------------------------------
function _test_s1_multi_type()
for _ in 1:5
_lazy_checkpoint!(pool_s1)
acquire!(pool_s1, Float64, 100)
acquire!(pool_s1, Int64, 50)
acquire!(pool_s1, ComplexF64, 30)
_lazy_rewind!(pool_s1)
end
return @allocated for _ in 1:100
_lazy_checkpoint!(pool_s1)
acquire!(pool_s1, Float64, 100)
acquire!(pool_s1, Int64, 50)
acquire!(pool_s1, ComplexF64, 30)
_lazy_rewind!(pool_s1)
end
end
@testset "S=1 multi-type" begin
@test _test_s1_multi_type() == 0
end

# ------------------------------------------------------------------
# Pattern 3: N-D arrays (exercises wrapper mutation check)
# ------------------------------------------------------------------
function _test_s1_nd_arrays()
for _ in 1:5
_lazy_checkpoint!(pool_s1)
acquire!(pool_s1, Float64, 10, 10)
acquire!(pool_s1, Float64, 5, 5, 5)
_lazy_rewind!(pool_s1)
end
return @allocated for _ in 1:100
_lazy_checkpoint!(pool_s1)
acquire!(pool_s1, Float64, 10, 10)
acquire!(pool_s1, Float64, 5, 5, 5)
_lazy_rewind!(pool_s1)
end
end
@testset "S=1 N-D arrays" begin
@test _test_s1_nd_arrays() == 0
end

# ------------------------------------------------------------------
# Pattern 4: _check_pointer_overlap (exercises escape detection)
# ------------------------------------------------------------------
function _test_s1_overlap_check()
ext = Vector{Float64}(undef, 100)
for _ in 1:5
_lazy_checkpoint!(pool_s1)
acquire!(pool_s1, Float64, 100)
_check_pointer_overlap(ext, pool_s1)
_lazy_rewind!(pool_s1)
end
return @allocated for _ in 1:100
_lazy_checkpoint!(pool_s1)
acquire!(pool_s1, Float64, 100)
_check_pointer_overlap(ext, pool_s1)
_lazy_rewind!(pool_s1)
end
end
@testset "S=1 overlap check" begin
@test _test_s1_overlap_check() == 0
end

# ------------------------------------------------------------------
# Pattern 5: Nested scopes
# ------------------------------------------------------------------
function _test_s1_nested()
for _ in 1:5
_lazy_checkpoint!(pool_s1)
acquire!(pool_s1, Float64, 50)
_lazy_checkpoint!(pool_s1)
acquire!(pool_s1, Float64, 30)
_lazy_rewind!(pool_s1)
_lazy_rewind!(pool_s1)
end
return @allocated for _ in 1:100
_lazy_checkpoint!(pool_s1)
acquire!(pool_s1, Float64, 50)
_lazy_checkpoint!(pool_s1)
acquire!(pool_s1, Float64, 30)
_lazy_rewind!(pool_s1)
_lazy_rewind!(pool_s1)
end
end
@testset "S=1 nested scopes" begin
@test _test_s1_nested() == 0
end
end
Loading