From 2fac22c647a36b7b38bd415d74c242e017437d27 Mon Sep 17 00:00:00 2001 From: Balasubramania Pillai Date: Thu, 12 Mar 2026 14:24:52 -0400 Subject: [PATCH 1/6] TASK-214337 debug code --- src/lgc.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/lgc.c b/src/lgc.c index 116a058..f366d6b 100644 --- a/src/lgc.c +++ b/src/lgc.c @@ -1656,6 +1656,8 @@ void luaC_inherit_thread(lua_State *L, lua_State *th) ck_sequence_write_end(&th->memlock); luaE_flush_stringtable(th); + validate_heap_objects(L, "inherit_thread:before_transfer"); + TAILQ_FOREACH_SAFE(steal, &th->heap->objects, allocd, tmp) { /* Update owner before removing from source heap. This ensures that * if any concurrent reader sees this object, it will either: @@ -1670,6 +1672,9 @@ void luaC_inherit_thread(lua_State *L, lua_State *th) make_grey(L, steal); } + + validate_heap_objects(L, "inherit_thread:after_transfer"); + TAILQ_REMOVE(&G(L)->all_heaps, th->heap, heaps); unlock_all_threads(); @@ -1954,6 +1959,43 @@ static void sanity_check_mark_status(lua_State *L) } } +static void validate_heap_objects(lua_State *L, const char *where) +{ + GCheader *o; + int count = 0; + const uint64_t poison = 0x5a5a5a5a5a5a5a5aULL; + + TAILQ_FOREACH(o, &L->heap->objects, allocd) { + if ((uintptr_t)o == poison || + ((uintptr_t)o & 0x7) != 0 || + o->tt > 11 || + (o->marked & FREEDBIT)) { + fprintf(stderr, + "thrlua HEAP CORRUPT [%s] L=%p heap=%p count=%d bad_node=%p" + " (tt=%d marked=0x%x)\n", + where, (void*)L, (void*)L->heap, count, (void*)o, + (uintptr_t)o != poison ? o->tt : -1, + (uintptr_t)o != poison ? o->marked : 0xff); + abort(); + } + if (o->owner != L->heap) { + fprintf(stderr, + "thrlua HEAP CORRUPT [%s] L=%p heap=%p count=%d node=%p" + " owner=%p (expected %p) tt=%d\n", + where, (void*)L, (void*)L->heap, count, (void*)o, + (void*)o->owner, (void*)L->heap, o->tt); + abort(); + } + count++; + if (count > 10000000) { + fprintf(stderr, + "thrlua HEAP CORRUPT [%s] L=%p heap=%p: list cycle detected\n", + where, (void*)L, (void*)L->heap); + abort(); + } + } +} + static int local_collection(lua_State *L, int type) { int reclaimed; @@ -1974,6 +2016,8 @@ static int local_collection(lua_State *L, int type) * while we are in this function and manipulating our string tables or heap */ block_collector(L, pt); + validate_heap_objects(L, "local_collection:entry"); + /* prune out excess string table entries. * We don't want to be too aggressive, as we'd like to see some benefit * from string interning. We remove the head of each chain and repeat @@ -2020,6 +2064,8 @@ static int local_collection(lua_State *L, int type) check_references(L); } + validate_heap_objects(L, "local_collection:after_mark"); + /* run any finalizers; may turn some objects grey again */ run_finalize(L); @@ -2035,11 +2081,15 @@ static int local_collection(lua_State *L, int type) /* remove collected weak values from weak tables */ fixup_weak_refs(L); + validate_heap_objects(L, "local_collection:before_reclaim"); + /* and now we can free whatever is left in White. Note that we're still * blocked here so we are pulling white out of the heap and placing them * in another list that will free them when we unblock the collector. */ reclaimed = reclaim_white(L, 0); + validate_heap_objects(L, "local_collection:after_reclaim"); + /* White is the new Black */ L->black = !L->black; @@ -2052,9 +2102,13 @@ static int local_collection(lua_State *L, int type) /* Finalize deferred objects */ finalize_deferred(L); + validate_heap_objects(L, "local_collection:after_finalize"); + /* Free any objects that were white */ free_deferred_white(L); + validate_heap_objects(L, "local_collection:after_free"); + /* Free any deferred stringtable nodes */ while (tofree) { n = tofree; @@ -2117,9 +2171,20 @@ static void global_trace_obj(lua_State *L, GCheader *lval, GCheader *rval) static void trace_heap(GCheap *h) { GCheader *o; + const uint64_t poison = 0x5a5a5a5a5a5a5a5aULL; ck_pr_store_32(&h->owner->xref_count, 0); TAILQ_FOREACH(o, &h->objects, allocd) { + if ((uintptr_t)o == poison || ((uintptr_t)o & 0x7) != 0 || + o->tt > 11 || (o->marked & FREEDBIT)) { + fprintf(stderr, + "thrlua HEAP CORRUPT [trace_heap] heap=%p owner=%p bad_node=%p" + " (tt=%d marked=0x%x)\n", + (void*)h, (void*)h->owner, (void*)o, + (uintptr_t)o != poison ? o->tt : -1, + (uintptr_t)o != poison ? o->marked : 0xff); + abort(); + } global_trace_obj(h->owner, &h->owner->gch, o); } From 86aa78084518be7d9df7328db84c19898c2c2818 Mon Sep 17 00:00:00 2001 From: Balasubramania Pillai Date: Thu, 12 Mar 2026 14:29:42 -0400 Subject: [PATCH 2/6] TASK-214337 fix compile error --- src/lgc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lgc.c b/src/lgc.c index f366d6b..ffe6218 100644 --- a/src/lgc.c +++ b/src/lgc.c @@ -106,6 +106,7 @@ static uint32_t trace_heaps = 0; static int local_collection(lua_State *L, int type); static int global_trace(lua_State *L); static void unblock_mutators(lua_State *L); +static void validate_heap_objects(lua_State *L, const char *where); static INLINE int is_black(lua_State *L, GCheader *obj) { From b5a53a3144febd5a46e6094eb1988e3449689b8a Mon Sep 17 00:00:00 2001 From: Balasubramania Pillai Date: Thu, 12 Mar 2026 14:36:29 -0400 Subject: [PATCH 3/6] TASK-214337 send to paniclog --- src/lgc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lgc.c b/src/lgc.c index ffe6218..b11d154 100644 --- a/src/lgc.c +++ b/src/lgc.c @@ -1969,9 +1969,9 @@ static void validate_heap_objects(lua_State *L, const char *where) TAILQ_FOREACH(o, &L->heap->objects, allocd) { if ((uintptr_t)o == poison || ((uintptr_t)o & 0x7) != 0 || - o->tt > 11 || + o->tt > LUA_TGLOBAL || (o->marked & FREEDBIT)) { - fprintf(stderr, + thrlua_log(L, DCRITICAL, "thrlua HEAP CORRUPT [%s] L=%p heap=%p count=%d bad_node=%p" " (tt=%d marked=0x%x)\n", where, (void*)L, (void*)L->heap, count, (void*)o, @@ -1980,7 +1980,7 @@ static void validate_heap_objects(lua_State *L, const char *where) abort(); } if (o->owner != L->heap) { - fprintf(stderr, + thrlua_log(L, DCRITICAL, "thrlua HEAP CORRUPT [%s] L=%p heap=%p count=%d node=%p" " owner=%p (expected %p) tt=%d\n", where, (void*)L, (void*)L->heap, count, (void*)o, @@ -1989,7 +1989,7 @@ static void validate_heap_objects(lua_State *L, const char *where) } count++; if (count > 10000000) { - fprintf(stderr, + thrlua_log(L, DCRITICAL, "thrlua HEAP CORRUPT [%s] L=%p heap=%p: list cycle detected\n", where, (void*)L, (void*)L->heap); abort(); @@ -2177,8 +2177,8 @@ static void trace_heap(GCheap *h) ck_pr_store_32(&h->owner->xref_count, 0); TAILQ_FOREACH(o, &h->objects, allocd) { if ((uintptr_t)o == poison || ((uintptr_t)o & 0x7) != 0 || - o->tt > 11 || (o->marked & FREEDBIT)) { - fprintf(stderr, + o->tt > LUA_TGLOBAL || (o->marked & FREEDBIT)) { + thrlua_log(h->owner, DCRITICAL, "thrlua HEAP CORRUPT [trace_heap] heap=%p owner=%p bad_node=%p" " (tt=%d marked=0x%x)\n", (void*)h, (void*)h->owner, (void*)o, From 165dba1507e4f3d84d4852b6b0820ee043010726 Mon Sep 17 00:00:00 2001 From: Balasubramania Pillai Date: Thu, 12 Mar 2026 14:56:07 -0400 Subject: [PATCH 4/6] TASK-214337 improvements --- src/lgc.c | 52 +++++++++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/src/lgc.c b/src/lgc.c index b11d154..292fd4c 100644 --- a/src/lgc.c +++ b/src/lgc.c @@ -1960,26 +1960,36 @@ static void sanity_check_mark_status(lua_State *L) } } +static int is_bad_gc_ptr(uintptr_t p) +{ + return p == 0x5a5a5a5a5a5a5a5aULL || p == 0 || (p & 0x7) != 0; +} + static void validate_heap_objects(lua_State *L, const char *where) { GCheader *o; int count = 0; - const uint64_t poison = 0x5a5a5a5a5a5a5a5aULL; TAILQ_FOREACH(o, &L->heap->objects, allocd) { - if ((uintptr_t)o == poison || - ((uintptr_t)o & 0x7) != 0 || - o->tt > LUA_TGLOBAL || + if (is_bad_gc_ptr((uintptr_t)o) || + o->tt < LUA_TSTRING || o->tt > LUA_TGLOBAL || (o->marked & FREEDBIT)) { + fprintf(stderr, + "thrlua HEAP CORRUPT [%s] L=%p heap=%p count=%d bad_node=%p\n", + where, (void*)L, (void*)L->heap, count, (void*)o); + if (!is_bad_gc_ptr((uintptr_t)o)) + fprintf(stderr, " tt=%d marked=0x%x\n", o->tt, o->marked); thrlua_log(L, DCRITICAL, - "thrlua HEAP CORRUPT [%s] L=%p heap=%p count=%d bad_node=%p" - " (tt=%d marked=0x%x)\n", - where, (void*)L, (void*)L->heap, count, (void*)o, - (uintptr_t)o != poison ? o->tt : -1, - (uintptr_t)o != poison ? o->marked : 0xff); + "thrlua HEAP CORRUPT [%s] L=%p heap=%p count=%d bad_node=%p\n", + where, (void*)L, (void*)L->heap, count, (void*)o); abort(); } if (o->owner != L->heap) { + fprintf(stderr, + "thrlua HEAP CORRUPT [%s] L=%p heap=%p count=%d node=%p" + " owner=%p (expected %p) tt=%d\n", + where, (void*)L, (void*)L->heap, count, (void*)o, + (void*)o->owner, (void*)L->heap, o->tt); thrlua_log(L, DCRITICAL, "thrlua HEAP CORRUPT [%s] L=%p heap=%p count=%d node=%p" " owner=%p (expected %p) tt=%d\n", @@ -1988,12 +1998,6 @@ static void validate_heap_objects(lua_State *L, const char *where) abort(); } count++; - if (count > 10000000) { - thrlua_log(L, DCRITICAL, - "thrlua HEAP CORRUPT [%s] L=%p heap=%p: list cycle detected\n", - where, (void*)L, (void*)L->heap); - abort(); - } } } @@ -2172,18 +2176,20 @@ static void global_trace_obj(lua_State *L, GCheader *lval, GCheader *rval) static void trace_heap(GCheap *h) { GCheader *o; - const uint64_t poison = 0x5a5a5a5a5a5a5a5aULL; ck_pr_store_32(&h->owner->xref_count, 0); TAILQ_FOREACH(o, &h->objects, allocd) { - if ((uintptr_t)o == poison || ((uintptr_t)o & 0x7) != 0 || - o->tt > LUA_TGLOBAL || (o->marked & FREEDBIT)) { + if (is_bad_gc_ptr((uintptr_t)o) || + o->tt < LUA_TSTRING || o->tt > LUA_TGLOBAL || + (o->marked & FREEDBIT)) { + fprintf(stderr, + "thrlua HEAP CORRUPT [trace_heap] heap=%p owner=%p bad_node=%p\n", + (void*)h, (void*)h->owner, (void*)o); + if (!is_bad_gc_ptr((uintptr_t)o)) + fprintf(stderr, " tt=%d marked=0x%x\n", o->tt, o->marked); thrlua_log(h->owner, DCRITICAL, - "thrlua HEAP CORRUPT [trace_heap] heap=%p owner=%p bad_node=%p" - " (tt=%d marked=0x%x)\n", - (void*)h, (void*)h->owner, (void*)o, - (uintptr_t)o != poison ? o->tt : -1, - (uintptr_t)o != poison ? o->marked : 0xff); + "thrlua HEAP CORRUPT [trace_heap] heap=%p owner=%p bad_node=%p\n", + (void*)h, (void*)h->owner, (void*)o); abort(); } global_trace_obj(h->owner, &h->owner->gch, o); From 7d07c5a997fc0542765e438548d191c4e90646f0 Mon Sep 17 00:00:00 2001 From: Balasubramania Pillai Date: Thu, 12 Mar 2026 15:11:39 -0400 Subject: [PATCH 5/6] TASK-214337 improvements --- src/lgc.c | 35 ++++++++++------------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/src/lgc.c b/src/lgc.c index 292fd4c..65d9478 100644 --- a/src/lgc.c +++ b/src/lgc.c @@ -1974,22 +1974,19 @@ static void validate_heap_objects(lua_State *L, const char *where) if (is_bad_gc_ptr((uintptr_t)o) || o->tt < LUA_TSTRING || o->tt > LUA_TGLOBAL || (o->marked & FREEDBIT)) { - fprintf(stderr, - "thrlua HEAP CORRUPT [%s] L=%p heap=%p count=%d bad_node=%p\n", - where, (void*)L, (void*)L->heap, count, (void*)o); if (!is_bad_gc_ptr((uintptr_t)o)) - fprintf(stderr, " tt=%d marked=0x%x\n", o->tt, o->marked); - thrlua_log(L, DCRITICAL, - "thrlua HEAP CORRUPT [%s] L=%p heap=%p count=%d bad_node=%p\n", - where, (void*)L, (void*)L->heap, count, (void*)o); + thrlua_log(L, DCRITICAL, + "thrlua HEAP CORRUPT [%s] L=%p heap=%p count=%d bad_node=%p" + " tt=%d marked=0x%x\n", + where, (void*)L, (void*)L->heap, count, (void*)o, + o->tt, o->marked); + else + thrlua_log(L, DCRITICAL, + "thrlua HEAP CORRUPT [%s] L=%p heap=%p count=%d bad_node=%p\n", + where, (void*)L, (void*)L->heap, count, (void*)o); abort(); } if (o->owner != L->heap) { - fprintf(stderr, - "thrlua HEAP CORRUPT [%s] L=%p heap=%p count=%d node=%p" - " owner=%p (expected %p) tt=%d\n", - where, (void*)L, (void*)L->heap, count, (void*)o, - (void*)o->owner, (void*)L->heap, o->tt); thrlua_log(L, DCRITICAL, "thrlua HEAP CORRUPT [%s] L=%p heap=%p count=%d node=%p" " owner=%p (expected %p) tt=%d\n", @@ -2178,20 +2175,8 @@ static void trace_heap(GCheap *h) GCheader *o; ck_pr_store_32(&h->owner->xref_count, 0); + validate_heap_objects(h->owner, "trace_heap"); TAILQ_FOREACH(o, &h->objects, allocd) { - if (is_bad_gc_ptr((uintptr_t)o) || - o->tt < LUA_TSTRING || o->tt > LUA_TGLOBAL || - (o->marked & FREEDBIT)) { - fprintf(stderr, - "thrlua HEAP CORRUPT [trace_heap] heap=%p owner=%p bad_node=%p\n", - (void*)h, (void*)h->owner, (void*)o); - if (!is_bad_gc_ptr((uintptr_t)o)) - fprintf(stderr, " tt=%d marked=0x%x\n", o->tt, o->marked); - thrlua_log(h->owner, DCRITICAL, - "thrlua HEAP CORRUPT [trace_heap] heap=%p owner=%p bad_node=%p\n", - (void*)h, (void*)h->owner, (void*)o); - abort(); - } global_trace_obj(h->owner, &h->owner->gch, o); } From 53b84ba85fb71c7c4234cd91bd412fe72ceada8b Mon Sep 17 00:00:00 2001 From: Balasubramania Pillai Date: Fri, 13 Mar 2026 15:13:07 -0400 Subject: [PATCH 6/6] TASK-214337 delay unblocking global trace during local gc --- src/lgc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lgc.c b/src/lgc.c index 65d9478..64254de 100644 --- a/src/lgc.c +++ b/src/lgc.c @@ -2097,15 +2097,15 @@ static int local_collection(lua_State *L, int type) sanity_check_mark_status(L); - /* Now we can un-block the global collector, as we are done with our string - * tables and our heap. */ - unblock_collector(L, pt); - /* Finalize deferred objects */ finalize_deferred(L); validate_heap_objects(L, "local_collection:after_finalize"); + /* Now we can un-block the global collector, as we are done with our string + * tables and our heap. */ + unblock_collector(L, pt); + /* Free any objects that were white */ free_deferred_white(L);