diff --git a/src/gc-stock.c b/src/gc-stock.c index 8118b3c5629ae..ea4654aa8a1b6 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3414,6 +3414,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) } jl_gc_debug_print(); + ct->ctx.activefp = (char*)jl_get_frame_addr(); int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state); jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING); // `jl_safepoint_start_gc()` makes sure only one thread can run the GC. diff --git a/src/julia_gcext.h b/src/julia_gcext.h index e124f58a09402..6403b586bcefe 100644 --- a/src/julia_gcext.h +++ b/src/julia_gcext.h @@ -139,6 +139,9 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) JL_NOTSAFEPOINT; // *active_start and *active_end respectively *total_start and *total_end // accordingly. The range for the active part is a best-effort approximation // and may not be tight. +// +// NOTE: Only valid to call from within a GC context. May return incorrect +// values and segfault otherwise. JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task, char **active_start, char **active_end, char **total_start, char **total_end) JL_NOTSAFEPOINT; diff --git a/src/julia_internal.h b/src/julia_internal.h index 3e4967c9d4dca..77c05274de68c 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1765,18 +1765,6 @@ JL_DLLEXPORT unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *fi void register_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT; void deregister_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT; -STATIC_INLINE void *jl_get_frame_addr(void) JL_NOTSAFEPOINT -{ -#ifdef __GNUC__ - return __builtin_frame_address(0); -#else - void *dummy = NULL; - // The mask is to suppress the compiler warning about returning - // address of local variable - return (void*)((uintptr_t)&dummy & ~(uintptr_t)15); -#endif -} - // Log `msg` to the current logger by calling CoreLogging.logmsg_shim() on the // julia side. If any of module, group, id, file or line are NULL, these will // be passed to the julia side as `nothing`. If `kwargs` is NULL an empty set diff --git a/src/julia_threads.h b/src/julia_threads.h index 0f0ee11d1918c..18b05b2b72d35 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -96,6 +96,7 @@ typedef struct { _jl_ucontext_t *ctx; jl_stack_context_t *copy_ctx; }; + void *activefp; void *stkbuf; // malloc'd memory (either copybuf or stack) size_t bufsz; // actual sizeof stkbuf unsigned int copy_stack:31; // sizeof stack for copybuf @@ -226,6 +227,7 @@ typedef jl_value_t jl_function_t; typedef struct _jl_timing_block_t jl_timing_block_t; typedef struct _jl_timing_event_t jl_timing_event_t; typedef struct _jl_excstack_t jl_excstack_t; + typedef struct _jl_handler_t jl_handler_t; typedef struct _jl_task_t { @@ -316,6 +318,18 @@ JL_DLLEXPORT void (jl_cpu_pause)(void); JL_DLLEXPORT void (jl_cpu_suspend)(void); JL_DLLEXPORT void (jl_cpu_wake)(void); +STATIC_INLINE void *jl_get_frame_addr(void) JL_NOTSAFEPOINT +{ +#ifdef __GNUC__ + return __builtin_frame_address(0); +#else + void *dummy = NULL; + // The mask is to suppress the compiler warning about returning + // address of local variable + return (void*)((uintptr_t)&dummy & ~(uintptr_t)15); +#endif +} + #ifdef __clang_gcanalyzer__ // Note that the sigint safepoint can also trigger GC, albeit less likely void jl_gc_safepoint_(jl_ptls_t tls); @@ -343,6 +357,9 @@ STATIC_INLINE int8_t jl_gc_state_set(jl_ptls_t ptls, int8_t state, { assert(old_state != JL_GC_PARALLEL_COLLECTOR_THREAD); assert(old_state != JL_GC_CONCURRENT_COLLECTOR_THREAD); + if (__builtin_constant_p(state) ? state : // required if !old_state && state, otherwise optional + __builtin_constant_p(old_state) ? !old_state : 1) + jl_atomic_load_relaxed(&ptls->current_task)->ctx.activefp = (char*)jl_get_frame_addr(); jl_atomic_store_release(&ptls->gc_state, state); if (state == JL_GC_STATE_UNSAFE || old_state == JL_GC_STATE_UNSAFE) jl_gc_safepoint_(ptls); diff --git a/src/llvm-codegen-shared.h b/src/llvm-codegen-shared.h index ff6f5a97299d7..ae83b6ffcb9f4 100644 --- a/src/llvm-codegen-shared.h +++ b/src/llvm-codegen-shared.h @@ -237,9 +237,10 @@ static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_s emit_signal_fence(builder); } -static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, llvm::Value *old_state, bool final) +static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, llvm::Value *state, llvm::Value *old_state, bool final) { using namespace llvm; + Value *ptls = get_current_ptls_from_task(builder, task, tbaa); Type *T_int8 = state->getType(); unsigned offset = offsetof(jl_tls_states_t, gc_state); Value *gc_state = builder.CreateConstInBoundsGEP1_32(T_int8, ptls, offset, "gc_state"); @@ -247,6 +248,13 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::T old_state = builder.CreateLoad(T_int8, gc_state); cast(old_state)->setOrdering(AtomicOrdering::Monotonic); } + if (isa(state) ? state == builder.getInt8(0) : + isa(old_state) ? old_state == builder.getInt8(0) : true) { + unsigned offset = offsetof(jl_task_t, ctx.activefp); + Value *currentfp = builder.CreateConstInBoundsGEP1_32(T_int8, task, offset, "gc_state"); + Value *fp = builder.CreateIntrinsic(Intrinsic::frameaddress, {builder.getPtrTy()}, {builder.getInt32(0)}); + builder.CreateAlignedStore(fp, currentfp, Align(sizeof(void*))); + } builder.CreateAlignedStore(state, gc_state, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release); if (auto *C = dyn_cast(old_state)) if (C->isZero()) @@ -261,39 +269,38 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::T builder.CreateICmpEQ(state, zero8)), passBB, exitBB); builder.SetInsertPoint(passBB); - MDNode *tbaa = get_tbaa_const(builder.getContext()); - emit_gc_safepoint(builder, T_size, ptls, tbaa, final); + emit_gc_safepoint(builder, T_size, ptls, get_tbaa_const(builder.getContext()), final); builder.CreateBr(exitBB); builder.SetInsertPoint(exitBB); return old_state; } -static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, bool final) +static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, bool final) { using namespace llvm; Value *state = builder.getInt8(0); - return emit_gc_state_set(builder, T_size, ptls, state, nullptr, final); + return emit_gc_state_set(builder, T_size, task, tbaa, state, nullptr, final); } -static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final) +static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, llvm::Value *state, bool final) { using namespace llvm; Value *old_state = builder.getInt8(JL_GC_STATE_UNSAFE); - return emit_gc_state_set(builder, T_size, ptls, state, old_state, final); + return emit_gc_state_set(builder, T_size, task, tbaa, state, old_state, final); } -static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, bool final) +static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, bool final) { using namespace llvm; Value *state = builder.getInt8(JL_GC_STATE_SAFE); - return emit_gc_state_set(builder, T_size, ptls, state, nullptr, final); + return emit_gc_state_set(builder, T_size, task, tbaa, state, nullptr, final); } -static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final) +static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, llvm::Value *state, bool final) { using namespace llvm; Value *old_state = builder.getInt8(JL_GC_STATE_SAFE); - return emit_gc_state_set(builder, T_size, ptls, state, old_state, final); + return emit_gc_state_set(builder, T_size, task, tbaa, state, old_state, final); } // Compatibility shims for LLVM attribute APIs that were renamed in LLVM 14. diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp index e36136859517a..5b3a14d6b8e9b 100644 --- a/src/llvm-ptls.cpp +++ b/src/llvm-ptls.cpp @@ -185,7 +185,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, builder.SetInsertPoint(fastTerm->getParent()); fastTerm->removeFromParent(); MDNode *tbaa = tbaa_gcframe; - Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa), true); + Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_task_from_pgcstack(builder, pgcstack), tbaa, true); builder.Insert(fastTerm); phi->addIncoming(pgcstack, fastTerm->getParent()); // emit pre-return cleanup @@ -197,7 +197,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, for (auto &BB : *pgcstack->getParent()->getParent()) { if (isa(BB.getTerminator())) { builder.SetInsertPoint(BB.getTerminator()); - emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa), last_gc_state, true); + emit_gc_unsafe_leave(builder, T_size, get_current_task_from_pgcstack(builder, phi), tbaa, last_gc_state, true); } } } diff --git a/src/safepoint.c b/src/safepoint.c index 66bea539861f8..1f5edfa569b67 100644 --- a/src/safepoint.c +++ b/src/safepoint.c @@ -240,6 +240,7 @@ void jl_set_gc_and_wait(jl_task_t *ct) // n.b. not used on _OS_DARWIN_ // reading own gc state doesn't need atomic ops since no one else // should store to it. int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state); + ct->ctx.activefp = (char*)jl_get_frame_addr(); jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING); uv_mutex_lock(&safepoint_lock); uv_cond_broadcast(&safepoint_cond_begin); @@ -279,6 +280,7 @@ void jl_safepoint_wait_thread_resume(jl_task_t *ct) // might have already observed our gc_state. // if (!jl_atomic_load_relaxed(&ct->ptls->suspend_count)) return; int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state); + ct->ctx.activefp = (char*)jl_get_frame_addr(); jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING); uv_mutex_lock(&ct->ptls->sleep_lock); if (jl_atomic_load_relaxed(&ct->ptls->suspend_count)) { diff --git a/src/signals-mach.c b/src/signals-mach.c index 1c4af2cf9d033..17c1af632a5fb 100644 --- a/src/signals-mach.c +++ b/src/signals-mach.c @@ -108,6 +108,17 @@ void jl_mach_gc_end(void) // implement jl_set_gc_and_wait from a different thread static void jl_mach_gc_wait(jl_ptls_t ptls2, mach_port_t thread, int16_t tid) { + unsigned int count = MACH_THREAD_STATE_COUNT; + host_thread_state_t state; + kern_return_t ret = thread_get_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, &count); + ptls2->current_task->ctx.activefp = +#ifdef _CPU_X86_64_ + state->__rsp; +#elif defined(_CPU_AARCH64_) + state->__sp; +#else +#error fp not defined for platform +#endif // relaxed, since we don't mind missing one--we will hit another soon (immediately probably) uv_mutex_lock(&safepoint_lock); // Since this gets set to zero only while the safepoint_lock was held this diff --git a/src/task.c b/src/task.c index 37e7f0e1f5440..b1e4e8709d87f 100644 --- a/src/task.c +++ b/src/task.c @@ -356,49 +356,35 @@ JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task, char **active_start, char **active_end, char **total_start, char **total_end) { - if (!task->ctx.started) { - *total_start = *active_start = 0; - *total_end = *active_end = 0; + *total_start = *active_start = 0; + *total_end = *active_end = 0; + if (!task->ctx.started) return; - } - jl_ptls_t ptls2 = task->ptls; - if (task->ctx.copy_stack && ptls2) { - *total_start = *active_start = (char*)ptls2->stackbase - ptls2->stacksize; - *total_end = *active_end = (char*)ptls2->stackbase; - } - else if (task->ctx.stkbuf) { - *total_start = *active_start = (char*)task->ctx.stkbuf; -#ifndef _OS_WINDOWS_ - jl_ptls_t ptls0 = jl_atomic_load_relaxed(&jl_all_tls_states)[0]; - if (ptls0->root_task == task) { - // See jl_init_root_task(). The root task of the main thread - // has its buffer enlarged by an artificial 3000000 bytes, but - // that means that the start of the buffer usually points to - // inaccessible memory. We need to correct for this. - *active_start += ROOT_TASK_STACK_ADJUSTMENT; - *total_start += ROOT_TASK_STACK_ADJUSTMENT; - } -#endif - - *total_end = *active_end = (char*)task->ctx.stkbuf + task->ctx.bufsz; -#ifdef COPY_STACKS - // save_stack stores the stack of an inactive task in stkbuf, and the - // actual number of used bytes in copy_stack. - if (task->ctx.copy_stack > 1) + if (task->ctx.copy_stack) { + if (task->ctx.ctx) { + // currently paused task + // save_stack stores the stack of an inactive task in stkbuf, and the + // actual number of used bytes in copy_stack. + *active_start = *total_start = (char*)task->ctx.stkbuf; *active_end = (char*)task->ctx.stkbuf + task->ctx.copy_stack; -#endif + *total_end = (char*)task->ctx.stkbuf + task->ctx.bufsz; + } + else if (task->ptls) { + jl_ptls_t ptls2 = task->ptls; + // currently running copy task + *active_start = (char*)task->ctx.activefp; + *total_start = (char*)ptls2->stackbase - ptls2->stacksize; + *total_end = *active_end = (char*)ptls2->stackbase; + } } else { - // no stack allocated yet - *total_start = *active_start = 0; - *total_end = *active_end = 0; - return; - } - - if (task == jl_current_task) { - // scan up to current `sp` for current thread and task - *active_start = (char*)jl_get_frame_addr(); + if (task->ctx.stkbuf) { + // currently running task + *active_start = (char*)task->ctx.activefp; + *total_start = (char*)task->ctx.stkbuf; + *active_end = *total_end = (char*)task->ctx.stkbuf + task->ctx.bufsz; + } } } @@ -506,6 +492,7 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt) } // set up global state for new task and clear global state for old task + lastt->ctx.activefp = (char*)jl_get_frame_addr(); t->ptls = ptls; jl_atomic_store_relaxed(&ptls->current_task, t); JL_GC_PROMISE_ROOTED(t); diff --git a/test/gcext/gcext-test.jl b/test/gcext/gcext-test.jl index 81637392e3c5d..46b4e86ab5d9d 100644 --- a/test/gcext/gcext-test.jl +++ b/test/gcext/gcext-test.jl @@ -27,11 +27,12 @@ end close(out.in) close(err.in) out_task = @async readlines(out) - err_task = @async readlines(err) + err_task = @async read(err, String) # @test success(p) errlines = fetch(err_task) lines = fetch(out_task) - @test length(errlines) == 0 + print(errlines) + @test isempty(errlines) # @test length(lines) == 6 @test length(lines) == 5 @test checknum(lines[2], r"([0-9]+) full collections", n -> n >= 10) diff --git a/test/gcext/gcext.c b/test/gcext/gcext.c index d5bf91ec8c9ab..f06132bf26194 100644 --- a/test/gcext/gcext.c +++ b/test/gcext/gcext.c @@ -479,8 +479,6 @@ static int stack_grows_down(void) { void task_scanner(jl_task_t *task, int root_task) { - int var_on_frame; - // The task scanner is not necessary for liveness, as the // corresponding task stack is already part of the stack. // Its purpose is simply to test that the task scanner @@ -491,23 +489,36 @@ void task_scanner(jl_task_t *task, int root_task) char *total_end_stack; jl_active_task_stack(task, &start_stack, &end_stack, &total_start_stack, &total_end_stack); + if (end_stack == 0) + return; + + if (!(start_stack < end_stack) || + !(total_start_stack < total_end_stack) || + !(total_start_stack <= start_stack) || + !(end_stack <= total_end_stack)) { + fputs("incorrect stack bounds\n", stderr); + fflush(stderr); + abort(); + } + // this is the live stack of a thread. Is it ours? - if (start_stack && task == (jl_task_t*)jl_get_current_task()) { - if (!(lt_ptr(start_stack, &var_on_frame) && lt_ptr(&var_on_frame, end_stack))) { + if (task == (jl_task_t*)jl_get_current_task()) { + void *var_on_frame = task->ctx.activefp + 1; + if (!(lt_ptr(start_stack, var_on_frame) && lt_ptr(var_on_frame, end_stack))) { // error, current stack frame must be on the live stack. - jl_error("stack frame not part of the current task"); + fputs("stack frame not part of the current task\n", stderr); + fflush(stderr); + abort(); } } - if (start_stack) { - void **start = (void **)start_stack; - void **end = (void **)end_stack; - while (start < end) { - void *p = *start++; - void *q = jl_gc_internal_obj_base_ptr(p); - if (q) { - jl_gc_mark_queue_obj(ptls, q); - } + void **start = (void **)start_stack; + void **end = (void **)end_stack; + while (start < end) { + void *p = *start++; + void *q = jl_gc_internal_obj_base_ptr(p); + if (q) { + jl_gc_mark_queue_obj(ptls, q); } } }