Skip to content

Commit 7528266

Browse files
committed
Improved jl_unw_stepn: skip internal frames, don't adjust ip for signal frames
Add an option to jl_unw_stepn to avoid adjusting the instruction pointer when we know the cursor derives from a signal frame. (We could alternatively try to do this with unw_is_signal_frame, but that wouldn't work for windows and would be an extra function call for each jl_unw_step which seems a bit unnecessary when we already know the top frame is the signal frame.) Also generalize skipping of the first few backtrace frames as needed to hide the internal backtrace machinery itself by adding a `skip` option to jl_unw_stepn/jl_backtrace_from_here/record_backtrace. As part of this also move the workaround for 32-bit windows into the backtrace internals. Move Base.backtrace into error.jl as it doesn't need any lookup functionality from StackTraces to work correctly.
1 parent 141ff51 commit 7528266

File tree

6 files changed

+108
-86
lines changed

6 files changed

+108
-86
lines changed

base/error.jl

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,19 @@ function _reformat_bt(bt, bt2)
8585
ret
8686
end
8787

88-
function backtrace end
88+
"""
89+
backtrace()
90+
91+
Get a backtrace object for the current program point.
92+
"""
93+
function backtrace()
94+
@_noinline_meta
95+
# skip frame for backtrace(). Note that for this to work properly,
96+
# backtrace() itself must not be interpreted nor inlined.
97+
skip = 1
98+
bt1, bt2 = ccall(:jl_backtrace_from_here, Any, (Cint,Cint), false, skip)
99+
_reformat_bt(bt1, bt2)
100+
end
89101

90102
"""
91103
catch_backtrace()

base/stacktraces.jl

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -149,35 +149,6 @@ function lookup(ip::Base.InterpreterIP)
149149
return scopes
150150
end
151151

152-
"""
153-
backtrace()
154-
155-
Get a backtrace object for the current program point.
156-
"""
157-
function Base.backtrace()
158-
bt, bt2 = ccall(:jl_backtrace_from_here, Any, (Int32,), false)
159-
if length(bt) > 2
160-
# remove frames for jl_backtrace_from_here and backtrace()
161-
if bt[2] == Ptr{Cvoid}(-1%UInt)
162-
# backtrace() is interpreted
163-
# Note: win32 is missing the top frame (see https://bugs.chromium.org/p/crashpad/issues/detail?id=53)
164-
@static if Base.Sys.iswindows() && Int === Int32
165-
deleteat!(bt, 1:2)
166-
else
167-
deleteat!(bt, 1:3)
168-
end
169-
pushfirst!(bt2)
170-
else
171-
@static if Base.Sys.iswindows() && Int === Int32
172-
deleteat!(bt, 1)
173-
else
174-
deleteat!(bt, 1:2)
175-
end
176-
end
177-
end
178-
return Base._reformat_bt(bt, bt2)
179-
end
180-
181152
"""
182153
stacktrace([trace::Vector{Ptr{Cvoid}},] [c_funcs::Bool=false]) -> StackTrace
183154

src/gf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1714,7 +1714,7 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args,
17141714
jl_static_show((JL_STREAM*)STDERR_FILENO,(jl_value_t*)f); jl_printf((JL_STREAM*)STDERR_FILENO," world %u\n", (unsigned)world);
17151715
jl_static_show((JL_STREAM*)STDERR_FILENO,args); jl_printf((JL_STREAM*)STDERR_FILENO,"\n");
17161716
jl_ptls_t ptls = jl_get_ptls_states();
1717-
ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE);
1717+
ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
17181718
jl_critical_error(0, NULL, ptls->bt_data, &ptls->bt_size);
17191719
abort();
17201720
}

src/julia_internal.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -638,8 +638,11 @@ typedef int bt_cursor_t;
638638
#define JL_BT_INTERP_FRAME (((uintptr_t)0)-1)
639639
// Maximum number of elements of bt_data taken up by interpreter frame
640640
#define JL_BT_MAX_ENTRY_SIZE 3
641-
size_t rec_backtrace(uintptr_t *bt_data, size_t maxsize) JL_NOTSAFEPOINT;
642-
size_t rec_backtrace_ctx(uintptr_t *bt_data, size_t maxsize, bt_context_t *ctx, int add_interp_frames) JL_NOTSAFEPOINT;
641+
size_t rec_backtrace(uintptr_t *bt_data, size_t maxsize, int skip) JL_NOTSAFEPOINT;
642+
// Record backtrace from a signal handler. `ctx` is the context of the code
643+
// which was asynchronously interrupted.
644+
size_t rec_backtrace_ctx(uintptr_t *bt_data, size_t maxsize, bt_context_t *ctx,
645+
int add_interp_frames) JL_NOTSAFEPOINT;
643646
#ifdef LIBOSXUNWIND
644647
size_t rec_backtrace_ctx_dwarf(uintptr_t *bt_data, size_t maxsize, bt_context_t *ctx, int add_interp_frames) JL_NOTSAFEPOINT;
645648
#endif

src/stackwalk.c

Lines changed: 83 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,25 @@ static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp, uintpt
2929

3030
// Record backtrace entries into bt_data by stepping cursor with jl_unw_step
3131
// until the outermost frame is encountered or the buffer bt_data is (close to)
32-
// full. Native instruction pointers are adjusted to point to the address of
33-
// the call instruction.
32+
// full. Returned instruction pointers are adjusted to point to the address of
33+
// the call instruction. The first `skip` frames are not included in `bt_data`.
3434
//
3535
// `maxsize` is the size of the buffer `bt_data` (and `sp` if non-NULL). It
3636
// must be at least JL_BT_MAX_ENTRY_SIZE to accommodate extended backtrace
3737
// entries. If `sp != NULL`, the stack pointer corresponding `bt_data[i]` is
3838
// stored in `sp[i]`.
3939
//
40+
// Flag `add_interp_frames==1` should be set to record an extended backtrace
41+
// entries in `bt_data` for each julia interpreter frame.
42+
//
43+
// Flag `from_signal_handler==1` should be set if the cursor was obtained by
44+
// asynchronously interrupting the code.
45+
//
4046
// jl_unw_stepn will return 1 if there are more frames to come. The number of
41-
// elements of bt_data (and sp if non-NULL) which were used are returned in
42-
// bt_size.
47+
// elements written to bt_data (and sp if non-NULL) are returned in bt_size.
4348
int jl_unw_stepn(bt_cursor_t *cursor, uintptr_t *bt_data, size_t *bt_size,
44-
uintptr_t *sp, size_t maxsize, int add_interp_frames) JL_NOTSAFEPOINT
49+
uintptr_t *sp, size_t maxsize, int skip, int add_interp_frames,
50+
int from_signal_handler) JL_NOTSAFEPOINT
4551
{
4652
jl_ptls_t ptls = jl_get_ptls_states();
4753
volatile size_t n = 0;
@@ -52,50 +58,65 @@ int jl_unw_stepn(bt_cursor_t *cursor, uintptr_t *bt_data, size_t *bt_size,
5258
#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
5359
assert(!jl_in_stackwalk);
5460
jl_in_stackwalk = 1;
61+
if (!from_signal_handler) {
62+
// Workaround 32-bit windows bug missing top frame
63+
// See for example https://bugs.chromium.org/p/crashpad/issues/detail?id=53
64+
skip--;
65+
}
5566
#endif
5667
#if !defined(_OS_WINDOWS_)
5768
jl_jmp_buf *old_buf = ptls->safe_restore;
5869
jl_jmp_buf buf;
5970
if (!jl_setjmp(buf, 0)) {
6071
ptls->safe_restore = &buf;
6172
#endif
62-
while (1) {
73+
int have_more_frames = 1;
74+
while (have_more_frames) {
6375
if (n + JL_BT_MAX_ENTRY_SIZE > maxsize) {
6476
// Postpone advancing the cursor: may need more space
6577
need_more_space = 1;
6678
break;
6779
}
68-
int have_more_frames = jl_unw_step(cursor, &return_ip, &thesp, &thefp);
80+
have_more_frames = jl_unw_step(cursor, &return_ip, &thesp, &thefp);
81+
if (skip > 0) {
82+
skip--;
83+
continue;
84+
}
6985
if (sp)
7086
sp[n] = thesp;
71-
// ARM instruction pointer encoding uses the low bit as a flag for
72-
// thumb mode, which must be cleared before further use. (Note not
73-
// needed for ARM AArch64.) See
74-
// https://github.com/libunwind/libunwind/pull/131
75-
#ifdef _CPU_ARM_
76-
return_ip &= ~(uintptr_t)0x1;
77-
#endif
7887
// For the purposes of looking up debug info for functions, we want
7988
// to harvest addresses for the *call* instruction `call_ip` during
8089
// stack walking. However, this information isn't directly
8190
// available. Instead, the stack walk discovers the address
8291
// `return_ip` which would be *returned to* as the stack is
8392
// unwound.
8493
//
85-
// To infer `call_ip` in full generality we would need to
86-
// understand each platform ABI instruction pointer encoding and
87-
// calling conventions, noting that these may vary per stack frame.
88-
// (For example signal frames on linux x86_64 have `call_ip ==
89-
// return_ip`.)
90-
//
91-
// However for our current purposes it seems sufficient to assume
92-
// that `call_ip = return_ip-1`. See also:
94+
// To infer `call_ip` in full generality we need to understand each
95+
// platform ABI instruction pointer encoding and calling
96+
// conventions, noting that the latter may vary per stack frame.
9397
//
98+
// See also:
9499
// * The LLVM unwinder functions step() and setInfoBasedOnIPRegister()
95100
// https://github.com/llvm/llvm-project/blob/master/libunwind/src/UnwindCursor.hpp
96101
// * The way that libunwind handles it in `unw_get_proc_name`:
97102
// https://lists.nongnu.org/archive/html/libunwind-devel/2014-06/msg00025.html
98-
uintptr_t call_ip = return_ip - 1;
103+
uintptr_t call_ip = return_ip;
104+
// ARM instruction pointer encoding uses the low bit as a flag for
105+
// thumb mode, which must be cleared before further use. (Note not
106+
// needed for ARM AArch64.) See
107+
// https://github.com/libunwind/libunwind/pull/131
108+
#ifdef _CPU_ARM_
109+
call_ip &= ~(uintptr_t)0x1;
110+
#endif
111+
// Now there's two main cases to adjust for:
112+
// * Normal stack frames where compilers emit a `call` instruction
113+
// which we can get from the return address via `call_ip = return_ip - 1`.
114+
// * Code which was interrupted asynchronously (eg, via a signal)
115+
// is expected to have `call_ip == return_ip`.
116+
if (n != 0 || !from_signal_handler) {
117+
// normal frame
118+
call_ip -= 1;
119+
}
99120
if (call_ip == JL_BT_INTERP_FRAME) {
100121
// Never leave special marker in the bt data as it can corrupt the GC.
101122
call_ip = 0;
@@ -109,8 +130,6 @@ int jl_unw_stepn(bt_cursor_t *cursor, uintptr_t *bt_data, size_t *bt_size,
109130
*bt_entry = call_ip;
110131
n++;
111132
}
112-
if (!have_more_frames)
113-
break;
114133
}
115134
#if !defined(_OS_WINDOWS_)
116135
}
@@ -130,27 +149,45 @@ int jl_unw_stepn(bt_cursor_t *cursor, uintptr_t *bt_data, size_t *bt_size,
130149
return need_more_space;
131150
}
132151

133-
size_t rec_backtrace_ctx(uintptr_t *bt_data, size_t maxsize,
134-
bt_context_t *context, int add_interp_frames)
152+
NOINLINE size_t rec_backtrace_ctx(uintptr_t *bt_data, size_t maxsize,
153+
bt_context_t *context, int add_interp_frames) JL_NOTSAFEPOINT
135154
{
136-
size_t bt_size = 0;
137155
bt_cursor_t cursor;
138156
if (!jl_unw_init(&cursor, context))
139157
return 0;
140-
jl_unw_stepn(&cursor, bt_data, &bt_size, NULL, maxsize, add_interp_frames);
158+
size_t bt_size = 0;
159+
jl_unw_stepn(&cursor, bt_data, &bt_size, NULL, maxsize, 0, add_interp_frames, 1);
141160
return bt_size;
142161
}
143162

144-
size_t rec_backtrace(uintptr_t *bt_data, size_t maxsize)
163+
// Record backtrace into buffer `bt_data`, using a maximum of `maxsize`
164+
// elements, and returning the number of elements written.
165+
//
166+
// The first `skip` frames are omitted, in addition to omitting the frame from
167+
// `rec_backtrace` itself.
168+
NOINLINE size_t rec_backtrace(uintptr_t *bt_data, size_t maxsize, int skip)
145169
{
146170
bt_context_t context;
147171
memset(&context, 0, sizeof(context));
148172
jl_unw_get(&context);
149-
return rec_backtrace_ctx(bt_data, maxsize, &context, 1);
173+
bt_cursor_t cursor;
174+
if (!jl_unw_init(&cursor, &context))
175+
return 0;
176+
size_t bt_size = 0;
177+
jl_unw_stepn(&cursor, bt_data, &bt_size, NULL, maxsize, skip + 1, 1, 0);
178+
return bt_size;
150179
}
151180

152181
static jl_value_t *array_ptr_void_type JL_ALWAYS_LEAFTYPE = NULL;
153-
JL_DLLEXPORT jl_value_t *jl_backtrace_from_here(int returnsp)
182+
// Return backtrace information as an svec of (bt1, bt2, [sp])
183+
//
184+
// The stack pointers `sp` are returned only when `returnsp` evaluates to true.
185+
// bt1 contains raw backtrace entries, while bt2 exists to root any julia
186+
// objects associated with the entries in bt1.
187+
//
188+
// The frame from jl_backtrace_from_here will be skipped; set `skip > 0` to
189+
// skip additional native frames from the start of the backtrace.
190+
JL_DLLEXPORT jl_value_t *jl_backtrace_from_here(int returnsp, int skip)
154191
{
155192
jl_array_t *ip = NULL;
156193
jl_array_t *sp = NULL;
@@ -168,25 +205,26 @@ JL_DLLEXPORT jl_value_t *jl_backtrace_from_here(int returnsp)
168205
memset(&context, 0, sizeof(context));
169206
jl_unw_get(&context);
170207
if (jl_unw_init(&cursor, &context)) {
208+
// Skip frame for jl_backtrace_from_here itself
209+
skip += 1;
171210
size_t offset = 0;
172-
while (1) {
211+
int have_more_frames = 1;
212+
while (have_more_frames) {
173213
jl_array_grow_end(ip, maxincr);
174214
uintptr_t *sp_ptr = NULL;
175215
if (returnsp) {
176216
sp_ptr = (uintptr_t*)jl_array_data(sp) + offset;
177217
jl_array_grow_end(sp, maxincr);
178218
}
179219
size_t size_incr = 0;
180-
int need_more_space = jl_unw_stepn(&cursor, (uintptr_t*)jl_array_data(ip) + offset,
181-
&size_incr, sp_ptr, maxincr, 1);
220+
have_more_frames = jl_unw_stepn(&cursor, (uintptr_t*)jl_array_data(ip) + offset,
221+
&size_incr, sp_ptr, maxincr, skip, 1, 0);
222+
skip = 0;
182223
offset += size_incr;
183-
if (!need_more_space) {
184-
jl_array_del_end(ip, jl_array_len(ip) - offset);
185-
if (returnsp)
186-
jl_array_del_end(sp, jl_array_len(sp) - offset);
187-
break;
188-
}
189224
}
225+
jl_array_del_end(ip, jl_array_len(ip) - offset);
226+
if (returnsp)
227+
jl_array_del_end(sp, jl_array_len(sp) - offset);
190228

191229
size_t n = 0;
192230
while (n < jl_array_len(ip)) {
@@ -480,18 +518,14 @@ static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp, uintpt
480518
}
481519

482520
#ifdef LIBOSXUNWIND
483-
int jl_unw_init_dwarf(bt_cursor_t *cursor, bt_context_t *uc)
484-
{
485-
return unw_init_local_dwarf(cursor, uc) != 0;
486-
}
487-
size_t rec_backtrace_ctx_dwarf(uintptr_t *bt_data, size_t maxsize,
488-
bt_context_t *context, int add_interp_frames)
521+
NOINLINE size_t rec_backtrace_ctx_dwarf(uintptr_t *bt_data, size_t maxsize,
522+
bt_context_t *context, int add_interp_frames)
489523
{
490524
size_t bt_size = 0;
491525
bt_cursor_t cursor;
492-
if (!jl_unw_init_dwarf(&cursor, context))
526+
if (unw_init_local_dwarf(&cursor, context) != UNW_ESUCCESS)
493527
return 0;
494-
jl_unw_stepn(&cursor, bt_data, &bt_size, NULL, maxsize, add_interp_frames);
528+
jl_unw_stepn(&cursor, bt_data, &bt_size, NULL, maxsize, 0, add_interp_frames, 1);
495529
return bt_size;
496530
}
497531
#endif

src/task.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -234,10 +234,12 @@ JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid)
234234
return (void *)((char *)task->stkbuf + off);
235235
}
236236

237-
static void record_backtrace(jl_ptls_t ptls) JL_NOTSAFEPOINT
237+
// Marked noinline so we can consistently skip the associated frame.
238+
// `skip` is number of additional frames to skip.
239+
NOINLINE static void record_backtrace(jl_ptls_t ptls, int skip) JL_NOTSAFEPOINT
238240
{
239241
// storing bt_size in ptls ensures roots in bt_data will be found
240-
ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE);
242+
ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, skip + 1);
241243
}
242244

243245
JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
@@ -484,7 +486,7 @@ JL_DLLEXPORT void jl_throw(jl_value_t *e JL_MAYBE_UNROOTED)
484486
assert(e != NULL);
485487
if (ptls->safe_restore)
486488
throw_internal(NULL);
487-
record_backtrace(ptls);
489+
record_backtrace(ptls, 1);
488490
throw_internal(e);
489491
}
490492

@@ -669,7 +671,7 @@ STATIC_OR_JS void NOINLINE JL_NORETURN start_task(void)
669671

670672
t->started = 1;
671673
if (t->exception != jl_nothing) {
672-
record_backtrace(ptls);
674+
record_backtrace(ptls, 0);
673675
jl_push_excstack(&t->excstack, t->exception,
674676
ptls->bt_data, ptls->bt_size);
675677
res = t->exception;

0 commit comments

Comments
 (0)