Skip to content

Commit

Permalink
gdb: pass down type in execute_stack_op
Browse files Browse the repository at this point in the history
This comment [1] on bug SWDEV-294225 shows a case where backtrace fails
and a Python error is shown:

    $ ./gdb -ex "set pag off" -q -nx --data-directory=data-directory deep -ex "b 7" -ex r -ex c
    Reading symbols from deep...
    Breakpoint 1 at 0x219a8f: file deep.cpp, line 25.
    Starting program: /home/master/smarchi/build/binutils-gdb/gdb/deep
    [Thread debugging using libthread_db enabled]
    Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".

    Breakpoint 1, main () at deep.cpp:25
    25          hipLaunchKernelGGL(HIP_KERNEL_NAME(hip_deep), dim3(1), dim3(1), 0, 0);
    Continuing.
    [New Thread 0x7ffff6350700 (LWP 43023)]
    [New Thread 0x7ffff53ff700 (LWP 43024)]
    [Thread 0x7ffff53ff700 (LWP 43024) exited]
    [New Thread 0x7ffff5b4f700 (LWP 43025)]
    [New Thread 0x7ffff5951700 (LWP 43026)]
    Hello Device
    [Switching to thread 6, lane 0 (AMDGPU Lane 1:2:1:1/0 (0,0,0)[0,0,0])]

    Thread 6 "deep" hit Breakpoint 1, with lane 0, base_case () at deep.cpp:7
    7           return;
    (gdb) bt
    Python Exception <class 'gdb.error'>: access outside bounds of object referenced via synthetic pointer
    #0  base_case () at deep.cpp:7

Python isn't the culprit here, it's just that we fail to compute frame
0's id while trying to apply frame filters, so the Python code catches
and exception and prints that error.  The failure to compute the frame
id is the reason for the backtrace stopping early.

The point where an exception is thrown is:

    #4  0x000056459263d322 in error (fmt=0x5645926f20f8 "access outside bounds of object referenced via synthetic pointer") at /home/master/smarchi/src/binutils-gdb/gdbsupport/errors.cc:43
    #5  0x0000564591fc0627 in invalid_synthetic_pointer () at /home/master/smarchi/src/binutils-gdb/gdb/dwarf2/loc.c:99
    #6  0x0000564591f758bd in dwarf_composite::to_gdb_value (this=0x5645955cc360, frame=0x564594c7c480, type=0x5645957b7a10, subobj_type=0x5645957b7a10, subobj_offset=0) at /home/master/smarchi/src/binutils-gdb/gdb/dwarf2/expr.c:2067
    #7  0x0000564591f774ef in dwarf_expr_context::fetch_result (this=0x7ffdf74c0880, type=0x5645957b7a10, subobj_type=0x5645957b7a10, subobj_offset=0, as_lval=true) at /home/master/smarchi/src/binutils-gdb/gdb/dwarf2/expr.c:2785
    #8  0x0000564591f77689 in dwarf_expr_context::evaluate (this=0x7ffdf74c0880, addr=0x564595d4172a "\220\250\024\026\344\200\004\346\021\224\b\354 @b\020\251\024\016\220\251\024\026\344\200\002\346\021\224\b\354 @b\020\252\024\r\220\252\024", <incomplete sequence \344>, len=14, as_lval=true, per_cu=0x0,
        frame=0x564594c7c480, init_values=0x7ffdf74c0990, addr_info=0x0, type=0x0, subobj_type=0x0, subobj_offset=0) at /home/master/smarchi/src/binutils-gdb/gdb/dwarf2/expr.c:2806
    #9  0x0000564591f7dff2 in dwarf2_evaluate (addr=0x564595d4172a "\220\250\024\026\344\200\004\346\021\224\b\354 @b\020\251\024\016\220\251\024\026\344\200\002\346\021\224\b\354 @b\020\252\024\r\220\252\024", <incomplete sequence \344>, len=14, as_lval=true, per_objfile=0x56459573d730, per_cu=0x0,
        frame=0x564594c7c480, addr_size=8, init_values=0x7ffdf74c0990, addr_info=0x0, type=0x0, subobj_type=0x0, subobj_offset=0) at /home/master/smarchi/src/binutils-gdb/gdb/dwarf2/expr.c:4342
    #10 0x0000564591f95960 in execute_stack_op (exp=0x564595d4172a "\220\250\024\026\344\200\004\346\021\224\b\354 @b\020\251\024\016\220\251\024\026\344\200\002\346\021\224\b\354 @b\020\252\024\r\220\252\024", <incomplete sequence \344>, len=14, addr_size=8, this_frame=0x564594c7c480, initial=432345564227578880,
        initial_in_stack_memory=1, per_objfile=0x56459573d730, type=0x564595804b60, as_lval=true) at /home/master/smarchi/src/binutils-gdb/gdb/dwarf2/frame.c:257
    #11 0x0000564591f98619 in dwarf2_frame_prev_register (this_frame=0x564594c7c480, this_cache=0x564594c7c498, regnum=40) at /home/master/smarchi/src/binutils-gdb/gdb/dwarf2/frame.c:1233
    #12 0x000056459207ff4d in frame_unwind_register_value (next_frame=0x564594c7c480, regnum=40) at /home/master/smarchi/src/binutils-gdb/gdb/frame.c:1234
    #13 0x000056459207f9f0 in frame_register_unwind (next_frame=0x564594c7c480, regnum=40, optimizedp=0x7ffdf74c0e00, unavailablep=0x7ffdf74c0e04, lvalp=0x7ffdf74c0c6c, addrp=0x7ffdf74c0c80, realnump=0x7ffdf74c0c70, bufferp=0x5645957be9b0 "\200ꏕEV") at /home/master/smarchi/src/binutils-gdb/gdb/frame.c:1144
    #14 0x000056459207fcdd in frame_register (frame=0x564594c21310, regnum=40, optimizedp=0x7ffdf74c0e00, unavailablep=0x7ffdf74c0e04, lvalp=0x7ffdf74c0c6c, addrp=0x7ffdf74c0c80, realnump=0x7ffdf74c0c70, bufferp=0x5645957be9b0 "\200ꏕEV") at /home/master/smarchi/src/binutils-gdb/gdb/frame.c:1187
    #15 0x0000564591f723a7 in read_from_register (frame=0x564594c21310, regnum=40, offset=8, buf=..., optimized=0x7ffdf74c0e00, unavailable=0x7ffdf74c0e04) at /home/master/smarchi/src/binutils-gdb/gdb/dwarf2/expr.c:137
    #16 0x0000564591f73e5f in dwarf_register::read (this=0x56459577a0c0, frame=0x564594c21310, buf=0x56459562e470 "", buf_bit_offset=0, bit_size=32, bits_to_skip=0, location_bit_limit=32, big_endian=false, optimized=0x7ffdf74c0e00, unavailable=0x7ffdf74c0e04)
        at /home/master/smarchi/src/binutils-gdb/gdb/dwarf2/expr.c:1242
    #17 0x0000564591f72ead in dwarf_location::write_to_gdb_value (this=0x56459577a0c0, frame=0x564594c21310, value=0x56459562b4d0, value_bit_offset=0, bits_to_skip=0, bit_size=32, location_bit_limit=32) at /home/master/smarchi/src/binutils-gdb/gdb/dwarf2/expr.c:704
    #18 0x0000564591f7551f in dwarf_composite::write_to_gdb_value (this=0x56459574b4a0, frame=0x564594c21310, value=0x56459562b4d0, value_bit_offset=0, bits_to_skip=0, bit_size=64, location_bit_limit=0) at /home/master/smarchi/src/binutils-gdb/gdb/dwarf2/expr.c:1968
    #19 0x0000564591f76234 in rw_closure_value (v=0x56459562b4d0, from=0x0) at /home/master/smarchi/src/binutils-gdb/gdb/dwarf2/expr.c:2248
    #20 0x0000564591f762f0 in read_closure_value (v=0x56459562b4d0) at /home/master/smarchi/src/binutils-gdb/gdb/dwarf2/expr.c:2261
    #21 0x00005645924c8952 in value_fetch_lazy (val=0x56459562b4d0) at /home/master/smarchi/src/binutils-gdb/gdb/value.c:4045
    #22 0x00005645924c2dde in value_optimized_out (value=0x56459562b4d0) at /home/master/smarchi/src/binutils-gdb/gdb/value.c:1431
    #23 0x00005645920804d4 in frame_unwind_register_unsigned (next_frame=0x564594c21310, regnum=624) at /home/master/smarchi/src/binutils-gdb/gdb/frame.c:1331
    #24 0x000056459207c87b in default_unwind_pc (gdbarch=0x5645957bec90, next_frame=0x564594c21310) at /home/master/smarchi/src/binutils-gdb/gdb/frame-unwind.c:240
    #25 0x000056459209bad3 in gdbarch_unwind_pc (gdbarch=0x5645957bec90, next_frame=0x564594c21310) at /home/master/smarchi/src/binutils-gdb/gdb/gdbarch.c:3364

The context described by this backtrace is:

 - We are trying to know the value of the PC register (regno 624) in
   frame 1
 - That value is saved in register v40 (regno 40, a 2048 bit value) in frame 0
 - The failure happens because when reading register v40, the expected
   type is 8 byte (64 bits) long

The cause is that execute_stack_op at frame 10 misses passing down the
expected type it receives as a parameter.  dwarf2_frame_prev_register at
frame 11 passes down the appropriate type for register v40 (a 2048 bit
long type), but this is lost by execute_stack_op.  Further down, when
time comes to convert to a GDB value, the expected type is nullptr, so
the default address type is used (frame 7, fetch_result).  This is where
the 64-bit type comes from.

[1] https://ontrack-internal.amd.com/browse/SWDEV-294225?focusedCommentId=7837574&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-7837574

Bug: https://ontrack-internal.amd.com/browse/SWDEV-294225
Change-Id: I8c7bcbd392b3e0ae839c00749c36e88066cb7f4c
  • Loading branch information
Simon Marchi authored and lmoriche committed Sep 13, 2021
1 parent f0dde8c commit 9ef2a9a
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 1 deletion.
2 changes: 1 addition & 1 deletion gdb/dwarf2/frame.c
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ execute_stack_op (const gdb_byte *exp, ULONGEST len, int addr_size,

value *result_val
= dwarf2_evaluate (exp, len, true, per_objfile, nullptr,
this_frame, addr_size, &init_values, nullptr);
this_frame, addr_size, &init_values, nullptr, type);

/* We need to clean up all the values that are not needed any more.
The problem with a value_ref_ptr class is that it disconnects the
Expand Down
58 changes: 58 additions & 0 deletions gdb/testsuite/gdb.rocm/deep-stack.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/* Copyright (C) 2021 Advanced Micro Devices, Inc. All rights reserved.
This file is part of GDB.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include <cstdio>
#include <hip/hip_runtime.h>

__device__ void
base_case ()
{
/* That printf is necessary to reproduce the exact failure as reported in
SWDEV-294225. */
printf ("Hello device\n");
return; /* break here */
}

template <unsigned int N>
__device__ void
deep ()
{
deep<N-1> ();
}

template <>
__device__ void
deep<0> ()
{
base_case ();
}

__global__ void
hip_deep ()
{
deep<10> ();
}

int
main ()
{
hipLaunchKernelGGL (HIP_KERNEL_NAME (hip_deep), dim3 (1), dim3 (1), 0, 0);
hipDeviceSynchronize ();
return EXIT_SUCCESS;
}

63 changes: 63 additions & 0 deletions gdb/testsuite/gdb.rocm/deep-stack.exp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright (C) 2021 Advanced Micro Devices, Inc. All rights reserved.

# This file is part of GDB.

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this- program. If not, see <http://www.gnu.org/licenses/>.

# Test inspired by the reproducer in SWDEV-294225.
#
# When stopped at /* break here */, GDB would fail to unwind the stack due to
# the way the PC register was saved.

load_lib rocm.exp

standard_testfile .cpp

if [skip_hipcc_tests] {
verbose "Skipping hip test: ${testfile}."
return 0
}

if {[build_executable "failed to prepare ${testfile}" $testfile $srcfile {debug hip}]} {
return -1
}

with_rocm_gpu_lock {
clean_restart $::binfile

if { ![runto_main] } {
fail "could not run to main"
return
}

set line [gdb_get_line_number "break here"]
gdb_test "break $line" "Breakpoint $::decimal at $hex.*"

gdb_test "continue" "Thread $decimal \"deep-stack\" hit Breakpoint $decimal.*"
gdb_test "backtrace" \
[multi_line \
"#0 .* base_case .*" \
"#1 .* deep<0u> .*" \
"#2 .* deep<1u> .*" \
"#3 .* deep<2u> .*" \
"#4 .* deep<3u> .*" \
"#5 .* deep<4u> .*" \
"#6 .* deep<5u> .*" \
"#7 .* deep<6u> .*" \
"#8 .* deep<7u> .*" \
"#9 .* deep<8u> .*" \
"#10 .* deep<9u> .*" \
"#11 .* deep<10u> .*" \
"#12 .* hip_deep .*"]
}

0 comments on commit 9ef2a9a

Please sign in to comment.