Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dwarf v4 debug types support #530

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions elftools/dwarf/datatype_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,8 @@ def DIE_is_ptr_to_member_struct(type_die):

def _strip_type_tag(die):
"""Given a DIE with DW_TAG_foo_type, returns foo"""
if isinstance(die.tag, int): # User-defined tag
return ""
return die.tag[7:-5]

def _array_subtype_size(sub):
Expand Down
5 changes: 3 additions & 2 deletions elftools/dwarf/die.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,12 @@ def get_DIE_from_attribute(self, name):
'DW_FORM_ref8', 'DW_FORM_ref', 'DW_FORM_ref_udata'):
refaddr = self.cu.cu_offset + attr.raw_value
return self.cu.get_DIE_from_refaddr(refaddr)

elif attr.form in ('DW_FORM_ref_addr'):
return self.cu.dwarfinfo.get_DIE_from_refaddr(attr.raw_value)
elif attr.form in ('DW_FORM_ref_sig8'):
# Implement search type units for matching signature
raise NotImplementedError('%s (type unit by signature)' % attr.form)
die = self.cu.dwarfinfo.get_DIE_by_sig8(attr.raw_value)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: just return this directly, no need for a local die

return die
elif attr.form in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8', 'DW_FORM_GNU_ref_alt'):
if self.dwarfinfo.supplementary_dwarfinfo:
return self.dwarfinfo.supplementary_dwarfinfo.get_DIE_from_refaddr(attr.raw_value)
Expand Down
133 changes: 128 additions & 5 deletions elftools/dwarf/dwarfinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# This code is in the public domain
#-------------------------------------------------------------------------------
import os
from collections import namedtuple
from collections import namedtuple, OrderedDict
from bisect import bisect_right

from ..construct.lib.container import Container
Expand All @@ -16,6 +16,7 @@
parse_cstring_from_stream)
from .structs import DWARFStructs
from .compileunit import CompileUnit
from .typeunit import TypeUnit
from .abbrevtable import AbbrevTable
from .lineprogram import LineProgram
from .callframe import CallFrameInfo
Expand Down Expand Up @@ -81,7 +82,8 @@ def __init__(self,
debug_loclists_sec,
debug_rnglists_sec,
debug_sup_sec,
gnu_debugaltlink_sec
gnu_debugaltlink_sec,
debug_types_sec
):
""" config:
A DwarfConfig object
Expand Down Expand Up @@ -110,6 +112,7 @@ def __init__(self,
self.debug_rnglists_sec = debug_rnglists_sec
self.debug_sup_sec = debug_sup_sec
self.gnu_debugaltlink_sec = gnu_debugaltlink_sec
self.debug_types_sec = debug_types_sec

# Sets the supplementary_dwarfinfo to None. Client code can set this
# to something else, typically a DWARFInfo file read from an ELFFile
Expand All @@ -134,6 +137,9 @@ def __init__(self,
self._cu_cache = []
self._cu_offsets_map = []

# DWARF v4 type units by sig8 - OrderedDict created on Reference
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure what on Reference refers to here.

Do you mean to say it's lazily initialized the first time it's referenced?

self._type_units_by_sig = None

@property
def has_debug_info(self):
""" Return whether this contains debug information.
Expand Down Expand Up @@ -167,6 +173,35 @@ def get_DIE_from_refaddr(self, refaddr, cu=None):
cu = self.get_CU_containing(refaddr)
return cu.get_DIE_from_refaddr(refaddr)

def get_DIE_by_sig8(self, sig8):
""" Find and return a DIE referenced by its type signature.
sig8:
The 8 byte signature (as a 64-bit unsigned integer)

Returns the DIE with the given type signature by searching
for the Type Unit with the matching signature then finding
the DIE at the offset given by the type_die field in the

Type Unit header.
Signatures are an 64-bit unsigned integers computed by the
DWARF producer as specified in the DWARF standard. Each
Type Unit contains one signature and the offset to the
corresponding DW_AT_type DIE in its unit header.
Describing a type can generate several DIEs. By moving
a DIE and its related DIEs to a Type Unit and generating
a hash of the DIEs and attributes in a flattened form
multiple Compile Units in a linked object can reference
the same DIE in the overall DWARF structure.

In DWARF v4 type units are identified by their appearance in the
.debug_types section.
"""
self._parse_debug_types()
tu = self._type_units_by_sig.get(sig8)
if tu is None:
raise KeyError("Signature %016x not found in .debug_types" % sig8)
return tu._get_cached_DIE(tu.tu_offset + tu['type_offset'])

def get_CU_containing(self, refaddr):
""" Find the CU that includes the given reference address in the
.debug_info section.
Expand Down Expand Up @@ -226,6 +261,11 @@ def iter_CUs(self):
"""
return self._parse_CUs_iter()

def iter_TUs(self):
"""Yield all the compile units (CompileUnit objects) in the debug_types
"""
return self.parse_TUs_iter()

def get_abbrev_table(self, offset):
""" Get an AbbrevTable from the given offset in the debug_abbrev
section.
Expand Down Expand Up @@ -414,11 +454,49 @@ def _parse_CUs_iter(self, offset=0):
# Compute the offset of the next CU in the section. The unit_length
# field of the CU header contains its size not including the length
# field itself.
offset = ( offset +
cu['unit_length'] +
cu.structs.initial_length_field_size())
offset = (offset +
cu['unit_length'] +
cu.structs.initial_length_field_size())
yield cu

def parse_TUs_iter(self, offset=0):
if self.debug_types_sec is None:
return

while offset < self.debug_types_sec.size:
tu = self._parse_TU_at_offset(offset)
# Compute the offset of the next CU in the section. The unit_length
# field of the CU header contains its size not including the length
# field itself.
offset = (offset +
tu['unit_length'] +
tu.structs.initial_length_field_size())

yield tu

def _parse_debug_types(self):
""" Parse all the TU entries in the .debug_types section.
Place units into an OrderedDict keyed by type signature.
"""
if self._type_units_by_sig is not None:
return
self._type_units_by_sig = OrderedDict()

if self.debug_types_sec is None:
return

# Parse all the Type Units in the types section for access by sig8
offset = 0
while offset < self.debug_types_sec.size:
tu = self._parse_TU_at_offset(offset)
# Compute the offset of the next TU in the section. The unit_length
# field of the TU header contains its size not including the length
# field itself.
offset = (offset +
tu['unit_length'] +
tu.structs.initial_length_field_size())
self._type_units_by_sig[tu['signature']] = tu

def _cached_CU_at_offset(self, offset):
""" Return the CU with unit header at the given offset into the
debug_info section from the cache. If not present, the unit is
Expand Down Expand Up @@ -491,6 +569,51 @@ def _parse_CU_at_offset(self, offset):
cu_offset=offset,
cu_die_offset=cu_die_offset)

def _parse_TU_at_offset(self, offset):
""" Parse and return a Type Unit (TU) at the given offset in the debug_types stream.
"""
# Section 7.4 (32-bit and 64-bit DWARF Formats) of the DWARF spec v3
# states that the first 32-bit word of the TU header determines
# whether the TU is represented with 32-bit or 64-bit DWARF format.
#
# So we peek at the first word in the TU header to determine its
# dwarf format. Based on it, we then create a new DWARFStructs
# instance suitable for this TU and use it to parse the rest.
#
initial_length = struct_parse(
self.structs.Dwarf_uint32(''), self.debug_types_sec.stream, offset)
dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32

# Temporary structs for parsing the header
# The structs for the rest of the TU depend on the header data.
#
tu_structs = DWARFStructs(
little_endian=self.config.little_endian,
dwarf_format=dwarf_format,
address_size=4,
dwarf_version=2)

tu_header = struct_parse(
tu_structs.Dwarf_TU_header, self.debug_types_sec.stream, offset)

# structs for the rest of the TU, taking into account bit-width and DWARF version
tu_structs = DWARFStructs(
little_endian=self.config.little_endian,
dwarf_format=dwarf_format,
address_size=tu_header['address_size'],
dwarf_version=tu_header['version'])

tu_die_offset = self.debug_types_sec.stream.tell()
dwarf_assert(
self._is_supported_version(tu_header['version']),
"Expected supported DWARF version. Got '%s'" % tu_header['version'])
return TypeUnit(
header=tu_header,
dwarfinfo=self,
structs=tu_structs,
tu_offset=offset,
tu_die_offset=tu_die_offset)

def _is_supported_version(self, version):
""" DWARF version supported by this parser
"""
Expand Down
7 changes: 4 additions & 3 deletions elftools/dwarf/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,9 @@
DW_TAG_immutable_type = 0x4b,


# Tags between 0x4080 and 0xffff are user-defined.
# different implementations may overlap?

DW_TAG_lo_user = 0x4080,
DW_TAG_GNU_template_template_param = 0x4106,
DW_TAG_GNU_template_parameter_pack = 0x4107,
DW_TAG_GNU_formal_parameter_pack = 0x4108,
Expand All @@ -101,8 +102,6 @@

DW_TAG_APPLE_property = 0x4200,

DW_TAG_hi_user = 0xffff,

_default_ = Pass,
)

Expand Down Expand Up @@ -268,6 +267,8 @@
DW_AT_MIPS_allocatable_dopetype = 0x200f,
DW_AT_MIPS_assumed_shape_dopetype = 0x2010,
DW_AT_MIPS_assumed_size = 0x2011,

DW_AT_HP_opt_level = 0x2014,

DW_AT_sf_names = 0x2101,
DW_AT_src_info = 0x2102,
Expand Down
13 changes: 13 additions & 0 deletions elftools/dwarf/structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ class DWARFStructs(object):
Dwarf_CU_header (+):
Compilation unit header

Dwarf_CU_types_header (+):
Compilation unit (.debug_types section) header

Dwarf_abbrev_declaration (+):
Abbreviation table declaration - doesn't include the initial
code, only the contents.
Expand Down Expand Up @@ -148,6 +151,7 @@ def _create_structs(self):
self._create_initial_length()
self._create_leb128()
self._create_cu_header()
self._create_tu_header()
self._create_abbrev_declaration()
self._create_dw_form()
self._create_lineprog_header()
Expand Down Expand Up @@ -222,6 +226,15 @@ def _create_cu_header(self):
Embed(dwarfv4_CU_header),
))

def _create_tu_header(self):
self.Dwarf_TU_header = Struct('Dwarf_TU_header',
self.Dwarf_initial_length('unit_length'),
self.Dwarf_uint16('version'),
self.Dwarf_offset('debug_abbrev_offset'),
self.Dwarf_uint8('address_size'),
self.Dwarf_uint64('signature'),
self.Dwarf_offset('type_offset'))

def _create_abbrev_declaration(self):
self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
Expand Down
Loading