diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 737989c5..04c362b3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,6 +23,8 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: pip install -e . - name: Test run: | python test/all_tests.py diff --git a/README.rst b/README.rst index 2b4b5d0b..03a2ff1d 100644 --- a/README.rst +++ b/README.rst @@ -14,8 +14,10 @@ for more details. Pre-requisites -------------- -As a user of **pyelftools**, one only needs Python 3 to run. While there is no -reason for the library to not work on earlier versions of Python, our CI + * Python 3 + * construct >= 2.10.70 + +While there is no reason for the library to not work on earlier versions of Python, our CI tests are based on the official `Status of Python versions `__. @@ -38,9 +40,6 @@ recent version of the code. This can be done by downloading the `master zip file `_ or just cloning the Git repository. -Since **pyelftools** has no external dependencies, it's also easy to use it -without installing, by locally adjusting ``PYTHONPATH``. - How to use it? -------------- diff --git a/elftools/common/construct_utils.py b/elftools/common/construct_utils.py index 6ef7f461..a4a6ca7a 100644 --- a/elftools/common/construct_utils.py +++ b/elftools/common/construct_utils.py @@ -6,45 +6,22 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from ..construct import ( - Subconstruct, ConstructError, ArrayError, Adapter, Field, RepeatUntil, - Rename, SizeofError, Construct - ) +import itertools +from construct import ( + Subconstruct, Adapter, Bytes, RepeatUntil, Container, StopFieldError, + singleton, GreedyBytes, NullTerminated, Struct, Array +) -class RepeatUntilExcluding(Subconstruct): - """ A version of construct's RepeatUntil that doesn't include the last - element (which casued the repeat to exit) in the return value. - Only parsing is currently implemented. +def exclude_last_value(predicate): + def _exclude_last_value(obj, list, ctx): + result = predicate(obj, list, ctx) + if result: + del list[-1] + return result - P.S. removed some code duplication - """ - __slots__ = ["predicate"] - def __init__(self, predicate, subcon): - Subconstruct.__init__(self, subcon) - self.predicate = predicate - self._clear_flag(self.FLAG_COPY_CONTEXT) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - obj = [] - try: - context_for_subcon = context - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - context_for_subcon = context.__copy__() - - while True: - subobj = self.subcon._parse(stream, context_for_subcon) - if self.predicate(subobj, context): - break - obj.append(subobj) - except ConstructError as ex: - raise ArrayError("missing terminator", ex) - return obj - def _build(self, obj, stream, context): - raise NotImplementedError('no building') - def _sizeof(self, context): - raise SizeofError("can't calculate size") + return _exclude_last_value def _LEB128_reader(): @@ -52,24 +29,14 @@ def _LEB128_reader(): by a byte with 0 in its highest bit. """ return RepeatUntil( - lambda obj, ctx: ord(obj) < 0x80, - Field(None, 1)) - - -class _ULEB128Adapter(Adapter): - """ An adapter for ULEB128, given a sequence of bytes in a sub-construct. - """ - def _decode(self, obj, context): - value = 0 - for b in reversed(obj): - value = (value << 7) + (ord(b) & 0x7F) - return value - + lambda obj, list, ctx: ord(obj) < 0x80, + Bytes(1) + ) class _SLEB128Adapter(Adapter): """ An adapter for SLEB128, given a sequence of bytes in a sub-construct. """ - def _decode(self, obj, context): + def _decode(self, obj, context, path): value = 0 for b in reversed(obj): value = (value << 7) + (ord(b) & 0x7F) @@ -77,36 +44,87 @@ def _decode(self, obj, context): # negative -> sign extend value |= - (1 << (7 * len(obj))) return value + + def _emitparse(self, code): + block = f""" + def parse_sleb128(io, this): + l = [] + while True: + b = io.read(1)[0] + l.append(b) + if b < 0x80: + break + value = 0 + for b in reversed(l): + value = (value << 7) + (b & 0x7F) + if l[-1] & 0x40: + value |= - (1 << (7 * len(l))) + return value + """ + code.append(block) + return f"parse_sleb128(io, this)" + + def _emitbuild(self, code): + return "None" + +# ULEB128 was here, but construct has a drop-in replacement called VarInt + +@singleton +def SLEB128(): + """ A construct creator for SLEB128 encoding. + """ + return _SLEB128Adapter(_LEB128_reader()) -def ULEB128(name): - """ A construct creator for ULEB128 encoding. +class EmbeddableStruct(Struct): + r""" + A special Struct that allows embedding of fields with type Embed. """ - return Rename(name, _ULEB128Adapter(_LEB128_reader())) + def __init__(self, *subcons, **subconskw): + super().__init__(*subcons, **subconskw) + + def _parse(self, stream, context, path): + obj = Container() + obj._io = stream + context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None), _parent = obj) + context._root = context._.get("_root", context) + for sc in self.subcons: + try: + subobj = sc._parsereport(stream, context, path) + if sc.name: + obj[sc.name] = subobj + context[sc.name] = subobj + elif subobj and isinstance(sc, Embed): + obj.update(subobj) + + except StopFieldError: + break + return obj -def SLEB128(name): - """ A construct creator for SLEB128 encoding. - """ - return Rename(name, _SLEB128Adapter(_LEB128_reader())) -class StreamOffset(Construct): +class Embed(Subconstruct): + r""" + Special wrapper that allows outer multiple-subcons construct to merge fields from another multiple-subcons construct. + Parsing building and sizeof are deferred to subcon. + :param subcon: Construct instance, its fields to embed inside a struct or sequence + Example:: + >>> outer = EmbeddableStruct( + ... Embed(Struct( + ... "data" / Bytes(4), + ... )), + ... ) + >>> outer.parse(b"1234") + Container(data=b'1234') """ - Captures the current stream offset - Parameters: - * name - the name of the value + def __init__(self, subcon): + super().__init__(subcon) - Example: - StreamOffset("item_offset") + +@singleton +def CStringBytes(): + """ + A stripped back version of CString that returns bytes instead of a unicode string. """ - __slots__ = [] - def __init__(self, name): - Construct.__init__(self, name) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - return stream.tell() - def _build(self, obj, stream, context): - context[self.name] = stream.tell() - def _sizeof(self, context): - return 0 + return NullTerminated(GreedyBytes) diff --git a/elftools/common/utils.py b/elftools/common/utils.py index b4bc1e73..7e8cb12f 100644 --- a/elftools/common/utils.py +++ b/elftools/common/utils.py @@ -8,7 +8,7 @@ #------------------------------------------------------------------------------- from contextlib import contextmanager from .exceptions import ELFParseError, ELFError, DWARFError -from ..construct import ConstructError, ULInt8 +from construct import ConstructError, Int8ul import os @@ -108,7 +108,7 @@ def roundup(num, bits): def read_blob(stream, length): """Read length bytes from stream, return a list of ints """ - return [struct_parse(ULInt8(''), stream) for i in range(length)] + return [struct_parse(Int8ul, stream) for i in range(length)] def save_dwarf_section(section, filename): """Debug helper: dump section contents into a file diff --git a/elftools/construct/LICENSE b/elftools/construct/LICENSE deleted file mode 100644 index 6529f04a..00000000 --- a/elftools/construct/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (C) 2009 Tomer Filiba, 2010-2011 Corbin Simpson - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/elftools/construct/README b/elftools/construct/README deleted file mode 100644 index 364a9437..00000000 --- a/elftools/construct/README +++ /dev/null @@ -1,13 +0,0 @@ -construct is a Python library for declarative parsing and building of binary -data. This is my fork of construct 2, with some modifications for Python 3 -and bug fixes. The construct website is http://construct.readthedocs.org - -pyelftools carries construct around because construct has been abandoned for -a long time and didn't get bugfixes; it also didn't work with Python 3. - -These days (Feb 2018) construct is maintained again, but its APIs have -underwent extensive changes that would require rewriting all of the -construct-facing code in pyelftools. I'm still evaluating the pros/cons of -this effort. See https://github.com/eliben/pyelftools/issues/180 for details. - -LICENSE is the original license. diff --git a/elftools/construct/__init__.py b/elftools/construct/__init__.py deleted file mode 100644 index de335ae2..00000000 --- a/elftools/construct/__init__.py +++ /dev/null @@ -1,110 +0,0 @@ -""" - #### #### - ## #### ## ## #### ###### ##### ## ## #### ###### ## ## - ## ## ## ### ## ## ## ## ## ## ## ## ## #### ## - ## ## ## ###### ### ## ##### ## ## ## ## ## - ## ## ## ## ### ## ## ## ## ## ## ## ## ## - #### #### ## ## #### ## ## ## ##### #### ## ###### - - Parsing made even more fun (and faster too) - -Homepage: - http://construct.wikispaces.com (including online tutorial) - -Typical usage: - >>> from construct import * - -Hands-on example: - >>> from construct import * - >>> s = Struct("foo", - ... UBInt8("a"), - ... UBInt16("b"), - ... ) - >>> s.parse("\\x01\\x02\\x03") - Container(a = 1, b = 515) - >>> print s.parse("\\x01\\x02\\x03") - Container: - a = 1 - b = 515 - >>> s.build(Container(a = 1, b = 0x0203)) - "\\x01\\x02\\x03" -""" - -from .core import * -from .adapters import * -from .macros import * -from .debug import Probe, Debugger - - -#=============================================================================== -# Metadata -#=============================================================================== -__author__ = "tomer filiba (tomerfiliba [at] gmail.com)" -__maintainer__ = "Corbin Simpson " -__version__ = "2.06" - -#=============================================================================== -# Shorthand expressions -#=============================================================================== -Bits = BitField -Byte = UBInt8 -Bytes = Field -Const = ConstAdapter -Tunnel = TunnelAdapter -Embed = Embedded - -#=============================================================================== -# Deprecated names -# Next scheduled name cleanout: 2.1 -#=============================================================================== -import functools, warnings - -def deprecated(f): - @functools.wraps(f) - def wrapper(*args, **kwargs): - warnings.warn( - "This name is deprecated, use %s instead" % f.__name__, - DeprecationWarning, stacklevel=2) - return f(*args, **kwargs) - return wrapper - -MetaBytes = deprecated(MetaField) -GreedyRepeater = deprecated(GreedyRange) -OptionalGreedyRepeater = deprecated(OptionalGreedyRange) -Repeater = deprecated(Range) -StrictRepeater = deprecated(Array) -MetaRepeater = deprecated(Array) -OneOfValidator = deprecated(OneOf) -NoneOfValidator = deprecated(NoneOf) - -#=============================================================================== -# exposed names -#=============================================================================== -__all__ = [ - 'AdaptationError', 'Adapter', 'Alias', 'Aligned', 'AlignedStruct', - 'Anchor', 'Array', 'ArrayError', 'BFloat32', 'BFloat64', 'Bit', 'BitField', - 'BitIntegerAdapter', 'BitIntegerError', 'BitStruct', 'Bits', 'Bitwise', - 'Buffered', 'Byte', 'Bytes', 'CString', 'CStringAdapter', 'Const', - 'ConstAdapter', 'ConstError', 'Construct', 'ConstructError', 'Container', - 'Debugger', 'Embed', 'Embedded', 'EmbeddedBitStruct', 'Enum', 'ExprAdapter', - 'Field', 'FieldError', 'Flag', 'FlagsAdapter', 'FlagsContainer', - 'FlagsEnum', 'FormatField', 'GreedyRange', 'GreedyRepeater', - 'HexDumpAdapter', 'If', 'IfThenElse', 'IndexingAdapter', 'LFloat32', - 'LFloat64', 'LazyBound', 'LengthValueAdapter', 'ListContainer', - 'MappingAdapter', 'MappingError', 'MetaArray', 'MetaBytes', 'MetaField', - 'MetaRepeater', 'NFloat32', 'NFloat64', 'Nibble', 'NoneOf', - 'NoneOfValidator', 'Octet', 'OnDemand', 'OnDemandPointer', 'OneOf', - 'OneOfValidator', 'OpenRange', 'Optional', 'OptionalGreedyRange', - 'OptionalGreedyRepeater', 'PaddedStringAdapter', 'Padding', - 'PaddingAdapter', 'PaddingError', 'PascalString', 'Pass', 'Peek', - 'Pointer', 'PrefixedArray', 'Probe', 'Range', 'RangeError', 'Reconfig', - 'Rename', 'RepeatUntil', 'Repeater', 'Restream', 'SBInt16', 'SBInt32', - 'SBInt64', 'SBInt8', 'SLInt16', 'SLInt32', 'SLInt64', 'SLInt8', 'SNInt16', - 'SNInt32', 'SNInt64', 'SNInt8', 'Select', 'SelectError', 'Sequence', - 'SizeofError', 'SlicingAdapter', 'StaticField', 'StrictRepeater', 'String', - 'StringAdapter', 'Struct', 'Subconstruct', 'Switch', 'SwitchError', - 'SymmetricMapping', 'Terminator', 'TerminatorError', 'Tunnel', - 'TunnelAdapter', 'UBInt16', 'UBInt32', 'UBInt64', 'UBInt8', 'ULInt16', - 'ULInt32', 'ULInt64', 'ULInt8', 'UNInt16', 'UNInt32', 'UNInt64', 'UNInt8', - 'Union', 'ValidationError', 'Validator', 'Value', "Magic", -] diff --git a/elftools/construct/adapters.py b/elftools/construct/adapters.py deleted file mode 100644 index 545dbac1..00000000 --- a/elftools/construct/adapters.py +++ /dev/null @@ -1,470 +0,0 @@ -from .core import Adapter, AdaptationError, Pass -from .lib import int_to_bin, bin_to_int, swap_bytes -from .lib import FlagsContainer, HexString -from .lib.py3compat import BytesIO, decodebytes - - -#=============================================================================== -# exceptions -#=============================================================================== -class BitIntegerError(AdaptationError): - __slots__ = [] -class MappingError(AdaptationError): - __slots__ = [] -class ConstError(AdaptationError): - __slots__ = [] -class ValidationError(AdaptationError): - __slots__ = [] -class PaddingError(AdaptationError): - __slots__ = [] - -#=============================================================================== -# adapters -#=============================================================================== -class BitIntegerAdapter(Adapter): - """ - Adapter for bit-integers (converts bitstrings to integers, and vice versa). - See BitField. - - Parameters: - * subcon - the subcon to adapt - * width - the size of the subcon, in bits - * swapped - whether to swap byte order (little endian/big endian). - default is False (big endian) - * signed - whether the value is signed (two's complement). the default - is False (unsigned) - * bytesize - number of bits per byte, used for byte-swapping (if swapped). - default is 8. - """ - __slots__ = ["width", "swapped", "signed", "bytesize"] - def __init__(self, subcon, width, swapped = False, signed = False, - bytesize = 8): - Adapter.__init__(self, subcon) - self.width = width - self.swapped = swapped - self.signed = signed - self.bytesize = bytesize - def _encode(self, obj, context): - if obj < 0 and not self.signed: - raise BitIntegerError("object is negative, but field is not signed", - obj) - obj2 = int_to_bin(obj, width = self.width) - if self.swapped: - obj2 = swap_bytes(obj2, bytesize = self.bytesize) - return obj2 - def _decode(self, obj, context): - if self.swapped: - obj = swap_bytes(obj, bytesize = self.bytesize) - return bin_to_int(obj, signed = self.signed) - -class MappingAdapter(Adapter): - """ - Adapter that maps objects to other objects. - See SymmetricMapping and Enum. - - Parameters: - * subcon - the subcon to map - * decoding - the decoding (parsing) mapping (a dict) - * encoding - the encoding (building) mapping (a dict) - * decdefault - the default return value when the object is not found - in the decoding mapping. if no object is given, an exception is raised. - if `Pass` is used, the unmapped object will be passed as-is - * encdefault - the default return value when the object is not found - in the encoding mapping. if no object is given, an exception is raised. - if `Pass` is used, the unmapped object will be passed as-is - """ - __slots__ = ["encoding", "decoding", "encdefault", "decdefault"] - def __init__(self, subcon, decoding, encoding, - decdefault = NotImplemented, encdefault = NotImplemented): - Adapter.__init__(self, subcon) - self.decoding = decoding - self.encoding = encoding - self.decdefault = decdefault - self.encdefault = encdefault - def _encode(self, obj, context): - try: - return self.encoding[obj] - except (KeyError, TypeError): - if self.encdefault is NotImplemented: - raise MappingError("no encoding mapping for %r [%s]" % ( - obj, self.subcon.name)) - if self.encdefault is Pass: - return obj - return self.encdefault - def _decode(self, obj, context): - try: - return self.decoding[obj] - except (KeyError, TypeError): - if self.decdefault is NotImplemented: - raise MappingError("no decoding mapping for %r [%s]" % ( - obj, self.subcon.name)) - if self.decdefault is Pass: - return obj - return self.decdefault - -class FlagsAdapter(Adapter): - """ - Adapter for flag fields. Each flag is extracted from the number, resulting - in a FlagsContainer object. Not intended for direct usage. - See FlagsEnum. - - Parameters - * subcon - the subcon to extract - * flags - a dictionary mapping flag-names to their value - """ - __slots__ = ["flags"] - def __init__(self, subcon, flags): - Adapter.__init__(self, subcon) - self.flags = flags - def _encode(self, obj, context): - flags = 0 - for name, value in self.flags.items(): - if getattr(obj, name, False): - flags |= value - return flags - def _decode(self, obj, context): - obj2 = FlagsContainer() - for name, value in self.flags.items(): - setattr(obj2, name, bool(obj & value)) - return obj2 - -class StringAdapter(Adapter): - """ - Adapter for strings. Converts a sequence of characters into a python - string, and optionally handles character encoding. - See String. - - Parameters: - * subcon - the subcon to convert - * encoding - the character encoding name (e.g., "utf8"), or None to - return raw bytes (usually 8-bit ASCII). - """ - __slots__ = ["encoding"] - def __init__(self, subcon, encoding = None): - Adapter.__init__(self, subcon) - self.encoding = encoding - def _encode(self, obj, context): - if self.encoding: - obj = obj.encode(self.encoding) - return obj - def _decode(self, obj, context): - if self.encoding: - obj = obj.decode(self.encoding) - return obj - -class PaddedStringAdapter(Adapter): - r""" - Adapter for padded strings. - See String. - - Parameters: - * subcon - the subcon to adapt - * padchar - the padding character. default is b"\x00". - * paddir - the direction where padding is placed ("right", "left", or - "center"). the default is "right". - * trimdir - the direction where trimming will take place ("right" or - "left"). the default is "right". trimming is only meaningful for - building, when the given string is too long. - """ - __slots__ = ["padchar", "paddir", "trimdir"] - def __init__(self, subcon, padchar = b"\x00", paddir = "right", - trimdir = "right"): - if paddir not in ("right", "left", "center"): - raise ValueError("paddir must be 'right', 'left' or 'center'", - paddir) - if trimdir not in ("right", "left"): - raise ValueError("trimdir must be 'right' or 'left'", trimdir) - Adapter.__init__(self, subcon) - self.padchar = padchar - self.paddir = paddir - self.trimdir = trimdir - def _decode(self, obj, context): - if self.paddir == "right": - obj = obj.rstrip(self.padchar) - elif self.paddir == "left": - obj = obj.lstrip(self.padchar) - else: - obj = obj.strip(self.padchar) - return obj - def _encode(self, obj, context): - size = self._sizeof(context) - if self.paddir == "right": - obj = obj.ljust(size, self.padchar) - elif self.paddir == "left": - obj = obj.rjust(size, self.padchar) - else: - obj = obj.center(size, self.padchar) - if len(obj) > size: - if self.trimdir == "right": - obj = obj[:size] - else: - obj = obj[-size:] - return obj - -class LengthValueAdapter(Adapter): - """ - Adapter for length-value pairs. It extracts only the value from the - pair, and calculates the length based on the value. - See PrefixedArray and PascalString. - - Parameters: - * subcon - the subcon returning a length-value pair - """ - __slots__ = [] - def _encode(self, obj, context): - return (len(obj), obj) - def _decode(self, obj, context): - return obj[1] - -class CStringAdapter(StringAdapter): - r""" - Adapter for C-style strings (strings terminated by a terminator char). - - Parameters: - * subcon - the subcon to convert - * terminators - a sequence of terminator chars. default is b"\x00". - * encoding - the character encoding to use (e.g., "utf8"), or None to - return raw-bytes. the terminator characters are not affected by the - encoding. - """ - __slots__ = ["terminators"] - def __init__(self, subcon, terminators = b"\x00", encoding = None): - StringAdapter.__init__(self, subcon, encoding = encoding) - self.terminators = terminators - def _encode(self, obj, context): - return StringAdapter._encode(self, obj, context) + self.terminators[0:1] - def _decode(self, obj, context): - return StringAdapter._decode(self, b''.join(obj[:-1]), context) - -class TunnelAdapter(Adapter): - """ - Adapter for tunneling (as in protocol tunneling). A tunnel is construct - nested upon another (layering). For parsing, the lower layer first parses - the data (note: it must return a string!), then the upper layer is called - to parse that data (bottom-up). For building it works in a top-down manner; - first the upper layer builds the data, then the lower layer takes it and - writes it to the stream. - - Parameters: - * subcon - the lower layer subcon - * inner_subcon - the upper layer (tunneled/nested) subcon - - Example: - # a pascal string containing compressed data (zlib encoding), so first - # the string is read, decompressed, and finally re-parsed as an array - # of UBInt16 - TunnelAdapter( - PascalString("data", encoding = "zlib"), - GreedyRange(UBInt16("elements")) - ) - """ - __slots__ = ["inner_subcon"] - def __init__(self, subcon, inner_subcon): - Adapter.__init__(self, subcon) - self.inner_subcon = inner_subcon - def _decode(self, obj, context): - return self.inner_subcon._parse(BytesIO(obj), context) - def _encode(self, obj, context): - stream = BytesIO() - self.inner_subcon._build(obj, stream, context) - return stream.getvalue() - -class ExprAdapter(Adapter): - """ - A generic adapter that accepts 'encoder' and 'decoder' as parameters. You - can use ExprAdapter instead of writing a full-blown class when only a - simple expression is needed. - - Parameters: - * subcon - the subcon to adapt - * encoder - a function that takes (obj, context) and returns an encoded - version of obj - * decoder - a function that takes (obj, context) and returns a decoded - version of obj - - Example: - ExprAdapter(UBInt8("foo"), - encoder = lambda obj, ctx: obj / 4, - decoder = lambda obj, ctx: obj * 4, - ) - """ - __slots__ = ["_encode", "_decode"] - def __init__(self, subcon, encoder, decoder): - Adapter.__init__(self, subcon) - self._encode = encoder - self._decode = decoder - -class HexDumpAdapter(Adapter): - """ - Adapter for hex-dumping strings. It returns a HexString, which is a string - """ - __slots__ = ["linesize"] - def __init__(self, subcon, linesize = 16): - Adapter.__init__(self, subcon) - self.linesize = linesize - def _encode(self, obj, context): - return obj - def _decode(self, obj, context): - return HexString(obj, linesize = self.linesize) - -class ConstAdapter(Adapter): - """ - Adapter for enforcing a constant value ("magic numbers"). When decoding, - the return value is checked; when building, the value is substituted in. - - Parameters: - * subcon - the subcon to validate - * value - the expected value - - Example: - Const(Field("signature", 2), "MZ") - """ - __slots__ = ["value"] - def __init__(self, subcon, value): - Adapter.__init__(self, subcon) - self.value = value - def _encode(self, obj, context): - if obj is None or obj == self.value: - return self.value - else: - raise ConstError("expected %r, found %r" % (self.value, obj)) - def _decode(self, obj, context): - if obj != self.value: - raise ConstError("expected %r, found %r" % (self.value, obj)) - return obj - -class SlicingAdapter(Adapter): - """ - Adapter for slicing a list (getting a slice from that list) - - Parameters: - * subcon - the subcon to slice - * start - start index - * stop - stop index (or None for up-to-end) - * step - step (or None for every element) - """ - __slots__ = ["start", "stop", "step"] - def __init__(self, subcon, start, stop = None): - Adapter.__init__(self, subcon) - self.start = start - self.stop = stop - def _encode(self, obj, context): - if self.start is None: - return obj - return [None] * self.start + obj - def _decode(self, obj, context): - return obj[self.start:self.stop] - -class IndexingAdapter(Adapter): - """ - Adapter for indexing a list (getting a single item from that list) - - Parameters: - * subcon - the subcon to index - * index - the index of the list to get - """ - __slots__ = ["index"] - def __init__(self, subcon, index): - Adapter.__init__(self, subcon) - if type(index) is not int: - raise TypeError("index must be an integer", type(index)) - self.index = index - def _encode(self, obj, context): - return [None] * self.index + [obj] - def _decode(self, obj, context): - return obj[self.index] - -class PaddingAdapter(Adapter): - r""" - Adapter for padding. - - Parameters: - * subcon - the subcon to pad - * pattern - the padding pattern (character as byte). default is b"\x00" - * strict - whether or not to verify, during parsing, that the given - padding matches the padding pattern. default is False (unstrict) - """ - __slots__ = ["pattern", "strict"] - def __init__(self, subcon, pattern = b"\x00", strict = False): - Adapter.__init__(self, subcon) - self.pattern = pattern - self.strict = strict - def _encode(self, obj, context): - return self._sizeof(context) * self.pattern - def _decode(self, obj, context): - if self.strict: - expected = self._sizeof(context) * self.pattern - if obj != expected: - raise PaddingError("expected %r, found %r" % (expected, obj)) - return obj - - -#=============================================================================== -# validators -#=============================================================================== -class Validator(Adapter): - """ - Abstract class: validates a condition on the encoded/decoded object. - Override _validate(obj, context) in deriving classes. - - Parameters: - * subcon - the subcon to validate - """ - __slots__ = [] - def _decode(self, obj, context): - if not self._validate(obj, context): - raise ValidationError("invalid object", obj) - return obj - def _encode(self, obj, context): - return self._decode(obj, context) - def _validate(self, obj, context): - raise NotImplementedError() - -class OneOf(Validator): - """ - Validates that the object is one of the listed values. - - :param ``Construct`` subcon: object to validate - :param iterable valids: a set of valid values - - >>> OneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x05") - 5 - >>> OneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x08") - Traceback (most recent call last): - ... - construct.core.ValidationError: ('invalid object', 8) - >>> - >>> OneOf(UBInt8("foo"), [4,5,6,7]).build(5) - '\\x05' - >>> OneOf(UBInt8("foo"), [4,5,6,7]).build(9) - Traceback (most recent call last): - ... - construct.core.ValidationError: ('invalid object', 9) - """ - __slots__ = ["valids"] - def __init__(self, subcon, valids): - Validator.__init__(self, subcon) - self.valids = valids - def _validate(self, obj, context): - return obj in self.valids - -class NoneOf(Validator): - """ - Validates that the object is none of the listed values. - - :param ``Construct`` subcon: object to validate - :param iterable invalids: a set of invalid values - - >>> NoneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x08") - 8 - >>> NoneOf(UBInt8("foo"), [4,5,6,7]).parse("\\x06") - Traceback (most recent call last): - ... - construct.core.ValidationError: ('invalid object', 6) - """ - __slots__ = ["invalids"] - def __init__(self, subcon, invalids): - Validator.__init__(self, subcon) - self.invalids = invalids - def _validate(self, obj, context): - return obj not in self.invalids diff --git a/elftools/construct/core.py b/elftools/construct/core.py deleted file mode 100644 index 14a50f88..00000000 --- a/elftools/construct/core.py +++ /dev/null @@ -1,1326 +0,0 @@ -from struct import Struct as Packer - -from .lib.py3compat import BytesIO, advance_iterator, bchr -from .lib import Container, ListContainer, LazyContainer - - -#=============================================================================== -# exceptions -#=============================================================================== -class ConstructError(Exception): - __slots__ = [] -class FieldError(ConstructError): - __slots__ = [] -class SizeofError(ConstructError): - __slots__ = [] -class AdaptationError(ConstructError): - __slots__ = [] -class ArrayError(ConstructError): - __slots__ = [] -class RangeError(ConstructError): - __slots__ = [] -class SwitchError(ConstructError): - __slots__ = [] -class SelectError(ConstructError): - __slots__ = [] -class TerminatorError(ConstructError): - __slots__ = [] - -#=============================================================================== -# abstract constructs -#=============================================================================== -class Construct(object): - """ - The mother of all constructs. - - This object is generally not directly instantiated, and it does not - directly implement parsing and building, so it is largely only of interest - to subclass implementors. - - The external user API: - - * parse() - * parse_stream() - * build() - * build_stream() - * sizeof() - - Subclass authors should not override the external methods. Instead, - another API is available: - - * _parse() - * _build() - * _sizeof() - - There is also a flag API: - - * _set_flag() - * _clear_flag() - * _inherit_flags() - * _is_flag() - - And stateful copying: - - * __getstate__() - * __setstate__() - - Attributes and Inheritance - ========================== - - All constructs have a name and flags. The name is used for naming struct - members and context dictionaries. Note that the name can either be a - string, or None if the name is not needed. A single underscore ("_") is a - reserved name, and so are names starting with a less-than character ("<"). - The name should be descriptive, short, and valid as a Python identifier, - although these rules are not enforced. - - The flags specify additional behavioral information about this construct. - Flags are used by enclosing constructs to determine a proper course of - action. Flags are inherited by default, from inner subconstructs to outer - constructs. The enclosing construct may set new flags or clear existing - ones, as necessary. - - For example, if FLAG_COPY_CONTEXT is set, repeaters will pass a copy of - the context for each iteration, which is necessary for OnDemand parsing. - """ - - FLAG_COPY_CONTEXT = 0x0001 - FLAG_DYNAMIC = 0x0002 - FLAG_EMBED = 0x0004 - FLAG_NESTING = 0x0008 - - __slots__ = ["name", "conflags"] - def __init__(self, name, flags = 0): - if name is not None: - if type(name) is not str: - raise TypeError("name must be a string or None", name) - if name == "_" or name.startswith("<"): - raise ValueError("reserved name", name) - self.name = name - self.conflags = flags - - def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self.name) - - def _set_flag(self, flag): - """ - Set the given flag or flags. - - :param int flag: flag to set; may be OR'd combination of flags - """ - - self.conflags |= flag - - def _clear_flag(self, flag): - """ - Clear the given flag or flags. - - :param int flag: flag to clear; may be OR'd combination of flags - """ - - self.conflags &= ~flag - - def _inherit_flags(self, *subcons): - """ - Pull flags from subconstructs. - """ - - for sc in subcons: - self._set_flag(sc.conflags) - - def _is_flag(self, flag): - """ - Check whether a given flag is set. - - :param int flag: flag to check - """ - - return bool(self.conflags & flag) - - def __getstate__(self): - """ - Obtain a dictionary representing this construct's state. - """ - - attrs = {} - if hasattr(self, "__dict__"): - attrs.update(self.__dict__) - slots = [] - c = self.__class__ - while c is not None: - if hasattr(c, "__slots__"): - slots.extend(c.__slots__) - c = c.__base__ - for name in slots: - if hasattr(self, name): - attrs[name] = getattr(self, name) - return attrs - - def __setstate__(self, attrs): - """ - Set this construct's state to a given state. - """ - for name, value in attrs.items(): - setattr(self, name, value) - - def __copy__(self): - """returns a copy of this construct""" - self2 = object.__new__(self.__class__) - self2.__setstate__(self.__getstate__()) - return self2 - - def parse(self, data): - """ - Parse an in-memory buffer. - - Strings, buffers, memoryviews, and other complete buffers can be - parsed with this method. - """ - - return self.parse_stream(BytesIO(data)) - - def parse_stream(self, stream): - """ - Parse a stream. - - Files, pipes, sockets, and other streaming sources of data are handled - by this method. - """ - - return self._parse(stream, Container()) - - def _parse(self, stream, context): - """ - Override me in your subclass. - """ - - raise NotImplementedError() - - def build(self, obj): - """ - Build an object in memory. - """ - stream = BytesIO() - self.build_stream(obj, stream) - return stream.getvalue() - - def build_stream(self, obj, stream): - """ - Build an object directly into a stream. - """ - self._build(obj, stream, Container()) - - def _build(self, obj, stream, context): - """ - Override me in your subclass. - """ - - raise NotImplementedError() - - def sizeof(self, context=None): - """ - Calculate the size of this object, optionally using a context. - - Some constructs have no fixed size and can only know their size for a - given hunk of data; these constructs will raise an error if they are - not passed a context. - - :param ``Container`` context: contextual data - - :returns: int of the length of this construct - :raises SizeofError: the size could not be determined - """ - - if context is None: - context = Container() - try: - return self._sizeof(context) - except Exception as e: - raise SizeofError(e) - - def _sizeof(self, context): - """ - Override me in your subclass. - """ - - raise SizeofError("Raw Constructs have no size!") - -class Subconstruct(Construct): - """ - Abstract subconstruct (wraps an inner construct, inheriting its - name and flags). - - Parameters: - * subcon - the construct to wrap - """ - __slots__ = ["subcon"] - def __init__(self, subcon): - Construct.__init__(self, subcon.name, subcon.conflags) - self.subcon = subcon - def _parse(self, stream, context): - return self.subcon._parse(stream, context) - def _build(self, obj, stream, context): - self.subcon._build(obj, stream, context) - def _sizeof(self, context): - return self.subcon._sizeof(context) - -class Adapter(Subconstruct): - """ - Abstract adapter: calls _decode for parsing and _encode for building. - - Parameters: - * subcon - the construct to wrap - """ - __slots__ = [] - def _parse(self, stream, context): - return self._decode(self.subcon._parse(stream, context), context) - def _build(self, obj, stream, context): - self.subcon._build(self._encode(obj, context), stream, context) - def _decode(self, obj, context): - raise NotImplementedError() - def _encode(self, obj, context): - raise NotImplementedError() - - -#=============================================================================== -# Fields -#=============================================================================== -def _read_stream(stream, length): - if length < 0: - raise ValueError("length must be >= 0", length) - data = stream.read(length) - if len(data) != length: - raise FieldError("expected %d, found %d" % (length, len(data))) - return data - -def _write_stream(stream, length, data): - if length < 0: - raise ValueError("length must be >= 0", length) - if len(data) != length: - raise FieldError("expected %d, found %d" % (length, len(data))) - stream.write(data) - -class StaticField(Construct): - """ - A fixed-size byte field. - - :param str name: field name - :param int length: number of bytes in the field - """ - - __slots__ = ["length"] - def __init__(self, name, length): - Construct.__init__(self, name) - self.length = length - def _parse(self, stream, context): - return _read_stream(stream, self.length) - def _build(self, obj, stream, context): - _write_stream(stream, self.length, obj) - def _sizeof(self, context): - return self.length - -class FormatField(StaticField): - """ - A field that uses ``struct`` to pack and unpack data. - - See ``struct`` documentation for instructions on crafting format strings. - - :param str name: name of the field - :param str endianness: format endianness string; one of "<", ">", or "=" - :param str format: a single format character - """ - - __slots__ = ["packer"] - def __init__(self, name, endianity, format): - if endianity not in (">", "<", "="): - raise ValueError("endianity must be be '=', '<', or '>'", - endianity) - if len(format) != 1: - raise ValueError("must specify one and only one format char") - self.packer = Packer(endianity + format) - StaticField.__init__(self, name, self.packer.size) - def __getstate__(self): - attrs = StaticField.__getstate__(self) - attrs["packer"] = attrs["packer"].format - return attrs - def __setstate__(self, attrs): - attrs["packer"] = Packer(attrs["packer"]) - return StaticField.__setstate__(self, attrs) - def _parse(self, stream, context): - try: - return self.packer.unpack(_read_stream(stream, self.length))[0] - except Exception as ex: - raise FieldError(ex) - def _build(self, obj, stream, context): - try: - _write_stream(stream, self.length, self.packer.pack(obj)) - except Exception as ex: - raise FieldError(ex) - -class MetaField(Construct): - """ - A variable-length field. The length is obtained at runtime from a - function. - - :param str name: name of the field - :param callable lengthfunc: callable that takes a context and returns - length as an int - - >>> foo = Struct("foo", - ... Byte("length"), - ... MetaField("data", lambda ctx: ctx["length"]) - ... ) - >>> foo.parse("\\x03ABC") - Container(data = 'ABC', length = 3) - >>> foo.parse("\\x04ABCD") - Container(data = 'ABCD', length = 4) - """ - - __slots__ = ["lengthfunc"] - def __init__(self, name, lengthfunc): - Construct.__init__(self, name) - self.lengthfunc = lengthfunc - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - return _read_stream(stream, self.lengthfunc(context)) - def _build(self, obj, stream, context): - _write_stream(stream, self.lengthfunc(context), obj) - def _sizeof(self, context): - return self.lengthfunc(context) - - -#=============================================================================== -# arrays and repeaters -#=============================================================================== -class MetaArray(Subconstruct): - """ - An array (repeater) of a meta-count. The array will iterate exactly - `countfunc()` times. Will raise ArrayError if less elements are found. - See also Array, Range and RepeatUntil. - - Parameters: - * countfunc - a function that takes the context as a parameter and returns - the number of elements of the array (count) - * subcon - the subcon to repeat `countfunc()` times - - Example: - MetaArray(lambda ctx: 5, UBInt8("foo")) - """ - __slots__ = ["countfunc"] - def __init__(self, countfunc, subcon): - Subconstruct.__init__(self, subcon) - self.countfunc = countfunc - self._clear_flag(self.FLAG_COPY_CONTEXT) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - obj = ListContainer() - c = 0 - count = self.countfunc(context) - try: - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - while c < count: - obj.append(self.subcon._parse(stream, context.__copy__())) - c += 1 - else: - while c < count: - obj.append(self.subcon._parse(stream, context)) - c += 1 - except ConstructError as ex: - raise ArrayError("expected %d, found %d" % (count, c), ex) - return obj - def _build(self, obj, stream, context): - count = self.countfunc(context) - if len(obj) != count: - raise ArrayError("expected %d, found %d" % (count, len(obj))) - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - for subobj in obj: - self.subcon._build(subobj, stream, context.__copy__()) - else: - for subobj in obj: - self.subcon._build(subobj, stream, context) - def _sizeof(self, context): - return self.subcon._sizeof(context) * self.countfunc(context) - -class Range(Subconstruct): - """ - A range-array. The subcon will iterate between `mincount` to `maxcount` - times. If less than `mincount` elements are found, raises RangeError. - See also GreedyRange and OptionalGreedyRange. - - The general-case repeater. Repeats the given unit for at least mincount - times, and up to maxcount times. If an exception occurs (EOF, validation - error), the repeater exits. If less than mincount units have been - successfully parsed, a RangeError is raised. - - .. note:: - This object requires a seekable stream for parsing. - - :param int mincount: the minimal count - :param int maxcount: the maximal count - :param Construct subcon: the subcon to repeat - - >>> c = Range(3, 7, UBInt8("foo")) - >>> c.parse("\\x01\\x02") - Traceback (most recent call last): - ... - construct.core.RangeError: expected 3..7, found 2 - >>> c.parse("\\x01\\x02\\x03") - [1, 2, 3] - >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06") - [1, 2, 3, 4, 5, 6] - >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06\\x07") - [1, 2, 3, 4, 5, 6, 7] - >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\x09") - [1, 2, 3, 4, 5, 6, 7] - >>> c.build([1,2]) - Traceback (most recent call last): - ... - construct.core.RangeError: expected 3..7, found 2 - >>> c.build([1,2,3,4]) - '\\x01\\x02\\x03\\x04' - >>> c.build([1,2,3,4,5,6,7,8]) - Traceback (most recent call last): - ... - construct.core.RangeError: expected 3..7, found 8 - """ - - __slots__ = ["mincount", "maxcout"] - def __init__(self, mincount, maxcout, subcon): - Subconstruct.__init__(self, subcon) - self.mincount = mincount - self.maxcout = maxcout - self._clear_flag(self.FLAG_COPY_CONTEXT) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - obj = ListContainer() - c = 0 - try: - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - while c < self.maxcout: - pos = stream.tell() - obj.append(self.subcon._parse(stream, context.__copy__())) - c += 1 - else: - while c < self.maxcout: - pos = stream.tell() - obj.append(self.subcon._parse(stream, context)) - c += 1 - except ConstructError as ex: - if c < self.mincount: - raise RangeError("expected %d to %d, found %d" % - (self.mincount, self.maxcout, c), ex) - stream.seek(pos) - return obj - def _build(self, obj, stream, context): - if len(obj) < self.mincount or len(obj) > self.maxcout: - raise RangeError("expected %d to %d, found %d" % - (self.mincount, self.maxcout, len(obj))) - cnt = 0 - try: - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - for subobj in obj: - if isinstance(obj, bytes): - subobj = bchr(subobj) - self.subcon._build(subobj, stream, context.__copy__()) - cnt += 1 - else: - for subobj in obj: - if isinstance(obj, bytes): - subobj = bchr(subobj) - self.subcon._build(subobj, stream, context) - cnt += 1 - except ConstructError as ex: - if cnt < self.mincount: - raise RangeError("expected %d to %d, found %d" % - (self.mincount, self.maxcout, len(obj)), ex) - def _sizeof(self, context): - raise SizeofError("can't calculate size") - -class RepeatUntil(Subconstruct): - """ - An array that repeats until the predicate indicates it to stop. Note that - the last element (which caused the repeat to exit) is included in the - return value. - - Parameters: - * predicate - a predicate function that takes (obj, context) and returns - True if the stop-condition is met, or False to continue. - * subcon - the subcon to repeat. - - Example: - # will read chars until b\x00 (inclusive) - RepeatUntil(lambda obj, ctx: obj == b"\x00", - Field("chars", 1) - ) - """ - __slots__ = ["predicate"] - def __init__(self, predicate, subcon): - Subconstruct.__init__(self, subcon) - self.predicate = predicate - self._clear_flag(self.FLAG_COPY_CONTEXT) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - obj = [] - try: - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - while True: - subobj = self.subcon._parse(stream, context.__copy__()) - obj.append(subobj) - if self.predicate(subobj, context): - break - else: - while True: - subobj = self.subcon._parse(stream, context) - obj.append(subobj) - if self.predicate(subobj, context): - break - except ConstructError as ex: - raise ArrayError("missing terminator", ex) - return obj - def _build(self, obj, stream, context): - terminated = False - if self.subcon.conflags & self.FLAG_COPY_CONTEXT: - for subobj in obj: - self.subcon._build(subobj, stream, context.__copy__()) - if self.predicate(subobj, context): - terminated = True - break - else: - for subobj in obj: - subobj = bchr(subobj) - self.subcon._build(subobj, stream, context.__copy__()) - if self.predicate(subobj, context): - terminated = True - break - if not terminated: - raise ArrayError("missing terminator") - def _sizeof(self, context): - raise SizeofError("can't calculate size") - - -#=============================================================================== -# structures and sequences -#=============================================================================== -class Struct(Construct): - """ - A sequence of named constructs, similar to structs in C. The elements are - parsed and built in the order they are defined. - See also Embedded. - - Parameters: - * name - the name of the structure - * subcons - a sequence of subconstructs that make up this structure. - * nested - a keyword-only argument that indicates whether this struct - creates a nested context. The default is True. This parameter is - considered "advanced usage", and may be removed in the future. - - Example: - Struct("foo", - UBInt8("first_element"), - UBInt16("second_element"), - Padding(2), - UBInt8("third_element"), - ) - """ - __slots__ = ["subcons", "nested"] - def __init__(self, name, *subcons, **kw): - self.nested = kw.pop("nested", True) - if kw: - raise TypeError("the only keyword argument accepted is 'nested'", kw) - Construct.__init__(self, name) - self.subcons = subcons - self._inherit_flags(*subcons) - self._clear_flag(self.FLAG_EMBED) - def _parse(self, stream, context): - if "" in context: - obj = context[""] - del context[""] - else: - obj = Container() - if self.nested: - context = Container(_ = context) - for sc in self.subcons: - if sc.conflags & self.FLAG_EMBED: - context[""] = obj - sc._parse(stream, context) - else: - subobj = sc._parse(stream, context) - if sc.name is not None: - obj[sc.name] = subobj - context[sc.name] = subobj - return obj - def _build(self, obj, stream, context): - if "" in context: - del context[""] - elif self.nested: - context = Container(_ = context) - for sc in self.subcons: - if sc.conflags & self.FLAG_EMBED: - context[""] = True - subobj = obj - elif sc.name is None: - subobj = None - else: - subobj = getattr(obj, sc.name) - context[sc.name] = subobj - sc._build(subobj, stream, context) - def _sizeof(self, context): - if self.nested: - context = Container(_ = context) - return sum(sc._sizeof(context) for sc in self.subcons) - -class Sequence(Struct): - """ - A sequence of unnamed constructs. The elements are parsed and built in the - order they are defined. - See also Embedded. - - Parameters: - * name - the name of the structure - * subcons - a sequence of subconstructs that make up this structure. - * nested - a keyword-only argument that indicates whether this struct - creates a nested context. The default is True. This parameter is - considered "advanced usage", and may be removed in the future. - - Example: - Sequence("foo", - UBInt8("first_element"), - UBInt16("second_element"), - Padding(2), - UBInt8("third_element"), - ) - """ - __slots__ = [] - def _parse(self, stream, context): - if "" in context: - obj = context[""] - del context[""] - else: - obj = ListContainer() - if self.nested: - context = Container(_ = context) - for sc in self.subcons: - if sc.conflags & self.FLAG_EMBED: - context[""] = obj - sc._parse(stream, context) - else: - subobj = sc._parse(stream, context) - if sc.name is not None: - obj.append(subobj) - context[sc.name] = subobj - return obj - def _build(self, obj, stream, context): - if "" in context: - del context[""] - elif self.nested: - context = Container(_ = context) - objiter = iter(obj) - for sc in self.subcons: - if sc.conflags & self.FLAG_EMBED: - context[""] = True - subobj = objiter - elif sc.name is None: - subobj = None - else: - subobj = advance_iterator(objiter) - context[sc.name] = subobj - sc._build(subobj, stream, context) - -class Union(Construct): - """ - a set of overlapping fields (like unions in C). when parsing, - all fields read the same data; when building, only the first subcon - (called "master") is used. - - Parameters: - * name - the name of the union - * master - the master subcon, i.e., the subcon used for building and - calculating the total size - * subcons - additional subcons - - Example: - Union("what_are_four_bytes", - UBInt32("one_dword"), - Struct("two_words", UBInt16("first"), UBInt16("second")), - Struct("four_bytes", - UBInt8("a"), - UBInt8("b"), - UBInt8("c"), - UBInt8("d") - ), - ) - """ - __slots__ = ["parser", "builder"] - def __init__(self, name, master, *subcons, **kw): - Construct.__init__(self, name) - args = [Peek(sc) for sc in subcons] - args.append(MetaField(None, lambda ctx: master._sizeof(ctx))) - self.parser = Struct(name, Peek(master, perform_build = True), *args) - self.builder = Struct(name, master) - def _parse(self, stream, context): - return self.parser._parse(stream, context) - def _build(self, obj, stream, context): - return self.builder._build(obj, stream, context) - def _sizeof(self, context): - return self.builder._sizeof(context) - -#=============================================================================== -# conditional -#=============================================================================== -class Switch(Construct): - """ - A conditional branch. Switch will choose the case to follow based on - the return value of keyfunc. If no case is matched, and no default value - is given, SwitchError will be raised. - See also Pass. - - Parameters: - * name - the name of the construct - * keyfunc - a function that takes the context and returns a key, which - will ne used to choose the relevant case. - * cases - a dictionary mapping keys to constructs. the keys can be any - values that may be returned by keyfunc. - * default - a default value to use when the key is not found in the cases. - if not supplied, an exception will be raised when the key is not found. - You can use the builtin construct Pass for 'do-nothing'. - * include_key - whether or not to include the key in the return value - of parsing. defualt is False. - - Example: - Struct("foo", - UBInt8("type"), - Switch("value", lambda ctx: ctx.type, { - 1 : UBInt8("spam"), - 2 : UBInt16("spam"), - 3 : UBInt32("spam"), - 4 : UBInt64("spam"), - } - ), - ) - """ - - class NoDefault(Construct): - def _parse(self, stream, context): - raise SwitchError("no default case defined") - def _build(self, obj, stream, context): - raise SwitchError("no default case defined") - def _sizeof(self, context): - raise SwitchError("no default case defined") - NoDefault = NoDefault("No default value specified") - - __slots__ = ["subcons", "keyfunc", "cases", "default", "include_key"] - - def __init__(self, name, keyfunc, cases, default = NoDefault, - include_key = False): - Construct.__init__(self, name) - self._inherit_flags(*cases.values()) - self.keyfunc = keyfunc - self.cases = cases - self.default = default - self.include_key = include_key - self._inherit_flags(*cases.values()) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - key = self.keyfunc(context) - obj = self.cases.get(key, self.default)._parse(stream, context) - if self.include_key: - return key, obj - else: - return obj - def _build(self, obj, stream, context): - if self.include_key: - key, obj = obj - else: - key = self.keyfunc(context) - case = self.cases.get(key, self.default) - case._build(obj, stream, context) - def _sizeof(self, context): - case = self.cases.get(self.keyfunc(context), self.default) - return case._sizeof(context) - -class Select(Construct): - """ - Selects the first matching subconstruct. It will literally try each of - the subconstructs, until one matches. - - Notes: - * requires a seekable stream. - - Parameters: - * name - the name of the construct - * subcons - the subcons to try (order-sensitive) - * include_name - a keyword only argument, indicating whether to include - the name of the selected subcon in the return value of parsing. default - is false. - - Example: - Select("foo", - UBInt64("large"), - UBInt32("medium"), - UBInt16("small"), - UBInt8("tiny"), - ) - """ - __slots__ = ["subcons", "include_name"] - def __init__(self, name, *subcons, **kw): - include_name = kw.pop("include_name", False) - if kw: - raise TypeError("the only keyword argument accepted " - "is 'include_name'", kw) - Construct.__init__(self, name) - self.subcons = subcons - self.include_name = include_name - self._inherit_flags(*subcons) - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - for sc in self.subcons: - pos = stream.tell() - context2 = context.__copy__() - try: - obj = sc._parse(stream, context2) - except ConstructError: - stream.seek(pos) - else: - context.__update__(context2) - if self.include_name: - return sc.name, obj - else: - return obj - raise SelectError("no subconstruct matched") - def _build(self, obj, stream, context): - if self.include_name: - name, obj = obj - for sc in self.subcons: - if sc.name == name: - sc._build(obj, stream, context) - return - else: - for sc in self.subcons: - stream2 = BytesIO() - context2 = context.__copy__() - try: - sc._build(obj, stream2, context2) - except Exception: - pass - else: - context.__update__(context2) - stream.write(stream2.getvalue()) - return - raise SelectError("no subconstruct matched", obj) - def _sizeof(self, context): - raise SizeofError("can't calculate size") - - -#=============================================================================== -# stream manipulation -#=============================================================================== -class Pointer(Subconstruct): - """ - Changes the stream position to a given offset, where the construction - should take place, and restores the stream position when finished. - See also Anchor, OnDemand and OnDemandPointer. - - Notes: - * requires a seekable stream. - - Parameters: - * offsetfunc: a function that takes the context and returns an absolute - stream position, where the construction would take place - * subcon - the subcon to use at `offsetfunc()` - - Example: - Struct("foo", - UBInt32("spam_pointer"), - Pointer(lambda ctx: ctx.spam_pointer, - Array(5, UBInt8("spam")) - ) - ) - """ - __slots__ = ["offsetfunc"] - def __init__(self, offsetfunc, subcon): - Subconstruct.__init__(self, subcon) - self.offsetfunc = offsetfunc - def _parse(self, stream, context): - newpos = self.offsetfunc(context) - origpos = stream.tell() - stream.seek(newpos) - obj = self.subcon._parse(stream, context) - stream.seek(origpos) - return obj - def _build(self, obj, stream, context): - newpos = self.offsetfunc(context) - origpos = stream.tell() - stream.seek(newpos) - self.subcon._build(obj, stream, context) - stream.seek(origpos) - def _sizeof(self, context): - return 0 - -class Peek(Subconstruct): - """ - Peeks at the stream: parses without changing the stream position. - See also Union. If the end of the stream is reached when peeking, - returns None. - - Notes: - * requires a seekable stream. - - Parameters: - * subcon - the subcon to peek at - * perform_build - whether or not to perform building. by default this - parameter is set to False, meaning building is a no-op. - - Example: - Peek(UBInt8("foo")) - """ - __slots__ = ["perform_build"] - def __init__(self, subcon, perform_build = False): - Subconstruct.__init__(self, subcon) - self.perform_build = perform_build - def _parse(self, stream, context): - pos = stream.tell() - try: - return self.subcon._parse(stream, context) - except FieldError: - pass - finally: - stream.seek(pos) - def _build(self, obj, stream, context): - if self.perform_build: - self.subcon._build(obj, stream, context) - def _sizeof(self, context): - return 0 - -class OnDemand(Subconstruct): - """ - Allows for on-demand (lazy) parsing. When parsing, it will return a - LazyContainer that represents a pointer to the data, but does not actually - parses it from stream until it's "demanded". - By accessing the 'value' property of LazyContainers, you will demand the - data from the stream. The data will be parsed and cached for later use. - You can use the 'has_value' property to know whether the data has already - been demanded. - See also OnDemandPointer. - - Notes: - * requires a seekable stream. - - Parameters: - * subcon - - * advance_stream - whether or not to advance the stream position. by - default this is True, but if subcon is a pointer, this should be False. - * force_build - whether or not to force build. If set to False, and the - LazyContainer has not been demaned, building is a no-op. - - Example: - OnDemand(Array(10000, UBInt8("foo")) - """ - __slots__ = ["advance_stream", "force_build"] - def __init__(self, subcon, advance_stream = True, force_build = True): - Subconstruct.__init__(self, subcon) - self.advance_stream = advance_stream - self.force_build = force_build - def _parse(self, stream, context): - obj = LazyContainer(self.subcon, stream, stream.tell(), context) - if self.advance_stream: - stream.seek(self.subcon._sizeof(context), 1) - return obj - def _build(self, obj, stream, context): - if not isinstance(obj, LazyContainer): - self.subcon._build(obj, stream, context) - elif self.force_build or obj.has_value: - self.subcon._build(obj.value, stream, context) - elif self.advance_stream: - stream.seek(self.subcon._sizeof(context), 1) - -class Buffered(Subconstruct): - """ - Creates an in-memory buffered stream, which can undergo encoding and - decoding prior to being passed on to the subconstruct. - See also Bitwise. - - Note: - * Do not use pointers inside Buffered - - Parameters: - * subcon - the subcon which will operate on the buffer - * encoder - a function that takes a string and returns an encoded - string (used after building) - * decoder - a function that takes a string and returns a decoded - string (used before parsing) - * resizer - a function that takes the size of the subcon and "adjusts" - or "resizes" it according to the encoding/decoding process. - - Example: - Buffered(BitField("foo", 16), - encoder = decode_bin, - decoder = encode_bin, - resizer = lambda size: size / 8, - ) - """ - __slots__ = ["encoder", "decoder", "resizer"] - def __init__(self, subcon, decoder, encoder, resizer): - Subconstruct.__init__(self, subcon) - self.encoder = encoder - self.decoder = decoder - self.resizer = resizer - def _parse(self, stream, context): - data = _read_stream(stream, self._sizeof(context)) - stream2 = BytesIO(self.decoder(data)) - return self.subcon._parse(stream2, context) - def _build(self, obj, stream, context): - size = self._sizeof(context) - stream2 = BytesIO() - self.subcon._build(obj, stream2, context) - data = self.encoder(stream2.getvalue()) - assert len(data) == size - _write_stream(stream, self._sizeof(context), data) - def _sizeof(self, context): - return self.resizer(self.subcon._sizeof(context)) - -class Restream(Subconstruct): - """ - Wraps the stream with a read-wrapper (for parsing) or a - write-wrapper (for building). The stream wrapper can buffer the data - internally, reading it from- or writing it to the underlying stream - as needed. For example, BitStreamReader reads whole bytes from the - underlying stream, but returns them as individual bits. - See also Bitwise. - - When the parsing or building is done, the stream's close method - will be invoked. It can perform any finalization needed for the stream - wrapper, but it must not close the underlying stream. - - Note: - * Do not use pointers inside Restream - - Parameters: - * subcon - the subcon - * stream_reader - the read-wrapper - * stream_writer - the write wrapper - * resizer - a function that takes the size of the subcon and "adjusts" - or "resizes" it according to the encoding/decoding process. - - Example: - Restream(BitField("foo", 16), - stream_reader = BitStreamReader, - stream_writer = BitStreamWriter, - resizer = lambda size: size / 8, - ) - """ - __slots__ = ["stream_reader", "stream_writer", "resizer"] - def __init__(self, subcon, stream_reader, stream_writer, resizer): - Subconstruct.__init__(self, subcon) - self.stream_reader = stream_reader - self.stream_writer = stream_writer - self.resizer = resizer - def _parse(self, stream, context): - stream2 = self.stream_reader(stream) - obj = self.subcon._parse(stream2, context) - stream2.close() - return obj - def _build(self, obj, stream, context): - stream2 = self.stream_writer(stream) - self.subcon._build(obj, stream2, context) - stream2.close() - def _sizeof(self, context): - return self.resizer(self.subcon._sizeof(context)) - - -#=============================================================================== -# miscellaneous -#=============================================================================== -class Reconfig(Subconstruct): - """ - Reconfigures a subconstruct. Reconfig can be used to change the name and - set and clear flags of the inner subcon. - - Parameters: - * name - the new name - * subcon - the subcon to reconfigure - * setflags - the flags to set (default is 0) - * clearflags - the flags to clear (default is 0) - - Example: - Reconfig("foo", UBInt8("bar")) - """ - __slots__ = [] - def __init__(self, name, subcon, setflags = 0, clearflags = 0): - Construct.__init__(self, name, subcon.conflags) - self.subcon = subcon - self._set_flag(setflags) - self._clear_flag(clearflags) - -class Anchor(Construct): - """ - Returns the "anchor" (stream position) at the point where it's inserted. - Useful for adjusting relative offsets to absolute positions, or to measure - sizes of constructs. - absolute pointer = anchor + relative offset - size = anchor_after - anchor_before - See also Pointer. - - Notes: - * requires a seekable stream. - - Parameters: - * name - the name of the anchor - - Example: - Struct("foo", - Anchor("base"), - UBInt8("relative_offset"), - Pointer(lambda ctx: ctx.relative_offset + ctx.base, - UBInt8("data") - ) - ) - """ - __slots__ = [] - def _parse(self, stream, context): - return stream.tell() - def _build(self, obj, stream, context): - context[self.name] = stream.tell() - def _sizeof(self, context): - return 0 - -class Value(Construct): - """ - A computed value. - - Parameters: - * name - the name of the value - * func - a function that takes the context and return the computed value - - Example: - Struct("foo", - UBInt8("width"), - UBInt8("height"), - Value("total_pixels", lambda ctx: ctx.width * ctx.height), - ) - """ - __slots__ = ["func"] - def __init__(self, name, func): - Construct.__init__(self, name) - self.func = func - self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): - return self.func(context) - def _build(self, obj, stream, context): - context[self.name] = self.func(context) - def _sizeof(self, context): - return 0 - -#class Dynamic(Construct): -# """ -# Dynamically creates a construct and uses it for parsing and building. -# This allows you to create change the construction tree on the fly. -# Deprecated. -# -# Parameters: -# * name - the name of the construct -# * factoryfunc - a function that takes the context and returns a new -# construct object which will be used for parsing and building. -# -# Example: -# def factory(ctx): -# if ctx.bar == 8: -# return UBInt8("spam") -# if ctx.bar == 9: -# return String("spam", 9) -# -# Struct("foo", -# UBInt8("bar"), -# Dynamic("spam", factory), -# ) -# """ -# __slots__ = ["factoryfunc"] -# def __init__(self, name, factoryfunc): -# Construct.__init__(self, name, self.FLAG_COPY_CONTEXT) -# self.factoryfunc = factoryfunc -# self._set_flag(self.FLAG_DYNAMIC) -# def _parse(self, stream, context): -# return self.factoryfunc(context)._parse(stream, context) -# def _build(self, obj, stream, context): -# return self.factoryfunc(context)._build(obj, stream, context) -# def _sizeof(self, context): -# return self.factoryfunc(context)._sizeof(context) - -class LazyBound(Construct): - """ - Lazily bound construct, useful for constructs that need to make cyclic - references (linked-lists, expression trees, etc.). - - Parameters: - - - Example: - foo = Struct("foo", - UBInt8("bar"), - LazyBound("next", lambda: foo), - ) - """ - __slots__ = ["bindfunc", "bound"] - def __init__(self, name, bindfunc): - Construct.__init__(self, name) - self.bound = None - self.bindfunc = bindfunc - def _parse(self, stream, context): - if self.bound is None: - self.bound = self.bindfunc() - return self.bound._parse(stream, context) - def _build(self, obj, stream, context): - if self.bound is None: - self.bound = self.bindfunc() - self.bound._build(obj, stream, context) - def _sizeof(self, context): - if self.bound is None: - self.bound = self.bindfunc() - return self.bound._sizeof(context) - -class Pass(Construct): - """ - A do-nothing construct, useful as the default case for Switch, or - to indicate Enums. - See also Switch and Enum. - - Notes: - * this construct is a singleton. do not try to instatiate it, as it - will not work... - - Example: - Pass - """ - __slots__ = [] - def _parse(self, stream, context): - pass - def _build(self, obj, stream, context): - assert obj is None - def _sizeof(self, context): - return 0 - def __reduce__(self): - return self.__class__.__name__ -Pass = Pass(None) - -class Terminator(Construct): - """ - Asserts the end of the stream has been reached at the point it's placed. - You can use this to ensure no more unparsed data follows. - - Notes: - * this construct is only meaningful for parsing. for building, it's - a no-op. - * this construct is a singleton. do not try to instatiate it, as it - will not work... - - Example: - Terminator - """ - __slots__ = [] - def _parse(self, stream, context): - if stream.read(1): - raise TerminatorError("expected end of stream") - def _build(self, obj, stream, context): - assert obj is None - def _sizeof(self, context): - return 0 -Terminator = Terminator(None) diff --git a/elftools/construct/debug.py b/elftools/construct/debug.py deleted file mode 100644 index 846daf89..00000000 --- a/elftools/construct/debug.py +++ /dev/null @@ -1,133 +0,0 @@ -""" -Debugging utilities for constructs -""" -from __future__ import print_function -import sys -import traceback -import pdb -import inspect -from .core import Construct, Subconstruct -from .lib import HexString, Container, ListContainer - - -class Probe(Construct): - """ - A probe: dumps the context, stack frames, and stream content to the screen - to aid the debugging process. - See also Debugger. - - Parameters: - * name - the display name - * show_stream - whether or not to show stream contents. default is True. - the stream must be seekable. - * show_context - whether or not to show the context. default is True. - * show_stack - whether or not to show the upper stack frames. default - is True. - * stream_lookahead - the number of bytes to dump when show_stack is set. - default is 100. - - Example: - Struct("foo", - UBInt8("a"), - Probe("between a and b"), - UBInt8("b"), - ) - """ - __slots__ = [ - "printname", "show_stream", "show_context", "show_stack", - "stream_lookahead" - ] - counter = 0 - - def __init__(self, name = None, show_stream = True, - show_context = True, show_stack = True, - stream_lookahead = 100): - Construct.__init__(self, None) - if name is None: - Probe.counter += 1 - name = "" % (Probe.counter,) - self.printname = name - self.show_stream = show_stream - self.show_context = show_context - self.show_stack = show_stack - self.stream_lookahead = stream_lookahead - def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self.printname) - def _parse(self, stream, context): - self.printout(stream, context) - def _build(self, obj, stream, context): - self.printout(stream, context) - def _sizeof(self, context): - return 0 - - def printout(self, stream, context): - obj = Container() - if self.show_stream: - obj.stream_position = stream.tell() - follows = stream.read(self.stream_lookahead) - if not follows: - obj.following_stream_data = "EOF reached" - else: - stream.seek(-len(follows), 1) - obj.following_stream_data = HexString(follows) - print - - if self.show_context: - obj.context = context - - if self.show_stack: - obj.stack = ListContainer() - frames = [s[0] for s in inspect.stack()][1:-1] - frames.reverse() - for f in frames: - a = Container() - a.__update__(f.f_locals) - obj.stack.append(a) - - print("=" * 80) - print("Probe", self.printname) - print(obj) - print("=" * 80) - -class Debugger(Subconstruct): - """ - A pdb-based debugger. When an exception occurs in the subcon, a debugger - will appear and allow you to debug the error (and even fix on-the-fly). - - Parameters: - * subcon - the subcon to debug - - Example: - Debugger( - Enum(UBInt8("foo"), - a = 1, - b = 2, - c = 3 - ) - ) - """ - __slots__ = ["retval"] - def _parse(self, stream, context): - try: - return self.subcon._parse(stream, context) - except Exception: - self.retval = NotImplemented - self.handle_exc("(you can set the value of 'self.retval', " - "which will be returned)") - if self.retval is NotImplemented: - raise - else: - return self.retval - def _build(self, obj, stream, context): - try: - self.subcon._build(obj, stream, context) - except Exception: - self.handle_exc() - def handle_exc(self, msg = None): - print("=" * 80) - print("Debugging exception of %s:" % (self.subcon,)) - print("".join(traceback.format_exception(*sys.exc_info())[1:])) - if msg: - print(msg) - pdb.post_mortem(sys.exc_info()[2]) - print("=" * 80) diff --git a/elftools/construct/lib/__init__.py b/elftools/construct/lib/__init__.py deleted file mode 100644 index 2e095787..00000000 --- a/elftools/construct/lib/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .binary import ( - int_to_bin, bin_to_int, swap_bytes, encode_bin, decode_bin) -from .bitstream import BitStreamReader, BitStreamWriter -from .container import (Container, FlagsContainer, ListContainer, - LazyContainer) -from .hex import HexString, hexdump - diff --git a/elftools/construct/lib/binary.py b/elftools/construct/lib/binary.py deleted file mode 100644 index 3efef0d7..00000000 --- a/elftools/construct/lib/binary.py +++ /dev/null @@ -1,117 +0,0 @@ -from .py3compat import int2byte - - -def int_to_bin(number, width=32): - r""" - Convert an integer into its binary representation in a bytes object. - Width is the amount of bits to generate. If width is larger than the actual - amount of bits required to represent number in binary, sign-extension is - used. If it's smaller, the representation is trimmed to width bits. - Each "bit" is either '\x00' or '\x01'. The MSBit is first. - - Examples: - - >>> int_to_bin(19, 5) - b'\x01\x00\x00\x01\x01' - >>> int_to_bin(19, 8) - b'\x00\x00\x00\x01\x00\x00\x01\x01' - """ - if number < 0: - number += 1 << width - i = width - 1 - bits = bytearray(width) - while number and i >= 0: - bits[i] = number & 1 - number >>= 1 - i -= 1 - return bytes(bits) - - -_bit_values = { - 0: 0, - 1: 1, - 48: 0, # '0' - 49: 1, # '1' - - # The following are for Python 2, in which iteration over a bytes object - # yields single-character bytes and not integers. - '\x00': 0, - '\x01': 1, - '0': 0, - '1': 1, - } - -def bin_to_int(bits, signed=False): - r""" - Logical opposite of int_to_bin. Both '0' and '\x00' are considered zero, - and both '1' and '\x01' are considered one. Set sign to True to interpret - the number as a 2-s complement signed integer. - """ - number = 0 - bias = 0 - ptr = 0 - if signed and _bit_values[bits[0]] == 1: - bits = bits[1:] - bias = 1 << len(bits) - for b in bits: - number <<= 1 - number |= _bit_values[b] - return number - bias - - -def swap_bytes(bits, bytesize=8): - r""" - Bits is a b'' object containing a binary representation. Assuming each - bytesize bits constitute a bytes, perform a endianness byte swap. Example: - - >>> swap_bytes(b'00011011', 2) - b'11100100' - """ - i = 0 - l = len(bits) - output = [b""] * ((l // bytesize) + 1) - j = len(output) - 1 - while i < l: - output[j] = bits[i : i + bytesize] - i += bytesize - j -= 1 - return b"".join(output) - - -_char_to_bin = {} -_bin_to_char = {} -for i in range(256): - ch = int2byte(i) - bin = int_to_bin(i, 8) - # Populate with for both keys i and ch, to support Python 2 & 3 - _char_to_bin[ch] = bin - _char_to_bin[i] = bin - _bin_to_char[bin] = ch - - -def encode_bin(data): - """ - Create a binary representation of the given b'' object. Assume 8-bit - ASCII. Example: - - >>> encode_bin('ab') - b"\x00\x01\x01\x00\x00\x00\x00\x01\x00\x01\x01\x00\x00\x00\x01\x00" - """ - return b"".join(_char_to_bin[ch] for ch in data) - - -def decode_bin(data): - """ - Locical opposite of decode_bin. - """ - if len(data) & 7: - raise ValueError("Data length must be a multiple of 8") - i = 0 - j = 0 - l = len(data) // 8 - chars = [b""] * l - while j < l: - chars[j] = _bin_to_char[data[i:i+8]] - i += 8 - j += 1 - return b"".join(chars) diff --git a/elftools/construct/lib/bitstream.py b/elftools/construct/lib/bitstream.py deleted file mode 100644 index 0c521a42..00000000 --- a/elftools/construct/lib/bitstream.py +++ /dev/null @@ -1,77 +0,0 @@ -from .binary import encode_bin, decode_bin - -class BitStreamReader(object): - - __slots__ = ["substream", "buffer", "total_size"] - - def __init__(self, substream): - self.substream = substream - self.total_size = 0 - self.buffer = "" - - def close(self): - if self.total_size % 8 != 0: - raise ValueError("total size of read data must be a multiple of 8", - self.total_size) - - def tell(self): - return self.substream.tell() - - def seek(self, pos, whence = 0): - self.buffer = "" - self.total_size = 0 - self.substream.seek(pos, whence) - - def read(self, count): - if count < 0: - raise ValueError("count cannot be negative") - - l = len(self.buffer) - if count == 0: - data = "" - elif count <= l: - data = self.buffer[:count] - self.buffer = self.buffer[count:] - else: - data = self.buffer - count -= l - bytes = count // 8 - if count & 7: - bytes += 1 - buf = encode_bin(self.substream.read(bytes)) - data += buf[:count] - self.buffer = buf[count:] - self.total_size += len(data) - return data - -class BitStreamWriter(object): - - __slots__ = ["substream", "buffer", "pos"] - - def __init__(self, substream): - self.substream = substream - self.buffer = [] - self.pos = 0 - - def close(self): - self.flush() - - def flush(self): - bytes = decode_bin("".join(self.buffer)) - self.substream.write(bytes) - self.buffer = [] - self.pos = 0 - - def tell(self): - return self.substream.tell() + self.pos // 8 - - def seek(self, pos, whence = 0): - self.flush() - self.substream.seek(pos, whence) - - def write(self, data): - if not data: - return - if type(data) is not str: - raise TypeError("data must be a string, not %r" % (type(data),)) - self.buffer.append(data) diff --git a/elftools/construct/lib/container.py b/elftools/construct/lib/container.py deleted file mode 100644 index 5a580fac..00000000 --- a/elftools/construct/lib/container.py +++ /dev/null @@ -1,161 +0,0 @@ -""" -Various containers. -""" - -from pprint import pformat -from .py3compat import MutableMapping - -def recursion_lock(retval, lock_name = "__recursion_lock__"): - def decorator(func): - def wrapper(self, *args, **kw): - if getattr(self, lock_name, False): - return retval - setattr(self, lock_name, True) - try: - return func(self, *args, **kw) - finally: - setattr(self, lock_name, False) - wrapper.__name__ = func.__name__ - return wrapper - return decorator - -class Container(MutableMapping): - """ - A generic container of attributes. - - Containers are the common way to express parsed data. - """ - - def __init__(self, **kw): - self.__dict__ = kw - - # The core dictionary interface. - - def __getitem__(self, name): - return self.__dict__[name] - - def __delitem__(self, name): - del self.__dict__[name] - - def __setitem__(self, name, value): - self.__dict__[name] = value - - def keys(self): - return self.__dict__.keys() - - def __len__(self): - return len(self.__dict__.keys()) - - # Extended dictionary interface. - - def update(self, other): - self.__dict__.update(other) - - __update__ = update - - def __contains__(self, value): - return value in self.__dict__ - - # Rich comparisons. - - def __eq__(self, other): - try: - return self.__dict__ == other.__dict__ - except AttributeError: - return False - - def __ne__(self, other): - return not self == other - - # Copy interface. - - def copy(self): - return self.__class__(**self.__dict__) - - __copy__ = copy - - # Iterator interface. - - def __iter__(self): - return iter(self.__dict__) - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, repr(self.__dict__)) - - def __str__(self): - return "%s(%s)" % (self.__class__.__name__, str(self.__dict__)) - -class FlagsContainer(Container): - """ - A container providing pretty-printing for flags. - - Only set flags are displayed. - """ - - @recursion_lock("<...>") - def __str__(self): - d = dict((k, self[k]) for k in self - if self[k] and not k.startswith("_")) - return "%s(%s)" % (self.__class__.__name__, pformat(d)) - -class ListContainer(list): - """ - A container for lists. - """ - - __slots__ = ["__recursion_lock__"] - - @recursion_lock("[...]") - def __str__(self): - return pformat(self) - -class LazyContainer(object): - - __slots__ = ["subcon", "stream", "pos", "context", "_value"] - - def __init__(self, subcon, stream, pos, context): - self.subcon = subcon - self.stream = stream - self.pos = pos - self.context = context - self._value = NotImplemented - - def __eq__(self, other): - try: - return self._value == other._value - except AttributeError: - return False - - def __ne__(self, other): - return not (self == other) - - def __str__(self): - return self.__pretty_str__() - - def __pretty_str__(self, nesting = 1, indentation = " "): - if self._value is NotImplemented: - text = "" - elif hasattr(self._value, "__pretty_str__"): - text = self._value.__pretty_str__(nesting, indentation) - else: - text = str(self._value) - return "%s: %s" % (self.__class__.__name__, text) - - def read(self): - self.stream.seek(self.pos) - return self.subcon._parse(self.stream, self.context) - - def dispose(self): - self.subcon = None - self.stream = None - self.context = None - self.pos = None - - def _get_value(self): - if self._value is NotImplemented: - self._value = self.read() - return self._value - - value = property(_get_value) - - has_value = property(lambda self: self._value is not NotImplemented) diff --git a/elftools/construct/lib/hex.py b/elftools/construct/lib/hex.py deleted file mode 100644 index b830644a..00000000 --- a/elftools/construct/lib/hex.py +++ /dev/null @@ -1,43 +0,0 @@ -from .py3compat import byte2int, int2byte, bytes2str - - -# Map an integer in the inclusive range 0-255 to its string byte representation -_printable = dict((i, ".") for i in range(256)) -_printable.update((i, bytes2str(int2byte(i))) for i in range(32, 128)) - - -def hexdump(data, linesize): - """ - data is a bytes object. The returned result is a string. - """ - prettylines = [] - if len(data) < 65536: - fmt = "%%04X %%-%ds %%s" - else: - fmt = "%%08X %%-%ds %%s" - fmt = fmt % (3 * linesize - 1,) - for i in range(0, len(data), linesize): - line = data[i : i + linesize] - hextext = " ".join('%02x' % byte2int(b) for b in line) - rawtext = "".join(_printable[byte2int(b)] for b in line) - prettylines.append(fmt % (i, str(hextext), str(rawtext))) - return prettylines - - -class HexString(bytes): - """ - Represents bytes that will be hex-dumped to a string when its string - representation is requested. - """ - def __init__(self, data, linesize = 16): - self.linesize = linesize - - def __new__(cls, data, *args, **kwargs): - return bytes.__new__(cls, data) - - def __str__(self): - if not self: - return "''" - sep = "\n" - return sep + sep.join( - hexdump(self, self.linesize)) diff --git a/elftools/construct/lib/py3compat.py b/elftools/construct/lib/py3compat.py deleted file mode 100644 index 16e12979..00000000 --- a/elftools/construct/lib/py3compat.py +++ /dev/null @@ -1,74 +0,0 @@ -#------------------------------------------------------------------------------- -# py3compat.py -# -# Some Python2&3 compatibility code -#------------------------------------------------------------------------------- -import sys -PY3 = sys.version_info[0] == 3 - -try: - from collections.abc import MutableMapping # python >= 3.3 -except ImportError: - from collections import MutableMapping # python < 3.3 - - -if PY3: - import io - StringIO = io.StringIO - BytesIO = io.BytesIO - - def bchr(i): - """ When iterating over b'...' in Python 2 you get single b'_' chars - and in Python 3 you get integers. Call bchr to always turn this - to single b'_' chars. - """ - return bytes((i,)) - - def u(s): - return s - - def int2byte(i): - return bytes((i,)) - - def byte2int(b): - return b - - def str2bytes(s): - return s.encode("latin-1") - - def str2unicode(s): - return s - - def bytes2str(b): - return b.decode('latin-1') - - def decodebytes(b, encoding): - return bytes(b, encoding) - - advance_iterator = next - -else: - import cStringIO - StringIO = BytesIO = cStringIO.StringIO - - int2byte = chr - byte2int = ord - bchr = lambda i: i - - def u(s): - return unicode(s, "unicode_escape") - - def str2bytes(s): - return s - - def str2unicode(s): - return unicode(s, "unicode_escape") - - def bytes2str(b): - return b - - def decodebytes(b, encoding): - return b.decode(encoding) - - def advance_iterator(it): - return it.next() diff --git a/elftools/construct/macros.py b/elftools/construct/macros.py deleted file mode 100644 index d7680991..00000000 --- a/elftools/construct/macros.py +++ /dev/null @@ -1,634 +0,0 @@ -from .lib.py3compat import int2byte -from .lib import (BitStreamReader, BitStreamWriter, encode_bin, - decode_bin) -from .core import (Struct, MetaField, StaticField, FormatField, - OnDemand, Pointer, Switch, Value, RepeatUntil, MetaArray, Sequence, Range, - Select, Pass, SizeofError, Buffered, Restream, Reconfig) -from .adapters import (BitIntegerAdapter, PaddingAdapter, - ConstAdapter, CStringAdapter, LengthValueAdapter, IndexingAdapter, - PaddedStringAdapter, FlagsAdapter, StringAdapter, MappingAdapter) - - -#=============================================================================== -# fields -#=============================================================================== -def Field(name, length): - """ - A field consisting of a specified number of bytes. - - :param str name: the name of the field - :param length: the length of the field. the length can be either an integer - (StaticField), or a function that takes the context as an argument and - returns the length (MetaField) - """ - if callable(length): - return MetaField(name, length) - else: - return StaticField(name, length) - -def BitField(name, length, swapped = False, signed = False, bytesize = 8): - """ - BitFields, as the name suggests, are fields that operate on raw, unaligned - bits, and therefore must be enclosed in a BitStruct. Using them is very - similar to all normal fields: they take a name and a length (in bits). - - :param str name: name of the field - :param int length: number of bits in the field, or a function that takes - the context as its argument and returns the length - :param bool swapped: whether the value is byte-swapped - :param bool signed: whether the value is signed - :param int bytesize: number of bits per byte, for byte-swapping - - >>> foo = BitStruct("foo", - ... BitField("a", 3), - ... Flag("b"), - ... Padding(3), - ... Nibble("c"), - ... BitField("d", 5), - ... ) - >>> foo.parse("\\xe1\\x1f") - Container(a = 7, b = False, c = 8, d = 31) - >>> foo = BitStruct("foo", - ... BitField("a", 3), - ... Flag("b"), - ... Padding(3), - ... Nibble("c"), - ... Struct("bar", - ... Nibble("d"), - ... Bit("e"), - ... ) - ... ) - >>> foo.parse("\\xe1\\x1f") - Container(a = 7, b = False, bar = Container(d = 15, e = 1), c = 8) - """ - - return BitIntegerAdapter(Field(name, length), - length, - swapped=swapped, - signed=signed, - bytesize=bytesize - ) - -def Padding(length, pattern = b"\x00", strict = False): - r"""a padding field (value is discarded) - * length - the length of the field. the length can be either an integer, - or a function that takes the context as an argument and returns the - length - * pattern - the padding pattern (character/byte) to use. default is b"\x00" - * strict - whether or not to raise an exception is the actual padding - pattern mismatches the desired pattern. default is False. - """ - return PaddingAdapter(Field(None, length), - pattern = pattern, - strict = strict, - ) - -def Flag(name, truth = 1, falsehood = 0, default = False): - """ - A flag. - - Flags are usually used to signify a Boolean value, and this construct - maps values onto the ``bool`` type. - - .. note:: This construct works with both bit and byte contexts. - - .. warning:: Flags default to False, not True. This is different from the - C and Python way of thinking about truth, and may be subject to change - in the future. - - :param str name: field name - :param int truth: value of truth (default 1) - :param int falsehood: value of falsehood (default 0) - :param bool default: default value (default False) - """ - - return SymmetricMapping(Field(name, 1), - {True : int2byte(truth), False : int2byte(falsehood)}, - default = default, - ) - -#=============================================================================== -# field shortcuts -#=============================================================================== -def Bit(name): - """a 1-bit BitField; must be enclosed in a BitStruct""" - return BitField(name, 1) -def Nibble(name): - """a 4-bit BitField; must be enclosed in a BitStruct""" - return BitField(name, 4) -def Octet(name): - """an 8-bit BitField; must be enclosed in a BitStruct""" - return BitField(name, 8) - -def UBInt8(name): - """unsigned, big endian 8-bit integer""" - return FormatField(name, ">", "B") -def UBInt16(name): - """unsigned, big endian 16-bit integer""" - return FormatField(name, ">", "H") -def UBInt32(name): - """unsigned, big endian 32-bit integer""" - return FormatField(name, ">", "L") -def UBInt64(name): - """unsigned, big endian 64-bit integer""" - return FormatField(name, ">", "Q") - -def SBInt8(name): - """signed, big endian 8-bit integer""" - return FormatField(name, ">", "b") -def SBInt16(name): - """signed, big endian 16-bit integer""" - return FormatField(name, ">", "h") -def SBInt32(name): - """signed, big endian 32-bit integer""" - return FormatField(name, ">", "l") -def SBInt64(name): - """signed, big endian 64-bit integer""" - return FormatField(name, ">", "q") - -def ULInt8(name): - """unsigned, little endian 8-bit integer""" - return FormatField(name, "<", "B") -def ULInt16(name): - """unsigned, little endian 16-bit integer""" - return FormatField(name, "<", "H") -def ULInt32(name): - """unsigned, little endian 32-bit integer""" - return FormatField(name, "<", "L") -def ULInt64(name): - """unsigned, little endian 64-bit integer""" - return FormatField(name, "<", "Q") - -def SLInt8(name): - """signed, little endian 8-bit integer""" - return FormatField(name, "<", "b") -def SLInt16(name): - """signed, little endian 16-bit integer""" - return FormatField(name, "<", "h") -def SLInt32(name): - """signed, little endian 32-bit integer""" - return FormatField(name, "<", "l") -def SLInt64(name): - """signed, little endian 64-bit integer""" - return FormatField(name, "<", "q") - -def UNInt8(name): - """unsigned, native endianity 8-bit integer""" - return FormatField(name, "=", "B") -def UNInt16(name): - """unsigned, native endianity 16-bit integer""" - return FormatField(name, "=", "H") -def UNInt32(name): - """unsigned, native endianity 32-bit integer""" - return FormatField(name, "=", "L") -def UNInt64(name): - """unsigned, native endianity 64-bit integer""" - return FormatField(name, "=", "Q") - -def SNInt8(name): - """signed, native endianity 8-bit integer""" - return FormatField(name, "=", "b") -def SNInt16(name): - """signed, native endianity 16-bit integer""" - return FormatField(name, "=", "h") -def SNInt32(name): - """signed, native endianity 32-bit integer""" - return FormatField(name, "=", "l") -def SNInt64(name): - """signed, native endianity 64-bit integer""" - return FormatField(name, "=", "q") - -def BFloat32(name): - """big endian, 32-bit IEEE floating point number""" - return FormatField(name, ">", "f") -def LFloat32(name): - """little endian, 32-bit IEEE floating point number""" - return FormatField(name, "<", "f") -def NFloat32(name): - """native endianity, 32-bit IEEE floating point number""" - return FormatField(name, "=", "f") - -def BFloat64(name): - """big endian, 64-bit IEEE floating point number""" - return FormatField(name, ">", "d") -def LFloat64(name): - """little endian, 64-bit IEEE floating point number""" - return FormatField(name, "<", "d") -def NFloat64(name): - """native endianity, 64-bit IEEE floating point number""" - return FormatField(name, "=", "d") - - -#=============================================================================== -# arrays -#=============================================================================== -def Array(count, subcon): - """ - Repeats the given unit a fixed number of times. - - :param int count: number of times to repeat - :param ``Construct`` subcon: construct to repeat - - >>> c = Array(4, UBInt8("foo")) - >>> c.parse("\\x01\\x02\\x03\\x04") - [1, 2, 3, 4] - >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06") - [1, 2, 3, 4] - >>> c.build([5,6,7,8]) - '\\x05\\x06\\x07\\x08' - >>> c.build([5,6,7,8,9]) - Traceback (most recent call last): - ... - construct.core.RangeError: expected 4..4, found 5 - """ - - if callable(count): - con = MetaArray(count, subcon) - else: - con = MetaArray(lambda ctx: count, subcon) - con._clear_flag(con.FLAG_DYNAMIC) - return con - -def PrefixedArray(subcon, length_field = UBInt8("length")): - """an array prefixed by a length field. - * subcon - the subcon to be repeated - * length_field - a construct returning an integer - """ - return LengthValueAdapter( - Sequence(subcon.name, - length_field, - Array(lambda ctx: ctx[length_field.name], subcon), - nested = False - ) - ) - -def OpenRange(mincount, subcon): - from sys import maxsize - return Range(mincount, maxsize, subcon) - -def GreedyRange(subcon): - """ - Repeats the given unit one or more times. - - :param ``Construct`` subcon: construct to repeat - - >>> from construct import GreedyRange, UBInt8 - >>> c = GreedyRange(UBInt8("foo")) - >>> c.parse("\\x01") - [1] - >>> c.parse("\\x01\\x02\\x03") - [1, 2, 3] - >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06") - [1, 2, 3, 4, 5, 6] - >>> c.parse("") - Traceback (most recent call last): - ... - construct.core.RangeError: expected 1..2147483647, found 0 - >>> c.build([1,2]) - '\\x01\\x02' - >>> c.build([]) - Traceback (most recent call last): - ... - construct.core.RangeError: expected 1..2147483647, found 0 - """ - - return OpenRange(1, subcon) - -def OptionalGreedyRange(subcon): - """ - Repeats the given unit zero or more times. This repeater can't - fail, as it accepts lists of any length. - - :param ``Construct`` subcon: construct to repeat - - >>> from construct import OptionalGreedyRange, UBInt8 - >>> c = OptionalGreedyRange(UBInt8("foo")) - >>> c.parse("") - [] - >>> c.parse("\\x01\\x02") - [1, 2] - >>> c.build([]) - '' - >>> c.build([1,2]) - '\\x01\\x02' - """ - - return OpenRange(0, subcon) - - -#=============================================================================== -# subconstructs -#=============================================================================== -def Optional(subcon): - """an optional construct. if parsing fails, returns None. - * subcon - the subcon to optionally parse or build - """ - return Select(subcon.name, subcon, Pass) - -def Bitwise(subcon): - """converts the stream to bits, and passes the bitstream to subcon - * subcon - a bitwise construct (usually BitField) - """ - # subcons larger than MAX_BUFFER will be wrapped by Restream instead - # of Buffered. implementation details, don't stick your nose in :) - MAX_BUFFER = 1024 * 8 - def resizer(length): - if length & 7: - raise SizeofError("size must be a multiple of 8", length) - return length >> 3 - if not subcon._is_flag(subcon.FLAG_DYNAMIC) and subcon.sizeof() < MAX_BUFFER: - con = Buffered(subcon, - encoder = decode_bin, - decoder = encode_bin, - resizer = resizer - ) - else: - con = Restream(subcon, - stream_reader = BitStreamReader, - stream_writer = BitStreamWriter, - resizer = resizer) - return con - -def Aligned(subcon, modulus = 4, pattern = b"\x00"): - r"""aligns subcon to modulus boundary using padding pattern - * subcon - the subcon to align - * modulus - the modulus boundary (default is 4) - * pattern - the padding pattern (default is \x00) - """ - if modulus < 2: - raise ValueError("modulus must be >= 2", modulus) - def padlength(ctx): - return (modulus - (subcon._sizeof(ctx) % modulus)) % modulus - return SeqOfOne(subcon.name, - subcon, - # ?????? - # ?????? - # ?????? - # ?????? - Padding(padlength, pattern = pattern), - nested = False, - ) - -def SeqOfOne(name, *args, **kw): - """a sequence of one element. only the first element is meaningful, the - rest are discarded - * name - the name of the sequence - * args - subconstructs - * kw - any keyword arguments to Sequence - """ - return IndexingAdapter(Sequence(name, *args, **kw), index = 0) - -def Embedded(subcon): - """embeds a struct into the enclosing struct. - * subcon - the struct to embed - """ - return Reconfig(subcon.name, subcon, subcon.FLAG_EMBED) - -def Rename(newname, subcon): - """renames an existing construct - * newname - the new name - * subcon - the subcon to rename - """ - return Reconfig(newname, subcon) - -def Alias(newname, oldname): - """creates an alias for an existing element in a struct - * newname - the new name - * oldname - the name of an existing element - """ - return Value(newname, lambda ctx: ctx[oldname]) - - -#=============================================================================== -# mapping -#=============================================================================== -def SymmetricMapping(subcon, mapping, default = NotImplemented): - """defines a symmetrical mapping: a->b, b->a. - * subcon - the subcon to map - * mapping - the encoding mapping (a dict); the decoding mapping is - achieved by reversing this mapping - * default - the default value to use when no mapping is found. if no - default value is given, and exception is raised. setting to Pass would - return the value "as is" (unmapped) - """ - reversed_mapping = dict((v, k) for k, v in mapping.items()) - return MappingAdapter(subcon, - encoding = mapping, - decoding = reversed_mapping, - encdefault = default, - decdefault = default, - ) - -def Enum(subcon, **kw): - """a set of named values mapping. - * subcon - the subcon to map - * kw - keyword arguments which serve as the encoding mapping - * _default_ - an optional, keyword-only argument that specifies the - default value to use when the mapping is undefined. if not given, - and exception is raised when the mapping is undefined. use `Pass` to - pass the unmapped value as-is - """ - return SymmetricMapping(subcon, kw, kw.pop("_default_", NotImplemented)) - -def FlagsEnum(subcon, **kw): - """a set of flag values mapping. - * subcon - the subcon to map - * kw - keyword arguments which serve as the encoding mapping - """ - return FlagsAdapter(subcon, kw) - - -#=============================================================================== -# structs -#=============================================================================== -def AlignedStruct(name, *subcons, **kw): - """a struct of aligned fields - * name - the name of the struct - * subcons - the subcons that make up this structure - * kw - keyword arguments to pass to Aligned: 'modulus' and 'pattern' - """ - return Struct(name, *(Aligned(sc, **kw) for sc in subcons)) - -def BitStruct(name, *subcons): - """a struct of bitwise fields - * name - the name of the struct - * subcons - the subcons that make up this structure - """ - return Bitwise(Struct(name, *subcons)) - -def EmbeddedBitStruct(*subcons): - """an embedded BitStruct. no name is necessary. - * subcons - the subcons that make up this structure - """ - return Bitwise(Embedded(Struct(None, *subcons))) - -#=============================================================================== -# strings -#=============================================================================== -def String(name, length, encoding=None, padchar=None, paddir="right", - trimdir="right"): - """ - A configurable, fixed-length string field. - - The padding character must be specified for padding and trimming to work. - - :param str name: name - :param int length: length, in bytes - :param str encoding: encoding (e.g. "utf8") or None for no encoding - :param str padchar: optional character to pad out strings - :param str paddir: direction to pad out strings; one of "right", "left", - or "both" - :param str trim: direction to trim strings; one of "right", "left" - - >>> from construct import String - >>> String("foo", 5).parse("hello") - 'hello' - >>> - >>> String("foo", 12, encoding = "utf8").parse("hello joh\\xd4\\x83n") - u'hello joh\\u0503n' - >>> - >>> foo = String("foo", 10, padchar = "X", paddir = "right") - >>> foo.parse("helloXXXXX") - 'hello' - >>> foo.build("hello") - 'helloXXXXX' - """ - - con = StringAdapter(Field(name, length), encoding=encoding) - if padchar is not None: - con = PaddedStringAdapter(con, padchar=padchar, paddir=paddir, - trimdir=trimdir) - return con - -def PascalString(name, length_field=UBInt8("length"), encoding=None): - """ - A length-prefixed string. - - ``PascalString`` is named after the string types of Pascal, which are - length-prefixed. Lisp strings also follow this convention. - - The length field will appear in the same ``Container`` as the - ``PascalString``, with the given name. - - :param str name: name - :param ``Construct`` length_field: a field which will store the length of - the string - :param str encoding: encoding (e.g. "utf8") or None for no encoding - - >>> foo = PascalString("foo") - >>> foo.parse("\\x05hello") - 'hello' - >>> foo.build("hello world") - '\\x0bhello world' - >>> - >>> foo = PascalString("foo", length_field = UBInt16("length")) - >>> foo.parse("\\x00\\x05hello") - 'hello' - >>> foo.build("hello") - '\\x00\\x05hello' - """ - - return StringAdapter( - LengthValueAdapter( - Sequence(name, - length_field, - Field("data", lambda ctx: ctx[length_field.name]), - ) - ), - encoding=encoding, - ) - -def CString(name, terminators=b"\x00", encoding=None, - char_field=Field(None, 1)): - """ - A string ending in a terminator. - - ``CString`` is similar to the strings of C, C++, and other related - programming languages. - - By default, the terminator is the NULL byte (b``0x00``). - - :param str name: name - :param iterable terminators: sequence of valid terminators, in order of - preference - :param str encoding: encoding (e.g. "utf8") or None for no encoding - :param ``Construct`` char_field: construct representing a single character - - >>> foo = CString("foo") - >>> foo.parse(b"hello\\x00") - b'hello' - >>> foo.build(b"hello") - b'hello\\x00' - >>> foo = CString("foo", terminators = b"XYZ") - >>> foo.parse(b"helloX") - b'hello' - >>> foo.parse(b"helloY") - b'hello' - >>> foo.parse(b"helloZ") - b'hello' - >>> foo.build(b"hello") - b'helloX' - """ - - return Rename(name, - CStringAdapter( - RepeatUntil(lambda obj, ctx: obj in terminators, char_field), - terminators=terminators, - encoding=encoding, - ) - ) - - -#=============================================================================== -# conditional -#=============================================================================== -def IfThenElse(name, predicate, then_subcon, else_subcon): - """an if-then-else conditional construct: if the predicate indicates True, - `then_subcon` will be used; otherwise `else_subcon` - * name - the name of the construct - * predicate - a function taking the context as an argument and returning - True or False - * then_subcon - the subcon that will be used if the predicate returns True - * else_subcon - the subcon that will be used if the predicate returns False - """ - return Switch(name, lambda ctx: bool(predicate(ctx)), - { - True : then_subcon, - False : else_subcon, - } - ) - -def If(predicate, subcon, elsevalue = None): - """an if-then conditional construct: if the predicate indicates True, - subcon will be used; otherwise, `elsevalue` will be returned instead. - * predicate - a function taking the context as an argument and returning - True or False - * subcon - the subcon that will be used if the predicate returns True - * elsevalue - the value that will be used should the predicate return False. - by default this value is None. - """ - return IfThenElse(subcon.name, - predicate, - subcon, - Value("elsevalue", lambda ctx: elsevalue) - ) - - -#=============================================================================== -# misc -#=============================================================================== -def OnDemandPointer(offsetfunc, subcon, force_build = True): - """an on-demand pointer. - * offsetfunc - a function taking the context as an argument and returning - the absolute stream position - * subcon - the subcon that will be parsed from the `offsetfunc()` stream - position on demand - * force_build - see OnDemand. by default True. - """ - return OnDemand(Pointer(offsetfunc, subcon), - advance_stream = False, - force_build = force_build - ) - -def Magic(data): - return ConstAdapter(Field(None, len(data)), data) diff --git a/elftools/dwarf/abbrevtable.py b/elftools/dwarf/abbrevtable.py index 6d29d5cf..00677117 100644 --- a/elftools/dwarf/abbrevtable.py +++ b/elftools/dwarf/abbrevtable.py @@ -42,7 +42,7 @@ def _parse_abbrev_table(self): self.stream.seek(self.offset) while True: decl_code = struct_parse( - struct=self.structs.Dwarf_uleb128(''), + struct=self.structs.Dwarf_uleb128, stream=self.stream) if decl_code == 0: break diff --git a/elftools/dwarf/aranges.py b/elftools/dwarf/aranges.py index ae409756..8615c63a 100644 --- a/elftools/dwarf/aranges.py +++ b/elftools/dwarf/aranges.py @@ -88,8 +88,8 @@ def _get_entries(self, need_empty=False): got_entries = False # entries in this set/CU - addr = struct_parse(addr_size('addr'), self.stream) - length = struct_parse(addr_size('length'), self.stream) + addr = struct_parse(addr_size, self.stream) + length = struct_parse(addr_size, self.stream) while addr != 0 or length != 0 or (not got_entries and need_empty): # 'begin_addr length info_offset version address_size segment_size' entries.append( @@ -102,8 +102,8 @@ def _get_entries(self, need_empty=False): segment_size=aranges_header["segment_size"])) got_entries = True if addr != 0 or length != 0: - addr = struct_parse(addr_size('addr'), self.stream) - length = struct_parse(addr_size('length'), self.stream) + addr = struct_parse(addr_size, self.stream) + length = struct_parse(addr_size, self.stream) # Segmentation exists in executable elif aranges_header["segment_size"] != 0: diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index 07c0306c..83e0578e 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -10,7 +10,7 @@ from collections import namedtuple from ..common.utils import ( struct_parse, dwarf_assert, preserve_stream_pos, iterbytes) -from ..construct import Struct, Switch +from construct import Struct, Switch from .enums import DW_EH_encoding_flags from .structs import DWARFStructs from .constants import * @@ -90,7 +90,7 @@ def _parse_entry_at(self, offset): return self._entry_cache[offset] entry_length = struct_parse( - self.base_structs.Dwarf_uint32(''), self.stream, offset) + self.base_structs.Dwarf_uint32, self.stream, offset) if self.for_eh_frame and entry_length == 0: return ZERO(offset) @@ -104,7 +104,7 @@ def _parse_entry_at(self, offset): # Read the next field to see whether this is a CIE or FDE CIE_id = struct_parse( - entry_structs.Dwarf_offset(''), self.stream) + entry_structs.Dwarf_offset, self.stream) if self.for_eh_frame: is_CIE = CIE_id == 0 @@ -184,7 +184,7 @@ def _parse_instructions(self, structs, offset, end_offset): """ instructions = [] while offset < end_offset: - opcode = struct_parse(structs.Dwarf_uint8(''), self.stream, offset) + opcode = struct_parse(structs.Dwarf_uint8, self.stream, offset) args = [] primary = opcode & _PRIMARY_MASK @@ -194,7 +194,7 @@ def _parse_instructions(self, structs, offset, end_offset): elif primary == DW_CFA_offset: args = [ primary_arg, - struct_parse(structs.Dwarf_uleb128(''), self.stream)] + struct_parse(structs.Dwarf_uleb128, self.stream)] elif primary == DW_CFA_restore: args = [primary_arg] # primary == 0 and real opcode is extended @@ -203,40 +203,39 @@ def _parse_instructions(self, structs, offset, end_offset): args = [] elif opcode == DW_CFA_set_loc: args = [ - struct_parse(structs.Dwarf_target_addr(''), self.stream)] + struct_parse(structs.Dwarf_target_addr, self.stream)] elif opcode == DW_CFA_advance_loc1: - args = [struct_parse(structs.Dwarf_uint8(''), self.stream)] + args = [struct_parse(structs.Dwarf_uint8, self.stream)] elif opcode == DW_CFA_advance_loc2: - args = [struct_parse(structs.Dwarf_uint16(''), self.stream)] + args = [struct_parse(structs.Dwarf_uint16, self.stream)] elif opcode == DW_CFA_advance_loc4: - args = [struct_parse(structs.Dwarf_uint32(''), self.stream)] + args = [struct_parse(structs.Dwarf_uint32, self.stream)] elif opcode in (DW_CFA_offset_extended, DW_CFA_register, DW_CFA_def_cfa, DW_CFA_val_offset): args = [ - struct_parse(structs.Dwarf_uleb128(''), self.stream), - struct_parse(structs.Dwarf_uleb128(''), self.stream)] + struct_parse(structs.Dwarf_uleb128, self.stream), + struct_parse(structs.Dwarf_uleb128, self.stream)] elif opcode in (DW_CFA_restore_extended, DW_CFA_undefined, DW_CFA_same_value, DW_CFA_def_cfa_register, DW_CFA_def_cfa_offset): - args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)] + args = [struct_parse(structs.Dwarf_uleb128, self.stream)] elif opcode == DW_CFA_def_cfa_offset_sf: - args = [struct_parse(structs.Dwarf_sleb128(''), self.stream)] + args = [struct_parse(structs.Dwarf_sleb128, self.stream)] elif opcode == DW_CFA_def_cfa_expression: args = [struct_parse( structs.Dwarf_dw_form['DW_FORM_block'], self.stream)] elif opcode in (DW_CFA_expression, DW_CFA_val_expression): args = [ - struct_parse(structs.Dwarf_uleb128(''), self.stream), + struct_parse(structs.Dwarf_uleb128, self.stream), struct_parse( structs.Dwarf_dw_form['DW_FORM_block'], self.stream)] elif opcode in (DW_CFA_offset_extended_sf, DW_CFA_def_cfa_sf, DW_CFA_val_offset_sf): args = [ - struct_parse(structs.Dwarf_uleb128(''), self.stream), - struct_parse(structs.Dwarf_sleb128(''), self.stream)] + struct_parse(structs.Dwarf_uleb128, self.stream), + struct_parse(structs.Dwarf_sleb128, self.stream)] elif opcode == DW_CFA_GNU_args_size: - args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)] - + args = [struct_parse(structs.Dwarf_uleb128, self.stream)] else: dwarf_assert(False, 'Unknown CFI opcode: 0x%x' % opcode) @@ -277,18 +276,22 @@ def _parse_cie_augmentation(self, header, entry_structs): assert augmentation.startswith(b'z'), ( 'Unhandled augmentation string: {}'.format(repr(augmentation))) + personality_struct = Struct( + 'encoding' / entry_structs.Dwarf_uint8, + 'function' / Switch(lambda ctx: ctx.encoding & 0x0f, + { + enc: fld_cons + for enc, fld_cons + in self._eh_encoding_to_field(entry_structs).items() + }) + ) + available_fields = { - b'z': entry_structs.Dwarf_uleb128('length'), - b'L': entry_structs.Dwarf_uint8('LSDA_encoding'), - b'R': entry_structs.Dwarf_uint8('FDE_encoding'), + b'z': 'length' / entry_structs.Dwarf_uleb128, + b'L': 'LSDA_encoding' / entry_structs.Dwarf_uint8, + b'R': 'FDE_encoding' / entry_structs.Dwarf_uint8, b'S': True, - b'P': Struct( - 'personality', - entry_structs.Dwarf_uint8('encoding'), - Switch('function', lambda ctx: ctx.encoding & 0x0f, { - enc: fld_cons('function') - for enc, fld_cons - in self._eh_encoding_to_field(entry_structs).items()})), + b'P': 'personality' / personality_struct, } # Build the Struct we will be using to parse the augmentation data. @@ -313,7 +316,7 @@ def _parse_cie_augmentation(self, header, entry_structs): # (missing trailing fields) due to an unknown char: see the KeyError # above. offset = self.stream.tell() - struct = Struct('Augmentation_Data', *fields) + struct = Struct(*fields) # Augmentation_Data aug_dict.update(struct_parse(struct, self.stream, offset)) self.stream.seek(offset) aug_bytes = self._read_augmentation_data(entry_structs) @@ -329,8 +332,7 @@ def _read_augmentation_data(self, entry_structs): return b'' augmentation_data_length = struct_parse( - Struct('Dummy_Augmentation_Data', - entry_structs.Dwarf_uleb128('length')), + Struct('length' / entry_structs.Dwarf_uleb128), # Dummy_Augmentation_Data self.stream)['length'] return self.stream.read(augmentation_data_length) @@ -350,8 +352,7 @@ def _parse_lsda_pointer(self, structs, stream_offset, encoding): formats = self._eh_encoding_to_field(structs) ptr = struct_parse( - Struct('Augmentation_Data', - formats[basic_encoding]('LSDA_pointer')), + Struct('LSDA_pointer' / formats[basic_encoding]), # Augmentation_Data self.stream, stream_pos=stream_offset)['LSDA_pointer'] if modifier == DW_EH_encoding_flags['DW_EH_PE_absptr']: @@ -372,13 +373,12 @@ def _parse_fde_header(self, entry_structs, offset): return struct_parse(entry_structs.Dwarf_FDE_header, self.stream, offset) - fields = [entry_structs.Dwarf_initial_length('length'), - entry_structs.Dwarf_offset('CIE_pointer')] + fields = ['length' / entry_structs.Dwarf_initial_length, + 'CIE_pointer' / entry_structs.Dwarf_offset] # Parse the couple of header fields that are always here so we can # fetch the corresponding CIE. - minimal_header = struct_parse(Struct('eh_frame_minimal_header', - *fields), self.stream, offset) + minimal_header = struct_parse(Struct(*fields), self.stream, offset) # eh_frame_minimal_header cie = self._parse_cie_for_fde(offset, minimal_header, entry_structs) initial_location_offset = self.stream.tell() @@ -392,10 +392,10 @@ def _parse_fde_header(self, entry_structs, offset): # Depending on the specified encoding, complete the header Struct formats = self._eh_encoding_to_field(entry_structs) - fields.append(formats[basic_encoding]('initial_location')) - fields.append(formats[basic_encoding]('address_range')) + fields.append('initial_location' / formats[basic_encoding]) + fields.append('address_range' / formats[basic_encoding]) - result = struct_parse(Struct('Dwarf_FDE_header', *fields), + result = struct_parse(Struct(*fields), # Dwarf_FDE_header self.stream, offset) if encoding_modifier == 0: diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py index 2b18a50e..5b28c54d 100755 --- a/elftools/dwarf/die.py +++ b/elftools/dwarf/die.py @@ -231,7 +231,7 @@ def _parse_DIE(self): # Note: here and elsewhere, preserve_stream_pos is used on operations # that manipulate the stream by reading data from it. self.abbrev_code = struct_parse( - structs.Dwarf_uleb128(''), self.stream, self.offset) + structs.Dwarf_uleb128, self.stream, self.offset) # This may be a null entry if self.abbrev_code == 0: @@ -277,7 +277,7 @@ def _resolve_indirect(self): # Returns (form, raw_value, length). structs = self.cu.structs length = 1 - real_form_code = struct_parse(structs.Dwarf_uleb128(''), self.stream) # Numeric form code + real_form_code = struct_parse(structs.Dwarf_uleb128, self.stream) # Numeric form code while True: try: real_form = DW_FORM_raw2name[real_form_code] # Form name or exception if bogus code @@ -326,7 +326,7 @@ def _translate_attr_value(self, form, raw_value): base_offset = _get_base_offset(self.cu, 'DW_AT_str_offsets_base') offset_size = 4 if self.cu.structs.dwarf_format == 32 else 8 with preserve_stream_pos(stream): - str_offset = struct_parse(self.cu.structs.Dwarf_offset(''), stream, base_offset + raw_value*offset_size) + str_offset = struct_parse(self.cu.structs.Dwarf_offset, stream, base_offset + raw_value*offset_size) value = self.dwarfinfo.get_string_from_table(str_offset) elif form == 'DW_FORM_loclistx' and translate_indirect: value = _resolve_via_offset_table(self.dwarfinfo.debug_loclists_sec.stream, self.cu, raw_value, 'DW_AT_loclists_base') diff --git a/elftools/dwarf/dwarf_expr.py b/elftools/dwarf/dwarf_expr.py index f05f739b..508bb212 100644 --- a/elftools/dwarf/dwarf_expr.py +++ b/elftools/dwarf/dwarf_expr.py @@ -174,7 +174,7 @@ def parse_noargs(): return lambda stream: [] def parse_op_addr(): - return lambda stream: [struct_parse(structs.Dwarf_target_addr(''), + return lambda stream: [struct_parse(structs.Dwarf_target_addr, stream)] def parse_arg_struct(arg_struct): @@ -187,48 +187,48 @@ def parse_arg_struct2(arg1_struct, arg2_struct): # ULEB128, then an expression of that length def parse_nestedexpr(): def parse(stream): - size = struct_parse(structs.Dwarf_uleb128(''), stream) + size = struct_parse(structs.Dwarf_uleb128, stream) nested_expr_blob = read_blob(stream, size) return [DWARFExprParser(structs).parse_expr(nested_expr_blob)] return parse # ULEB128, then a blob of that size def parse_blob(): - return lambda stream: [read_blob(stream, struct_parse(structs.Dwarf_uleb128(''), stream))] + return lambda stream: [read_blob(stream, struct_parse(structs.Dwarf_uleb128, stream))] # ULEB128 with datatype DIE offset, then byte, then a blob of that size def parse_typedblob(): - return lambda stream: [struct_parse(structs.Dwarf_uleb128(''), stream), read_blob(stream, struct_parse(structs.Dwarf_uint8(''), stream))] + return lambda stream: [struct_parse(structs.Dwarf_uleb128, stream), read_blob(stream, struct_parse(structs.Dwarf_uint8, stream))] # https://yurydelendik.github.io/webassembly-dwarf/ # Byte, then variant: 0, 1, 2 => uleb128, 3 => uint32 def parse_wasmloc(): def parse(stream): - op = struct_parse(structs.Dwarf_uint8(''), stream) + op = struct_parse(structs.Dwarf_uint8, stream) if 0 <= op <= 2: - return [op, struct_parse(structs.Dwarf_uleb128(''), stream)] + return [op, struct_parse(structs.Dwarf_uleb128, stream)] elif op == 3: - return [op, struct_parse(structs.Dwarf_uint32(''), stream)] + return [op, struct_parse(structs.Dwarf_uint32, stream)] else: raise DWARFError("Unknown operation code in DW_OP_WASM_location: %d" % (op,)) return parse add('DW_OP_addr', parse_op_addr()) - add('DW_OP_addrx', parse_arg_struct(structs.Dwarf_uleb128(''))) - add('DW_OP_const1u', parse_arg_struct(structs.Dwarf_uint8(''))) - add('DW_OP_const1s', parse_arg_struct(structs.Dwarf_int8(''))) - add('DW_OP_const2u', parse_arg_struct(structs.Dwarf_uint16(''))) - add('DW_OP_const2s', parse_arg_struct(structs.Dwarf_int16(''))) - add('DW_OP_const4u', parse_arg_struct(structs.Dwarf_uint32(''))) - add('DW_OP_const4s', parse_arg_struct(structs.Dwarf_int32(''))) - add('DW_OP_const8u', parse_arg_struct(structs.Dwarf_uint64(''))) - add('DW_OP_const8s', parse_arg_struct(structs.Dwarf_int64(''))) - add('DW_OP_constu', parse_arg_struct(structs.Dwarf_uleb128(''))) - add('DW_OP_consts', parse_arg_struct(structs.Dwarf_sleb128(''))) - add('DW_OP_pick', parse_arg_struct(structs.Dwarf_uint8(''))) - add('DW_OP_plus_uconst', parse_arg_struct(structs.Dwarf_uleb128(''))) - add('DW_OP_bra', parse_arg_struct(structs.Dwarf_int16(''))) - add('DW_OP_skip', parse_arg_struct(structs.Dwarf_int16(''))) + add('DW_OP_addrx', parse_arg_struct(structs.Dwarf_uleb128)) + add('DW_OP_const1u', parse_arg_struct(structs.Dwarf_uint8)) + add('DW_OP_const1s', parse_arg_struct(structs.Dwarf_int8)) + add('DW_OP_const2u', parse_arg_struct(structs.Dwarf_uint16)) + add('DW_OP_const2s', parse_arg_struct(structs.Dwarf_int16)) + add('DW_OP_const4u', parse_arg_struct(structs.Dwarf_uint32)) + add('DW_OP_const4s', parse_arg_struct(structs.Dwarf_int32)) + add('DW_OP_const8u', parse_arg_struct(structs.Dwarf_uint64)) + add('DW_OP_const8s', parse_arg_struct(structs.Dwarf_int64)) + add('DW_OP_constu', parse_arg_struct(structs.Dwarf_uleb128)) + add('DW_OP_consts', parse_arg_struct(structs.Dwarf_sleb128)) + add('DW_OP_pick', parse_arg_struct(structs.Dwarf_uint8)) + add('DW_OP_plus_uconst', parse_arg_struct(structs.Dwarf_uleb128)) + add('DW_OP_bra', parse_arg_struct(structs.Dwarf_int16)) + add('DW_OP_skip', parse_arg_struct(structs.Dwarf_int16)) for opname in [ 'DW_OP_deref', 'DW_OP_dup', 'DW_OP_drop', 'DW_OP_over', 'DW_OP_swap', 'DW_OP_swap', 'DW_OP_rot', 'DW_OP_xderef', @@ -245,40 +245,40 @@ def parse(stream): for n in range(0, 32): add('DW_OP_lit%s' % n, parse_noargs()) add('DW_OP_reg%s' % n, parse_noargs()) - add('DW_OP_breg%s' % n, parse_arg_struct(structs.Dwarf_sleb128(''))) - - add('DW_OP_fbreg', parse_arg_struct(structs.Dwarf_sleb128(''))) - add('DW_OP_regx', parse_arg_struct(structs.Dwarf_uleb128(''))) - add('DW_OP_bregx', parse_arg_struct2(structs.Dwarf_uleb128(''), - structs.Dwarf_sleb128(''))) - add('DW_OP_piece', parse_arg_struct(structs.Dwarf_uleb128(''))) - add('DW_OP_bit_piece', parse_arg_struct2(structs.Dwarf_uleb128(''), - structs.Dwarf_uleb128(''))) - add('DW_OP_deref_size', parse_arg_struct(structs.Dwarf_int8(''))) - add('DW_OP_xderef_size', parse_arg_struct(structs.Dwarf_int8(''))) - add('DW_OP_call2', parse_arg_struct(structs.Dwarf_uint16(''))) - add('DW_OP_call4', parse_arg_struct(structs.Dwarf_uint32(''))) - add('DW_OP_call_ref', parse_arg_struct(structs.Dwarf_offset(''))) + add('DW_OP_breg%s' % n, parse_arg_struct(structs.Dwarf_sleb128)) + + add('DW_OP_fbreg', parse_arg_struct(structs.Dwarf_sleb128)) + add('DW_OP_regx', parse_arg_struct(structs.Dwarf_uleb128)) + add('DW_OP_bregx', parse_arg_struct2(structs.Dwarf_uleb128, + structs.Dwarf_sleb128)) + add('DW_OP_piece', parse_arg_struct(structs.Dwarf_uleb128)) + add('DW_OP_bit_piece', parse_arg_struct2(structs.Dwarf_uleb128, + structs.Dwarf_uleb128)) + add('DW_OP_deref_size', parse_arg_struct(structs.Dwarf_int8)) + add('DW_OP_xderef_size', parse_arg_struct(structs.Dwarf_int8)) + add('DW_OP_call2', parse_arg_struct(structs.Dwarf_uint16)) + add('DW_OP_call4', parse_arg_struct(structs.Dwarf_uint32)) + add('DW_OP_call_ref', parse_arg_struct(structs.Dwarf_offset)) add('DW_OP_implicit_value', parse_blob()) add('DW_OP_entry_value', parse_nestedexpr()) add('DW_OP_const_type', parse_typedblob()) - add('DW_OP_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''), - structs.Dwarf_uleb128(''))) - add('DW_OP_deref_type', parse_arg_struct2(structs.Dwarf_uint8(''), - structs.Dwarf_uleb128(''))) - add('DW_OP_implicit_pointer', parse_arg_struct2(structs.Dwarf_offset(''), - structs.Dwarf_sleb128(''))) - add('DW_OP_convert', parse_arg_struct(structs.Dwarf_uleb128(''))) + add('DW_OP_regval_type', parse_arg_struct2(structs.Dwarf_uleb128, + structs.Dwarf_uleb128)) + add('DW_OP_deref_type', parse_arg_struct2(structs.Dwarf_uint8, + structs.Dwarf_uleb128)) + add('DW_OP_implicit_pointer', parse_arg_struct2(structs.Dwarf_offset, + structs.Dwarf_sleb128)) + add('DW_OP_convert', parse_arg_struct(structs.Dwarf_uleb128)) add('DW_OP_GNU_entry_value', parse_nestedexpr()) add('DW_OP_GNU_const_type', parse_typedblob()) - add('DW_OP_GNU_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''), - structs.Dwarf_uleb128(''))) - add('DW_OP_GNU_deref_type', parse_arg_struct2(structs.Dwarf_uint8(''), - structs.Dwarf_uleb128(''))) - add('DW_OP_GNU_implicit_pointer', parse_arg_struct2(structs.Dwarf_offset(''), - structs.Dwarf_sleb128(''))) - add('DW_OP_GNU_parameter_ref', parse_arg_struct(structs.Dwarf_offset(''))) - add('DW_OP_GNU_convert', parse_arg_struct(structs.Dwarf_uleb128(''))) + add('DW_OP_GNU_regval_type', parse_arg_struct2(structs.Dwarf_uleb128, + structs.Dwarf_uleb128)) + add('DW_OP_GNU_deref_type', parse_arg_struct2(structs.Dwarf_uint8, + structs.Dwarf_uleb128)) + add('DW_OP_GNU_implicit_pointer', parse_arg_struct2(structs.Dwarf_offset, + structs.Dwarf_sleb128)) + add('DW_OP_GNU_parameter_ref', parse_arg_struct(structs.Dwarf_offset)) + add('DW_OP_GNU_convert', parse_arg_struct(structs.Dwarf_uleb128)) add('DW_OP_WASM_location', parse_wasmloc()) return table diff --git a/elftools/dwarf/dwarf_util.py b/elftools/dwarf/dwarf_util.py index 3dc5681d..0ece1c05 100644 --- a/elftools/dwarf/dwarf_util.py +++ b/elftools/dwarf/dwarf_util.py @@ -8,7 +8,7 @@ #------------------------------------------------------------------------------- import os -from ..construct.macros import UBInt32, UBInt64, ULInt32, ULInt64, Array +from construct import Array from ..common.exceptions import DWARFError from ..common.utils import preserve_stream_pos, struct_parse @@ -38,7 +38,7 @@ def _resolve_via_offset_table(stream, cu, index, base_attribute_name): offset_size = 4 if cu.structs.dwarf_format == 32 else 8 with preserve_stream_pos(stream): - return base_offset + struct_parse(cu.structs.Dwarf_offset(''), stream, base_offset + index*offset_size) + return base_offset + struct_parse(cu.structs.Dwarf_offset, stream, base_offset + index*offset_size) def _iter_CUs_in_section(stream, structs, parser): """Iterates through the list of CU sections in loclists or rangelists. Almost identical structures there. @@ -54,7 +54,7 @@ def _iter_CUs_in_section(stream, structs, parser): header = struct_parse(parser, stream, offset) if header.offset_count > 0: offset_parser = structs.Dwarf_uint64 if header.is64 else structs.Dwarf_uint32 - header['offsets'] = struct_parse(Array(header.offset_count, offset_parser('')), stream) + header['offsets'] = struct_parse(Array(header.offset_count, offset_parser), stream) else: header['offsets'] = False yield header diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 40614628..062e5a93 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -10,7 +10,7 @@ from collections import namedtuple from bisect import bisect_right -from ..construct.lib.container import Container +from construct.lib import Container from ..common.exceptions import DWARFError from ..common.utils import (struct_parse, dwarf_assert, parse_cstring_from_stream) @@ -393,7 +393,7 @@ def get_addr(self, cu, addr_index): raise DWARFError('The file does not contain a debug_addr section for indirect address access') # Selectors are not supported, but no assert on that. TODO? cu_addr_base = _get_base_offset(cu, 'DW_AT_addr_base') - return struct_parse(cu.structs.Dwarf_target_addr(''), self.debug_addr_sec.stream, cu_addr_base + addr_index*cu.header.address_size) + return struct_parse(cu.structs.Dwarf_target_addr, self.debug_addr_sec.stream, cu_addr_base + addr_index*cu.header.address_size) #------ PRIVATE ------# @@ -457,7 +457,7 @@ def _parse_CU_at_offset(self, offset): # instance suitable for this CU and use it to parse the rest. # initial_length = struct_parse( - self.structs.Dwarf_uint32(''), self.debug_info_sec.stream, offset) + self.structs.Dwarf_uint32, self.debug_info_sec.stream, offset) dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32 diff --git a/elftools/dwarf/enums.py b/elftools/dwarf/enums.py index bd8db2d5..b00d703d 100644 --- a/elftools/dwarf/enums.py +++ b/elftools/dwarf/enums.py @@ -6,8 +6,6 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from ..construct import Pass - ENUM_DW_TAG = dict( DW_TAG_null = 0x00, @@ -102,8 +100,6 @@ DW_TAG_APPLE_property = 0x4200, DW_TAG_hi_user = 0xffff, - - _default_ = Pass, ) @@ -320,8 +316,6 @@ DW_AT_APPLE_property = 0x3fed, DW_AT_APPLE_objc_direct = 0x3fee, DW_AT_APPLE_sdk = 0x3fef, - - _default_ = Pass, ) @@ -376,7 +370,6 @@ DW_FORM_GNU_str_index = 0x1f02, DW_FORM_GNU_ref_alt = 0x1f20, DW_FORM_GNU_strp_alt = 0x1f21, - _default_ = Pass, ) # Inverse mapping for ENUM_DW_FORM diff --git a/elftools/dwarf/lineprogram.py b/elftools/dwarf/lineprogram.py index dbde7baf..77e6db2e 100644 --- a/elftools/dwarf/lineprogram.py +++ b/elftools/dwarf/lineprogram.py @@ -144,7 +144,7 @@ def add_entry_old_state(cmd, args, is_extended=False): offset = self.program_start_offset while offset < self.program_end_offset: opcode = struct_parse( - self.structs.Dwarf_uint8(''), + self.structs.Dwarf_uint8, self.stream, offset) @@ -171,9 +171,9 @@ def add_entry_old_state(cmd, args, is_extended=False): elif opcode == 0: # Extended opcode: start with a zero byte, followed by # instruction size and the instruction itself. - inst_len = struct_parse(self.structs.Dwarf_uleb128(''), + inst_len = struct_parse(self.structs.Dwarf_uleb128, self.stream) - ex_opcode = struct_parse(self.structs.Dwarf_uint8(''), + ex_opcode = struct_parse(self.structs.Dwarf_uint8, self.stream) if ex_opcode == DW_LNE_end_sequence: @@ -183,7 +183,7 @@ def add_entry_old_state(cmd, args, is_extended=False): # reset state state = LineState(self.header['default_is_stmt']) elif ex_opcode == DW_LNE_set_address: - operand = struct_parse(self.structs.Dwarf_target_addr(''), + operand = struct_parse(self.structs.Dwarf_target_addr, self.stream) state.address = operand add_entry_old_state(ex_opcode, [operand], is_extended=True) @@ -193,7 +193,7 @@ def add_entry_old_state(cmd, args, is_extended=False): self['file_entry'].append(operand) add_entry_old_state(ex_opcode, [operand], is_extended=True) elif ex_opcode == DW_LNE_set_discriminator: - operand = struct_parse(self.structs.Dwarf_uleb128(''), + operand = struct_parse(self.structs.Dwarf_uleb128, self.stream) state.discriminator = operand else: @@ -207,23 +207,23 @@ def add_entry_old_state(cmd, args, is_extended=False): if opcode == DW_LNS_copy: add_entry_new_state(opcode, []) elif opcode == DW_LNS_advance_pc: - operand = struct_parse(self.structs.Dwarf_uleb128(''), + operand = struct_parse(self.structs.Dwarf_uleb128, self.stream) address_addend = ( operand * self.header['minimum_instruction_length']) state.address += address_addend add_entry_old_state(opcode, [address_addend]) elif opcode == DW_LNS_advance_line: - operand = struct_parse(self.structs.Dwarf_sleb128(''), + operand = struct_parse(self.structs.Dwarf_sleb128, self.stream) state.line += operand elif opcode == DW_LNS_set_file: - operand = struct_parse(self.structs.Dwarf_uleb128(''), + operand = struct_parse(self.structs.Dwarf_uleb128, self.stream) state.file = operand add_entry_old_state(opcode, [operand]) elif opcode == DW_LNS_set_column: - operand = struct_parse(self.structs.Dwarf_uleb128(''), + operand = struct_parse(self.structs.Dwarf_uleb128, self.stream) state.column = operand add_entry_old_state(opcode, [operand]) @@ -240,7 +240,7 @@ def add_entry_old_state(cmd, args, is_extended=False): state.address += address_addend add_entry_old_state(opcode, [address_addend]) elif opcode == DW_LNS_fixed_advance_pc: - operand = struct_parse(self.structs.Dwarf_uint16(''), + operand = struct_parse(self.structs.Dwarf_uint16, self.stream) state.address += operand add_entry_old_state(opcode, [operand]) @@ -251,7 +251,7 @@ def add_entry_old_state(cmd, args, is_extended=False): state.epilogue_begin = True add_entry_old_state(opcode, []) elif opcode == DW_LNS_set_isa: - operand = struct_parse(self.structs.Dwarf_uleb128(''), + operand = struct_parse(self.structs.Dwarf_uleb128, self.stream) state.isa = operand add_entry_old_state(opcode, [operand]) diff --git a/elftools/dwarf/locationlists.py b/elftools/dwarf/locationlists.py index ffdfde8b..e4bad7ac 100644 --- a/elftools/dwarf/locationlists.py +++ b/elftools/dwarf/locationlists.py @@ -207,9 +207,9 @@ def _parse_location_list_from_stream(self): while True: entry_offset = self.stream.tell() begin_offset = struct_parse( - self.structs.Dwarf_target_addr(''), self.stream) + self.structs.Dwarf_target_addr, self.stream) end_offset = struct_parse( - self.structs.Dwarf_target_addr(''), self.stream) + self.structs.Dwarf_target_addr, self.stream) if begin_offset == 0 and end_offset == 0: # End of list - we're done. break @@ -220,8 +220,8 @@ def _parse_location_list_from_stream(self): else: # Location list entry expr_len = struct_parse( - self.structs.Dwarf_uint16(''), self.stream) - loc_expr = [struct_parse(self.structs.Dwarf_uint8(''), + self.structs.Dwarf_uint16, self.stream) + loc_expr = [struct_parse(self.structs.Dwarf_uint8, self.stream) for i in range(expr_len)] entry_length = self.stream.tell() - entry_offset diff --git a/elftools/dwarf/namelut.py b/elftools/dwarf/namelut.py index 7999be88..dcb3daf6 100755 --- a/elftools/dwarf/namelut.py +++ b/elftools/dwarf/namelut.py @@ -10,10 +10,12 @@ import collections from collections import OrderedDict from collections.abc import Mapping + +from ..common.construct_utils import CStringBytes from ..common.utils import struct_parse from bisect import bisect_right import math -from ..construct import CString, Struct, If +from construct import If, CString, Struct NameLUTEntry = collections.namedtuple('NameLUTEntry', 'cu_ofs die_ofs') @@ -163,9 +165,10 @@ def _get_entries(self): # an offset field containing zero (and no following string). Because # of sequential parsing, every next entry may be that terminator. # So, field "name" is conditional. - entry_struct = Struct("Dwarf_offset_name_pair", - self._structs.Dwarf_offset('die_ofs'), - If(lambda ctx: ctx['die_ofs'], CString('name'))) + entry_struct = Struct( # Dwarf_offset_name_pair + 'die_ofs' / self._structs.Dwarf_offset, + 'name' / If(lambda ctx: ctx['die_ofs'], CStringBytes) + ) # each run of this loop will fetch one CU worth of entries. while offset < self._size: diff --git a/elftools/dwarf/ranges.py b/elftools/dwarf/ranges.py index 4491918f..fa6b06f2 100644 --- a/elftools/dwarf/ranges.py +++ b/elftools/dwarf/ranges.py @@ -179,9 +179,9 @@ def _parse_range_list_from_stream(self, cu): while True: entry_offset = self.stream.tell() begin_offset = struct_parse( - self.structs.Dwarf_target_addr(''), self.stream) + self.structs.Dwarf_target_addr, self.stream) end_offset = struct_parse( - self.structs.Dwarf_target_addr(''), self.stream) + self.structs.Dwarf_target_addr, self.stream) if begin_offset == 0 and end_offset == 0: # End of list - we're done. break diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index 8fa6b58a..cc28c69b 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -7,16 +7,16 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from logging.config import valid_ident -from ..construct import ( - UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64, - SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64, - Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray, - CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence, - String, Switch, Value - ) -from ..common.construct_utils import (RepeatUntilExcluding, ULEB128, SLEB128, - StreamOffset) +from construct import ( + Int8ub, Int16ub, Int24ub, Int32ub, Int64ub, Int8ul, Int16ul, Int24ul, Int32ul, Int64ul, + Int8sb, Int16sb, Int24sb, Int32sb, Int64sb, Int8sl, Int16sl, Int24sl, Int32sl, Int64sl, + Adapter, ConstructError, If, Enum, Array, PrefixedArray, Bytes, IfThenElse, Construct, + Struct, Switch, Computed, Padding, NullTerminated, GreedyBytes, Tell, RepeatUntil, + VarInt +) +from ..common.construct_utils import ( + SLEB128, EmbeddableStruct, Embed, CStringBytes, exclude_last_value +) from .enums import * @@ -119,31 +119,33 @@ def initial_length_field_size(self): def _create_structs(self): if self.little_endian: - self.Dwarf_uint8 = ULInt8 - self.Dwarf_uint16 = ULInt16 - self.Dwarf_uint32 = ULInt32 - self.Dwarf_uint64 = ULInt64 - self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64 - self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64 - self.Dwarf_target_addr = ( - ULInt32 if self.address_size == 4 else ULInt64) - self.Dwarf_int8 = SLInt8 - self.Dwarf_int16 = SLInt16 - self.Dwarf_int32 = SLInt32 - self.Dwarf_int64 = SLInt64 + self.Dwarf_uint8 = Int8ul + self.Dwarf_uint16 = Int16ul + self.Dwarf_uint24 = Int24ul + self.Dwarf_uint32 = Int32ul + self.Dwarf_uint64 = Int64ul + self.Dwarf_offset = Int32ul if self.dwarf_format == 32 else Int64ul + self.Dwarf_length = Int32ul if self.dwarf_format == 32 else Int64ul + self.Dwarf_target_addr = Int32ul if self.address_size == 4 else Int64ul + self.Dwarf_int8 = Int8sl + self.Dwarf_int16 = Int16sl + self.Dwarf_int24 = Int24sl + self.Dwarf_int32 = Int32sl + self.Dwarf_int64 = Int64sl else: - self.Dwarf_uint8 = UBInt8 - self.Dwarf_uint16 = UBInt16 - self.Dwarf_uint32 = UBInt32 - self.Dwarf_uint64 = UBInt64 - self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64 - self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64 - self.Dwarf_target_addr = ( - UBInt32 if self.address_size == 4 else UBInt64) - self.Dwarf_int8 = SBInt8 - self.Dwarf_int16 = SBInt16 - self.Dwarf_int32 = SBInt32 - self.Dwarf_int64 = SBInt64 + self.Dwarf_uint8 = Int8ub + self.Dwarf_uint16 = Int16ub + self.Dwarf_uint24 = Int24ub + self.Dwarf_uint32 = Int32ub + self.Dwarf_uint64 = Int64ub + self.Dwarf_offset = Int32ub if self.dwarf_format == 32 else Int64ub + self.Dwarf_length = Int32ub if self.dwarf_format == 32 else Int64ub + self.Dwarf_target_addr = Int32ub if self.address_size == 4 else Int64ub + self.Dwarf_int8 = Int8sb + self.Dwarf_int16 = Int16sb + self.Dwarf_int24 = Int24sb + self.Dwarf_int32 = Int32sb + self.Dwarf_int64 = Int64sb self._create_initial_length() self._create_leb128() @@ -163,49 +165,49 @@ def _create_structs(self): self._create_gnu_debugaltlink() def _create_initial_length(self): - def _InitialLength(name): - # Adapts a Struct that parses forward a full initial length field. - # Only if the first word is the continuation value, the second - # word is parsed from the stream. - return _InitialLengthAdapter( - Struct(name, - self.Dwarf_uint32('first'), - If(lambda ctx: ctx.first == 0xFFFFFFFF, - self.Dwarf_uint64('second'), - elsevalue=None))) - self.Dwarf_initial_length = _InitialLength + # Adapts a Struct that parses forward a full initial length field. + # Only if the first word is the continuation value, the second + # word is parsed from the stream. + self.Dwarf_initial_length = _InitialLengthAdapter( + Struct( + 'first' / self.Dwarf_uint32, + 'second' / If(lambda ctxt: ctxt.first == 0xFFFFFFFF, + self.Dwarf_uint64 + ) + ) + ) def _create_leb128(self): - self.Dwarf_uleb128 = ULEB128 + self.Dwarf_uleb128 = VarInt self.Dwarf_sleb128 = SLEB128 def _create_cu_header(self): - dwarfv4_CU_header = Struct('', - self.Dwarf_offset('debug_abbrev_offset'), - self.Dwarf_uint8('address_size') - ) + dwarfv4_CU_header = Struct( + 'debug_abbrev_offset' / self.Dwarf_offset, + 'address_size' / self.Dwarf_uint8 + ).compile() # DWARFv5 reverses the order of address_size and debug_abbrev_offset. # DWARFv5 7.5.1.1 - dwarfv5_CP_CU_header = Struct('', - self.Dwarf_uint8('address_size'), - self.Dwarf_offset('debug_abbrev_offset') - ) + dwarfv5_CP_CU_header = Struct( + 'address_size' / self.Dwarf_uint8, + 'debug_abbrev_offset' / self.Dwarf_offset + ).compile() # DWARFv5 7.5.1.2 - dwarfv5_SS_CU_header = Struct('', - self.Dwarf_uint8('address_size'), - self.Dwarf_offset('debug_abbrev_offset'), - self.Dwarf_uint64('dwo_id') - ) + dwarfv5_SS_CU_header = Struct( + 'address_size' / self.Dwarf_uint8, + 'debug_abbrev_offset' / self.Dwarf_offset, + 'dwo_id' / self.Dwarf_uint64 + ).compile() # DWARFv5 7.5.1.3 - dwarfv5_TS_CU_header = Struct('', - self.Dwarf_uint8('address_size'), - self.Dwarf_offset('debug_abbrev_offset'), - self.Dwarf_uint64('type_signature'), - self.Dwarf_offset('type_offset') - ) - dwarfv5_CU_header = Struct('', - Enum(self.Dwarf_uint8('unit_type'), **ENUM_DW_UT), - Embed(Switch('', lambda ctx: ctx.unit_type, + dwarfv5_TS_CU_header = Struct( + 'address_size' / self.Dwarf_uint8, + 'debug_abbrev_offset' / self.Dwarf_offset, + 'type_signature' / self.Dwarf_uint64, + 'type_offset' / self.Dwarf_offset + ).compile() + dwarfv5_CU_header = EmbeddableStruct( + 'unit_type' / Enum(self.Dwarf_uint8, **ENUM_DW_UT), + Embed(Switch(lambda ctx: ctx.unit_type, { 'DW_UT_compile' : dwarfv5_CP_CU_header, 'DW_UT_partial' : dwarfv5_CP_CU_header, @@ -213,48 +215,55 @@ def _create_cu_header(self): 'DW_UT_split_compile' : dwarfv5_SS_CU_header, 'DW_UT_type' : dwarfv5_TS_CU_header, 'DW_UT_split_type' : dwarfv5_TS_CU_header, - }))) - self.Dwarf_CU_header = Struct('Dwarf_CU_header', - self.Dwarf_initial_length('unit_length'), - self.Dwarf_uint16('version'), - IfThenElse('', lambda ctx: ctx['version'] >= 5, - Embed(dwarfv5_CU_header), - Embed(dwarfv4_CU_header), + })) + ) + self.Dwarf_CU_header = EmbeddableStruct( + 'unit_length' / self.Dwarf_initial_length, + 'version' / self.Dwarf_uint16, + Embed(IfThenElse(lambda ctx: ctx['version'] >= 5, + dwarfv5_CU_header, + dwarfv4_CU_header, )) + ) def _create_abbrev_declaration(self): - self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry', - Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG), - Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN), - RepeatUntilExcluding( - lambda obj, ctx: - obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null', - Struct('attr_spec', - Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT), - Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM), - If(lambda ctx: ctx['form'] == 'DW_FORM_implicit_const', - self.Dwarf_sleb128('value'))))) + self.Dwarf_abbrev_declaration = Struct( # Dwarf_abbrev_entry + 'tag' / Enum(self.Dwarf_uleb128, **ENUM_DW_TAG), + 'children_flag' / Enum(self.Dwarf_uint8, **ENUM_DW_CHILDREN), + 'attr_spec' / RepeatUntil( + exclude_last_value(lambda obj, lst, ctx: obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null'), + Struct( + 'name' / Enum(self.Dwarf_uleb128, **ENUM_DW_AT), + 'form' / Enum(self.Dwarf_uleb128, **ENUM_DW_FORM), + 'value' / If(lambda ctx: ctx['form'] == 'DW_FORM_implicit_const', + self.Dwarf_sleb128 + ) + ) + ) + ) def _create_debugsup(self): # We don't care about checksums, for now. - self.Dwarf_debugsup = Struct('Elf_debugsup', - self.Dwarf_int16('version'), - self.Dwarf_uint8('is_supplementary'), - CString('sup_filename')) + self.Dwarf_debugsup = Struct( + 'version' / self.Dwarf_int16, + 'is_supplementary' / self.Dwarf_uint8, + 'sup_filename' / CStringBytes + ) def _create_gnu_debugaltlink(self): - self.Dwarf_debugaltlink = Struct('Elf_debugaltlink', - CString("sup_filename"), - String("sup_checksum", length=20)) + self.Dwarf_debugaltlink = Struct( + 'sup_filename' / CStringBytes, + 'sup_checksum' / Bytes(20) + ) def _create_dw_form(self): self.Dwarf_dw_form = dict( - DW_FORM_addr=self.Dwarf_target_addr(''), - DW_FORM_addrx=self.Dwarf_uleb128(''), - DW_FORM_addrx1=self.Dwarf_uint8(''), - DW_FORM_addrx2=self.Dwarf_uint16(''), - # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO - DW_FORM_addrx4=self.Dwarf_uint32(''), + DW_FORM_addr=self.Dwarf_target_addr, + DW_FORM_addrx=self.Dwarf_uleb128, + DW_FORM_addrx1=self.Dwarf_uint8, + DW_FORM_addrx2=self.Dwarf_uint16, + DW_FORM_addrx3=self.Dwarf_uint24, + DW_FORM_addrx4=self.Dwarf_uint32, DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8), DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16), @@ -262,283 +271,294 @@ def _create_dw_form(self): DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128), # All DW_FORM_data forms are assumed to be unsigned - DW_FORM_data1=self.Dwarf_uint8(''), - DW_FORM_data2=self.Dwarf_uint16(''), - DW_FORM_data4=self.Dwarf_uint32(''), - DW_FORM_data8=self.Dwarf_uint64(''), - DW_FORM_data16=Array(16, self.Dwarf_uint8('')), # Used for hashes and such, not for integers - DW_FORM_sdata=self.Dwarf_sleb128(''), - DW_FORM_udata=self.Dwarf_uleb128(''), - - DW_FORM_string=CString(''), - DW_FORM_strp=self.Dwarf_offset(''), - DW_FORM_strp_sup=self.Dwarf_offset(''), - DW_FORM_line_strp=self.Dwarf_offset(''), - DW_FORM_strx1=self.Dwarf_uint8(''), - DW_FORM_strx2=self.Dwarf_uint16(''), - # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO - DW_FORM_strx4=self.Dwarf_uint64(''), - DW_FORM_flag=self.Dwarf_uint8(''), - - DW_FORM_ref=self.Dwarf_uint32(''), - DW_FORM_ref1=self.Dwarf_uint8(''), - DW_FORM_ref2=self.Dwarf_uint16(''), - DW_FORM_ref4=self.Dwarf_uint32(''), - DW_FORM_ref_sup4=self.Dwarf_uint32(''), - DW_FORM_ref8=self.Dwarf_uint64(''), - DW_FORM_ref_sup8=self.Dwarf_uint64(''), - DW_FORM_ref_udata=self.Dwarf_uleb128(''), - DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''), - - DW_FORM_indirect=self.Dwarf_uleb128(''), + DW_FORM_data1=self.Dwarf_uint8, + DW_FORM_data2=self.Dwarf_uint16, + DW_FORM_data4=self.Dwarf_uint32, + DW_FORM_data8=self.Dwarf_uint64, + DW_FORM_data16=Array(16, self.Dwarf_uint8), # Used for hashes and such, not for integers + DW_FORM_sdata=self.Dwarf_sleb128, + DW_FORM_udata=self.Dwarf_uleb128, + + DW_FORM_string=CStringBytes, + DW_FORM_strp=self.Dwarf_offset, + DW_FORM_strp_sup=self.Dwarf_offset, + DW_FORM_line_strp=self.Dwarf_offset, + DW_FORM_strx1=self.Dwarf_uint8, + DW_FORM_strx2=self.Dwarf_uint16, + DW_FORM_strx3=self.Dwarf_uint24, + DW_FORM_strx4=self.Dwarf_uint64, + DW_FORM_flag=self.Dwarf_uint8, + + DW_FORM_ref=self.Dwarf_uint32, + DW_FORM_ref1=self.Dwarf_uint8, + DW_FORM_ref2=self.Dwarf_uint16, + DW_FORM_ref4=self.Dwarf_uint32, + DW_FORM_ref_sup4=self.Dwarf_uint32, + DW_FORM_ref8=self.Dwarf_uint64, + DW_FORM_ref_sup8=self.Dwarf_uint64, + DW_FORM_ref_udata=self.Dwarf_uleb128, + DW_FORM_ref_addr=self.Dwarf_target_addr if self.dwarf_version == 2 else self.Dwarf_offset, + + DW_FORM_indirect=self.Dwarf_uleb128, # New forms in DWARFv4 - DW_FORM_flag_present = StaticField('', 0), - DW_FORM_sec_offset = self.Dwarf_offset(''), - DW_FORM_exprloc = self._make_block_struct(self.Dwarf_uleb128), - DW_FORM_ref_sig8 = self.Dwarf_uint64(''), + DW_FORM_flag_present=Bytes(0), + DW_FORM_sec_offset=self.Dwarf_offset, + DW_FORM_exprloc=self._make_block_struct(self.Dwarf_uleb128), + DW_FORM_ref_sig8=self.Dwarf_uint64, - DW_FORM_GNU_strp_alt=self.Dwarf_offset(''), - DW_FORM_GNU_ref_alt=self.Dwarf_offset(''), - DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''), + DW_FORM_GNU_strp_alt=self.Dwarf_offset, + DW_FORM_GNU_ref_alt=self.Dwarf_offset, + DW_AT_GNU_all_call_sites=self.Dwarf_uleb128, # New forms in DWARFv5 - DW_FORM_loclistx=self.Dwarf_uleb128(''), - DW_FORM_rnglistx=self.Dwarf_uleb128('') + DW_FORM_loclistx=self.Dwarf_uleb128, + DW_FORM_rnglistx=self.Dwarf_uleb128 ) def _create_aranges_header(self): - self.Dwarf_aranges_header = Struct("Dwarf_aranges_header", - self.Dwarf_initial_length('unit_length'), - self.Dwarf_uint16('version'), - self.Dwarf_offset('debug_info_offset'), # a little tbd - self.Dwarf_uint8('address_size'), - self.Dwarf_uint8('segment_size') + self.Dwarf_aranges_header = Struct( + 'unit_length' / self.Dwarf_initial_length, + 'version' / self.Dwarf_uint16, + 'debug_info_offset' / self.Dwarf_offset, # a little tbd + 'address_size' / self.Dwarf_uint8, + 'segment_size' / self.Dwarf_uint8 ) def _create_nameLUT_header(self): - self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header", - self.Dwarf_initial_length('unit_length'), - self.Dwarf_uint16('version'), - self.Dwarf_offset('debug_info_offset'), - self.Dwarf_length('debug_info_length') + self.Dwarf_nameLUT_header = Struct( + 'unit_length' / self.Dwarf_initial_length, + 'version' / self.Dwarf_uint16, + 'debug_info_offset' / self.Dwarf_offset, + 'debug_info_length' / self.Dwarf_length ) def _create_string_offsets_table_header(self): self.Dwarf_string_offsets_table_header = Struct( - "Dwarf_string_offets_table_header", - self.Dwarf_initial_length('unit_length'), - self.Dwarf_uint16('version'), - self.Dwarf_uint16('padding'), + 'unit_length' / self.Dwarf_initial_length, + 'version' / self.Dwarf_uint16, + Padding(2) ) def _create_address_table_header(self): - self.Dwarf_address_table_header = Struct("Dwarf_address_table_header", - self.Dwarf_initial_length('unit_length'), - self.Dwarf_uint16('version'), - self.Dwarf_uint8('address_size'), - self.Dwarf_uint8('segment_selector_size'), + self.Dwarf_address_table_header = Struct( + 'unit_length' /self.Dwarf_initial_length, + 'version' / self.Dwarf_uint16, + 'address_size' / self.Dwarf_uint8, + 'segment_selector_size' / self.Dwarf_uint8, ) def _create_lineprog_header(self): # A file entry is terminated by a NULL byte, so we don't want to parse # past it. Therefore an If is used. - self.Dwarf_lineprog_file_entry = Struct('file_entry', - CString('name'), - If(lambda ctx: len(ctx.name) != 0, - Embed(Struct('', - self.Dwarf_uleb128('dir_index'), - self.Dwarf_uleb128('mtime'), - self.Dwarf_uleb128('length'))))) + self.Dwarf_lineprog_file_entry = EmbeddableStruct( + 'name' / CStringBytes, + Embed(If(lambda ctx: len(ctx.name) != 0, + Struct( + 'dir_index' / self.Dwarf_uleb128, + 'mtime' / self.Dwarf_uleb128, + 'length' / self.Dwarf_uleb128 + ) + )) + ) class FormattedEntry(Construct): # Generates a parser based on a previously parsed piece, - # similar to deprecared Dynamic. + # similar to deprecated Dynamic. # Strings are resolved later, since it potentially requires # looking at another section. - def __init__(self, name, structs, format_field): - Construct.__init__(self, name) + def __init__(self, structs, format_field): + Construct.__init__(self) self.structs = structs self.format_field = format_field - def _parse(self, stream, context): - # Somewhat tricky technique here, explicitly writing back to the context - if self.format_field + "_parser" in context: - parser = context[self.format_field + "_parser"] - else: - fields = tuple( - Rename(f.content_type, self.structs.Dwarf_dw_form[f.form]) - for f in context[self.format_field]) - parser = Struct('formatted_entry', *fields) - context[self.format_field + "_parser"] = parser - return parser._parse(stream, context) + def _parse(self, stream, context, path): + fields = [] + for f in context['_'][self.format_field]: + fields.append(f.content_type / self.structs.Dwarf_dw_form[f.form]) + parser = Struct(*fields) + return parser._parse(stream, context, path) ver5 = lambda ctx: ctx.version >= 5 - self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header', - self.Dwarf_initial_length('unit_length'), - self.Dwarf_uint16('version'), - If(ver5, - self.Dwarf_uint8("address_size"), - None), - If(ver5, - self.Dwarf_uint8("segment_selector_size"), - None), - self.Dwarf_offset('header_length'), - self.Dwarf_uint8('minimum_instruction_length'), - If(lambda ctx: ctx.version >= 4, - self.Dwarf_uint8("maximum_operations_per_instruction"), - 1), - self.Dwarf_uint8('default_is_stmt'), - self.Dwarf_int8('line_base'), - self.Dwarf_uint8('line_range'), - self.Dwarf_uint8('opcode_base'), - Array(lambda ctx: ctx.opcode_base - 1, - self.Dwarf_uint8('standard_opcode_lengths')), - If(ver5, - PrefixedArray( - Struct('directory_entry_format', - Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT), - Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)), - self.Dwarf_uint8("directory_entry_format_count"))), - If(ver5, # Name deliberately doesn't match the legacy object, since the format can't be made compatible - PrefixedArray( - FormattedEntry('directories', self, "directory_entry_format"), - self.Dwarf_uleb128('directories_count'))), - If(ver5, - PrefixedArray( - Struct('file_name_entry_format', - Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT), - Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)), - self.Dwarf_uint8("file_name_entry_format_count"))), - If(ver5, - PrefixedArray( - FormattedEntry('file_names', self, "file_name_entry_format"), - self.Dwarf_uleb128('file_names_count'))), - # Legacy directories/files - DWARF < 5 only - If(lambda ctx: ctx.version < 5, - RepeatUntilExcluding( - lambda obj, ctx: obj == b'', - CString('include_directory'))), - If(lambda ctx: ctx.version < 5, - RepeatUntilExcluding( - lambda obj, ctx: len(obj.name) == 0, - self.Dwarf_lineprog_file_entry)) # array name is file_entry + self.Dwarf_lineprog_header = EmbeddableStruct( + 'unit_length' / self.Dwarf_initial_length, + 'version' / self.Dwarf_uint16, + 'address_size' / If(ver5, self.Dwarf_uint8), + 'segment_selector_size' / If(ver5, self.Dwarf_uint8), + 'header_length' / self.Dwarf_offset, + 'minimum_instruction_length' / self.Dwarf_uint8, + 'maximum_operations_per_instruction' / IfThenElse(lambda ctx: ctx.version >= 4, + self.Dwarf_uint8, + Computed(1) + ), + 'default_is_stmt' / self.Dwarf_uint8, + 'line_base' / self.Dwarf_int8, + 'line_range' / self.Dwarf_uint8, + 'opcode_base' / self.Dwarf_uint8, + 'standard_opcode_lengths' / Array(lambda ctx: ctx.opcode_base - 1, self.Dwarf_uint8), + Embed(IfThenElse(ver5, + Struct( # Names deliberately don't match the legacy objects, since the format can't be made compatible + 'directory_entry_format' / PrefixedArray( + self.Dwarf_uint8, + Struct( + 'content_type' / Enum(self.Dwarf_uleb128, **ENUM_DW_LNCT), + 'form' / Enum(self.Dwarf_uleb128, **ENUM_DW_FORM) + ) + ), + 'directories' / PrefixedArray( + self.Dwarf_uleb128, + FormattedEntry(self, 'directory_entry_format'), + ), + 'file_name_entry_format' / PrefixedArray( + self.Dwarf_uint8, + Struct( + 'content_type' / Enum(self.Dwarf_uleb128, **ENUM_DW_LNCT), + 'form' / Enum(self.Dwarf_uleb128, **ENUM_DW_FORM) + ) + ), + 'file_names' / PrefixedArray( + self.Dwarf_uleb128, + FormattedEntry(self, 'file_name_entry_format') + ) + ), + # Legacy directories/files - DWARF < 5 only + Struct( + 'include_directory' / RepeatUntil( + exclude_last_value(lambda obj, lst, ctx: obj == b''), + NullTerminated(GreedyBytes) + ), + 'file_entry' / RepeatUntil( + exclude_last_value(lambda obj, lst, ctx: len(obj.name) == 0), + self.Dwarf_lineprog_file_entry + ) + ) + )), ) def _create_callframe_entry_headers(self): - self.Dwarf_CIE_header = Struct('Dwarf_CIE_header', - self.Dwarf_initial_length('length'), - self.Dwarf_offset('CIE_id'), - self.Dwarf_uint8('version'), - CString('augmentation'), - self.Dwarf_uleb128('code_alignment_factor'), - self.Dwarf_sleb128('data_alignment_factor'), - self.Dwarf_uleb128('return_address_register')) + self.Dwarf_CIE_header = Struct( + 'length' / self.Dwarf_initial_length, + 'CIE_id' / self.Dwarf_offset, + 'version' / self.Dwarf_uint8, + 'augmentation' / CStringBytes, + 'code_alignment_factor' / self.Dwarf_uleb128, + 'data_alignment_factor' / self.Dwarf_sleb128, + 'return_address_register' / self.Dwarf_uleb128) self.EH_CIE_header = self.Dwarf_CIE_header # The CIE header was modified in DWARFv4. if self.dwarf_version == 4: - self.Dwarf_CIE_header = Struct('Dwarf_CIE_header', - self.Dwarf_initial_length('length'), - self.Dwarf_offset('CIE_id'), - self.Dwarf_uint8('version'), - CString('augmentation'), - self.Dwarf_uint8('address_size'), - self.Dwarf_uint8('segment_size'), - self.Dwarf_uleb128('code_alignment_factor'), - self.Dwarf_sleb128('data_alignment_factor'), - self.Dwarf_uleb128('return_address_register')) - - self.Dwarf_FDE_header = Struct('Dwarf_FDE_header', - self.Dwarf_initial_length('length'), - self.Dwarf_offset('CIE_pointer'), - self.Dwarf_target_addr('initial_location'), - self.Dwarf_target_addr('address_range')) + self.Dwarf_CIE_header = Struct( + 'length' / self.Dwarf_initial_length, + 'CIE_id' / self.Dwarf_offset, + 'version' / self.Dwarf_uint8, + 'augmentation' / CStringBytes, + 'address_size' / self.Dwarf_uint8, + 'segment_size' / self.Dwarf_uint8, + 'code_alignment_factor' / self.Dwarf_uleb128, + 'data_alignment_factor' / self.Dwarf_sleb128, + 'return_address_register' / self.Dwarf_uleb128 + ) + + self.Dwarf_FDE_header = Struct( + 'length' / self.Dwarf_initial_length, + 'CIE_pointer' / self.Dwarf_offset, + 'initial_location' / self.Dwarf_target_addr, + 'address_range' / self.Dwarf_target_addr + ) def _make_block_struct(self, length_field): """ Create a struct for DW_FORM_block """ return PrefixedArray( - subcon=self.Dwarf_uint8('elem'), - length_field=length_field('')) + length_field, + self.Dwarf_uint8, + ) def _create_loclists_parsers(self): """ Create a struct for debug_loclists CU header, DWARFv5, 7,29 """ - self.Dwarf_loclists_CU_header = Struct('Dwarf_loclists_CU_header', - StreamOffset('cu_offset'), - self.Dwarf_initial_length('unit_length'), - Value('is64', lambda ctx: ctx.is64), - StreamOffset('offset_after_length'), - self.Dwarf_uint16('version'), - self.Dwarf_uint8('address_size'), - self.Dwarf_uint8('segment_selector_size'), - self.Dwarf_uint32('offset_count'), - StreamOffset('offset_table_offset')) - - cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uint8('loc_expr'), self.Dwarf_uleb128('')) - - self.Dwarf_loclists_entries = RepeatUntilExcluding( - lambda obj, ctx: obj.entry_type == 'DW_LLE_end_of_list', - Struct('entry', - StreamOffset('entry_offset'), - Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_LLE), - Embed(Switch('', lambda ctx: ctx.entry_type, + self.Dwarf_loclists_CU_header = Struct( + 'cu_offset' / Tell, + 'unit_length' / self.Dwarf_initial_length, + 'is64' / Computed(lambda ctx: ctx.is64), + 'offset_after_length' / Tell, + 'version' / self.Dwarf_uint16, + 'address_size' / self.Dwarf_uint8, + 'segment_selector_size' / self.Dwarf_uint8, + 'offset_count' / self.Dwarf_uint32, + 'offset_table_offset' / Tell + ) + + cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uleb128, self.Dwarf_uint8) + + self.Dwarf_loclists_entries = RepeatUntil( + exclude_last_value(lambda obj, list, ctx: obj.entry_type == 'DW_LLE_end_of_list'), + EmbeddableStruct( + 'entry_offset' / Tell, + 'entry_type' / Enum(self.Dwarf_uint8, **ENUM_DW_LLE), + Embed(Switch(lambda ctx: ctx.entry_type, { - 'DW_LLE_end_of_list' : Struct('end_of_list'), - 'DW_LLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')), - 'DW_LLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index'), cld), - 'DW_LLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length'), cld), - 'DW_LLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset'), cld), - 'DW_LLE_default_location' : Struct('default_location', cld), - 'DW_LLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')), - 'DW_LLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address'), cld), - 'DW_LLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'), cld), + 'DW_LLE_end_of_list' : Struct(), + 'DW_LLE_base_addressx' : Struct('index' / self.Dwarf_uleb128), + 'DW_LLE_startx_endx' : Struct('start_index' / self.Dwarf_uleb128, 'end_index' / self.Dwarf_uleb128, 'loc_expr' / cld), + 'DW_LLE_startx_length' : Struct('start_index' / self.Dwarf_uleb128, 'length' / self.Dwarf_uleb128, 'loc_expr' / cld), + 'DW_LLE_offset_pair' : Struct('start_offset' / self.Dwarf_uleb128, 'end_offset' / self.Dwarf_uleb128, 'loc_expr' / cld), + 'DW_LLE_default_location' : Struct('loc_expr' / cld), + 'DW_LLE_base_address' : Struct('address' / self.Dwarf_target_addr), + 'DW_LLE_start_end' : Struct('start_address' / self.Dwarf_target_addr, 'end_address' / self.Dwarf_target_addr, 'loc_expr' / cld), + 'DW_LLE_start_length' : Struct('start_address' / self.Dwarf_target_addr, 'length' / self.Dwarf_uleb128, 'loc_expr' / cld), })), - StreamOffset('entry_end_offset'), - Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset))) + 'entry_end_offset' / Tell, + 'entry_length' / Computed(lambda ctx: ctx.entry_end_offset - ctx.entry_offset) + ) + ) - self.Dwarf_locview_pair = Struct('locview_pair', - StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end')) + self.Dwarf_locview_pair = Struct( + 'entry_offset' / Tell, + 'begin' / self.Dwarf_uleb128, + 'end' / self.Dwarf_uleb128 + ) def _create_rnglists_parsers(self): - self.Dwarf_rnglists_CU_header = Struct('Dwarf_rnglists_CU_header', - StreamOffset('cu_offset'), - self.Dwarf_initial_length('unit_length'), - Value('is64', lambda ctx: ctx.is64), - StreamOffset('offset_after_length'), - self.Dwarf_uint16('version'), - self.Dwarf_uint8('address_size'), - self.Dwarf_uint8('segment_selector_size'), - self.Dwarf_uint32('offset_count'), - StreamOffset('offset_table_offset')) - - self.Dwarf_rnglists_entries = RepeatUntilExcluding( - lambda obj, ctx: obj.entry_type == 'DW_RLE_end_of_list', - Struct('entry', - StreamOffset('entry_offset'), - Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_RLE), - Embed(Switch('', lambda ctx: ctx.entry_type, + self.Dwarf_rnglists_CU_header = Struct( + 'cu_offset' / Tell, + 'unit_length' / self.Dwarf_initial_length, + 'is64' / Computed(lambda ctx: ctx.is64), + 'offset_after_length' / Tell, + 'version' / self.Dwarf_uint16, + 'address_size' / self.Dwarf_uint8, + 'segment_selector_size' / self.Dwarf_uint8, + 'offset_count' / self.Dwarf_uint32, + 'offset_table_offset' / Tell + ) + + self.Dwarf_rnglists_entries = RepeatUntil( + exclude_last_value(lambda obj, list, ctx: obj.entry_type == 'DW_RLE_end_of_list'), + EmbeddableStruct( + 'entry_offset' / Tell, + 'entry_type' / Enum(self.Dwarf_uint8, **ENUM_DW_RLE), + Embed(Switch(lambda ctx: ctx.entry_type, { - 'DW_RLE_end_of_list' : Struct('end_of_list'), - 'DW_RLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')), - 'DW_RLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index')), - 'DW_RLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length')), - 'DW_RLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset')), - 'DW_RLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')), - 'DW_RLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address')), - 'DW_RLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length')) + 'DW_RLE_end_of_list' : Struct(), + 'DW_RLE_base_addressx' : Struct('index' / self.Dwarf_uleb128), + 'DW_RLE_startx_endx' : Struct('start_index' / self.Dwarf_uleb128, 'end_index' / self.Dwarf_uleb128), + 'DW_RLE_startx_length' : Struct('start_index' / self.Dwarf_uleb128, 'length' / self.Dwarf_uleb128), + 'DW_RLE_offset_pair' : Struct('start_offset' / self.Dwarf_uleb128, 'end_offset' / self.Dwarf_uleb128), + 'DW_RLE_base_address' : Struct('address' / self.Dwarf_target_addr), + 'DW_RLE_start_end' : Struct('start_address' / self.Dwarf_target_addr, 'end_address' / self.Dwarf_target_addr), + 'DW_RLE_start_length' : Struct('start_address' / self.Dwarf_target_addr, 'length' / self.Dwarf_uleb128) })), - StreamOffset('entry_end_offset'), - Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset))) + 'entry_end_offset' / Tell, + 'entry_length' / Computed(lambda ctx: ctx.entry_end_offset - ctx.entry_offset))) class _InitialLengthAdapter(Adapter): """ A standard Construct adapter that expects a sub-construct as a struct with one or two values (first, second). """ - def _decode(self, obj, context): + def _decode(self, obj, context, path): if obj.first < 0xFFFFFF00: context['is64'] = False return obj.first diff --git a/elftools/ehabi/structs.py b/elftools/ehabi/structs.py index 35ceaf34..c03e83f1 100644 --- a/elftools/ehabi/structs.py +++ b/elftools/ehabi/structs.py @@ -8,7 +8,7 @@ # This code is in the public domain # ------------------------------------------------------------------------------- -from ..construct import UBInt32, ULInt32, Struct +from construct import Int32ub, Int32ul, Struct class EHABIStructs(object): @@ -27,21 +27,19 @@ def __init__(self, little_endian): def _create_structs(self): if self._little_endian: - self.EHABI_uint32 = ULInt32 + self.EHABI_uint32 = Int32ul else: - self.EHABI_uint32 = UBInt32 + self.EHABI_uint32 = Int32ub self._create_exception_handler_index() self._create_exception_handler_table() def _create_exception_handler_index(self): self.EH_index_struct = Struct( - 'EH_index', - self.EHABI_uint32('word0'), - self.EHABI_uint32('word1') + 'word0' / self.EHABI_uint32, + 'word1' / self.EHABI_uint32 ) def _create_exception_handler_table(self): self.EH_table_struct = Struct( - 'EH_table', - self.EHABI_uint32('word0'), + 'word0' / self.EHABI_uint32, ) diff --git a/elftools/elf/enums.py b/elftools/elf/enums.py index 103312d5..a217e568 100644 --- a/elftools/elf/enums.py +++ b/elftools/elf/enums.py @@ -7,7 +7,6 @@ # This code is in the public domain #------------------------------------------------------------------------------- from ..common.utils import merge_dicts -from ..construct import Pass # e_ident[EI_CLASS] in the ELF header @@ -28,7 +27,6 @@ ENUM_E_VERSION = dict( EV_NONE=0, EV_CURRENT=1, - _default_=Pass, ) # e_ident[EI_OSABI] in the ELF header @@ -55,7 +53,6 @@ ELFOSABI_ARM=97, ELFOSABI_CELL_LV2=102, ELFOSABI_STANDALONE=255, - _default_=Pass, ) # e_type in the ELF header @@ -67,7 +64,6 @@ ET_CORE=4, ET_LOPROC=0xff00, ET_HIPROC=0xffff, - _default_=Pass, ) # e_machine in the ELF header @@ -269,7 +265,6 @@ # reserved 182 Reserved for future Intel use # reserved 184 Reserved for future ARM use # unknown/reserve? 225 - 242 - _default_=Pass, ) # sh_type in the section header @@ -315,7 +310,6 @@ SHT_HIUSER=0xffffffff, SHT_SUNW_LDYNSYM=0x6ffffff3, SHT_SUNW_syminfo=0x6ffffffc, - _default_=Pass, ) ENUM_SH_TYPE_AMD64 = merge_dicts( @@ -380,7 +374,6 @@ ELFCOMPRESS_HIOS=0x6fffffff, ELFCOMPRESS_LOPROC=0x70000000, ELFCOMPRESS_HIPROC=0x7fffffff, - _default_=Pass, ) # p_type in the program header @@ -408,7 +401,6 @@ PT_GNU_STACK=0x6474e551, PT_GNU_RELRO=0x6474e552, PT_GNU_PROPERTY=0x6474e553, - _default_=Pass, ) ENUM_P_TYPE_ARM = merge_dicts( @@ -441,7 +433,6 @@ STB_HIOS=12, STB_LOPROC=13, STB_HIPROC=15, - _default_=Pass, ) # st_info type in the symbol header @@ -460,7 +451,6 @@ STT_HIOS=12, STT_LOPROC=13, STT_HIPROC=15, - _default_=Pass, ) # visibility from st_other @@ -472,11 +462,9 @@ STV_EXPORTED=4, STV_SINGLETON=5, STV_ELIMINATE=6, - _default_=Pass, ) ENUM_ST_LOCAL = dict( - _default_=Pass, ) # st_shndx @@ -484,7 +472,6 @@ SHN_UNDEF=0, SHN_ABS=0xfff1, SHN_COMMON=0xfff2, - _default_=Pass, ) # d_tag @@ -572,7 +559,6 @@ DT_VERNEEDNUM=0x6fffffff, DT_AUXILIARY=0x7ffffffd, DT_FILTER=0x7fffffff, - _default_=Pass, ) # Above are the dynamic tags which are valid always. @@ -733,7 +719,6 @@ R_MIPS_GLOB_DAT=51, R_MIPS_COPY=126, R_MIPS_JUMP_SLOT=127, - _default_=Pass, ) ENUM_RELOC_TYPE_i386 = dict( @@ -780,7 +765,6 @@ R_386_USED_BY_INTEL_200=200, R_386_GNU_VTINHERIT=250, R_386_GNU_VTENTRY=251, - _default_=Pass, ) ENUM_RELOC_TYPE_x64 = dict( @@ -823,7 +807,6 @@ R_X86_64_REX_GOTPCRELX=42, R_X86_64_GNU_VTINHERIT=250, R_X86_64_GNU_VTENTRY=251, - _default_=Pass, ) ENUM_RELOC_TYPE_BPF = dict( @@ -833,7 +816,6 @@ R_BPF_64_ABS32=3, R_BPF_64_NODYLD32=4, R_BPF_64_32=10, - _default_=Pass, ) # https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc @@ -936,7 +918,6 @@ R_LARCH_ADD_ULEB128=107, R_LARCH_SUB_ULEB128=108, R_LARCH_64_PCREL=109, - _default_=Pass, ) ENUM_RELOC_TYPE_S390X = dict( @@ -1008,7 +989,6 @@ SYMINFO_BT_PARENT=0xfffe, SYMINFO_BT_NONE=0xfffd, SYMINFO_BT_EXTERN=0xfffc, - _default_=Pass, ) # Versym section, version dependency index @@ -1017,7 +997,6 @@ VER_NDX_GLOBAL=1, VER_NDX_LORESERVE=0xff00, VER_NDX_ELIMINATE=0xff01, - _default_=Pass, ) # Sunw Syminfo Bound To special values @@ -1026,7 +1005,6 @@ SYMINFO_BT_PARENT=0xfffe, SYMINFO_BT_NONE=0xfffd, SYMINFO_BT_EXTERN=0xfffc, - _default_=Pass, ) # PT_NOTE section types for all ELF types except ET_CORE @@ -1036,7 +1014,6 @@ NT_GNU_BUILD_ID=3, NT_GNU_GOLD_VERSION=4, NT_GNU_PROPERTY_TYPE_0=5, - _default_=Pass, ) # PT_NOTE section types for ET_CORE @@ -1048,7 +1025,6 @@ NT_AUXV=6, NT_SIGINFO=0x53494749, NT_FILE=0x46494c45, - _default_=Pass, ) # Values in GNU .note.ABI-tag notes (n_type=='NT_GNU_ABI_TAG') @@ -1059,7 +1035,6 @@ ELF_NOTE_OS_FREEBSD=3, ELF_NOTE_OS_NETBSD=4, ELF_NOTE_OS_SYLLABLE=5, - _default_=Pass, ) # Values in GNU .note.gnu.property notes (n_type=='NT_GNU_PROPERTY_TYPE_0') @@ -1071,7 +1046,6 @@ GNU_PROPERTY_X86_FEATURE_2_USED=0xc0010001, GNU_PROPERTY_X86_ISA_1_USED=0xc0010002, GNU_PROPERTY_AARCH64_FEATURE_1_AND=0xc0000000, - _default_=Pass, ) ENUM_GNU_PROPERTY_X86_FEATURE_1_FLAGS = dict( @@ -1079,7 +1053,6 @@ GNU_PROPERTY_X86_FEATURE_1_SHSTK=2, GNU_PROPERTY_X86_FEATURE_1_LAM_U48=4, GNU_PROPERTY_X86_FEATURE_1_LAM_U57=8, - _default_=Pass ) ENUM_RELOC_TYPE_ARM = dict( diff --git a/elftools/elf/gnuversions.py b/elftools/elf/gnuversions.py index 3ad8dd16..9ed7ec0f 100644 --- a/elftools/elf/gnuversions.py +++ b/elftools/elf/gnuversions.py @@ -6,7 +6,6 @@ # Yann Rouillard (yann@pleiades.fr.eu.org) # This code is in the public domain #------------------------------------------------------------------------------ -from ..construct import CString from ..common.utils import struct_parse, elf_assert from .sections import Section, Symbol diff --git a/elftools/elf/hash.py b/elftools/elf/hash.py index c8d1e17e..9a7fed0a 100644 --- a/elftools/elf/hash.py +++ b/elftools/elf/hash.py @@ -100,8 +100,8 @@ def __init__(self, elffile, start_offset, symboltable): self.elffile.stream, start_offset) # Element sizes in the hash table - self._wordsize = self.elffile.structs.Elf_word('').sizeof() - self._xwordsize = self.elffile.structs.Elf_xword('').sizeof() + self._wordsize = self.elffile.structs.Elf_word.sizeof() + self._xwordsize = self.elffile.structs.Elf_xword.sizeof() self._chain_pos = start_offset + 4 * self._wordsize + \ self.params['bloom_size'] * self._xwordsize + \ self.params['nbuckets'] * self._wordsize diff --git a/elftools/elf/notes.py b/elftools/elf/notes.py index 90006028..4ac2b607 100644 --- a/elftools/elf/notes.py +++ b/elftools/elf/notes.py @@ -7,7 +7,7 @@ # This code is in the public domain #------------------------------------------------------------------------------- from ..common.utils import struct_parse, bytes2hex, roundup, bytes2str -from ..construct import CString +from construct import CString def iter_notes(elffile, offset, size): @@ -27,8 +27,7 @@ def iter_notes(elffile, offset, size): elffile.stream.seek(offset) # n_namesz is 4-byte aligned. disk_namesz = roundup(note['n_namesz'], 2) - note['n_name'] = bytes2str( - CString('').parse(elffile.stream.read(disk_namesz))) + note['n_name'] = CString('utf-8').parse(elffile.stream.read(disk_namesz)) offset += disk_namesz desc_data = elffile.stream.read(note['n_descsz']) diff --git a/elftools/elf/relocation.py b/elftools/elf/relocation.py index 5aa55247..45b3b0e4 100644 --- a/elftools/elf/relocation.py +++ b/elftools/elf/relocation.py @@ -14,9 +14,9 @@ from .enums import ( ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, ENUM_RELOC_TYPE_MIPS, ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64, - ENUM_RELOC_TYPE_S390X, ENUM_RELOC_TYPE_BPF, ENUM_RELOC_TYPE_LOONGARCH, - ENUM_D_TAG) -from ..construct import Container + ENUM_RELOC_TYPE_BPF, ENUM_D_TAG, ENUM_RELOC_TYPE_LOONGARCH, + ENUM_RELOC_TYPE_S390X) +from construct.core import Container class Relocation(object): @@ -176,7 +176,7 @@ def iter_relocations(self): # Advance 'base' past the current bitmap (8 == CHAR_BIT). There # are 63 (or 31 for 32-bit ELFs) entries in each bitmap, and # every bit corresponds to an ELF_addr-sized relocation. - base += (8 * self._entrysize - 1) * self._elffile.structs.Elf_addr('').sizeof() + base += (8 * self._entrysize - 1) * self._elffile.structs.Elf_addr.sizeof() # Advance to the next entry relr += self._entrysize @@ -299,13 +299,13 @@ def _do_apply_relocation(self, stream, reloc, symtab): # 0. Find out which struct we're going to be using to read this value # from the stream and write it back. if recipe.bytesize == 4: - value_struct = self.elffile.structs.Elf_word('') + value_struct = self.elffile.structs.Elf_word elif recipe.bytesize == 8: - value_struct = self.elffile.structs.Elf_word64('') + value_struct = self.elffile.structs.Elf_word64 elif recipe.bytesize == 1: - value_struct = self.elffile.structs.Elf_byte('') + value_struct = self.elffile.structs.Elf_byte elif recipe.bytesize == 2: - value_struct = self.elffile.structs.Elf_half('') + value_struct = self.elffile.structs.Elf_half else: raise ELFRelocationError('Invalid bytesize %s for relocation' % recipe.bytesize) diff --git a/elftools/elf/sections.py b/elftools/elf/sections.py index 3805962e..46a7b6ee 100644 --- a/elftools/elf/sections.py +++ b/elftools/elf/sections.py @@ -160,7 +160,7 @@ def get_section_index(self, n): The section contains an array of Elf32_word values with one entry for every symbol in the associated symbol table. """ - return struct_parse(self.elffile.structs.Elf_word(''), self.stream, + return struct_parse(self.elffile.structs.Elf_word, self.stream, self['sh_offset'] + n * self['sh_entsize']) @@ -429,7 +429,7 @@ def __init__(self, header, name, elffile, subsection): super(AttributesSection, self).__init__(header, name, elffile) self.subsection = subsection - fv = struct_parse(self.structs.Elf_byte('format_version'), + fv = struct_parse(self.structs.Elf_byte, self.stream, self['sh_offset']) @@ -480,38 +480,34 @@ def __init__(self, structs, stream): struct_parse(structs.Elf_Arm_Attribute_Tag, stream)) if self.tag in ('TAG_FILE', 'TAG_SECTION', 'TAG_SYMBOL'): - self.value = struct_parse(structs.Elf_word('value'), stream) + self.value = struct_parse(structs.Elf_word, stream) if self.tag != 'TAG_FILE': self.extra = [] - s_number = struct_parse(structs.Elf_uleb128('s_number'), stream) + s_number = struct_parse(structs.Elf_uleb128, stream) while s_number != 0: self.extra.append(s_number) - s_number = struct_parse(structs.Elf_uleb128('s_number'), - stream) + s_number = struct_parse(structs.Elf_uleb128, stream) elif self.tag in ('TAG_CPU_RAW_NAME', 'TAG_CPU_NAME', 'TAG_CONFORMANCE'): - self.value = struct_parse(structs.Elf_ntbs('value', - encoding='utf-8'), - stream) + self.value = struct_parse(structs.Elf_ntbs, stream) elif self.tag == 'TAG_COMPATIBILITY': - self.value = struct_parse(structs.Elf_uleb128('value'), stream) - self.extra = struct_parse(structs.Elf_ntbs('vendor_name', - encoding='utf-8'), - stream) + self.value = struct_parse(structs.Elf_uleb128, stream) + # vendor_name + self.extra = struct_parse(structs.Elf_ntbs, stream) elif self.tag == 'TAG_ALSO_COMPATIBLE_WITH': self.value = ARMAttribute(structs, stream) - if type(self.value.value) is not str: - nul = struct_parse(structs.Elf_byte('nul'), stream) + if not isinstance(self.value.value, str): + nul = struct_parse(structs.Elf_byte, stream) elf_assert(nul == 0, "Invalid terminating byte %r, expecting NUL." % nul) else: - self.value = struct_parse(structs.Elf_uleb128('value'), stream) + self.value = struct_parse(structs.Elf_uleb128, stream) class ARMAttributesSubsubsection(AttributesSubsubsection): @@ -548,24 +544,23 @@ def __init__(self, structs, stream): struct_parse(structs.Elf_RiscV_Attribute_Tag, stream)) if self.tag in ('TAG_FILE', 'TAG_SECTION', 'TAG_SYMBOL'): - self.value = struct_parse(structs.Elf_word('value'), stream) + self.value = struct_parse(structs.Elf_word, stream) if self.tag != 'TAG_FILE': self.extra = [] - s_number = struct_parse(structs.Elf_uleb128('s_number'), stream) + s_number = struct_parse(structs.Elf_uleb128, stream) while s_number != 0: self.extra.append(s_number) - s_number = struct_parse(structs.Elf_uleb128('s_number'), + s_number = struct_parse(structs.Elf_uleb128, stream) elif self.tag == 'TAG_ARCH': - self.value = struct_parse(structs.Elf_ntbs('value', - encoding='utf-8'), + self.value = struct_parse(structs.Elf_ntbs, stream) else: - self.value = struct_parse(structs.Elf_uleb128('value'), stream) + self.value = struct_parse(structs.Elf_uleb128, stream) class RISCVAttributesSubsubsection(AttributesSubsubsection): diff --git a/elftools/elf/segments.py b/elftools/elf/segments.py index 0c318e17..e1073722 100644 --- a/elftools/elf/segments.py +++ b/elftools/elf/segments.py @@ -6,7 +6,7 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from ..construct import CString +from construct import CString from ..common.utils import struct_parse from .constants import SH_FLAGS from .notes import iter_notes @@ -100,7 +100,7 @@ def get_interp_name(self): """ path_offset = self['p_offset'] return struct_parse( - CString('', encoding='utf-8'), + CString('utf-8'), self.stream, stream_pos=path_offset) diff --git a/elftools/elf/structs.py b/elftools/elf/structs.py index 34088456..62918843 100644 --- a/elftools/elf/structs.py +++ b/elftools/elf/structs.py @@ -7,14 +7,15 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from ..construct import ( - UBInt8, UBInt16, UBInt32, UBInt64, - ULInt8, ULInt16, ULInt32, ULInt64, - SBInt32, SLInt32, SBInt64, SLInt64, - Struct, Array, Enum, Padding, BitStruct, BitField, Value, String, CString, - Switch, Field - ) -from ..common.construct_utils import ULEB128 +from construct import ( + Int8ub, Int16ub, Int32ub, Int64ub, + Int8ul, Int16ul, Int32ul, Int64ul, + Int32sb, Int32sl, Int64sb, Int64sl, + Struct, Array, Enum, Padding, BitStruct, + BitsInteger, Computed, CString, Switch, Bytes, + VarInt +) +from ..common.construct_utils import CStringBytes from ..common.utils import roundup from .enums import * @@ -62,25 +63,25 @@ def create_basic_structs(self): initial determining of ELF type. """ if self.little_endian: - self.Elf_byte = ULInt8 - self.Elf_half = ULInt16 - self.Elf_word = ULInt32 - self.Elf_word64 = ULInt64 - self.Elf_addr = ULInt32 if self.elfclass == 32 else ULInt64 + self.Elf_byte = Int8ul + self.Elf_half = Int16ul + self.Elf_word = Int32ul + self.Elf_word64 = Int64ul + self.Elf_addr = Int32ul if self.elfclass == 32 else Int64ul self.Elf_offset = self.Elf_addr - self.Elf_sword = SLInt32 - self.Elf_xword = ULInt32 if self.elfclass == 32 else ULInt64 - self.Elf_sxword = SLInt32 if self.elfclass == 32 else SLInt64 + self.Elf_sword = Int32sl + self.Elf_xword = Int32ul if self.elfclass == 32 else Int64ul + self.Elf_sxword = Int32sl if self.elfclass == 32 else Int64sl else: - self.Elf_byte = UBInt8 - self.Elf_half = UBInt16 - self.Elf_word = UBInt32 - self.Elf_word64 = UBInt64 - self.Elf_addr = UBInt32 if self.elfclass == 32 else UBInt64 + self.Elf_byte = Int8ub + self.Elf_half = Int16ub + self.Elf_word = Int32ub + self.Elf_word64 = Int64ub + self.Elf_addr = Int32ub if self.elfclass == 32 else Int64ub self.Elf_offset = self.Elf_addr - self.Elf_sword = SBInt32 - self.Elf_xword = UBInt32 if self.elfclass == 32 else UBInt64 - self.Elf_sxword = SBInt32 if self.elfclass == 32 else SBInt64 + self.Elf_sword = Int32sb + self.Elf_xword = Int32ub if self.elfclass == 32 else Int64ub + self.Elf_sxword = Int32sb if self.elfclass == 32 else Int64sb self._create_ehdr() self._create_leb128() self._create_ntbs() @@ -116,36 +117,36 @@ def create_advanced_structs(self, e_type=None, e_machine=None, e_ident_osabi=Non #-------------------------------- PRIVATE --------------------------------# def _create_ehdr(self): - self.Elf_Ehdr = Struct('Elf_Ehdr', - Struct('e_ident', - Array(4, self.Elf_byte('EI_MAG')), - Enum(self.Elf_byte('EI_CLASS'), **ENUM_EI_CLASS), - Enum(self.Elf_byte('EI_DATA'), **ENUM_EI_DATA), - Enum(self.Elf_byte('EI_VERSION'), **ENUM_E_VERSION), - Enum(self.Elf_byte('EI_OSABI'), **ENUM_EI_OSABI), - self.Elf_byte('EI_ABIVERSION'), + self.Elf_Ehdr = Struct( + 'e_ident' / Struct( + 'EI_MAG' / Array(4, self.Elf_byte), + 'EI_CLASS' / Enum(self.Elf_byte, **ENUM_EI_CLASS), + 'EI_DATA' / Enum(self.Elf_byte, **ENUM_EI_DATA), + 'EI_VERSION' / Enum(self.Elf_byte, **ENUM_E_VERSION), + 'EI_OSABI' / Enum(self.Elf_byte, **ENUM_EI_OSABI), + 'EI_ABIVERSION' / self.Elf_byte, Padding(7) ), - Enum(self.Elf_half('e_type'), **ENUM_E_TYPE), - Enum(self.Elf_half('e_machine'), **ENUM_E_MACHINE), - Enum(self.Elf_word('e_version'), **ENUM_E_VERSION), - self.Elf_addr('e_entry'), - self.Elf_offset('e_phoff'), - self.Elf_offset('e_shoff'), - self.Elf_word('e_flags'), - self.Elf_half('e_ehsize'), - self.Elf_half('e_phentsize'), - self.Elf_half('e_phnum'), - self.Elf_half('e_shentsize'), - self.Elf_half('e_shnum'), - self.Elf_half('e_shstrndx'), - ) + 'e_type' / Enum(self.Elf_half, **ENUM_E_TYPE), + 'e_machine' / Enum(self.Elf_half, **ENUM_E_MACHINE), + 'e_version' / Enum(self.Elf_word, **ENUM_E_VERSION), + 'e_entry' / self.Elf_addr, + 'e_phoff' / self.Elf_offset, + 'e_shoff' / self.Elf_offset, + 'e_flags' / self.Elf_word, + 'e_ehsize' / self.Elf_half, + 'e_phentsize' / self.Elf_half, + 'e_phnum' / self.Elf_half, + 'e_shentsize' / self.Elf_half, + 'e_shnum' / self.Elf_half, + 'e_shstrndx' / self.Elf_half, + ).compile() def _create_leb128(self): - self.Elf_uleb128 = ULEB128 + self.Elf_uleb128 = VarInt def _create_ntbs(self): - self.Elf_ntbs = CString + self.Elf_ntbs = CString('utf8') def _create_phdr(self): p_type_dict = ENUM_P_TYPE_BASE @@ -159,27 +160,27 @@ def _create_phdr(self): p_type_dict = ENUM_P_TYPE_RISCV if self.elfclass == 32: - self.Elf_Phdr = Struct('Elf_Phdr', - Enum(self.Elf_word('p_type'), **p_type_dict), - self.Elf_offset('p_offset'), - self.Elf_addr('p_vaddr'), - self.Elf_addr('p_paddr'), - self.Elf_word('p_filesz'), - self.Elf_word('p_memsz'), - self.Elf_word('p_flags'), - self.Elf_word('p_align'), - ) + self.Elf_Phdr = Struct( + 'p_type' / Enum(self.Elf_word, **p_type_dict), + 'p_offset' / self.Elf_offset, + 'p_vaddr' / self.Elf_addr, + 'p_paddr' / self.Elf_addr, + 'p_filesz' / self.Elf_word, + 'p_memsz' / self.Elf_word, + 'p_flags' / self.Elf_word, + 'p_align' / self.Elf_word, + ).compile() else: # 64 - self.Elf_Phdr = Struct('Elf_Phdr', - Enum(self.Elf_word('p_type'), **p_type_dict), - self.Elf_word('p_flags'), - self.Elf_offset('p_offset'), - self.Elf_addr('p_vaddr'), - self.Elf_addr('p_paddr'), - self.Elf_xword('p_filesz'), - self.Elf_xword('p_memsz'), - self.Elf_xword('p_align'), - ) + self.Elf_Phdr = Struct( + 'p_type' / Enum(self.Elf_word, **p_type_dict), + 'p_flags' / self.Elf_word, + 'p_offset' / self.Elf_offset, + 'p_vaddr' / self.Elf_addr, + 'p_paddr' / self.Elf_addr, + 'p_filesz' / self.Elf_xword, + 'p_memsz' / self.Elf_xword, + 'p_align' / self.Elf_xword, + ).compile() def _create_shdr(self): """Section header parsing. @@ -196,18 +197,18 @@ def _create_shdr(self): if self.e_machine == 'EM_RISCV': sh_type_dict = ENUM_SH_TYPE_RISCV - self.Elf_Shdr = Struct('Elf_Shdr', - self.Elf_word('sh_name'), - Enum(self.Elf_word('sh_type'), **sh_type_dict), - self.Elf_xword('sh_flags'), - self.Elf_addr('sh_addr'), - self.Elf_offset('sh_offset'), - self.Elf_xword('sh_size'), - self.Elf_word('sh_link'), - self.Elf_word('sh_info'), - self.Elf_xword('sh_addralign'), - self.Elf_xword('sh_entsize'), - ) + self.Elf_Shdr = Struct( + 'sh_name' / self.Elf_word, + 'sh_type' / Enum(self.Elf_word, **sh_type_dict), + 'sh_flags' / self.Elf_xword, + 'sh_addr' / self.Elf_addr, + 'sh_offset' / self.Elf_offset, + 'sh_size' / self.Elf_xword, + 'sh_link' / self.Elf_word, + 'sh_info' / self.Elf_word, + 'sh_addralign' / self.Elf_xword, + 'sh_entsize' / self.Elf_xword, + ).compile() def _create_chdr(self): # Structure of compressed sections header. It is documented in Oracle @@ -215,72 +216,74 @@ def _create_chdr(self): # Interface, Chapter 13 Object File Format, Section Compression: # https://docs.oracle.com/cd/E53394_01/html/E54813/section_compression.html fields = [ - Enum(self.Elf_word('ch_type'), **ENUM_ELFCOMPRESS_TYPE), - self.Elf_xword('ch_size'), - self.Elf_xword('ch_addralign'), + 'ch_type' / Enum(self.Elf_word, **ENUM_ELFCOMPRESS_TYPE), + 'ch_size' / self.Elf_xword, + 'ch_addralign' / self.Elf_xword, ] + if self.elfclass == 64: - fields.insert(1, self.Elf_word('ch_reserved')) - self.Elf_Chdr = Struct('Elf_Chdr', *fields) + fields.insert(1, 'ch_reserved' / self.Elf_word) + + self.Elf_Chdr = Struct(*fields) def _create_rel(self): # r_info is also taken apart into r_info_sym and r_info_type. This is # done in Value to avoid endianity issues while parsing. if self.elfclass == 32: - fields = [self.Elf_xword('r_info'), - Value('r_info_sym', - lambda ctx: (ctx['r_info'] >> 8) & 0xFFFFFF), - Value('r_info_type', - lambda ctx: ctx['r_info'] & 0xFF)] + fields = [ + 'r_info' / self.Elf_xword, + 'r_info_sym' / Computed(lambda ctx: (ctx['r_info'] >> 8) & 0xFFFFFF), + 'r_info_type' / Computed(lambda ctx: ctx['r_info'] & 0xFF) + ] elif self.e_machine == 'EM_MIPS': # ELF64 MIPS fields = [ # The MIPS ELF64 specification # (https://www.linux-mips.org/pub/linux/mips/doc/ABI/elf64-2.4.pdf) # provides a non-standard relocation structure definition. - self.Elf_word('r_sym'), - self.Elf_byte('r_ssym'), - self.Elf_byte('r_type3'), - self.Elf_byte('r_type2'), - self.Elf_byte('r_type'), + 'r_sym' / self.Elf_word, + 'r_ssym' / self.Elf_byte, + 'r_type3' / self.Elf_byte, + 'r_type2' / self.Elf_byte, + 'r_type' / self.Elf_byte, # Synthetize usual fields for compatibility with other # architectures. This allows relocation consumers (including # our readelf tests) to work without worrying about MIPS64 # oddities. - Value('r_info_sym', lambda ctx: ctx['r_sym']), - Value('r_info_ssym', lambda ctx: ctx['r_ssym']), - Value('r_info_type', lambda ctx: ctx['r_type']), - Value('r_info_type2', lambda ctx: ctx['r_type2']), - Value('r_info_type3', lambda ctx: ctx['r_type3']), - Value('r_info', - lambda ctx: (ctx['r_sym'] << 32) + 'r_info_sym' / Computed(lambda ctx: ctx['r_sym']), + 'r_info_ssym' / Computed(lambda ctx: ctx['r_ssym']), + 'r_info_type' / Computed(lambda ctx: ctx['r_type']), + 'r_info_type2' / Computed(lambda ctx: ctx['r_type2']), + 'r_info_type3' / Computed(lambda ctx: ctx['r_type3']), + 'r_info' / Computed(lambda ctx: (ctx['r_sym'] << 32) | (ctx['r_ssym'] << 24) | (ctx['r_type3'] << 16) | (ctx['r_type2'] << 8) - | ctx['r_type']), + | ctx['r_type']) ] else: # Other 64 ELFs - fields = [self.Elf_xword('r_info'), - Value('r_info_sym', - lambda ctx: (ctx['r_info'] >> 32) & 0xFFFFFFFF), - Value('r_info_type', - lambda ctx: ctx['r_info'] & 0xFFFFFFFF)] - - self.Elf_Rel = Struct('Elf_Rel', - self.Elf_addr('r_offset'), - *fields) - - fields_and_addend = fields + [self.Elf_sxword('r_addend')] - self.Elf_Rela = Struct('Elf_Rela', - self.Elf_addr('r_offset'), - *fields_and_addend + fields = [ + 'r_info' / self.Elf_xword, + 'r_info_sym' / Computed(lambda ctx: (ctx['r_info'] >> 32) & 0xFFFFFFFF), + 'r_info_type' / Computed(lambda ctx: ctx['r_info'] & 0xFFFFFFFF) + ] + + self.Elf_Rel = Struct( + 'r_offset' / self.Elf_addr, + *fields ) + fields_and_addend = fields + ['r_addend' / self.Elf_sxword] + self.Elf_Rela = Struct( + 'r_offset' / self.Elf_addr, + *fields_and_addend + ) # Can't compile - lambdas + # Elf32_Relr is typedef'd as Elf32_Word, Elf64_Relr as Elf64_Xword # (see the glibc patch, for example: # https://sourceware.org/pipermail/libc-alpha/2021-October/132029.html) # For us, this is the same as self.Elf_addr (or self.Elf_xword). - self.Elf_Relr = Struct('Elf_Relr', self.Elf_addr('r_offset')) + self.Elf_Relr = Struct('r_offset' / self.Elf_addr).compile() def _create_dyn(self): d_tag_dict = dict(ENUM_D_TAG_COMMON) @@ -289,107 +292,110 @@ def _create_dyn(self): elif self.e_ident_osabi == 'ELFOSABI_SOLARIS': d_tag_dict.update(ENUM_D_TAG_SOLARIS) - self.Elf_Dyn = Struct('Elf_Dyn', - Enum(self.Elf_sxword('d_tag'), **d_tag_dict), - self.Elf_xword('d_val'), - Value('d_ptr', lambda ctx: ctx['d_val']), - ) + self.Elf_Dyn = Struct( + 'd_tag' / Enum(self.Elf_sxword, **d_tag_dict), + 'd_val' / self.Elf_xword, + 'd_ptr' / Computed(lambda ctx: ctx['d_val']), + ) # Can't compile - lambdas def _create_sym(self): # st_info is hierarchical. To access the type, use # container['st_info']['type'] - st_info_struct = BitStruct('st_info', - Enum(BitField('bind', 4), **ENUM_ST_INFO_BIND), - Enum(BitField('type', 4), **ENUM_ST_INFO_TYPE)) + st_info_struct = BitStruct( + 'bind' / Enum(BitsInteger(4), **ENUM_ST_INFO_BIND), + 'type' / Enum(BitsInteger(4), **ENUM_ST_INFO_TYPE) + ) # st_other is hierarchical. To access the visibility, # use container['st_other']['visibility'] - st_other_struct = BitStruct('st_other', + st_other_struct = BitStruct( # https://openpowerfoundation.org/wp-content/uploads/2016/03/ABI64BitOpenPOWERv1.1_16July2015_pub4.pdf # See 3.4.1 Symbol Values. - Enum(BitField('local', 3), **ENUM_ST_LOCAL), + 'local' / Enum(BitsInteger(3), **ENUM_ST_LOCAL), Padding(2), - Enum(BitField('visibility', 3), **ENUM_ST_VISIBILITY)) + 'visibility' / Enum(BitsInteger(3), **ENUM_ST_VISIBILITY) + ) if self.elfclass == 32: - self.Elf_Sym = Struct('Elf_Sym', - self.Elf_word('st_name'), - self.Elf_addr('st_value'), - self.Elf_word('st_size'), - st_info_struct, - st_other_struct, - Enum(self.Elf_half('st_shndx'), **ENUM_ST_SHNDX), + self.Elf_Sym = Struct( + 'st_name' / self.Elf_word, + 'st_value' / self.Elf_addr, + 'st_size' / self.Elf_word, + 'st_info' / st_info_struct, + 'st_other' / st_other_struct, + 'st_shndx' / Enum(self.Elf_half, **ENUM_ST_SHNDX), ) else: - self.Elf_Sym = Struct('Elf_Sym', - self.Elf_word('st_name'), - st_info_struct, - st_other_struct, - Enum(self.Elf_half('st_shndx'), **ENUM_ST_SHNDX), - self.Elf_addr('st_value'), - self.Elf_xword('st_size'), + self.Elf_Sym = Struct( + 'st_name' / self.Elf_word, + 'st_info' / st_info_struct, + 'st_other' / st_other_struct, + 'st_shndx' / Enum(self.Elf_half, **ENUM_ST_SHNDX), + 'st_value' / self.Elf_addr, + 'st_size' / self.Elf_xword, ) def _create_sunw_syminfo(self): - self.Elf_Sunw_Syminfo = Struct('Elf_Sunw_Syminfo', - Enum(self.Elf_half('si_boundto'), **ENUM_SUNW_SYMINFO_BOUNDTO), - self.Elf_half('si_flags'), - ) + self.Elf_Sunw_Syminfo = Struct( + 'si_boundto' / Enum(self.Elf_half, **ENUM_SUNW_SYMINFO_BOUNDTO), + 'si_flags' / self.Elf_half, + ).compile() def _create_gnu_verneed(self): # Structure of "version needed" entries is documented in # Oracle "Linker and Libraries Guide", Chapter 13 Object File Format - self.Elf_Verneed = Struct('Elf_Verneed', - self.Elf_half('vn_version'), - self.Elf_half('vn_cnt'), - self.Elf_word('vn_file'), - self.Elf_word('vn_aux'), - self.Elf_word('vn_next'), - ) - self.Elf_Vernaux = Struct('Elf_Vernaux', - self.Elf_word('vna_hash'), - self.Elf_half('vna_flags'), - self.Elf_half('vna_other'), - self.Elf_word('vna_name'), - self.Elf_word('vna_next'), - ) + self.Elf_Verneed = Struct( + 'vn_version' / self.Elf_half, + 'vn_cnt' / self.Elf_half, + 'vn_file' / self.Elf_word, + 'vn_aux' / self.Elf_word, + 'vn_next' / self.Elf_word, + ).compile() + self.Elf_Vernaux = Struct( + 'vna_hash' / self.Elf_word, + 'vna_flags' / self.Elf_half, + 'vna_other' / self.Elf_half, + 'vna_name' / self.Elf_word, + 'vna_next' / self.Elf_word, + ).compile() def _create_gnu_verdef(self): # Structure of "version definition" entries are documented in # Oracle "Linker and Libraries Guide", Chapter 13 Object File Format - self.Elf_Verdef = Struct('Elf_Verdef', - self.Elf_half('vd_version'), - self.Elf_half('vd_flags'), - self.Elf_half('vd_ndx'), - self.Elf_half('vd_cnt'), - self.Elf_word('vd_hash'), - self.Elf_word('vd_aux'), - self.Elf_word('vd_next'), - ) - self.Elf_Verdaux = Struct('Elf_Verdaux', - self.Elf_word('vda_name'), - self.Elf_word('vda_next'), - ) + self.Elf_Verdef = Struct( + 'vd_version' / self.Elf_half, + 'vd_flags' / self.Elf_half, + 'vd_ndx' / self.Elf_half, + 'vd_cnt' / self.Elf_half, + 'vd_hash' / self.Elf_word, + 'vd_aux' / self.Elf_word, + 'vd_next' / self.Elf_word, + ).compile() + self.Elf_Verdaux = Struct( + 'vda_name' / self.Elf_word, + 'vda_next' / self.Elf_word, + ).compile() def _create_gnu_versym(self): # Structure of "version symbol" entries are documented in # Oracle "Linker and Libraries Guide", Chapter 13 Object File Format - self.Elf_Versym = Struct('Elf_Versym', - Enum(self.Elf_half('ndx'), **ENUM_VERSYM), - ) + self.Elf_Versym = Struct( + 'ndx' / Enum(self.Elf_half, **ENUM_VERSYM), + ).compile() def _create_gnu_abi(self): # Structure of GNU ABI notes is documented in # https://code.woboq.org/userspace/glibc/csu/abi-note.S.html - self.Elf_abi = Struct('Elf_abi', - Enum(self.Elf_word('abi_os'), **ENUM_NOTE_ABI_TAG_OS), - self.Elf_word('abi_major'), - self.Elf_word('abi_minor'), - self.Elf_word('abi_tiny'), - ) + self.Elf_abi = Struct( + 'abi_os' / Enum(self.Elf_word, **ENUM_NOTE_ABI_TAG_OS), + 'abi_major' / self.Elf_word, + 'abi_minor' / self.Elf_word, + 'abi_tiny' / self.Elf_word, + ).compile() def _create_gnu_debugaltlink(self): - self.Elf_debugaltlink = Struct('Elf_debugaltlink', - CString("sup_filename"), - String("sup_checksum", length=20)) + self.Elf_debugaltlink = Struct( + 'sup_filename' / CStringBytes, + 'sup_checksum' / Bytes(20), + ).compile() def _create_gnu_property(self): # Structure of GNU property notes is documented in @@ -400,7 +406,7 @@ def roundup_padding(ctx): return roundup(ctx.pr_datasz, 3) - ctx.pr_datasz def classify_pr_data(ctx): - if type(ctx.pr_type) is not str: + if not isinstance(ctx.pr_type, str): return None if ctx.pr_type.startswith('GNU_PROPERTY_X86_'): return ('GNU_PROPERTY_X86_*', 4, 0) @@ -408,19 +414,19 @@ def classify_pr_data(ctx): return ('GNU_PROPERTY_AARCH64_*', 4, 0) return (ctx.pr_type, ctx.pr_datasz, self.elfclass) - self.Elf_Prop = Struct('Elf_Prop', - Enum(self.Elf_word('pr_type'), **ENUM_NOTE_GNU_PROPERTY_TYPE), - self.Elf_word('pr_datasz'), - Switch('pr_data', classify_pr_data, { - ('GNU_PROPERTY_STACK_SIZE', 4, 32): self.Elf_word('pr_data'), - ('GNU_PROPERTY_STACK_SIZE', 8, 64): self.Elf_word64('pr_data'), - ('GNU_PROPERTY_X86_*', 4, 0): self.Elf_word('pr_data'), - ('GNU_PROPERTY_AARCH64_*', 4, 0): self.Elf_word('pr_data'), + self.Elf_Prop = Struct( + 'pr_type' / Enum(self.Elf_word, **ENUM_NOTE_GNU_PROPERTY_TYPE), + 'pr_datasz' / self.Elf_word, + 'pr_data' / Switch(classify_pr_data, { + ('GNU_PROPERTY_STACK_SIZE', 4, 32): self.Elf_word, + ('GNU_PROPERTY_STACK_SIZE', 8, 64): self.Elf_word64, + ('GNU_PROPERTY_X86_*', 4, 0): self.Elf_word, + ('GNU_PROPERTY_AARCH64_*', 4, 0): self.Elf_word, }, - default=Field('pr_data', lambda ctx: ctx.pr_datasz) + default=Bytes(lambda ctx: ctx.pr_datasz) ), Padding(roundup_padding) - ) + ) # Can't compile def _create_note(self, e_type=None): # Structure of "PT_NOTE" section @@ -438,98 +444,97 @@ def _create_note(self, e_type=None): 'EM_SPARC', } else self.Elf_word - self.Elf_Nhdr = Struct('Elf_Nhdr', - self.Elf_word('n_namesz'), - self.Elf_word('n_descsz'), - Enum(self.Elf_word('n_type'), - **(ENUM_NOTE_N_TYPE if e_type != "ET_CORE" - else ENUM_CORE_NOTE_N_TYPE)), - ) + self.Elf_Nhdr = Struct( + 'n_namesz' / self.Elf_word, + 'n_descsz' / self.Elf_word, + 'n_type' / Enum(self.Elf_word, **(ENUM_NOTE_N_TYPE if e_type != "ET_CORE" else ENUM_CORE_NOTE_N_TYPE)), + ).compile() # A process psinfo structure according to # http://elixir.free-electrons.com/linux/v2.6.35/source/include/linux/elfcore.h#L84 if self.elfclass == 32: - self.Elf_Prpsinfo = Struct('Elf_Prpsinfo', - self.Elf_byte('pr_state'), - String('pr_sname', 1), - self.Elf_byte('pr_zomb'), - self.Elf_byte('pr_nice'), - self.Elf_xword('pr_flag'), - self.Elf_ugid('pr_uid'), - self.Elf_ugid('pr_gid'), - self.Elf_word('pr_pid'), - self.Elf_word('pr_ppid'), - self.Elf_word('pr_pgrp'), - self.Elf_word('pr_sid'), - String('pr_fname', 16), - String('pr_psargs', 80), - ) + self.Elf_Prpsinfo = Struct( + 'pr_state' / self.Elf_byte, + 'pr_sname' / Bytes(1), + 'pr_zomb' / self.Elf_byte, + 'pr_nice' / self.Elf_byte, + 'pr_flag' / self.Elf_xword, + 'pr_uid' / self.Elf_ugid, + 'pr_gid' / self.Elf_ugid, + 'pr_pid' / self.Elf_word, + 'pr_ppid' / self.Elf_word, + 'pr_pgrp' / self.Elf_word, + 'pr_sid' / self.Elf_word, + 'pr_fname' / Bytes(16), + 'pr_psargs' / Bytes(80), + ).compile() else: # 64 - self.Elf_Prpsinfo = Struct('Elf_Prpsinfo', - self.Elf_byte('pr_state'), - String('pr_sname', 1), - self.Elf_byte('pr_zomb'), - self.Elf_byte('pr_nice'), + self.Elf_Prpsinfo = Struct( + 'pr_state' / self.Elf_byte, + 'pr_sname' / Bytes(1), + 'pr_zomb' / self.Elf_byte, + 'pr_nice' / self.Elf_byte, Padding(4), - self.Elf_xword('pr_flag'), - self.Elf_ugid('pr_uid'), - self.Elf_ugid('pr_gid'), - self.Elf_word('pr_pid'), - self.Elf_word('pr_ppid'), - self.Elf_word('pr_pgrp'), - self.Elf_word('pr_sid'), - String('pr_fname', 16), - String('pr_psargs', 80), - ) + 'pr_flag' / self.Elf_xword, + 'pr_uid' / self.Elf_ugid, + 'pr_gid' / self.Elf_ugid, + 'pr_pid' / self.Elf_word, + 'pr_ppid' / self.Elf_word, + 'pr_pgrp' / self.Elf_word, + 'pr_sid' / self.Elf_word, + 'pr_fname' / Bytes(16), + 'pr_psargs' / Bytes(80), + ).compile() # A PT_NOTE of type NT_FILE matching the definition in # https://chromium.googlesource.com/ # native_client/nacl-binutils/+/upstream/master/binutils/readelf.c # Line 15121 - self.Elf_Nt_File = Struct('Elf_Nt_File', - self.Elf_xword("num_map_entries"), - self.Elf_xword("page_size"), - Array(lambda ctx: ctx.num_map_entries, - Struct('Elf_Nt_File_Entry', - self.Elf_addr('vm_start'), - self.Elf_addr('vm_end'), - self.Elf_offset('page_offset'))), - Array(lambda ctx: ctx.num_map_entries, - CString('filename'))) + self.Elf_Nt_File = Struct( + 'num_map_entries' / self.Elf_xword, + 'page_size' / self.Elf_xword, + 'Elf_Nt_File_Entry' / Array(lambda ctx: ctx.num_map_entries, + Struct( + 'vm_start' / self.Elf_addr, + 'vm_end' / self.Elf_addr, + 'page_offset' / self.Elf_offset + ) + ), + 'filename' / Array(lambda ctx: ctx.num_map_entries, + CStringBytes + ) + ) # Can't compile, lambdas def _create_stabs(self): # Structure of one stabs entry, see binutils/bfd/stabs.c # Names taken from https://sourceware.org/gdb/current/onlinedocs/stabs.html#Overview - self.Elf_Stabs = Struct('Elf_Stabs', - self.Elf_word('n_strx'), - self.Elf_byte('n_type'), - self.Elf_byte('n_other'), - self.Elf_half('n_desc'), - self.Elf_word('n_value'), - ) + self.Elf_Stabs = Struct( + 'n_strx' / self.Elf_word, + 'n_type' / self.Elf_byte, + 'n_other' / self.Elf_byte, + 'n_desc' / self.Elf_half, + 'n_value' / self.Elf_word, + ).compile() def _create_attributes_subsection(self): # Structure of a build attributes subsection header. A subsection is # either public to all tools that process the ELF file or private to # the vendor's tools. - self.Elf_Attr_Subsection_Header = Struct('Elf_Attr_Subsection', - self.Elf_word('length'), - self.Elf_ntbs('vendor_name', - encoding='utf-8') + self.Elf_Attr_Subsection_Header = Struct( + 'length' / self.Elf_word, + 'vendor_name' / self.Elf_ntbs ) def _create_arm_attributes(self): # Structure of an ARM build attribute tag. - self.Elf_Arm_Attribute_Tag = Struct('Elf_Arm_Attribute_Tag', - Enum(self.Elf_uleb128('tag'), - **ENUM_ATTR_TAG_ARM) + self.Elf_Arm_Attribute_Tag = Struct( + 'tag' / Enum(self.Elf_uleb128, **ENUM_ATTR_TAG_ARM) ) def _create_riscv_attributes(self): # Structure of a RISC-V build attribute tag. - self.Elf_RiscV_Attribute_Tag = Struct('Elf_RiscV_Attribute_Tag', - Enum(self.Elf_uleb128('tag'), - **ENUM_ATTR_TAG_RISCV) + self.Elf_RiscV_Attribute_Tag = Struct( + 'tag' / Enum(self.Elf_uleb128, **ENUM_ATTR_TAG_RISCV) ) def _create_elf_hash(self): @@ -539,21 +544,23 @@ def _create_elf_hash(self): # Section: # https://docs.oracle.com/cd/E53394_01/html/E54813/chapter6-48031.html - self.Elf_Hash = Struct('Elf_Hash', - self.Elf_word('nbuckets'), - self.Elf_word('nchains'), - Array(lambda ctx: ctx['nbuckets'], self.Elf_word('buckets')), - Array(lambda ctx: ctx['nchains'], self.Elf_word('chains'))) + self.Elf_Hash = Struct( + 'nbuckets' / self.Elf_word, + 'nchains' / self.Elf_word, + 'buckets' / Array(lambda ctx: ctx['nbuckets'], self.Elf_word), + 'chains' / Array(lambda ctx: ctx['nchains'], self.Elf_word) + ) def _create_gnu_hash(self): # Structure of the GNU-style hash table header. Documentation for this # table is mostly in the GLIBC source code, a good explanation of the # format can be found in this blog post: # https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ - self.Gnu_Hash = Struct('Gnu_Hash', - self.Elf_word('nbuckets'), - self.Elf_word('symoffset'), - self.Elf_word('bloom_size'), - self.Elf_word('bloom_shift'), - Array(lambda ctx: ctx['bloom_size'], self.Elf_xword('bloom')), - Array(lambda ctx: ctx['nbuckets'], self.Elf_word('buckets'))) + self.Gnu_Hash = Struct( + 'nbuckets' / self.Elf_word, + 'symoffset' / self.Elf_word, + 'bloom_size' / self.Elf_word, + 'bloom_shift' / self.Elf_word, + 'bloom' / Array(lambda ctx: ctx['bloom_size'], self.Elf_xword), + 'buckets' / Array(lambda ctx: ctx['nbuckets'], self.Elf_word) + ) diff --git a/setup.py b/setup.py index 57dfa0f6..d0b633d4 100644 --- a/setup.py +++ b/setup.py @@ -41,9 +41,12 @@ 'elftools.common', 'elftools.dwarf', 'elftools.ehabi', - 'elftools.construct', 'elftools.construct.lib', ], scripts=['scripts/readelf.py'], - package_data={'elftools': ['py.typed']} + package_data={'elftools': ['py.typed']}, + + install_requires=[ + 'construct>=2.10.70' + ], ) diff --git a/test/run_parser_perf_test.py b/test/run_parser_perf_test.py new file mode 100644 index 00000000..6b9cbb6f --- /dev/null +++ b/test/run_parser_perf_test.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +#------------------------------------------------------------------------------- +# test/run_examples_test.py +# +# Run the examples and compare their output to a reference +# +# Seva Alekseyev (sevaa@sprynet.com) +# This code is in the public domain +# +# This runs and times in-memory firehose DWARF parsing on all files from the dwarfdump autotest. +# The idea was to isolate the performance of the struct parsing logic alone. +#------------------------------------------------------------------------------- +from io import BytesIO +import os, sys, time +from utils import is_in_rootdir + +sys.path[0:0] = ['.'] + +from elftools.elf.elffile import ELFFile + +def parse_dwarf(ef): + di = ef.get_dwarf_info() + for cu in di.iter_CUs(): + for die in cu.iter_DIEs(): + # TODO: parse linked objects too + pass + +def slurp(filename): + with open(filename, "rb") as file: + return BytesIO(file.read()) + +def main(): + if not is_in_rootdir(): + print('Error: Please run me from the root dir of pyelftools!', file=sys.stderr) + return 1 + + root = os.path.join('.', 'test', 'testfiles_for_dwarfdump') + filenames = [filename for filename in os.listdir(root) if os.path.splitext(filename)[1] == '.elf'] + fileblobs = [slurp(os.path.join(root, filename)) for filename in filenames] + start_time = time.time() + for stream in fileblobs: + parse_dwarf(ELFFile(stream)) + print("--- %s seconds ---" % (time.time() - start_time)) + + +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file diff --git a/test/test_dynamic.py b/test/test_dynamic.py index a310d8ae..93bed036 100644 --- a/test/test_dynamic.py +++ b/test/test_dynamic.py @@ -106,7 +106,7 @@ def extract_sunw(filename): seen = set() for tag in dyn.iter_tags(): - if type(tag.entry.d_tag) is str and \ + if isinstance(tag.entry.d_tag, str) and \ tag.entry.d_tag.startswith("DT_SUNW"): seen.add(tag.entry.d_tag)