From 62a4239b0c479dceed2f36be1b4a19e594829b68 Mon Sep 17 00:00:00 2001 From: dipus Date: Wed, 4 Jan 2023 16:43:41 +0100 Subject: [PATCH] Preparing for version 1.2 --- README.md | 4 +- __init__.py | 14 +++- binaryninja_types.py | 2 - golang_parser.py | 135 +++++++++++++++++++++++++++++++++--- plugin.json | 4 +- types.py | 162 ++++++++++++++++++++++++++++++++++++++++--- 6 files changed, 294 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 4211e76..048c80f 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,9 @@ _BinaryNinja plugin to parse GoLang binaries and restore some information, like This plugin will parse a go binary and restore some information like: - Function names by parsing the `.gopclntab` section in the binary. If there is no section named .gopclntab it will try to search for it. -- Recover type information by parsing specific callsbuthe gopclntab and restore the function names extracting the information from the `.gopclntab` section in the binary. +- Comment the function with the filename from which the function comes +- Print the list of files in the binary +- Recover type information and names by parsing specific callsites gopclntab. The plugin works for all GoLang version from 12 to 119. diff --git a/__init__.py b/__init__.py index 7afd94b..3dff754 100644 --- a/__init__.py +++ b/__init__.py @@ -1,17 +1,27 @@ from binaryninja import PluginCommand -from .golang_parser import rename_functions, create_types, parse_go_file +from .golang_parser import rename_functions, create_types, parse_go_file, print_files, comment_functions PluginCommand.register( "golang\\auto-rename functions (gopclntab)", "Automatically rename go functions based on information from gopclntab", rename_functions) +PluginCommand.register( + "golang\\Comment functions with filename (gopclntab)", + "Comment the functions adding the filename where the function was defined", + comment_functions) + PluginCommand.register( "golang\\Apply types", "Automatically apply type information", create_types) +PluginCommand.register( + "golang\\Print file list", + "Print on the console the list of files in the GoLang binary", + print_files) + PluginCommand.register( "golang\\Parse GoLang executable", "Automatically apply all the transformation in the right order", - parse_go_file) \ No newline at end of file + parse_go_file) diff --git a/binaryninja_types.py b/binaryninja_types.py index bed038c..c7b2002 100644 --- a/binaryninja_types.py +++ b/binaryninja_types.py @@ -49,7 +49,5 @@ int64_t gcData; int32_t nameoff; int32_t typeoff; - int64_t name; - int64_t mhdr; }; """) diff --git a/golang_parser.py b/golang_parser.py index 5ce89b4..ea2d5ec 100644 --- a/golang_parser.py +++ b/golang_parser.py @@ -1,10 +1,12 @@ import binaryninja as bn +import struct from binaryninja import Symbol, SymbolType from .binaryninja_types import * from .types import * + NAME = 'Golang Loader Helper' GoFixLogger = bn.Logger(0, NAME) @@ -28,10 +30,12 @@ def sanitize_gotype_name(name): class GoHelper(bn.plugin.BackgroundTaskThread): - def __init__(self, bv: bn.BinaryView): - super().__init__(NAME, True) + def __init__(self, bv: bn.BinaryView, name: str = None): + name = f"{NAME} ({name})" if name else NAME + super().__init__(name, True) self.bv = bv self.br = bn.binaryview.BinaryReader(bv) + # Consider caching the table as class variable self.gopclntab = None def init_gopclntab(self): @@ -110,9 +114,16 @@ def init_gopclntab(self): self.gopclntab.nfunctab = self.gopclntab.uintptr(8) self.gopclntab.funcdata = self.gopclntab.raw self.gopclntab.funcnametab = self.gopclntab.raw + self.gopclntab.pctab = self.gopclntab.raw self.gopclntab.functab = self.gopclntab.data_after_offset(8 + self.gopclntab.ptrsize) self.gopclntab.functabsize = (self.gopclntab.nfunctab * 2 + 1) * functabFieldSize + fileoff = struct.unpack("I", + self.gopclntab.functab[self.gopclntab.functabsize:self.gopclntab.functabsize + 4])[0] self.gopclntab.functab = self.gopclntab.functab[:self.gopclntab.functabsize] + self.gopclntab.filetab = self.gopclntab.data_after_offset(fileoff) + self.gopclntab.nfiletab = struct.unpack("I", self.gopclntab.filetab[:4])[0] + self.gopclntab.filetab = self.gopclntab.filetab[:(self.gopclntab.nfiletab + 1) * 4] + else: raise ValueError("Invalid go version") @@ -220,11 +231,76 @@ def rename_functions(self): log_info(f"Created {created} functions") log_info(f"Renamed {renamed - created} functions") log_info(f"Total {renamed} functions") + self.bv.update_analysis_and_wait() def run(self): return self.rename_functions() +class PrintFiles(GoHelper): + + def print_files(self): + try: + self.init_gopclntab() + except ValueError: + log_error("Golang version not supported") + return + + for fidx in range(self.gopclntab.nfiletab): + file_name = self.gopclntab.fileName(fidx) + log_info(file_name.decode('utf-8')) + + def run(self): + return self.print_files() + + +class FunctionCommenter(GoHelper): + + OVERRIDE_COMMENT = True + COMMENT_KEY = "File:" + + def comment_functions(self): + try: + self.init_gopclntab() + except ValueError: + log_error("Golang version not supported") + return + + log_info("Commenting functions based on .gopclntab section") + log_info(f"gopclntab contains {self.gopclntab.nfunctab} functions") + + commented = 0 + + for fidx in range(self.gopclntab.nfunctab): + if self.gopclntab.version == GoVersion.ver12: + function = self.gopclntab.go12FuncInfo(fidx) + else: + function = self.gopclntab.funcInfo(fidx) + function_addr = function.entry + + func = self.bv.get_function_at(function_addr) + # Parse only already existing functions + if not func: + continue + + filename = self.gopclntab.pc2filename(function) + if not filename: + continue + + if not self.OVERRIDE_COMMENT and func.comment: + log_debug("Already commented, skipping") + continue + + comment = f"{self.COMMENT_KEY} {filename.decode('utf-8')}" + func.comment = comment + commented += 1 + + log_info(f"Commented {commented} functions") + + def run(self): + return self.comment_functions() + + class TypeParser(GoHelper): TYPES = [ GO_KIND, @@ -245,6 +321,8 @@ def create_types(self): go_version = self.quick_go_version() log_debug(f"Go Version is {go_version}") + already_parsed = set() + for segment_name in ('.rodata', '__rodata'): rodata = self.get_section_by_name(segment_name) if rodata: @@ -280,8 +358,12 @@ def create_types(self): ptr_var.type = bn.Type.pointer(self.bv.arch, golang_type) log_debug(f"Parsing xrefs to {function.name}") for caller_site in function.caller_sites: - - mlil = caller_site.mlil + try: + mlil = caller_site.mlil + except: + log_debug("Unable to get the mlil for instruction") + continue + if not mlil or mlil.operation != bn.MediumLevelILOperation.MLIL_CALL: log_debug(f"Callsite at 0x{mlil.address:x} is not a call, skipping") continue @@ -292,6 +374,8 @@ def create_types(self): # funny enough `not ` will return `True` if go_data_type is None: continue + if param in already_parsed: + log_debug(f"Skipping already parsed at 0x{param:x}") go_data_type.type = golang_type # TODO figure out why sometime the type info are not there @@ -313,13 +397,19 @@ def create_types(self): log_debug(f"Found name at 0x{gotype.resolved_name_addr:x} with value {name}") sanitazed_name = sanitize_gotype_name(name) go_data_type.name = f"{sanitazed_name}_type" - # add cross-reference for convenience + # add cross-reference for convenience (both directions) self.bv.add_user_data_ref( gotype.address_off('nameOff'), gotype.resolved_name_addr) + self.bv.add_user_data_ref( + gotype.resolved_name_addr, + gotype.address_off('nameOff') + ) + name_datavar = self.bv.get_data_var_at(gotype.resolved_name_addr) name_datavar.name = f"{go_data_type.name}_name" + already_parsed.add(param) created += 1 log_info(f"Created {created} types") @@ -328,6 +418,22 @@ def run(self): return self.create_types() +class RunAll(bn.plugin.BackgroundTaskThread): + def __init__(self, bv): + super().__init__(NAME, True) + self.bv = bv + self.analysis = [] + self.analysis.append(FunctionRenamer(bv)) + self.analysis.append(FunctionCommenter(bv)) + self.analysis.append(TypeParser(bv)) + + def run(self): + for analysis in self.analysis: + analysis.start() + analysis.join() + log_info(f"Terminated all analysis") + + def rename_functions(bv): helper = FunctionRenamer(bv) return helper.start() @@ -338,9 +444,16 @@ def create_types(bv): return helper.start() -def parse_go_file(bv): - fr = FunctionRenamer(bv) - fr.start() - fr.join() - tp = TypeParser(bv) - return tp.start() +def print_files(bv): + helper = PrintFiles(bv) + return helper.start() + + +def comment_functions(bv): + helper = FunctionCommenter(bv) + return helper.start() + + +def parse_go_file(bv: bn.BinaryView): + ra = RunAll(bv) + return ra.start() diff --git a/plugin.json b/plugin.json index 6346ebe..a3c3828 100644 --- a/plugin.json +++ b/plugin.json @@ -9,7 +9,7 @@ "python3" ], "description": "BinaryNinja plugin to parse GoLang binaries and restore some information, like function names and type information", - "longdescription": "This plugin will parse a go binary and restore information like the functions names, by parsing the .gopclntab, and extract typing information from the executable itself.", + "longdescription": "This plugin will parse a go binary and restore information like the functions/file names, by parsing the .gopclntab, and extract typing information from the executable itself.", "license": { "name": "MIT", "text": "Copyright 2023 Jacopo Ferrigno\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE." @@ -24,6 +24,6 @@ "Windows": "no special instructions, package manager is recommended", "Linux": "no special instructions, package manager is recommended" }, - "version": "1.1", + "version": "1.2", "minimumbinaryninjaversion": 3946 } \ No newline at end of file diff --git a/types.py b/types.py index 409cb44..064147f 100644 --- a/types.py +++ b/types.py @@ -80,11 +80,11 @@ class FuncEntry: args: int = 0 frame: int = 0 - pcsp: int = 0 + deferreturn: int = 0 pcfile: int = 0 pcln: int = 0 nfuncdata: int = 0 - npcdata: int = 0 + cuOffset: int = 0 version: GoVersion = GoVersion.ver118 @@ -96,7 +96,7 @@ def __init__(self, raw: bytes, ptrsize: int, textStart: int = 0, version: GoVers self.__init_from_raw() def __init_from_raw(self): - fields = ["nameOffset", "args", "frame", "pcsp", "pcfile", "pcln", "nfuncdata", "npcdata"] + fields = ["nameOffset", "args", "frame", "deferreturn", "pcfile", "pcln", "nfuncdata", "cuOffset"] for idx, field in enumerate(fields, 1): value = self.field(idx) @@ -205,8 +205,6 @@ class GoPclnTab: nfiletab: int = 0 - # Ignore maps which are use for caching - def __init__(self, start, end, raw): self.start = start self.end = end @@ -224,7 +222,7 @@ def get_byte_at(self, offset: int) -> int: def offset(self, word: int) -> int: off = 8 + word * self.ptrsize - data = self.raw[off:off +self.ptrsize] + data = self.raw[off:off + self.ptrsize] if self.ptrsize == 8: return struct.unpack("Q", data)[0] @@ -243,7 +241,7 @@ def range(self, start: int, end: int) -> bytes: return self.raw[ostart:oend] def uintptr(self, offset: int) -> int: - value = self.raw[offset:offset +self.ptrsize] + value = self.raw[offset:offset + self.ptrsize] return self.value_to_uintptr(value) def value_to_uintptr(self, value: bytes) -> int: @@ -347,6 +345,153 @@ def go12FuncInfo(self, idx: int) -> FuncEntry: function.resolvedName = self.funcName(function.nameOffset) return function + def fileName(self, idx) -> bytes: + if self.version == GoVersion.ver12: + start = 4 * (idx + 1) + offset = struct.unpack("I", self.filetab[start:start + 4])[0] + string_end = self.funcdata.find(0, offset) + string = self.funcdata[offset:string_end] + return string + else: + offset = 0 + for i in range(idx + 1): + string_end = self.filetab.find(0, offset) + string = self.filetab[offset:string_end] + if i == idx: + return string + offset += len(string) + 1 + return b"" + + def pc2filename(self, function: FuncEntry) -> bytes: + """ + func (t *LineTable) go12PCToFile(pc uint64) (file string) { + entry := f.entryPC() + filetab := f.pcfile() + fno := t.pcvalue(filetab, entry, pc) + if t.version == ver12 { + if fno <= 0 { + return "" + } + return t.string(t.binary.Uint32(t.filetab[4*fno:])) + } + // Go ≥ 1.16 + if fno < 0 { // 0 is valid for ≥ 1.16 + return "" + } + cuoff := f.cuOffset() + if fnoff := t.binary.Uint32(t.cutab[(cuoff+uint32(fno))*4:]); fnoff != ^uint32(0) { + return t.stringFrom(t.filetab, fnoff) + } + return "" + } + """ + entry = function.entry + filetab = function.pcfile + targetpc = function.entry + offset = self.pcvalue(filetab, entry, targetpc) + + if self.version == GoVersion.ver12: + if offset <= 0: + return b'' + else: + return self.fileName(offset - 1) + if offset < 0: + return b'' + + compilation_unit_offset = function.cuOffset + start = (compilation_unit_offset + offset) * 4 + offset = struct.unpack("I", self.cutab[start:start + 4])[0] + if offset != self.make_mask(4): + string_end = self.filetab.find(0, offset) + string = self.filetab[offset:string_end] + return string + return b'' + + def pcvalue(self, offset: int, pc: int, targetpc: int) -> int: + """ + func (t *LineTable) pcvalue(off uint32, entry, targetpc uint64) int32 { + p := t.pctab[off:] + + val := int32(-1) + pc := entry + for t.step(&p, &pc, &val, pc == entry) { + if targetpc < pc { + return val + } + } + return -1 + } + """ + val = -1 + offset, pc, val, ok = self.step(self.pctab, offset, pc, val, True) + while ok: + if targetpc < pc: + return val + offset, pc, val, ok = self.step(self.pctab, offset, pc, val, False) + return -1 + + def step(self, table: bytes, offset_in_table: int, pc: int, val: int, first: bool) -> (int, int, int, bool): + """ + // step advances to the next pc, value pair in the encoded table. + func (t *LineTable) step(p *[]byte, pc *uint64, val *int32, first bool) bool { + uvdelta := t.readvarint(p) + if uvdelta == 0 && !first { + return false + } + if uvdelta&1 != 0 { + uvdelta = ^(uvdelta >> 1) + } else { + uvdelta >>= 1 + } + vdelta := int32(uvdelta) -> there might be some type/sign shenanigans + pcdelta := t.readvarint(p) * t.quantum + *pc += uint64(pcdelta) + *val += vdelta + return true + } + """ + uvdelta, read = self.read_varint(table, offset_in_table) + offset_in_table += read + + if uvdelta == 0 and not first: + return 0, 0, -1, False + + if uvdelta & 1: + mask = self.make_mask(read) + uvdelta = ~(uvdelta >> 1) & mask + else: + uvdelta = uvdelta >> 1 + + vdelta = uvdelta # There might be some sign parsing/shenanigans in the original code + pcdelta, read = self.read_varint(table, offset_in_table) + offset_in_table += read + pcdelta = pcdelta * self.quantum + + new_pc = pc + pcdelta + new_val = val + vdelta + return offset_in_table, new_pc, new_val, True + + @staticmethod + def read_varint(raw: bytes, offset: int = 0) -> (int, int): + shift = 0 + result = 0 + read = 0 + while True: + i = raw[offset + read] + result |= (i & 0x7f) << shift + shift += 7 + read += 1 + if not (i & 0x80): + break + return result, read + + @staticmethod + def make_mask(bytes_cnt: int) -> int: + mask = 0 + for i in range(bytes_cnt): + mask = mask << 8 | 0xff + return mask + def __repr__(self): excluded = ['raw'] excluded_types = [bytes] @@ -373,7 +518,6 @@ def __repr__(self): return f"{self.__class__.__name__}({nodef_f_repr})" - class GolangTypeKind(IntEnum): INVALID = 0x0 BOOL = 0x1 @@ -469,7 +613,7 @@ def __init_from_raw(self): offset += size def field(self, size=4, offset=0) -> int: - data = self.raw[offset:offset+size] + data = self.raw[offset:offset + size] if size == 1: return data[0] if size == 4: