From 3f488f365cfeb43915a444c8fd6a24676d5bcf59 Mon Sep 17 00:00:00 2001 From: Blas Date: Tue, 19 Dec 2023 20:18:36 -0500 Subject: [PATCH 01/49] Update helpers.py --- capa/features/extractors/dnfile/helpers.py | 79 +++++++++++++++++++++- 1 file changed, 76 insertions(+), 3 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index def6cd04a..eee286606 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -299,22 +299,95 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod] # like kernel32.CreateFileA yield DnUnmanagedMethod(token, module, method) +def get_nested_class_table(pe): + nested_class_table = {} + + # Used to find nested classes in typedef + for rid, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): + assert isinstance(nestedclass, dnfile.mdtable.NestedClassRow) + nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index + + return nested_class_table + +def get_typeref_table(pe): + typeref_table = [] + + # Used to track values in typeref table + for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): + assert isinstance(typeref, dnfile.mdtable.TypeRefRow) + typeref_table.append((typeref.TypeName, typeref.TypeNamespace, typeref.struct.ResolutionScope_CodedIndex)) + + return typeref_table + +def is_typedef_nested(rid, nested_classes, class_names, typedef, assembled_class_names): + name = typedef.TypeName + space = typedef.TypeNamespace + + if rid in nested_classes: + space = class_names[nested_classes[rid]-1][1] + + enclosing_class = class_names[nested_classes[rid]-1][0] + nested_class = class_names[rid-1][0] + if enclosing_class in assembled_class_names: + enclosing_class = f"{assembled_class_names[enclosing_class]}" + assembled_class_names[nested_class] = enclosing_class + + for i in class_names: + if i[0] == enclosing_class.split('/')[0]: + space = i[1] + + name = (enclosing_class, nested_class) + + assembled_class_names[name[1]] = f"{name[0]}/{name[1]}" + + return space, name, assembled_class_names + +def is_typeref_nested(rid, typeref_table, class_names, typeref, assembled_class_names): + name = typeref.TypeName.split('`')[0] if '`' in typeref.TypeName else typeref.TypeName + space = typeref.TypeNamespace + + # To be corrected + if typeref.struct.ResolutionScope_CodedIndex <= len(typeref_table): + space = typeref_table[typeref.struct.ResolutionScope_CodedIndex-1][1] + + enclosing_class = f"{space}.{name}" + nested_class = f"{typeref.TypeName}" + + name = (enclosing_class, nested_class) + + assembled_class_names[name[1]] = f"{name[0]}/{name[1]}" + + return space, name, assembled_class_names def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: """get .NET types from TypeDef and TypeRef tables""" + nested_class_table = get_nested_class_table(pe) + typedef_class_names = [] + typedef_assembled_class_names = {} + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) + typedef_class_names.append((typedef.TypeName, typedef.TypeNamespace)) + typedef.TypeNamespace, typedef.TypeName, typedef_assembled_class_names = is_typedef_nested(rid, nested_class_table, typedef_class_names, typedef, typedef_assembled_class_names) + typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) yield DnType(typedef_token, typedef.TypeName, namespace=typedef.TypeNamespace) + + typeref_table = get_typeref_table(pe) + typeref_class_names = [] + typeref_assembled_class_names = {} + for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): assert isinstance(typeref, dnfile.mdtable.TypeRefRow) - + + typeref_class_names.append((typeref.TypeName, typeref.TypeNamespace)) + typeref.TypeNamespace, typeref.TypeName, typeref_assembled_class_names = is_typeref_nested(rid, typeref_table, typeref_class_names, typeref, typeref_assembled_class_names) + typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) yield DnType(typeref_token, typeref.TypeName, namespace=typeref.TypeNamespace) - - + def calculate_dotnet_token_value(table: int, rid: int) -> int: return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) From 94346e4fecf0c698d6e280c21317f24b2e10428e Mon Sep 17 00:00:00 2001 From: Blas Date: Wed, 20 Dec 2023 13:05:04 -0500 Subject: [PATCH 02/49] Update helpers.py --- capa/features/extractors/dnfile/helpers.py | 70 ++++++++++++---------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index eee286606..c8ad53d55 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -319,43 +319,50 @@ def get_typeref_table(pe): return typeref_table -def is_typedef_nested(rid, nested_classes, class_names, typedef, assembled_class_names): - name = typedef.TypeName - space = typedef.TypeNamespace +def is_nested_helper(rid, table, class_names, typeX, assembled_class_names, is_typedef): + if is_typedef: + name = typeX.TypeName + space = typeX.TypeNamespace + nested_classes = table - if rid in nested_classes: - space = class_names[nested_classes[rid]-1][1] + if rid in nested_classes: + space = class_names[nested_classes[rid]-1][1] - enclosing_class = class_names[nested_classes[rid]-1][0] - nested_class = class_names[rid-1][0] - if enclosing_class in assembled_class_names: - enclosing_class = f"{assembled_class_names[enclosing_class]}" - assembled_class_names[nested_class] = enclosing_class + enclosing_class = class_names[nested_classes[rid]-1][0] + nested_class = class_names[rid-1][0] + + if enclosing_class in assembled_class_names: + enclosing_class = f"{assembled_class_names[enclosing_class]}" + assembled_class_names[nested_class] = enclosing_class - for i in class_names: - if i[0] == enclosing_class.split('/')[0]: - space = i[1] + for i in class_names: + if i[0] == enclosing_class.split('/')[0]: + space = i[1] - name = (enclosing_class, nested_class) - - assembled_class_names[name[1]] = f"{name[0]}/{name[1]}" - - return space, name, assembled_class_names - -def is_typeref_nested(rid, typeref_table, class_names, typeref, assembled_class_names): - name = typeref.TypeName.split('`')[0] if '`' in typeref.TypeName else typeref.TypeName - space = typeref.TypeNamespace + name = (enclosing_class, nested_class) + assembled_class_names[name[1]] = f"{name[0]}/{name[1]}" + + else: + name = typeX.TypeName.split('`')[0] if '`' in typeX.TypeName else typeX.TypeName + space = typeX.TypeNamespace + typeref_table = table - # To be corrected - if typeref.struct.ResolutionScope_CodedIndex <= len(typeref_table): - space = typeref_table[typeref.struct.ResolutionScope_CodedIndex-1][1] + if typeX.struct.ResolutionScope_CodedIndex <= len(typeref_table): + space = typeref_table[typeX.struct.ResolutionScope_CodedIndex-1][1] - enclosing_class = f"{space}.{name}" - nested_class = f"{typeref.TypeName}" + enclosing_class = typeref_table[typeX.struct.ResolutionScope_CodedIndex-1][0] + nested_class = name - name = (enclosing_class, nested_class) + if enclosing_class in assembled_class_names: + enclosing_class = f"{assembled_class_names[enclosing_class]}" + assembled_class_names[nested_class] = enclosing_class - assembled_class_names[name[1]] = f"{name[0]}/{name[1]}" + for i in class_names: + if i[0] == enclosing_class.split('/')[0]: + space = i[1] + + name = (enclosing_class, nested_class) + assembled_class_names[name[1]] = f"{name[0]}/{name[1]}" return space, name, assembled_class_names @@ -369,12 +376,11 @@ def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: assert isinstance(typedef, dnfile.mdtable.TypeDefRow) typedef_class_names.append((typedef.TypeName, typedef.TypeNamespace)) - typedef.TypeNamespace, typedef.TypeName, typedef_assembled_class_names = is_typedef_nested(rid, nested_class_table, typedef_class_names, typedef, typedef_assembled_class_names) + typedef.TypeNamespace, typedef.TypeName, typedef_assembled_class_names = is_nested_helper(rid, nested_class_table, typedef_class_names, typedef, typedef_assembled_class_names, True) typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) yield DnType(typedef_token, typedef.TypeName, namespace=typedef.TypeNamespace) - typeref_table = get_typeref_table(pe) typeref_class_names = [] typeref_assembled_class_names = {} @@ -383,7 +389,7 @@ def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: assert isinstance(typeref, dnfile.mdtable.TypeRefRow) typeref_class_names.append((typeref.TypeName, typeref.TypeNamespace)) - typeref.TypeNamespace, typeref.TypeName, typeref_assembled_class_names = is_typeref_nested(rid, typeref_table, typeref_class_names, typeref, typeref_assembled_class_names) + typedef.TypeNamespace, typedef.TypeName, typedef_assembled_class_names = is_nested_helper(rid, typeref_table, typeref_class_names, typeref, typeref_assembled_class_names, False) typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) yield DnType(typeref_token, typeref.TypeName, namespace=typeref.TypeNamespace) From 8049bdcecfa2acfc20d345ce491f77038078abc7 Mon Sep 17 00:00:00 2001 From: Blas Date: Wed, 20 Dec 2023 14:23:06 -0500 Subject: [PATCH 03/49] TypeRef correction in helpers.py --- capa/features/extractors/dnfile/helpers.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index c8ad53d55..15ab7229f 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -108,6 +108,10 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: TypeName (index into String heap) TypeNamespace (index into String heap) """ + #typeref_table = get_typeref_table(pe) + #typeref_class_names = [] + #typeref_assembled_class_names = {} + for rid, member_ref in iter_dotnet_table(pe, dnfile.mdtable.MemberRef.number): assert isinstance(member_ref, dnfile.mdtable.MemberRefRow) @@ -131,6 +135,9 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: # remove get_/set_ from MemberRef name member_ref_name = member_ref_name[4:] + #typeref_class_names.append((member_ref.Class.row.TypeName, member_ref.Class.row.TypeNamespace)) + #member_ref.Class.row.TypeNamespace, member_ref.Class.row.TypeName, typeref_assembled_class_names = is_nested_helper(rid, typeref_table, typeref_class_names, member_ref.Class.row, typeref_assembled_class_names, False) + yield DnType( token, member_ref.Class.row.TypeName, @@ -188,6 +195,10 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: TypeNamespace (index into String heap) MethodList (index into MethodDef table; it marks the first of a contiguous run of Methods owned by this Type) """ + #nested_class_table = get_nested_class_table(pe) + #typedef_class_names = [] + #typedef_assembled_class_names = {} + accessor_map: Dict[int, str] = {} for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe): accessor_map[methoddef] = methoddef_access @@ -211,6 +222,9 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: # remove get_/set_ method_name = method_name[4:] + #typedef_class_names.append((method_name, typedef.TypeNamespace)) + #typedef.TypeNamespace, typedef.TypeName, typedef_assembled_class_names = is_nested_helper(idx, nested_class_table, typedef_class_names, typedef, typedef_assembled_class_names, True) + yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method_name, access=access) @@ -381,6 +395,7 @@ def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) yield DnType(typedef_token, typedef.TypeName, namespace=typedef.TypeNamespace) + typeref_table = get_typeref_table(pe) typeref_class_names = [] typeref_assembled_class_names = {} @@ -389,7 +404,7 @@ def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: assert isinstance(typeref, dnfile.mdtable.TypeRefRow) typeref_class_names.append((typeref.TypeName, typeref.TypeNamespace)) - typedef.TypeNamespace, typedef.TypeName, typedef_assembled_class_names = is_nested_helper(rid, typeref_table, typeref_class_names, typeref, typeref_assembled_class_names, False) + typeref.TypeNamespace, typeref.TypeName, typeref_assembled_class_names = is_nested_helper(rid, typeref_table, typeref_class_names, typeref, typeref_assembled_class_names, False) typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) yield DnType(typeref_token, typeref.TypeName, namespace=typeref.TypeNamespace) From 46103b5f541246ef02859a45092752eed7802ee7 Mon Sep 17 00:00:00 2001 From: Blas Date: Wed, 20 Dec 2023 17:58:21 -0500 Subject: [PATCH 04/49] Fixed TypeRef to proper functionality --- capa/features/extractors/dnfile/helpers.py | 87 +++++++++++----------- 1 file changed, 42 insertions(+), 45 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 15ab7229f..bbcee2b83 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -250,6 +250,9 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: logger.debug("TypeDef[0x%X] FieldList[0x%X] row is None", rid, idx) continue token: int = calculate_dotnet_token_value(field.table.number, field.row_index) + + # Do here as well + yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=field.row.Name) @@ -329,57 +332,49 @@ def get_typeref_table(pe): # Used to track values in typeref table for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): assert isinstance(typeref, dnfile.mdtable.TypeRefRow) - typeref_table.append((typeref.TypeName, typeref.TypeNamespace, typeref.struct.ResolutionScope_CodedIndex)) + typeref_table.append((typeref.TypeName, typeref.TypeNamespace, typeref.ResolutionScope.row_index, type(typeref.ResolutionScope.table))) return typeref_table -def is_nested_helper(rid, table, class_names, typeX, assembled_class_names, is_typedef): - if is_typedef: - name = typeX.TypeName - space = typeX.TypeNamespace - nested_classes = table +def is_nested_helper(rid, table, class_names, typeX, assembled_class_names): + name = typeX.TypeName + space = typeX.TypeNamespace + nested_classes = table - if rid in nested_classes: - space = class_names[nested_classes[rid]-1][1] + if rid in nested_classes: + space = class_names[nested_classes[rid]-1][1] - enclosing_class = class_names[nested_classes[rid]-1][0] - nested_class = class_names[rid-1][0] - - if enclosing_class in assembled_class_names: - enclosing_class = f"{assembled_class_names[enclosing_class]}" - assembled_class_names[nested_class] = enclosing_class + enclosing_class = class_names[nested_classes[rid]-1][0] + nested_class = class_names[rid-1][0] + + if enclosing_class in assembled_class_names: + enclosing_class = f"{assembled_class_names[enclosing_class]}" + assembled_class_names[nested_class] = enclosing_class - for i in class_names: - if i[0] == enclosing_class.split('/')[0]: - space = i[1] + for i in class_names: + if i[0] == enclosing_class.split('/')[0]: + space = i[1] - name = (enclosing_class, nested_class) - assembled_class_names[name[1]] = f"{name[0]}/{name[1]}" + name = (enclosing_class, nested_class) + assembled_class_names[name[1]] = f"{name[0]}/{name[1]}" - else: - name = typeX.TypeName.split('`')[0] if '`' in typeX.TypeName else typeX.TypeName - space = typeX.TypeNamespace - typeref_table = table - - if typeX.struct.ResolutionScope_CodedIndex <= len(typeref_table): - space = typeref_table[typeX.struct.ResolutionScope_CodedIndex-1][1] - - enclosing_class = typeref_table[typeX.struct.ResolutionScope_CodedIndex-1][0] - nested_class = name - - if enclosing_class in assembled_class_names: - enclosing_class = f"{assembled_class_names[enclosing_class]}" - assembled_class_names[nested_class] = enclosing_class - - for i in class_names: - if i[0] == enclosing_class.split('/')[0]: - space = i[1] - - name = (enclosing_class, nested_class) - assembled_class_names[name[1]] = f"{name[0]}/{name[1]}" - return space, name, assembled_class_names +def typeref_helper(index, typeref_table, n, name): + # Append the current typeref name + n.append(name) + + if typeref_table[index - 1][3] == dnfile.mdtable.TypeRef: + # Recursively call helper function with enclosing typeref details + typeref_helper(typeref_table[index - 1][2], typeref_table, n, typeref_table[index - 1][0]) + else: + # Document the root enclosing details + n.append(typeref_table[index - 1][0]) + namespace = typeref_table[index - 1][1] + n.append(namespace) + + return tuple(n[::-1]) + def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: """get .NET types from TypeDef and TypeRef tables""" nested_class_table = get_nested_class_table(pe) @@ -390,7 +385,7 @@ def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: assert isinstance(typedef, dnfile.mdtable.TypeDefRow) typedef_class_names.append((typedef.TypeName, typedef.TypeNamespace)) - typedef.TypeNamespace, typedef.TypeName, typedef_assembled_class_names = is_nested_helper(rid, nested_class_table, typedef_class_names, typedef, typedef_assembled_class_names, True) + typedef.TypeNamespace, typedef.TypeName, typedef_assembled_class_names = is_nested_helper(rid, nested_class_table, typedef_class_names, typedef, typedef_assembled_class_names) typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) yield DnType(typedef_token, typedef.TypeName, namespace=typedef.TypeNamespace) @@ -403,9 +398,11 @@ def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): assert isinstance(typeref, dnfile.mdtable.TypeRefRow) - typeref_class_names.append((typeref.TypeName, typeref.TypeNamespace)) - typeref.TypeNamespace, typeref.TypeName, typeref_assembled_class_names = is_nested_helper(rid, typeref_table, typeref_class_names, typeref, typeref_assembled_class_names, False) - + # If the ResolutionScope decodes to a typeRef type, then it is nested + n = [] + if type(typeref.ResolutionScope.table) == dnfile.mdtable.TypeRef: + typeref.TypeName = typeref_helper(typeref.ResolutionScope.row_index, typeref_table, n, typeref.TypeName) + typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) yield DnType(typeref_token, typeref.TypeName, namespace=typeref.TypeNamespace) From b2621c7ff84f512ca1d3d6bb5fbfb2b8b17ab3e9 Mon Sep 17 00:00:00 2001 From: Blas Date: Wed, 20 Dec 2023 17:59:31 -0500 Subject: [PATCH 05/49] Accounts for TypeRef updated tuple --- capa/features/extractors/dnfile/types.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/capa/features/extractors/dnfile/types.py b/capa/features/extractors/dnfile/types.py index 6c6d59927..6500982b7 100644 --- a/capa/features/extractors/dnfile/types.py +++ b/capa/features/extractors/dnfile/types.py @@ -42,7 +42,12 @@ def __repr__(self): return str(self) @staticmethod - def format_name(class_: str, namespace: str = "", member: str = ""): + def format_name(class_: tuple, namespace: str = "", member: str = ""): + if type(class_) == tuple: + if len(class_) > 2: + class_ = class_[0] + "." + "/".join(class_[1:]) + else: + class_ = f"{class_[0]}.{class_[1]}" # like File::OpenRead name: str = f"{class_}::{member}" if member else class_ if namespace: @@ -50,7 +55,6 @@ def format_name(class_: str, namespace: str = "", member: str = ""): name = f"{namespace}.{name}" return name - class DnUnmanagedMethod: def __init__(self, token: int, module: str, method: str): self.token: int = token From 94a8b65b480fa0a5944638b0ce3c3dbfd3737bca Mon Sep 17 00:00:00 2001 From: Blas Date: Wed, 20 Dec 2023 18:36:04 -0500 Subject: [PATCH 06/49] Corrected TypeDef tuple creation in helpers.py --- capa/features/extractors/dnfile/helpers.py | 59 ++++++++++------------ 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index bbcee2b83..fd863831d 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -326,6 +326,29 @@ def get_nested_class_table(pe): return nested_class_table +def get_typedef_class_table(pe): + typedef_class_table = [] + + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): + assert isinstance(typedef, dnfile.mdtable.TypeDefRow) + typedef_class_table.append((typedef.TypeName, typedef.TypeNamespace)) + + return typedef_class_table + +def typedef_helper(index, nested_class_table, typedef_class_table, n, name): + # Append the current typeref name + n.append(name) + + if nested_class_table[index] in nested_class_table: + typedef_helper(nested_class_table[index], nested_class_table, typedef_class_table, n, typedef_class_table[nested_class_table[index]-1][0]) + else: + # Document the root enclosing details + n.append(typedef_class_table[nested_class_table[index]-1][0]) + namespace = typedef_class_table[nested_class_table[index]-1][1] + n.append(namespace) + + return tuple(n[::-1]) + def get_typeref_table(pe): typeref_table = [] @@ -336,30 +359,6 @@ def get_typeref_table(pe): return typeref_table -def is_nested_helper(rid, table, class_names, typeX, assembled_class_names): - name = typeX.TypeName - space = typeX.TypeNamespace - nested_classes = table - - if rid in nested_classes: - space = class_names[nested_classes[rid]-1][1] - - enclosing_class = class_names[nested_classes[rid]-1][0] - nested_class = class_names[rid-1][0] - - if enclosing_class in assembled_class_names: - enclosing_class = f"{assembled_class_names[enclosing_class]}" - assembled_class_names[nested_class] = enclosing_class - - for i in class_names: - if i[0] == enclosing_class.split('/')[0]: - space = i[1] - - name = (enclosing_class, nested_class) - assembled_class_names[name[1]] = f"{name[0]}/{name[1]}" - - return space, name, assembled_class_names - def typeref_helper(index, typeref_table, n, name): # Append the current typeref name n.append(name) @@ -378,22 +377,20 @@ def typeref_helper(index, typeref_table, n, name): def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: """get .NET types from TypeDef and TypeRef tables""" nested_class_table = get_nested_class_table(pe) - typedef_class_names = [] - typedef_assembled_class_names = {} + typedef_class_table = get_typedef_class_table(pe) for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) - typedef_class_names.append((typedef.TypeName, typedef.TypeNamespace)) - typedef.TypeNamespace, typedef.TypeName, typedef_assembled_class_names = is_nested_helper(rid, nested_class_table, typedef_class_names, typedef, typedef_assembled_class_names) - + typedef_name = [] + if rid in nested_class_table: + typedef.TypeName = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) + typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) yield DnType(typedef_token, typedef.TypeName, namespace=typedef.TypeNamespace) typeref_table = get_typeref_table(pe) - typeref_class_names = [] - typeref_assembled_class_names = {} for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): assert isinstance(typeref, dnfile.mdtable.TypeRefRow) From eefdfcfd6dc5d5dbf0c9dcc8d8d7ed6cbe76cb72 Mon Sep 17 00:00:00 2001 From: Blas Date: Thu, 21 Dec 2023 16:35:51 -0500 Subject: [PATCH 07/49] Update types.py --- capa/features/extractors/dnfile/types.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/capa/features/extractors/dnfile/types.py b/capa/features/extractors/dnfile/types.py index 6500982b7..609cdf5c3 100644 --- a/capa/features/extractors/dnfile/types.py +++ b/capa/features/extractors/dnfile/types.py @@ -43,11 +43,12 @@ def __repr__(self): @staticmethod def format_name(class_: tuple, namespace: str = "", member: str = ""): - if type(class_) == tuple: - if len(class_) > 2: - class_ = class_[0] + "." + "/".join(class_[1:]) - else: - class_ = f"{class_[0]}.{class_[1]}" + if len(class_) > 1: + # like ('myclass_outer0', 'myclass_inner0_0') + class_ = "/".join(class_) + else: + # like ('CompilationRelaxationsAttribute',) + class_ = "".join(class_) # like File::OpenRead name: str = f"{class_}::{member}" if member else class_ if namespace: From 36110d5102b640bbbce8259555ace53db765f725 Mon Sep 17 00:00:00 2001 From: Blas Date: Thu, 21 Dec 2023 16:40:18 -0500 Subject: [PATCH 08/49] Update types.py --- capa/features/extractors/dnfile/types.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/capa/features/extractors/dnfile/types.py b/capa/features/extractors/dnfile/types.py index 609cdf5c3..935e6120f 100644 --- a/capa/features/extractors/dnfile/types.py +++ b/capa/features/extractors/dnfile/types.py @@ -44,10 +44,10 @@ def __repr__(self): @staticmethod def format_name(class_: tuple, namespace: str = "", member: str = ""): if len(class_) > 1: - # like ('myclass_outer0', 'myclass_inner0_0') + # like Android.Graphics.Bitmap/CompressFormat class_ = "/".join(class_) else: - # like ('CompilationRelaxationsAttribute',) + # like CompilationRelaxationsAttribute class_ = "".join(class_) # like File::OpenRead name: str = f"{class_}::{member}" if member else class_ From 73c8db711099f490a7af20aaaed6ff4ddce10de5 Mon Sep 17 00:00:00 2001 From: Blas Date: Fri, 22 Dec 2023 19:56:11 -0500 Subject: [PATCH 09/49] Create helpers_draft.py --- .../extractors/dnfile/helpers_draft.py | 434 ++++++++++++++++++ 1 file changed, 434 insertions(+) create mode 100644 capa/features/extractors/dnfile/helpers_draft.py diff --git a/capa/features/extractors/dnfile/helpers_draft.py b/capa/features/extractors/dnfile/helpers_draft.py new file mode 100644 index 000000000..93656a7f3 --- /dev/null +++ b/capa/features/extractors/dnfile/helpers_draft.py @@ -0,0 +1,434 @@ +# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. + +from __future__ import annotations + +import logging +from typing import Dict, Tuple, Union, Iterator, Optional + +import dnfile +from dncil.cil.body import CilMethodBody +from dncil.cil.error import MethodBodyFormatError +from dncil.clr.token import Token, StringToken, InvalidToken +from dncil.cil.body.reader import CilMethodBodyReaderBase + +from capa.features.common import FeatureAccess +from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod + +logger = logging.getLogger(__name__) + + +class DnfileMethodBodyReader(CilMethodBodyReaderBase): + def __init__(self, pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow): + self.pe: dnfile.dnPE = pe + self.offset: int = self.pe.get_offset_from_rva(row.Rva) + + def read(self, n: int) -> bytes: + data: bytes = self.pe.get_data(self.pe.get_rva_from_offset(self.offset), n) + self.offset += n + return data + + def tell(self) -> int: + return self.offset + + def seek(self, offset: int) -> int: + self.offset = offset + return self.offset + + +def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Union[dnfile.base.MDTableRow, InvalidToken, str]: + """map generic token to string or table row""" + assert pe.net is not None + assert pe.net.mdtables is not None + + if isinstance(token, StringToken): + user_string: Optional[str] = read_dotnet_user_string(pe, token) + if user_string is None: + return InvalidToken(token.value) + return user_string + + table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(token.table) + if table is None: + # table index is not valid + return InvalidToken(token.value) + + try: + return table.rows[token.rid - 1] + except IndexError: + # table index is valid but row index is not valid + return InvalidToken(token.value) + + +def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) -> Optional[CilMethodBody]: + """read dotnet method body""" + try: + return CilMethodBody(DnfileMethodBodyReader(pe, row)) + except MethodBodyFormatError as e: + logger.debug("failed to parse managed method body @ 0x%08x (%s)", row.Rva, e) + return None + + +def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str]: + """read user string from #US stream""" + assert pe.net is not None + + if pe.net.user_strings is None: + # stream may not exist (seen in obfuscated .NET) + logger.debug("#US stream does not exist for stream index 0x%08x", token.rid) + return None + + try: + user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid) + except UnicodeDecodeError as e: + logger.debug("failed to decode #US stream index 0x%08x (%s)", token.rid, e) + return None + + if user_string is None: + return None + + return user_string.value + + +def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: + """get managed imports from MemberRef table + + see https://www.ntcore.com/files/dotnetformat.htm + + 10 - MemberRef Table + Each row represents an imported method + Class (index into the TypeRef, ModuleRef, MethodDef, TypeSpec or TypeDef tables) + Name (index into String heap) + 01 - TypeRef Table + Each row represents an imported class, its namespace and the assembly which contains it + TypeName (index into String heap) + TypeNamespace (index into String heap) + """ + typeref_table = get_typeref_table(pe) + + for rid, member_ref in iter_dotnet_table(pe, dnfile.mdtable.MemberRef.number): + assert isinstance(member_ref, dnfile.mdtable.MemberRefRow) + + if not isinstance(member_ref.Class.row, dnfile.mdtable.TypeRefRow): + # only process class imports from TypeRef table + continue + + token: int = calculate_dotnet_token_value(dnfile.mdtable.MemberRef.number, rid) + access: Optional[str] + + # assume .NET imports starting with get_/set_ are used to access a property + if member_ref.Name.startswith("get_"): + access = FeatureAccess.READ + elif member_ref.Name.startswith("set_"): + access = FeatureAccess.WRITE + else: + access = None + + member_ref_name: str = member_ref.Name + if member_ref_name.startswith(("get_", "set_")): + # remove get_/set_ from MemberRef name + member_ref_name = member_ref_name[4:] + + #typeref_name = [] + #if type(member_ref.Class.table) is dnfile.mdtable.TypeRef: + # if type(member_ref.Class.row.TypeName) is str: + # member_ref.Class.row.TypeNamespace, member_ref.Class.row.TypeName = typeref_helper(member_ref.Class.row_index, typeref_table, typeref_name, member_ref.Class.row.TypeName) + + yield DnType( + token, + member_ref.Class.row.TypeName, + namespace=member_ref.Class.row.TypeNamespace, + member=member_ref_name, + access=access, + ) + + +def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: + """get MethodDef methods used to access properties + + see https://www.ntcore.com/files/dotnetformat.htm + + 24 - MethodSemantics Table + Links Events and Properties to specific methods. For example one Event can be associated to more methods. A property uses this table to associate get/set methods. + Semantics (a 2-byte bitmask of type MethodSemanticsAttributes) + Method (index into the MethodDef table) + Association (index into the Event or Property table; more precisely, a HasSemantics coded index) + """ + for rid, method_semantics in iter_dotnet_table(pe, dnfile.mdtable.MethodSemantics.number): + assert isinstance(method_semantics, dnfile.mdtable.MethodSemanticsRow) + + if method_semantics.Association.row is None: + logger.debug("MethodSemantics[0x%X] Association row is None", rid) + continue + + if isinstance(method_semantics.Association.row, dnfile.mdtable.EventRow): + # ignore events + logger.debug("MethodSemantics[0x%X] ignoring Event", rid) + continue + + if method_semantics.Method.table is None: + logger.debug("MethodSemantics[0x%X] Method table is None", rid) + continue + + token: int = calculate_dotnet_token_value( + method_semantics.Method.table.number, method_semantics.Method.row_index + ) + + if method_semantics.Semantics.msSetter: + yield token, FeatureAccess.WRITE + elif method_semantics.Semantics.msGetter: + yield token, FeatureAccess.READ + + +def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: + """get managed method names from TypeDef table + + see https://www.ntcore.com/files/dotnetformat.htm + + 02 - TypeDef Table + Each row represents a class in the current assembly. + TypeName (index into String heap) + TypeNamespace (index into String heap) + MethodList (index into MethodDef table; it marks the first of a contiguous run of Methods owned by this Type) + """ + nested_class_table = get_nested_class_table(pe) + typedef_class_table = get_typedef_class_table(pe) + + accessor_map: Dict[int, str] = {} + for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe): + accessor_map[methoddef] = methoddef_access + + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): + assert isinstance(typedef, dnfile.mdtable.TypeDefRow) + + for idx, method in enumerate(typedef.MethodList): + if method.table is None: + logger.debug("TypeDef[0x%X] MethodList[0x%X] table is None", rid, idx) + continue + if method.row is None: + logger.debug("TypeDef[0x%X] MethodList[0x%X] row is None", rid, idx) + continue + + token: int = calculate_dotnet_token_value(method.table.number, method.row_index) + access: Optional[str] = accessor_map.get(token) + + method_name: str = method.row.Name + if method_name.startswith(("get_", "set_")): + # remove get_/set_ + method_name = method_name[4:] + + #typedef_name = [] + #if rid in nested_class_table: + # typedef.TypeNamespace, typedef.TypeName = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) + + yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method_name, access=access) + + +def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: + """get fields from TypeDef table + + see https://www.ntcore.com/files/dotnetformat.htm + + 02 - TypeDef Table + Each row represents a class in the current assembly. + TypeName (index into String heap) + TypeNamespace (index into String heap) + FieldList (index into Field table; it marks the first of a contiguous run of Fields owned by this Type) + """ + nested_class_table = get_nested_class_table(pe) + typedef_class_table = get_typedef_class_table(pe) + + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): + assert isinstance(typedef, dnfile.mdtable.TypeDefRow) + + for idx, field in enumerate(typedef.FieldList): + if field.table is None: + logger.debug("TypeDef[0x%X] FieldList[0x%X] table is None", rid, idx) + continue + if field.row is None: + logger.debug("TypeDef[0x%X] FieldList[0x%X] row is None", rid, idx) + continue + + #typedef_name = [] + #if rid in nested_class_table: + # typedef.TypeNamespace, typedef.TypeName = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) + + token: int = calculate_dotnet_token_value(field.table.number, field.row_index) + yield DnType(token, (typedef.TypeName,), namespace=typedef.TypeNamespace, member=field.row.Name) + + +def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]: + """get managed methods from MethodDef table""" + for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number): + assert isinstance(method_def, dnfile.mdtable.MethodDefRow) + + if not method_def.ImplFlags.miIL or any((method_def.Flags.mdAbstract, method_def.Flags.mdPinvokeImpl)): + # skip methods that do not have a method body + continue + + body: Optional[CilMethodBody] = read_dotnet_method_body(pe, method_def) + if body is None: + logger.debug("MethodDef[0x%X] method body is None", rid) + continue + + token: int = calculate_dotnet_token_value(dnfile.mdtable.MethodDef.number, rid) + yield token, body + + +def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]: + """get unmanaged imports from ImplMap table + + see https://www.ntcore.com/files/dotnetformat.htm + + 28 - ImplMap Table + ImplMap table holds information about unmanaged methods that can be reached from managed code, using PInvoke dispatch + MemberForwarded (index into the Field or MethodDef table; more precisely, a MemberForwarded coded index) + ImportName (index into the String heap) + ImportScope (index into the ModuleRef table) + """ + for rid, impl_map in iter_dotnet_table(pe, dnfile.mdtable.ImplMap.number): + assert isinstance(impl_map, dnfile.mdtable.ImplMapRow) + + module: str + if impl_map.ImportScope.row is None: + logger.debug("ImplMap[0x%X] ImportScope row is None", rid) + module = "" + else: + module = impl_map.ImportScope.row.Name + method: str = impl_map.ImportName + + member_forward_table: int + if impl_map.MemberForwarded.table is None: + logger.debug("ImplMap[0x%X] MemberForwarded table is None", rid) + continue + else: + member_forward_table = impl_map.MemberForwarded.table.number + member_forward_row: int = impl_map.MemberForwarded.row_index + + # ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the + # name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded + # MethodDef table token to help us later record native import method calls made from CIL + token: int = calculate_dotnet_token_value(member_forward_table, member_forward_row) + + # like Kernel32.dll + if module and "." in module: + module = module.split(".")[0] + + # like kernel32.CreateFileA + yield DnUnmanagedMethod(token, module, method) + +def get_nested_class_table(pe): + nested_class_table = {} + + # Used to find nested classes in typedef + for rid, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): + assert isinstance(nestedclass, dnfile.mdtable.NestedClassRow) + nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index + + return nested_class_table + +def get_typedef_class_table(pe): + typedef_class_table = [] + + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): + assert isinstance(typedef, dnfile.mdtable.TypeDefRow) + typedef_class_table.append((typedef.TypeName, typedef.TypeNamespace)) + + return typedef_class_table + +def get_typeref_table(pe): + typeref_table = [] + + # Used to track values in typeref table + for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): + assert isinstance(typeref, dnfile.mdtable.TypeRefRow) + typeref_table.append((typeref.TypeName, typeref.TypeNamespace, typeref.ResolutionScope.row_index, type(typeref.ResolutionScope.table))) + + return typeref_table + +def typedef_helper(index, nested_class_table, typedef_class_table, n, name): + # Append the current typeref name + n.append(name) + + while nested_class_table[index] in nested_class_table: + name = typedef_class_table[nested_class_table[index]-1][0] + n.append(name) + index = nested_class_table[index] + + # Document the root enclosing details + n.append(typedef_class_table[nested_class_table[index]-1][0]) + namespace = typedef_class_table[nested_class_table[index]-1][1] + + return namespace, tuple(n[::-1]) + +def typeref_helper(index, typeref_table, n, name): + # Append the current typeref name + n.append(name) + + while typeref_table[index - 1][3] is dnfile.mdtable.TypeRef: + # Recursively call helper function with enclosing typeref details + name = typeref_table[index - 1][0] + n.append(name) + index = typeref_table[index - 1][2] + + # Document the root enclosing details + n.append(typeref_table[index - 1][0]) + namespace = typeref_table[index - 1][1] + + return namespace, tuple(n[::-1]) + +def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: + """get .NET types from TypeDef and TypeRef tables""" + nested_class_table = get_nested_class_table(pe) + typedef_class_table = get_typedef_class_table(pe) + + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): + assert isinstance(typedef, dnfile.mdtable.TypeDefRow) + + typedef_name = [] + if rid in nested_class_table: + typedef.TypeNamespace, typedef.TypeName = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) + + typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) + # TODO: + # There is an issue in the `yield DnType` where the value passed through for the name appears to be inconsequential to the final naming of the function + # If a variable `name` is created to store the value of typedef.TypeName, and the value of `name` is modified and passed through to `yield DnType`... + # then the final value displayed by CAPA is still the original and unmodified value of typedef.TypeName, not the intended and modified value of `name` + yield DnType(typedef_token, typedef.TypeName, namespace=typedef.TypeNamespace) + + + typeref_table = get_typeref_table(pe) + + for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): + assert isinstance(typeref, dnfile.mdtable.TypeRefRow) + + # If the ResolutionScope decodes to a typeRef type, then it is nested + typeref_name = [] + if type(typeref.ResolutionScope.table) == dnfile.mdtable.TypeRef: + typeref.TypeNamespace, typeref.TypeName = typeref_helper(typeref.ResolutionScope.row_index, typeref_table, typeref_name, typeref.TypeName) + + typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) + yield DnType(typeref_token, typeref.TypeName, namespace=typeref.TypeNamespace) + +def calculate_dotnet_token_value(table: int, rid: int) -> int: + return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) + + +def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool: + assert pe.net is not None + assert pe.net.Flags is not None + + return not bool(pe.net.Flags.CLR_ILONLY) + + +def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[Tuple[int, dnfile.base.MDTableRow]]: + assert pe.net is not None + assert pe.net.mdtables is not None + + for rid, row in enumerate(pe.net.mdtables.tables.get(table_index, [])): + # .NET tables are 1-indexed + yield rid + 1, row From bfcbb0dcb33adcc90ae967cef19d6fae76cece6f Mon Sep 17 00:00:00 2001 From: Blas Date: Sat, 23 Dec 2023 12:38:29 -0500 Subject: [PATCH 10/49] Update capa/features/extractors/dnfile/helpers.py Co-authored-by: Mike Hunhoff --- capa/features/extractors/dnfile/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index fd863831d..acdbabeae 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -397,7 +397,7 @@ def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: # If the ResolutionScope decodes to a typeRef type, then it is nested n = [] - if type(typeref.ResolutionScope.table) == dnfile.mdtable.TypeRef: + if type(typeref.ResolutionScope.table) is dnfile.mdtable.TypeRef: typeref.TypeName = typeref_helper(typeref.ResolutionScope.row_index, typeref_table, n, typeref.TypeName) typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) From 30267c0a7c0437e0ff063e4b54144a716c8811bd Mon Sep 17 00:00:00 2001 From: Blas Date: Sat, 23 Dec 2023 12:55:42 -0500 Subject: [PATCH 11/49] Update helper functions, variables, and draft further implementations --- capa/features/extractors/dnfile/helpers.py | 95 ++++++++++++---------- 1 file changed, 50 insertions(+), 45 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index acdbabeae..6bbff00ca 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -108,9 +108,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: TypeName (index into String heap) TypeNamespace (index into String heap) """ - #typeref_table = get_typeref_table(pe) - #typeref_class_names = [] - #typeref_assembled_class_names = {} + typeref_table = get_typeref_table(pe) for rid, member_ref in iter_dotnet_table(pe, dnfile.mdtable.MemberRef.number): assert isinstance(member_ref, dnfile.mdtable.MemberRefRow) @@ -135,12 +133,14 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: # remove get_/set_ from MemberRef name member_ref_name = member_ref_name[4:] - #typeref_class_names.append((member_ref.Class.row.TypeName, member_ref.Class.row.TypeNamespace)) - #member_ref.Class.row.TypeNamespace, member_ref.Class.row.TypeName, typeref_assembled_class_names = is_nested_helper(rid, typeref_table, typeref_class_names, member_ref.Class.row, typeref_assembled_class_names, False) + #typeref_name = [] + #if type(member_ref.Class.table) is dnfile.mdtable.TypeRef: + # if type(member_ref.Class.row.TypeName) is str: + # member_ref.Class.row.TypeNamespace, member_ref.Class.row.TypeName = typeref_helper(member_ref.Class.row_index, typeref_table, typeref_name, member_ref.Class.row.TypeName) yield DnType( token, - member_ref.Class.row.TypeName, + (member_ref.Class.row.TypeName,), namespace=member_ref.Class.row.TypeNamespace, member=member_ref_name, access=access, @@ -195,9 +195,8 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: TypeNamespace (index into String heap) MethodList (index into MethodDef table; it marks the first of a contiguous run of Methods owned by this Type) """ - #nested_class_table = get_nested_class_table(pe) - #typedef_class_names = [] - #typedef_assembled_class_names = {} + nested_class_table = get_nested_class_table(pe) + typedef_class_table = get_typedef_class_table(pe) accessor_map: Dict[int, str] = {} for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe): @@ -222,10 +221,11 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: # remove get_/set_ method_name = method_name[4:] - #typedef_class_names.append((method_name, typedef.TypeNamespace)) - #typedef.TypeNamespace, typedef.TypeName, typedef_assembled_class_names = is_nested_helper(idx, nested_class_table, typedef_class_names, typedef, typedef_assembled_class_names, True) + #typedef_name = [] + #if rid in nested_class_table: + # typedef.TypeNamespace, typedef.TypeName = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) - yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method_name, access=access) + yield DnType(token, (typedef.TypeName,), namespace=typedef.TypeNamespace, member=method_name, access=access) def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: @@ -251,9 +251,11 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: continue token: int = calculate_dotnet_token_value(field.table.number, field.row_index) - # Do here as well + #typedef_name = [] + #if rid in nested_class_table: + # typedef.TypeNamespace, typedef.TypeName = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) - yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=field.row.Name) + yield DnType(token, (typedef.TypeName,), namespace=typedef.TypeNamespace, member=field.row.Name) def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]: @@ -335,20 +337,6 @@ def get_typedef_class_table(pe): return typedef_class_table -def typedef_helper(index, nested_class_table, typedef_class_table, n, name): - # Append the current typeref name - n.append(name) - - if nested_class_table[index] in nested_class_table: - typedef_helper(nested_class_table[index], nested_class_table, typedef_class_table, n, typedef_class_table[nested_class_table[index]-1][0]) - else: - # Document the root enclosing details - n.append(typedef_class_table[nested_class_table[index]-1][0]) - namespace = typedef_class_table[nested_class_table[index]-1][1] - n.append(namespace) - - return tuple(n[::-1]) - def get_typeref_table(pe): typeref_table = [] @@ -358,21 +346,37 @@ def get_typeref_table(pe): typeref_table.append((typeref.TypeName, typeref.TypeNamespace, typeref.ResolutionScope.row_index, type(typeref.ResolutionScope.table))) return typeref_table - -def typeref_helper(index, typeref_table, n, name): + +def resolve_typedef_nested_classes(index, nested_class_table, typedef_class_table, classes, name): # Append the current typeref name - n.append(name) + classes.append(name) - if typeref_table[index - 1][3] == dnfile.mdtable.TypeRef: - # Recursively call helper function with enclosing typeref details - typeref_helper(typeref_table[index - 1][2], typeref_table, n, typeref_table[index - 1][0]) - else: - # Document the root enclosing details - n.append(typeref_table[index - 1][0]) - namespace = typeref_table[index - 1][1] - n.append(namespace) + while nested_class_table[index] in nested_class_table: + name = typedef_class_table[nested_class_table[index]-1][0] + classes.append(name) + index = nested_class_table[index] - return tuple(n[::-1]) + # Document the root enclosing details + classes.append(typedef_class_table[nested_class_table[index]-1][0]) + namespace = typedef_class_table[nested_class_table[index]-1][1] + + return namespace, tuple(classes[::-1]) + +def resolve_typeref_nested_classes(index, typeref_table, classes, name): + # Append the current typeref name + classes.append(name) + + while typeref_table[index - 1][3] is dnfile.mdtable.TypeRef: + # Recursively call helper function with enclosing typeref details + name = typeref_table[index - 1][0] + classes.append(name) + index = typeref_table[index - 1][2] + + # Document the root enclosing details + classes.append(typeref_table[index - 1][0]) + namespace = typeref_table[index - 1][1] + + return namespace, tuple(classes[::-1]) def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: """get .NET types from TypeDef and TypeRef tables""" @@ -383,9 +387,10 @@ def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: assert isinstance(typedef, dnfile.mdtable.TypeDefRow) typedef_name = [] + # name = (typedef.TypeName,) if rid in nested_class_table: - typedef.TypeName = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) - + typedef.TypeNamespace, typedef.TypeName = resolve_typedef_nested_classes(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) + typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) yield DnType(typedef_token, typedef.TypeName, namespace=typedef.TypeNamespace) @@ -396,9 +401,9 @@ def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: assert isinstance(typeref, dnfile.mdtable.TypeRefRow) # If the ResolutionScope decodes to a typeRef type, then it is nested - n = [] - if type(typeref.ResolutionScope.table) is dnfile.mdtable.TypeRef: - typeref.TypeName = typeref_helper(typeref.ResolutionScope.row_index, typeref_table, n, typeref.TypeName) + typeref_name = [] + if type(typeref.ResolutionScope.table) == dnfile.mdtable.TypeRef: + typeref.TypeNamespace, typeref.TypeName = resolve_typeref_nested_classes(typeref.ResolutionScope.row_index, typeref_table, typeref_name, typeref.TypeName) typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) yield DnType(typeref_token, typeref.TypeName, namespace=typeref.TypeNamespace) From 4f05fc6bd3fe825dae83c0b804d168d788137ad4 Mon Sep 17 00:00:00 2001 From: Blas Date: Tue, 26 Dec 2023 17:01:09 -0500 Subject: [PATCH 12/49] Update helpers.py --- capa/features/extractors/dnfile/helpers.py | 77 ++++++++++++---------- 1 file changed, 44 insertions(+), 33 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 6bbff00ca..b9827c6a8 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -133,15 +133,17 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: # remove get_/set_ from MemberRef name member_ref_name = member_ref_name[4:] - #typeref_name = [] - #if type(member_ref.Class.table) is dnfile.mdtable.TypeRef: - # if type(member_ref.Class.row.TypeName) is str: - # member_ref.Class.row.TypeNamespace, member_ref.Class.row.TypeName = typeref_helper(member_ref.Class.row_index, typeref_table, typeref_name, member_ref.Class.row.TypeName) + typeref_name = [] + typerefname = (member_ref.Class.row.TypeName,) + typerefnamespace = member_ref.Class.row.TypeNamespace + if type(member_ref.Class.table) is dnfile.mdtable.TypeRef: + if type(member_ref.Class.row.TypeName) is str: + typerefnamespace, typerefname = typeref_helper(member_ref.Class.row_index, typeref_table, typeref_name, member_ref.Class.row.TypeName) yield DnType( token, - (member_ref.Class.row.TypeName,), - namespace=member_ref.Class.row.TypeNamespace, + typerefname, + namespace=typerefnamespace, member=member_ref_name, access=access, ) @@ -221,11 +223,13 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: # remove get_/set_ method_name = method_name[4:] - #typedef_name = [] - #if rid in nested_class_table: - # typedef.TypeNamespace, typedef.TypeName = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) + typedef_name = [] + typedefname = (typedef.TypeName,) + typedefnamespace = typedef.TypeNamespace + if rid in nested_class_table: + typedefnamespace, typedefname = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) - yield DnType(token, (typedef.TypeName,), namespace=typedef.TypeNamespace, member=method_name, access=access) + yield DnType(token, typedefname, namespace=typedefnamespace, member=method_name, access=access) def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: @@ -239,6 +243,9 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: TypeNamespace (index into String heap) FieldList (index into Field table; it marks the first of a contiguous run of Fields owned by this Type) """ + nested_class_table = get_nested_class_table(pe) + typedef_class_table = get_typedef_class_table(pe) + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) @@ -249,13 +256,14 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: if field.row is None: logger.debug("TypeDef[0x%X] FieldList[0x%X] row is None", rid, idx) continue - token: int = calculate_dotnet_token_value(field.table.number, field.row_index) - #typedef_name = [] - #if rid in nested_class_table: - # typedef.TypeNamespace, typedef.TypeName = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) + typedef_name = [] + typedefname = (typedef.TypeName,) + if rid in nested_class_table: + typedef.TypeNamespace, typedefname = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) - yield DnType(token, (typedef.TypeName,), namespace=typedef.TypeNamespace, member=field.row.Name) + token: int = calculate_dotnet_token_value(field.table.number, field.row_index) + yield DnType(token, typedefname, namespace=typedef.TypeNamespace, member=field.row.Name) def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]: @@ -347,66 +355,69 @@ def get_typeref_table(pe): return typeref_table -def resolve_typedef_nested_classes(index, nested_class_table, typedef_class_table, classes, name): +def typedef_helper(index, nested_class_table, typedef_class_table, n, name): # Append the current typeref name - classes.append(name) + n.append(name) while nested_class_table[index] in nested_class_table: name = typedef_class_table[nested_class_table[index]-1][0] - classes.append(name) + n.append(name) index = nested_class_table[index] # Document the root enclosing details - classes.append(typedef_class_table[nested_class_table[index]-1][0]) + n.append(typedef_class_table[nested_class_table[index]-1][0]) namespace = typedef_class_table[nested_class_table[index]-1][1] - return namespace, tuple(classes[::-1]) + return namespace, tuple(n[::-1]) -def resolve_typeref_nested_classes(index, typeref_table, classes, name): +def typeref_helper(index, typeref_table, n, name): # Append the current typeref name - classes.append(name) + #n.append(name) while typeref_table[index - 1][3] is dnfile.mdtable.TypeRef: # Recursively call helper function with enclosing typeref details + n.append(name) # Used to be below name = (...) name = typeref_table[index - 1][0] - classes.append(name) index = typeref_table[index - 1][2] # Document the root enclosing details - classes.append(typeref_table[index - 1][0]) + n.append(typeref_table[index - 1][0]) namespace = typeref_table[index - 1][1] - return namespace, tuple(classes[::-1]) + return namespace, tuple(n[::-1]) def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: """get .NET types from TypeDef and TypeRef tables""" nested_class_table = get_nested_class_table(pe) typedef_class_table = get_typedef_class_table(pe) - + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) typedef_name = [] - # name = (typedef.TypeName,) + typedefname = (typedef.TypeName,) + typedefnamespace = typedef.TypeNamespace if rid in nested_class_table: - typedef.TypeNamespace, typedef.TypeName = resolve_typedef_nested_classes(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) - + typedefnamespace, typedefname = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) + typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) - yield DnType(typedef_token, typedef.TypeName, namespace=typedef.TypeNamespace) + yield DnType(typedef_token, typedefname, namespace=typedefnamespace) typeref_table = get_typeref_table(pe) - + for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): assert isinstance(typeref, dnfile.mdtable.TypeRefRow) # If the ResolutionScope decodes to a typeRef type, then it is nested typeref_name = [] + typerefname = (typeref.TypeName,) + typerefnamespace = typeref.TypeNamespace if type(typeref.ResolutionScope.table) == dnfile.mdtable.TypeRef: - typeref.TypeNamespace, typeref.TypeName = resolve_typeref_nested_classes(typeref.ResolutionScope.row_index, typeref_table, typeref_name, typeref.TypeName) + typerefnamespace, typerefname = typeref_helper(typeref.ResolutionScope.row_index, typeref_table, typeref_name, typeref.TypeName) typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) - yield DnType(typeref_token, typeref.TypeName, namespace=typeref.TypeNamespace) + yield DnType(typeref_token, typerefname, namespace=typerefnamespace) def calculate_dotnet_token_value(table: int, rid: int) -> int: return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) From 43e972ab560f3e8661f67b2bd13d50f5eebd6454 Mon Sep 17 00:00:00 2001 From: Blas Date: Tue, 26 Dec 2023 17:03:26 -0500 Subject: [PATCH 13/49] Update types.py --- capa/features/extractors/dnfile/types.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/capa/features/extractors/dnfile/types.py b/capa/features/extractors/dnfile/types.py index 935e6120f..31ab4245d 100644 --- a/capa/features/extractors/dnfile/types.py +++ b/capa/features/extractors/dnfile/types.py @@ -43,12 +43,14 @@ def __repr__(self): @staticmethod def format_name(class_: tuple, namespace: str = "", member: str = ""): - if len(class_) > 1: - # like Android.Graphics.Bitmap/CompressFormat - class_ = "/".join(class_) - else: - # like CompilationRelaxationsAttribute - class_ = "".join(class_) + # Temporarily re-introducing type checking due to issue caused with an uncontrolled `str` types passed through + if type(class_) is tuple: + if len(class_) > 1: + # like Android.Graphics.Bitmap/CompressFormat + class_ = "/".join(class_) + else: + # like CompilationRelaxationsAttribute + class_ = "".join(class_) # like File::OpenRead name: str = f"{class_}::{member}" if member else class_ if namespace: From 7381a1c75fd2c0d13b7daf8a8d84af3362b5744f Mon Sep 17 00:00:00 2001 From: Blas Date: Wed, 27 Dec 2023 10:30:49 -0500 Subject: [PATCH 14/49] Directly access TypeDef and TypeRef tables --- capa/features/extractors/dnfile/helpers.py | 72 ++++++++-------------- 1 file changed, 26 insertions(+), 46 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index b9827c6a8..4fddf85db 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -108,7 +108,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: TypeName (index into String heap) TypeNamespace (index into String heap) """ - typeref_table = get_typeref_table(pe) + typeref_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeRef.number, []) for rid, member_ref in iter_dotnet_table(pe, dnfile.mdtable.MemberRef.number): assert isinstance(member_ref, dnfile.mdtable.MemberRefRow) @@ -198,7 +198,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: MethodList (index into MethodDef table; it marks the first of a contiguous run of Methods owned by this Type) """ nested_class_table = get_nested_class_table(pe) - typedef_class_table = get_typedef_class_table(pe) + typedef_class_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeDef.number, []) accessor_map: Dict[int, str] = {} for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe): @@ -244,7 +244,7 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: FieldList (index into Field table; it marks the first of a contiguous run of Fields owned by this Type) """ nested_class_table = get_nested_class_table(pe) - typedef_class_table = get_typedef_class_table(pe) + typedef_class_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeDef.number, []) for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) @@ -336,61 +336,42 @@ def get_nested_class_table(pe): return nested_class_table -def get_typedef_class_table(pe): - typedef_class_table = [] - - for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): - assert isinstance(typedef, dnfile.mdtable.TypeDefRow) - typedef_class_table.append((typedef.TypeName, typedef.TypeNamespace)) - - return typedef_class_table - -def get_typeref_table(pe): - typeref_table = [] - - # Used to track values in typeref table - for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): - assert isinstance(typeref, dnfile.mdtable.TypeRefRow) - typeref_table.append((typeref.TypeName, typeref.TypeNamespace, typeref.ResolutionScope.row_index, type(typeref.ResolutionScope.table))) - - return typeref_table - -def typedef_helper(index, nested_class_table, typedef_class_table, n, name): +def typedef_helper(index, nested_class_table, typedef_class_table, typedef_name, name): # Append the current typeref name - n.append(name) + typedef_name.append(name) while nested_class_table[index] in nested_class_table: - name = typedef_class_table[nested_class_table[index]-1][0] - n.append(name) + name = typedef_class_table[nested_class_table[index]-1].TypeName + typedef_name.append(name) index = nested_class_table[index] # Document the root enclosing details - n.append(typedef_class_table[nested_class_table[index]-1][0]) - namespace = typedef_class_table[nested_class_table[index]-1][1] + enclosing_name = typedef_class_table[nested_class_table[index]-1].TypeName + typedef_name.append(enclosing_name) + namespace = typedef_class_table[nested_class_table[index]-1].TypeNamespace - return namespace, tuple(n[::-1]) - -def typeref_helper(index, typeref_table, n, name): - # Append the current typeref name - #n.append(name) + return namespace, tuple(typedef_name[::-1]) - while typeref_table[index - 1][3] is dnfile.mdtable.TypeRef: +def typeref_helper(index, typeref_table, typeref_name, name): + # Not appending the current typeref name to avoid potential duplicate + + while type(typeref_table[index - 1].ResolutionScope.table) is dnfile.mdtable.TypeRef: # Recursively call helper function with enclosing typeref details - n.append(name) # Used to be below name = (...) - name = typeref_table[index - 1][0] - index = typeref_table[index - 1][2] + typeref_name.append(name) + name = typeref_table[index - 1].TypeName + index = typeref_table[index - 1].ResolutionScope.row_index # Document the root enclosing details - n.append(typeref_table[index - 1][0]) - namespace = typeref_table[index - 1][1] + typeref_name.append(typeref_table[index - 1].TypeName) + namespace = typeref_table[index - 1].TypeNamespace - return namespace, tuple(n[::-1]) + return namespace, tuple(typeref_name[::-1]) def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: """get .NET types from TypeDef and TypeRef tables""" nested_class_table = get_nested_class_table(pe) - typedef_class_table = get_typedef_class_table(pe) - + typedef_class_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeDef.number, []) + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) @@ -403,13 +384,12 @@ def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) yield DnType(typedef_token, typedefname, namespace=typedefnamespace) - - typeref_table = get_typeref_table(pe) - + typeref_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeRef.number, []) + for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): assert isinstance(typeref, dnfile.mdtable.TypeRefRow) - # If the ResolutionScope decodes to a typeRef type, then it is nested + # If the ResolutionScope decodes to a typeRef type then it is nested typeref_name = [] typerefname = (typeref.TypeName,) typerefnamespace = typeref.TypeNamespace From b943ebbcace80d4752e991e0870fa7ef4f0568db Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Wed, 27 Dec 2023 12:05:16 -0500 Subject: [PATCH 15/49] Update helpers.py --- capa/features/extractors/dnfile/helpers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 4fddf85db..9ab5dcb81 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -259,11 +259,12 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: typedef_name = [] typedefname = (typedef.TypeName,) + typedefnamespace = typedef.TypeNamespace if rid in nested_class_table: - typedef.TypeNamespace, typedefname = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) + typedefnamespace, typedefname = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) token: int = calculate_dotnet_token_value(field.table.number, field.row_index) - yield DnType(token, typedefname, namespace=typedef.TypeNamespace, member=field.row.Name) + yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name) def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]: From 931cd8438c20dc1bc0b310f9196b5b4a31ceb582 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Wed, 27 Dec 2023 12:56:41 -0500 Subject: [PATCH 16/49] Update helpers.py --- capa/features/extractors/dnfile/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 9ab5dcb81..5e3d674fe 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -381,7 +381,7 @@ def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: typedefnamespace = typedef.TypeNamespace if rid in nested_class_table: typedefnamespace, typedefname = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) - + typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) yield DnType(typedef_token, typedefname, namespace=typedefnamespace) From a1ea3f8ac222f3819dc8cc68aa86a96b1bb4c830 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Wed, 27 Dec 2023 12:56:59 -0500 Subject: [PATCH 17/49] Delete capa/features/extractors/dnfile/helpers_draft.py --- .../extractors/dnfile/helpers_draft.py | 434 ------------------ 1 file changed, 434 deletions(-) delete mode 100644 capa/features/extractors/dnfile/helpers_draft.py diff --git a/capa/features/extractors/dnfile/helpers_draft.py b/capa/features/extractors/dnfile/helpers_draft.py deleted file mode 100644 index 93656a7f3..000000000 --- a/capa/features/extractors/dnfile/helpers_draft.py +++ /dev/null @@ -1,434 +0,0 @@ -# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: [package root]/LICENSE.txt -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and limitations under the License. - -from __future__ import annotations - -import logging -from typing import Dict, Tuple, Union, Iterator, Optional - -import dnfile -from dncil.cil.body import CilMethodBody -from dncil.cil.error import MethodBodyFormatError -from dncil.clr.token import Token, StringToken, InvalidToken -from dncil.cil.body.reader import CilMethodBodyReaderBase - -from capa.features.common import FeatureAccess -from capa.features.extractors.dnfile.types import DnType, DnUnmanagedMethod - -logger = logging.getLogger(__name__) - - -class DnfileMethodBodyReader(CilMethodBodyReaderBase): - def __init__(self, pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow): - self.pe: dnfile.dnPE = pe - self.offset: int = self.pe.get_offset_from_rva(row.Rva) - - def read(self, n: int) -> bytes: - data: bytes = self.pe.get_data(self.pe.get_rva_from_offset(self.offset), n) - self.offset += n - return data - - def tell(self) -> int: - return self.offset - - def seek(self, offset: int) -> int: - self.offset = offset - return self.offset - - -def resolve_dotnet_token(pe: dnfile.dnPE, token: Token) -> Union[dnfile.base.MDTableRow, InvalidToken, str]: - """map generic token to string or table row""" - assert pe.net is not None - assert pe.net.mdtables is not None - - if isinstance(token, StringToken): - user_string: Optional[str] = read_dotnet_user_string(pe, token) - if user_string is None: - return InvalidToken(token.value) - return user_string - - table: Optional[dnfile.base.ClrMetaDataTable] = pe.net.mdtables.tables.get(token.table) - if table is None: - # table index is not valid - return InvalidToken(token.value) - - try: - return table.rows[token.rid - 1] - except IndexError: - # table index is valid but row index is not valid - return InvalidToken(token.value) - - -def read_dotnet_method_body(pe: dnfile.dnPE, row: dnfile.mdtable.MethodDefRow) -> Optional[CilMethodBody]: - """read dotnet method body""" - try: - return CilMethodBody(DnfileMethodBodyReader(pe, row)) - except MethodBodyFormatError as e: - logger.debug("failed to parse managed method body @ 0x%08x (%s)", row.Rva, e) - return None - - -def read_dotnet_user_string(pe: dnfile.dnPE, token: StringToken) -> Optional[str]: - """read user string from #US stream""" - assert pe.net is not None - - if pe.net.user_strings is None: - # stream may not exist (seen in obfuscated .NET) - logger.debug("#US stream does not exist for stream index 0x%08x", token.rid) - return None - - try: - user_string: Optional[dnfile.stream.UserString] = pe.net.user_strings.get_us(token.rid) - except UnicodeDecodeError as e: - logger.debug("failed to decode #US stream index 0x%08x (%s)", token.rid, e) - return None - - if user_string is None: - return None - - return user_string.value - - -def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: - """get managed imports from MemberRef table - - see https://www.ntcore.com/files/dotnetformat.htm - - 10 - MemberRef Table - Each row represents an imported method - Class (index into the TypeRef, ModuleRef, MethodDef, TypeSpec or TypeDef tables) - Name (index into String heap) - 01 - TypeRef Table - Each row represents an imported class, its namespace and the assembly which contains it - TypeName (index into String heap) - TypeNamespace (index into String heap) - """ - typeref_table = get_typeref_table(pe) - - for rid, member_ref in iter_dotnet_table(pe, dnfile.mdtable.MemberRef.number): - assert isinstance(member_ref, dnfile.mdtable.MemberRefRow) - - if not isinstance(member_ref.Class.row, dnfile.mdtable.TypeRefRow): - # only process class imports from TypeRef table - continue - - token: int = calculate_dotnet_token_value(dnfile.mdtable.MemberRef.number, rid) - access: Optional[str] - - # assume .NET imports starting with get_/set_ are used to access a property - if member_ref.Name.startswith("get_"): - access = FeatureAccess.READ - elif member_ref.Name.startswith("set_"): - access = FeatureAccess.WRITE - else: - access = None - - member_ref_name: str = member_ref.Name - if member_ref_name.startswith(("get_", "set_")): - # remove get_/set_ from MemberRef name - member_ref_name = member_ref_name[4:] - - #typeref_name = [] - #if type(member_ref.Class.table) is dnfile.mdtable.TypeRef: - # if type(member_ref.Class.row.TypeName) is str: - # member_ref.Class.row.TypeNamespace, member_ref.Class.row.TypeName = typeref_helper(member_ref.Class.row_index, typeref_table, typeref_name, member_ref.Class.row.TypeName) - - yield DnType( - token, - member_ref.Class.row.TypeName, - namespace=member_ref.Class.row.TypeNamespace, - member=member_ref_name, - access=access, - ) - - -def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: - """get MethodDef methods used to access properties - - see https://www.ntcore.com/files/dotnetformat.htm - - 24 - MethodSemantics Table - Links Events and Properties to specific methods. For example one Event can be associated to more methods. A property uses this table to associate get/set methods. - Semantics (a 2-byte bitmask of type MethodSemanticsAttributes) - Method (index into the MethodDef table) - Association (index into the Event or Property table; more precisely, a HasSemantics coded index) - """ - for rid, method_semantics in iter_dotnet_table(pe, dnfile.mdtable.MethodSemantics.number): - assert isinstance(method_semantics, dnfile.mdtable.MethodSemanticsRow) - - if method_semantics.Association.row is None: - logger.debug("MethodSemantics[0x%X] Association row is None", rid) - continue - - if isinstance(method_semantics.Association.row, dnfile.mdtable.EventRow): - # ignore events - logger.debug("MethodSemantics[0x%X] ignoring Event", rid) - continue - - if method_semantics.Method.table is None: - logger.debug("MethodSemantics[0x%X] Method table is None", rid) - continue - - token: int = calculate_dotnet_token_value( - method_semantics.Method.table.number, method_semantics.Method.row_index - ) - - if method_semantics.Semantics.msSetter: - yield token, FeatureAccess.WRITE - elif method_semantics.Semantics.msGetter: - yield token, FeatureAccess.READ - - -def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: - """get managed method names from TypeDef table - - see https://www.ntcore.com/files/dotnetformat.htm - - 02 - TypeDef Table - Each row represents a class in the current assembly. - TypeName (index into String heap) - TypeNamespace (index into String heap) - MethodList (index into MethodDef table; it marks the first of a contiguous run of Methods owned by this Type) - """ - nested_class_table = get_nested_class_table(pe) - typedef_class_table = get_typedef_class_table(pe) - - accessor_map: Dict[int, str] = {} - for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe): - accessor_map[methoddef] = methoddef_access - - for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): - assert isinstance(typedef, dnfile.mdtable.TypeDefRow) - - for idx, method in enumerate(typedef.MethodList): - if method.table is None: - logger.debug("TypeDef[0x%X] MethodList[0x%X] table is None", rid, idx) - continue - if method.row is None: - logger.debug("TypeDef[0x%X] MethodList[0x%X] row is None", rid, idx) - continue - - token: int = calculate_dotnet_token_value(method.table.number, method.row_index) - access: Optional[str] = accessor_map.get(token) - - method_name: str = method.row.Name - if method_name.startswith(("get_", "set_")): - # remove get_/set_ - method_name = method_name[4:] - - #typedef_name = [] - #if rid in nested_class_table: - # typedef.TypeNamespace, typedef.TypeName = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) - - yield DnType(token, typedef.TypeName, namespace=typedef.TypeNamespace, member=method_name, access=access) - - -def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: - """get fields from TypeDef table - - see https://www.ntcore.com/files/dotnetformat.htm - - 02 - TypeDef Table - Each row represents a class in the current assembly. - TypeName (index into String heap) - TypeNamespace (index into String heap) - FieldList (index into Field table; it marks the first of a contiguous run of Fields owned by this Type) - """ - nested_class_table = get_nested_class_table(pe) - typedef_class_table = get_typedef_class_table(pe) - - for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): - assert isinstance(typedef, dnfile.mdtable.TypeDefRow) - - for idx, field in enumerate(typedef.FieldList): - if field.table is None: - logger.debug("TypeDef[0x%X] FieldList[0x%X] table is None", rid, idx) - continue - if field.row is None: - logger.debug("TypeDef[0x%X] FieldList[0x%X] row is None", rid, idx) - continue - - #typedef_name = [] - #if rid in nested_class_table: - # typedef.TypeNamespace, typedef.TypeName = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) - - token: int = calculate_dotnet_token_value(field.table.number, field.row_index) - yield DnType(token, (typedef.TypeName,), namespace=typedef.TypeNamespace, member=field.row.Name) - - -def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]: - """get managed methods from MethodDef table""" - for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number): - assert isinstance(method_def, dnfile.mdtable.MethodDefRow) - - if not method_def.ImplFlags.miIL or any((method_def.Flags.mdAbstract, method_def.Flags.mdPinvokeImpl)): - # skip methods that do not have a method body - continue - - body: Optional[CilMethodBody] = read_dotnet_method_body(pe, method_def) - if body is None: - logger.debug("MethodDef[0x%X] method body is None", rid) - continue - - token: int = calculate_dotnet_token_value(dnfile.mdtable.MethodDef.number, rid) - yield token, body - - -def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod]: - """get unmanaged imports from ImplMap table - - see https://www.ntcore.com/files/dotnetformat.htm - - 28 - ImplMap Table - ImplMap table holds information about unmanaged methods that can be reached from managed code, using PInvoke dispatch - MemberForwarded (index into the Field or MethodDef table; more precisely, a MemberForwarded coded index) - ImportName (index into the String heap) - ImportScope (index into the ModuleRef table) - """ - for rid, impl_map in iter_dotnet_table(pe, dnfile.mdtable.ImplMap.number): - assert isinstance(impl_map, dnfile.mdtable.ImplMapRow) - - module: str - if impl_map.ImportScope.row is None: - logger.debug("ImplMap[0x%X] ImportScope row is None", rid) - module = "" - else: - module = impl_map.ImportScope.row.Name - method: str = impl_map.ImportName - - member_forward_table: int - if impl_map.MemberForwarded.table is None: - logger.debug("ImplMap[0x%X] MemberForwarded table is None", rid) - continue - else: - member_forward_table = impl_map.MemberForwarded.table.number - member_forward_row: int = impl_map.MemberForwarded.row_index - - # ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the - # name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded - # MethodDef table token to help us later record native import method calls made from CIL - token: int = calculate_dotnet_token_value(member_forward_table, member_forward_row) - - # like Kernel32.dll - if module and "." in module: - module = module.split(".")[0] - - # like kernel32.CreateFileA - yield DnUnmanagedMethod(token, module, method) - -def get_nested_class_table(pe): - nested_class_table = {} - - # Used to find nested classes in typedef - for rid, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): - assert isinstance(nestedclass, dnfile.mdtable.NestedClassRow) - nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index - - return nested_class_table - -def get_typedef_class_table(pe): - typedef_class_table = [] - - for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): - assert isinstance(typedef, dnfile.mdtable.TypeDefRow) - typedef_class_table.append((typedef.TypeName, typedef.TypeNamespace)) - - return typedef_class_table - -def get_typeref_table(pe): - typeref_table = [] - - # Used to track values in typeref table - for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): - assert isinstance(typeref, dnfile.mdtable.TypeRefRow) - typeref_table.append((typeref.TypeName, typeref.TypeNamespace, typeref.ResolutionScope.row_index, type(typeref.ResolutionScope.table))) - - return typeref_table - -def typedef_helper(index, nested_class_table, typedef_class_table, n, name): - # Append the current typeref name - n.append(name) - - while nested_class_table[index] in nested_class_table: - name = typedef_class_table[nested_class_table[index]-1][0] - n.append(name) - index = nested_class_table[index] - - # Document the root enclosing details - n.append(typedef_class_table[nested_class_table[index]-1][0]) - namespace = typedef_class_table[nested_class_table[index]-1][1] - - return namespace, tuple(n[::-1]) - -def typeref_helper(index, typeref_table, n, name): - # Append the current typeref name - n.append(name) - - while typeref_table[index - 1][3] is dnfile.mdtable.TypeRef: - # Recursively call helper function with enclosing typeref details - name = typeref_table[index - 1][0] - n.append(name) - index = typeref_table[index - 1][2] - - # Document the root enclosing details - n.append(typeref_table[index - 1][0]) - namespace = typeref_table[index - 1][1] - - return namespace, tuple(n[::-1]) - -def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: - """get .NET types from TypeDef and TypeRef tables""" - nested_class_table = get_nested_class_table(pe) - typedef_class_table = get_typedef_class_table(pe) - - for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): - assert isinstance(typedef, dnfile.mdtable.TypeDefRow) - - typedef_name = [] - if rid in nested_class_table: - typedef.TypeNamespace, typedef.TypeName = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) - - typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) - # TODO: - # There is an issue in the `yield DnType` where the value passed through for the name appears to be inconsequential to the final naming of the function - # If a variable `name` is created to store the value of typedef.TypeName, and the value of `name` is modified and passed through to `yield DnType`... - # then the final value displayed by CAPA is still the original and unmodified value of typedef.TypeName, not the intended and modified value of `name` - yield DnType(typedef_token, typedef.TypeName, namespace=typedef.TypeNamespace) - - - typeref_table = get_typeref_table(pe) - - for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): - assert isinstance(typeref, dnfile.mdtable.TypeRefRow) - - # If the ResolutionScope decodes to a typeRef type, then it is nested - typeref_name = [] - if type(typeref.ResolutionScope.table) == dnfile.mdtable.TypeRef: - typeref.TypeNamespace, typeref.TypeName = typeref_helper(typeref.ResolutionScope.row_index, typeref_table, typeref_name, typeref.TypeName) - - typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) - yield DnType(typeref_token, typeref.TypeName, namespace=typeref.TypeNamespace) - -def calculate_dotnet_token_value(table: int, rid: int) -> int: - return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) - - -def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool: - assert pe.net is not None - assert pe.net.Flags is not None - - return not bool(pe.net.Flags.CLR_ILONLY) - - -def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[Tuple[int, dnfile.base.MDTableRow]]: - assert pe.net is not None - assert pe.net.mdtables is not None - - for rid, row in enumerate(pe.net.mdtables.tables.get(table_index, [])): - # .NET tables are 1-indexed - yield rid + 1, row From 0150b17d9a8cb6ebeb3e699b89a85ec51d46ab38 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Wed, 27 Dec 2023 12:57:29 -0500 Subject: [PATCH 18/49] Update types.py --- capa/features/extractors/dnfile/types.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/capa/features/extractors/dnfile/types.py b/capa/features/extractors/dnfile/types.py index 31ab4245d..66dd16782 100644 --- a/capa/features/extractors/dnfile/types.py +++ b/capa/features/extractors/dnfile/types.py @@ -43,14 +43,10 @@ def __repr__(self): @staticmethod def format_name(class_: tuple, namespace: str = "", member: str = ""): - # Temporarily re-introducing type checking due to issue caused with an uncontrolled `str` types passed through - if type(class_) is tuple: - if len(class_) > 1: - # like Android.Graphics.Bitmap/CompressFormat - class_ = "/".join(class_) - else: - # like CompilationRelaxationsAttribute - class_ = "".join(class_) + if len(class_) > 1: + class_ = "/".join(class_) #class_[0] + "." + "/".join(class_[1:]) + else: + class_ = "".join(class_) #f"{class_[0]}.{class_[1]}" # like File::OpenRead name: str = f"{class_}::{member}" if member else class_ if namespace: From 9966ca3d6fbc351c5856e3cac0f7303c444c2713 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Wed, 27 Dec 2023 12:59:35 -0500 Subject: [PATCH 19/49] Update dotnetfile.py --- capa/features/extractors/dotnetfile.py | 63 +++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index a9d36d299..dcaaadc51 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -90,21 +90,80 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple yield Namespace(namespace), NO_ADDRESS +def get_nested_class_table(pe): + nested_class_table = {} + + # Used to find nested classes in typedef + for rid, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): + assert isinstance(nestedclass, dnfile.mdtable.NestedClassRow) + nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index + + return nested_class_table + +def typedef_helper(index, nested_class_table, typedef_class_table, typedef_name, name): + # Append the current typeref name + typedef_name.append(name) + + while nested_class_table[index] in nested_class_table: + name = typedef_class_table[nested_class_table[index]-1].TypeName + typedef_name.append(name) + index = nested_class_table[index] + + # Document the root enclosing details + enclosing_name = typedef_class_table[nested_class_table[index]-1].TypeName + typedef_name.append(enclosing_name) + namespace = typedef_class_table[nested_class_table[index]-1].TypeNamespace + + return namespace, tuple(typedef_name[::-1]) + +def typeref_helper(index, typeref_table, typeref_name, name): + # Not appending the current typeref name to avoid potential duplicate + + while type(typeref_table[index - 1].ResolutionScope.table) is dnfile.mdtable.TypeRef: + # Recursively call helper function with enclosing typeref details + typeref_name.append(name) + name = typeref_table[index - 1].TypeName + index = typeref_table[index - 1].ResolutionScope.row_index + + # Document the root enclosing details + typeref_name.append(typeref_table[index - 1].TypeName) + namespace = typeref_table[index - 1].TypeNamespace + + return namespace, tuple(typeref_name[::-1]) + def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]: """emit class features from TypeRef and TypeDef tables""" + nested_class_table = get_nested_class_table(pe) + typedef_class_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeDef.number, []) + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): # emit internal .NET classes assert isinstance(typedef, dnfile.mdtable.TypeDefRow) + typedef_name = [] + typedefname = (typedef.TypeName,) + typedefnamespace = typedef.TypeNamespace + if rid in nested_class_table: + typedefnamespace, typedefname = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) + token = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) - yield Class(DnType.format_name(typedef.TypeName, namespace=typedef.TypeNamespace)), DNTokenAddress(token) + yield Class(DnType.format_name(typedefname, namespace=typedefnamespace)), DNTokenAddress(token) + + typeref_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeRef.number, []) for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): # emit external .NET classes assert isinstance(typeref, dnfile.mdtable.TypeRefRow) + # If the ResolutionScope decodes to a typeRef type then it is nested + typeref_name = [] + typerefname = (typeref.TypeName,) + typerefnamespace = typeref.TypeNamespace + if type(typeref.ResolutionScope.table) == dnfile.mdtable.TypeRef: + typerefnamespace, typerefname = typeref_helper(typeref.ResolutionScope.row_index, typeref_table, typeref_name, typeref.TypeName) + token = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) - yield Class(DnType.format_name(typeref.TypeName, namespace=typeref.TypeNamespace)), DNTokenAddress(token) + yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token) def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]: From 8f16a572dffa2a739a797aadb79e6c5876dcf0a9 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Tue, 2 Jan 2024 15:45:25 -0500 Subject: [PATCH 20/49] Update types.py comment --- capa/features/extractors/dnfile/types.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/capa/features/extractors/dnfile/types.py b/capa/features/extractors/dnfile/types.py index 66dd16782..7da9ddf12 100644 --- a/capa/features/extractors/dnfile/types.py +++ b/capa/features/extractors/dnfile/types.py @@ -44,9 +44,9 @@ def __repr__(self): @staticmethod def format_name(class_: tuple, namespace: str = "", member: str = ""): if len(class_) > 1: - class_ = "/".join(class_) #class_[0] + "." + "/".join(class_[1:]) + class_ = "/".join(class_) # Convert tuple to str, separating items with "/" else: - class_ = "".join(class_) #f"{class_[0]}.{class_[1]}" + class_ = "".join(class_) # Convert tuple to str # like File::OpenRead name: str = f"{class_}::{member}" if member else class_ if namespace: From 625720366e51689425df5e10f6a88afcf6b41ffc Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Tue, 2 Jan 2024 16:27:03 -0500 Subject: [PATCH 21/49] Clean extract_file_class_features in dotnetfile.py --- capa/features/extractors/dotnetfile.py | 54 +++++++------------------- 1 file changed, 13 insertions(+), 41 deletions(-) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index dcaaadc51..4e401b1a3 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -40,6 +40,10 @@ get_dotnet_managed_methods, calculate_dotnet_token_value, get_dotnet_unmanaged_imports, + resolve_nested_typeref_helper, + resolve_nested_typedef_name, + enclosing_and_nested_classes_index_table, + ) logger = logging.getLogger(__name__) @@ -99,52 +103,20 @@ def get_nested_class_table(pe): nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index return nested_class_table - -def typedef_helper(index, nested_class_table, typedef_class_table, typedef_name, name): - # Append the current typeref name - typedef_name.append(name) - - while nested_class_table[index] in nested_class_table: - name = typedef_class_table[nested_class_table[index]-1].TypeName - typedef_name.append(name) - index = nested_class_table[index] - - # Document the root enclosing details - enclosing_name = typedef_class_table[nested_class_table[index]-1].TypeName - typedef_name.append(enclosing_name) - namespace = typedef_class_table[nested_class_table[index]-1].TypeNamespace - - return namespace, tuple(typedef_name[::-1]) - -def typeref_helper(index, typeref_table, typeref_name, name): - # Not appending the current typeref name to avoid potential duplicate - - while type(typeref_table[index - 1].ResolutionScope.table) is dnfile.mdtable.TypeRef: - # Recursively call helper function with enclosing typeref details - typeref_name.append(name) - name = typeref_table[index - 1].TypeName - index = typeref_table[index - 1].ResolutionScope.row_index - - # Document the root enclosing details - typeref_name.append(typeref_table[index - 1].TypeName) - namespace = typeref_table[index - 1].TypeNamespace - - return namespace, tuple(typeref_name[::-1]) def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]: """emit class features from TypeRef and TypeDef tables""" - nested_class_table = get_nested_class_table(pe) - typedef_class_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeDef.number, []) + nested_class_table = enclosing_and_nested_classes_index_table(pe) for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): # emit internal .NET classes assert isinstance(typedef, dnfile.mdtable.TypeDefRow) - typedef_name = [] - typedefname = (typedef.TypeName,) - typedefnamespace = typedef.TypeNamespace if rid in nested_class_table: - typedefnamespace, typedefname = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) + typedefnamespace, typedefname = resolve_nested_typedef_name(rid, nested_class_table, typedef.TypeName, pe) + else: + typedefname = (typedef.TypeName,) + typedefnamespace = typedef.TypeNamespace token = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) yield Class(DnType.format_name(typedefname, namespace=typedefnamespace)), DNTokenAddress(token) @@ -156,11 +128,11 @@ def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Cla assert isinstance(typeref, dnfile.mdtable.TypeRefRow) # If the ResolutionScope decodes to a typeRef type then it is nested - typeref_name = [] - typerefname = (typeref.TypeName,) - typerefnamespace = typeref.TypeNamespace if type(typeref.ResolutionScope.table) == dnfile.mdtable.TypeRef: - typerefnamespace, typerefname = typeref_helper(typeref.ResolutionScope.row_index, typeref_table, typeref_name, typeref.TypeName) + typerefnamespace, typerefname = resolve_nested_typeref_helper(typeref.ResolutionScope.row_index, typeref.TypeName, pe) + else: + typerefname = (typeref.TypeName,) + typerefnamespace = typeref.TypeNamespace token = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token) From 74abe419fda5051f44f26e19f651b5a5111b04a2 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Tue, 2 Jan 2024 16:28:30 -0500 Subject: [PATCH 22/49] Cleaned up callers, var names, and other small items --- capa/features/extractors/dnfile/helpers.py | 76 +++++++++++----------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 5e3d674fe..fbe04560b 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -108,8 +108,6 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: TypeName (index into String heap) TypeNamespace (index into String heap) """ - typeref_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeRef.number, []) - for rid, member_ref in iter_dotnet_table(pe, dnfile.mdtable.MemberRef.number): assert isinstance(member_ref, dnfile.mdtable.MemberRefRow) @@ -133,12 +131,11 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: # remove get_/set_ from MemberRef name member_ref_name = member_ref_name[4:] - typeref_name = [] - typerefname = (member_ref.Class.row.TypeName,) - typerefnamespace = member_ref.Class.row.TypeNamespace - if type(member_ref.Class.table) is dnfile.mdtable.TypeRef: - if type(member_ref.Class.row.TypeName) is str: - typerefnamespace, typerefname = typeref_helper(member_ref.Class.row_index, typeref_table, typeref_name, member_ref.Class.row.TypeName) + if type(member_ref.Class.table) is dnfile.mdtable.TypeRef and type(member_ref.Class.row.TypeName) is str: + typerefnamespace, typerefname = resolve_nested_typeref_helper(member_ref.Class.row_index, member_ref.Class.row.TypeName, pe) + else: + typerefname = (member_ref.Class.row.TypeName,) + typerefnamespace = member_ref.Class.row.TypeNamespace yield DnType( token, @@ -197,8 +194,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: TypeNamespace (index into String heap) MethodList (index into MethodDef table; it marks the first of a contiguous run of Methods owned by this Type) """ - nested_class_table = get_nested_class_table(pe) - typedef_class_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeDef.number, []) + nested_class_table = enclosing_and_nested_classes_index_table(pe) accessor_map: Dict[int, str] = {} for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe): @@ -223,11 +219,11 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: # remove get_/set_ method_name = method_name[4:] - typedef_name = [] - typedefname = (typedef.TypeName,) - typedefnamespace = typedef.TypeNamespace if rid in nested_class_table: - typedefnamespace, typedefname = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) + typedefnamespace, typedefname = resolve_nested_typedef_name(rid, nested_class_table, typedef.TypeName, pe) + else: + typedefname = (typedef.TypeName,) + typedefnamespace = typedef.TypeNamespace yield DnType(token, typedefname, namespace=typedefnamespace, member=method_name, access=access) @@ -243,8 +239,7 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: TypeNamespace (index into String heap) FieldList (index into Field table; it marks the first of a contiguous run of Fields owned by this Type) """ - nested_class_table = get_nested_class_table(pe) - typedef_class_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeDef.number, []) + nested_class_table = enclosing_and_nested_classes_index_table(pe) for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) @@ -257,11 +252,11 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: logger.debug("TypeDef[0x%X] FieldList[0x%X] row is None", rid, idx) continue - typedef_name = [] - typedefname = (typedef.TypeName,) - typedefnamespace = typedef.TypeNamespace if rid in nested_class_table: - typedefnamespace, typedefname = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) + typedefnamespace, typedefname = resolve_nested_typedef_name(rid, nested_class_table, typedef.TypeName, pe) + else: + typedefname = (typedef.TypeName,) + typedefnamespace = typedef.TypeNamespace token: int = calculate_dotnet_token_value(field.table.number, field.row_index) yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name) @@ -327,7 +322,8 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod] # like kernel32.CreateFileA yield DnUnmanagedMethod(token, module, method) -def get_nested_class_table(pe): +def enclosing_and_nested_classes_index_table(pe): + """Build index for EnclosingClass based off the NestedClass row index in the nestedclass table""" nested_class_table = {} # Used to find nested classes in typedef @@ -337,11 +333,16 @@ def get_nested_class_table(pe): return nested_class_table -def typedef_helper(index, nested_class_table, typedef_class_table, typedef_name, name): - # Append the current typeref name +def resolve_nested_typedef_name(index, nested_class_table, name, pe): + """Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" + typedef_class_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeDef.number, []) + typedef_name = [] + + # Append the current typedef name typedef_name.append(name) while nested_class_table[index] in nested_class_table: + # Iterate through the typedef table to resolve the nested name name = typedef_class_table[nested_class_table[index]-1].TypeName typedef_name.append(name) index = nested_class_table[index] @@ -353,11 +354,15 @@ def typedef_helper(index, nested_class_table, typedef_class_table, typedef_name, return namespace, tuple(typedef_name[::-1]) -def typeref_helper(index, typeref_table, typeref_name, name): +def resolve_nested_typeref_helper(index, name, pe): + """Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" + typeref_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeRef.number, []) + typeref_name = [] + # Not appending the current typeref name to avoid potential duplicate while type(typeref_table[index - 1].ResolutionScope.table) is dnfile.mdtable.TypeRef: - # Recursively call helper function with enclosing typeref details + # Iterate through the typeref table to resolve the nested name typeref_name.append(name) name = typeref_table[index - 1].TypeName index = typeref_table[index - 1].ResolutionScope.row_index @@ -370,32 +375,29 @@ def typeref_helper(index, typeref_table, typeref_name, name): def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: """get .NET types from TypeDef and TypeRef tables""" - nested_class_table = get_nested_class_table(pe) - typedef_class_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeDef.number, []) + nested_class_table = enclosing_and_nested_classes_index_table(pe) for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) - typedef_name = [] - typedefname = (typedef.TypeName,) - typedefnamespace = typedef.TypeNamespace if rid in nested_class_table: - typedefnamespace, typedefname = typedef_helper(rid, nested_class_table, typedef_class_table, typedef_name, typedef.TypeName) + typedefnamespace, typedefname = resolve_nested_typedef_name(rid, nested_class_table, typedef.TypeName, pe) + else: + typedefname = (typedef.TypeName,) + typedefnamespace = typedef.TypeNamespace typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) yield DnType(typedef_token, typedefname, namespace=typedefnamespace) - typeref_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeRef.number, []) - for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): assert isinstance(typeref, dnfile.mdtable.TypeRefRow) # If the ResolutionScope decodes to a typeRef type then it is nested - typeref_name = [] - typerefname = (typeref.TypeName,) - typerefnamespace = typeref.TypeNamespace if type(typeref.ResolutionScope.table) == dnfile.mdtable.TypeRef: - typerefnamespace, typerefname = typeref_helper(typeref.ResolutionScope.row_index, typeref_table, typeref_name, typeref.TypeName) + typerefnamespace, typerefname = resolve_nested_typeref_helper(typeref.ResolutionScope.row_index, typeref.TypeName, pe) + else: + typerefname = (typeref.TypeName,) + typerefnamespace = typeref.TypeNamespace typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) yield DnType(typeref_token, typerefname, namespace=typerefnamespace) From 66f01c06d2aeaed201525dde719b92c447446aaa Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Tue, 2 Jan 2024 17:07:02 -0500 Subject: [PATCH 23/49] Update dotnetfile.py --- capa/features/extractors/dotnetfile.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index 4e401b1a3..f6f3999aa 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -94,16 +94,6 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple yield Namespace(namespace), NO_ADDRESS -def get_nested_class_table(pe): - nested_class_table = {} - - # Used to find nested classes in typedef - for rid, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): - assert isinstance(nestedclass, dnfile.mdtable.NestedClassRow) - nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index - - return nested_class_table - def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]: """emit class features from TypeRef and TypeDef tables""" nested_class_table = enclosing_and_nested_classes_index_table(pe) From f8a97cf72f16af508664d43d86e72eb7eb894c78 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Wed, 3 Jan 2024 12:29:02 -0500 Subject: [PATCH 24/49] Clean up caller logic in dotnetfile.py --- capa/features/extractors/dotnetfile.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index f6f3999aa..1ff69ef87 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -43,7 +43,6 @@ resolve_nested_typeref_helper, resolve_nested_typedef_name, enclosing_and_nested_classes_index_table, - ) logger = logging.getLogger(__name__) @@ -93,7 +92,7 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple # namespace do not have an associated token, so we yield 0x0 yield Namespace(namespace), NO_ADDRESS - + def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]: """emit class features from TypeRef and TypeDef tables""" nested_class_table = enclosing_and_nested_classes_index_table(pe) @@ -102,28 +101,17 @@ def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Cla # emit internal .NET classes assert isinstance(typedef, dnfile.mdtable.TypeDefRow) - if rid in nested_class_table: - typedefnamespace, typedefname = resolve_nested_typedef_name(rid, nested_class_table, typedef.TypeName, pe) - else: - typedefname = (typedef.TypeName,) - typedefnamespace = typedef.TypeNamespace - + typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe) + token = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) yield Class(DnType.format_name(typedefname, namespace=typedefnamespace)), DNTokenAddress(token) - typeref_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeRef.number, []) - for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): # emit external .NET classes assert isinstance(typeref, dnfile.mdtable.TypeRefRow) - # If the ResolutionScope decodes to a typeRef type then it is nested - if type(typeref.ResolutionScope.table) == dnfile.mdtable.TypeRef: - typerefnamespace, typerefname = resolve_nested_typeref_helper(typeref.ResolutionScope.row_index, typeref.TypeName, pe) - else: - typerefname = (typeref.TypeName,) - typerefnamespace = typeref.TypeNamespace - + typerefnamespace, typerefname = resolve_nested_typeref_helper(typeref.ResolutionScope.row_index, typeref, pe) + token = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token) From bb381e588049809fbe4c84640790ccc3b99913dd Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Wed, 3 Jan 2024 12:33:33 -0500 Subject: [PATCH 25/49] Clean up callers and update helper logic in helpers.py --- capa/features/extractors/dnfile/helpers.py | 157 ++++++++++++--------- 1 file changed, 89 insertions(+), 68 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index fbe04560b..9b77e612e 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -131,12 +131,8 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: # remove get_/set_ from MemberRef name member_ref_name = member_ref_name[4:] - if type(member_ref.Class.table) is dnfile.mdtable.TypeRef and type(member_ref.Class.row.TypeName) is str: - typerefnamespace, typerefname = resolve_nested_typeref_helper(member_ref.Class.row_index, member_ref.Class.row.TypeName, pe) - else: - typerefname = (member_ref.Class.row.TypeName,) - typerefnamespace = member_ref.Class.row.TypeNamespace - + typerefnamespace, typerefname = resolve_nested_typeref_helper(member_ref.Class.row_index, member_ref.Class.row, pe) + yield DnType( token, typerefname, @@ -219,12 +215,8 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: # remove get_/set_ method_name = method_name[4:] - if rid in nested_class_table: - typedefnamespace, typedefname = resolve_nested_typedef_name(rid, nested_class_table, typedef.TypeName, pe) - else: - typedefname = (typedef.TypeName,) - typedefnamespace = typedef.TypeNamespace - + typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe) + yield DnType(token, typedefname, namespace=typedefnamespace, member=method_name, access=access) @@ -252,11 +244,7 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: logger.debug("TypeDef[0x%X] FieldList[0x%X] row is None", rid, idx) continue - if rid in nested_class_table: - typedefnamespace, typedefname = resolve_nested_typedef_name(rid, nested_class_table, typedef.TypeName, pe) - else: - typedefname = (typedef.TypeName,) - typedefnamespace = typedef.TypeNamespace + typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe) token: int = calculate_dotnet_token_value(field.table.number, field.row_index) yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name) @@ -322,57 +310,98 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod] # like kernel32.CreateFileA yield DnUnmanagedMethod(token, module, method) -def enclosing_and_nested_classes_index_table(pe): - """Build index for EnclosingClass based off the NestedClass row index in the nestedclass table""" - nested_class_table = {} - - # Used to find nested classes in typedef - for rid, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): - assert isinstance(nestedclass, dnfile.mdtable.NestedClassRow) - nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index - return nested_class_table +def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int): + assert pe.net is not None + assert pe.net.mdtables is not None -def resolve_nested_typedef_name(index, nested_class_table, name, pe): + table = pe.net.mdtables.tables.get(table_index, []) + if table is None: + # table index is not valid + return None + + try: + return table[row_index] + except IndexError: + return None + + +def resolve_nested_typedef_name(nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE): """Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" - typedef_class_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeDef.number, []) - typedef_name = [] - # Append the current typedef name - typedef_name.append(name) + if index in nested_class_table: + typedef_name = [] + name = typedef.TypeName - while nested_class_table[index] in nested_class_table: - # Iterate through the typedef table to resolve the nested name - name = typedef_class_table[nested_class_table[index]-1].TypeName + # Append the current typedef name typedef_name.append(name) - index = nested_class_table[index] + + while nested_class_table[index] in nested_class_table: + # Iterate through the typedef table to resolve the nested name + table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index]-1) + if table_row is None: + return typedef.TypeNamespace, tuple(typedef_name[::-1]) + + name = table_row.TypeName + typedef_name.append(name) + index = nested_class_table[index] - # Document the root enclosing details - enclosing_name = typedef_class_table[nested_class_table[index]-1].TypeName - typedef_name.append(enclosing_name) - namespace = typedef_class_table[nested_class_table[index]-1].TypeNamespace + # Document the root enclosing details + table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index]-1) + if table_row is None: + return typedef.TypeNamespace, tuple(typedef_name[::-1]) + + enclosing_name = table_row.TypeName + typedef_name.append(enclosing_name) + + return table_row.TypeNamespace, tuple(typedef_name[::-1]) - return namespace, tuple(typedef_name[::-1]) + else: + return typedef.TypeNamespace, (typedef.TypeName,) + -def resolve_nested_typeref_helper(index, name, pe): +def resolve_nested_typeref_helper(index: int, typeref: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE): """Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" - typeref_table = pe.net.mdtables.tables.get(dnfile.mdtable.TypeRef.number, []) - typeref_name = [] - - # Not appending the current typeref name to avoid potential duplicate - - while type(typeref_table[index - 1].ResolutionScope.table) is dnfile.mdtable.TypeRef: - # Iterate through the typeref table to resolve the nested name - typeref_name.append(name) - name = typeref_table[index - 1].TypeName - index = typeref_table[index - 1].ResolutionScope.row_index + # If the ResolutionScope decodes to a typeRef type then it is nested + if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef): + typeref_name = [] + name = typeref.TypeName + # Not appending the current typeref name to avoid potential duplicate - # Document the root enclosing details - typeref_name.append(typeref_table[index - 1].TypeName) - namespace = typeref_table[index - 1].TypeNamespace + # Validate index + table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, index-1) + if table_row is None: + return typeref.TypeNamespace, (typeref.TypeName,) + + while isinstance(table_row.ResolutionScope.table, dnfile.mdtable.TypeRef): + # Iterate through the typeref table to resolve the nested name + typeref_name.append(name) + name = table_row.TypeName + table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, table_row.ResolutionScope.row_index-1) + if table_row is None: + return typeref.TypeNamespace, tuple(typeref_name[::-1]) + + # Document the root enclosing details + typeref_name.append(table_row.TypeName) - return namespace, tuple(typeref_name[::-1]) + return table_row.TypeNamespace, tuple(typeref_name[::-1]) + else: + return typeref.TypeNamespace, (typeref.TypeName,) + + +def enclosing_and_nested_classes_index_table(pe: dnfile.dnPE): + """Build index for EnclosingClass based off the NestedClass row index in the nestedclass table""" + nested_class_table = {} + + # Used to find nested classes in typedef + for rid, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): + assert isinstance(nestedclass, dnfile.mdtable.NestedClassRow) + nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index + + return nested_class_table + + def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: """get .NET types from TypeDef and TypeRef tables""" nested_class_table = enclosing_and_nested_classes_index_table(pe) @@ -380,28 +409,20 @@ def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) - if rid in nested_class_table: - typedefnamespace, typedefname = resolve_nested_typedef_name(rid, nested_class_table, typedef.TypeName, pe) - else: - typedefname = (typedef.TypeName,) - typedefnamespace = typedef.TypeNamespace - + typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe) + typedef_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) yield DnType(typedef_token, typedefname, namespace=typedefnamespace) for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): assert isinstance(typeref, dnfile.mdtable.TypeRefRow) - # If the ResolutionScope decodes to a typeRef type then it is nested - if type(typeref.ResolutionScope.table) == dnfile.mdtable.TypeRef: - typerefnamespace, typerefname = resolve_nested_typeref_helper(typeref.ResolutionScope.row_index, typeref.TypeName, pe) - else: - typerefname = (typeref.TypeName,) - typerefnamespace = typeref.TypeNamespace + typerefnamespace, typerefname = resolve_nested_typeref_helper(typeref.ResolutionScope.row_index, typeref, pe) typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) yield DnType(typeref_token, typerefname, namespace=typerefnamespace) - + + def calculate_dotnet_token_value(table: int, rid: int) -> int: return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK) From bda8727b87b0f30cdff0a69aa91773d1881dbb08 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Wed, 3 Jan 2024 15:17:50 -0500 Subject: [PATCH 26/49] Linter corrections for types.py --- capa/features/extractors/dnfile/types.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/capa/features/extractors/dnfile/types.py b/capa/features/extractors/dnfile/types.py index 7da9ddf12..22790a03a 100644 --- a/capa/features/extractors/dnfile/types.py +++ b/capa/features/extractors/dnfile/types.py @@ -10,11 +10,11 @@ class DnType: - def __init__(self, token: int, class_: str, namespace: str = "", member: str = "", access: Optional[str] = None): + def __init__(self, token: int, class_: tuple, namespace: str = "", member: str = "", access: Optional[str] = None): self.token: int = token self.access: Optional[str] = access self.namespace: str = namespace - self.class_: str = class_ + self.class_: tuple = class_ if member == ".ctor": member = "ctor" @@ -44,16 +44,17 @@ def __repr__(self): @staticmethod def format_name(class_: tuple, namespace: str = "", member: str = ""): if len(class_) > 1: - class_ = "/".join(class_) # Convert tuple to str, separating items with "/" + class_str = "/".join(class_) # Concat items in tuple, separated by a "/" else: - class_ = "".join(class_) # Convert tuple to str + class_str = "".join(class_) # Convert tuple to str # like File::OpenRead - name: str = f"{class_}::{member}" if member else class_ + name: str = f"{class_str}::{member}" if member else class_str if namespace: # like System.IO.File::OpenRead name = f"{namespace}.{name}" return name + class DnUnmanagedMethod: def __init__(self, token: int, module: str, method: str): self.token: int = token From 8679964c3242ef239ca6ba41147516b26d7b29dd Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Wed, 3 Jan 2024 15:19:00 -0500 Subject: [PATCH 27/49] Linter corrections for dotnetfile.py --- capa/features/extractors/dotnetfile.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index 1ff69ef87..2eecfa6ad 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -38,10 +38,10 @@ is_dotnet_mixed_mode, get_dotnet_managed_imports, get_dotnet_managed_methods, + resolve_nested_typedef_name, calculate_dotnet_token_value, get_dotnet_unmanaged_imports, resolve_nested_typeref_helper, - resolve_nested_typedef_name, enclosing_and_nested_classes_index_table, ) @@ -92,17 +92,17 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple # namespace do not have an associated token, so we yield 0x0 yield Namespace(namespace), NO_ADDRESS - + def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]: """emit class features from TypeRef and TypeDef tables""" nested_class_table = enclosing_and_nested_classes_index_table(pe) - + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): # emit internal .NET classes assert isinstance(typedef, dnfile.mdtable.TypeDefRow) typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe) - + token = calculate_dotnet_token_value(dnfile.mdtable.TypeDef.number, rid) yield Class(DnType.format_name(typedefname, namespace=typedefnamespace)), DNTokenAddress(token) @@ -111,7 +111,7 @@ def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Cla assert isinstance(typeref, dnfile.mdtable.TypeRefRow) typerefnamespace, typerefname = resolve_nested_typeref_helper(typeref.ResolutionScope.row_index, typeref, pe) - + token = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token) From 531a35e0ef4d3159efcc8e8b451202933f79f4f7 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Wed, 3 Jan 2024 15:20:17 -0500 Subject: [PATCH 28/49] Linter corrections and caller functions cleanup for helpers.py --- capa/features/extractors/dnfile/helpers.py | 73 +++++++++++----------- 1 file changed, 38 insertions(+), 35 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 9b77e612e..fd62d2602 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -131,8 +131,10 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: # remove get_/set_ from MemberRef name member_ref_name = member_ref_name[4:] - typerefnamespace, typerefname = resolve_nested_typeref_helper(member_ref.Class.row_index, member_ref.Class.row, pe) - + typerefnamespace, typerefname = resolve_nested_typeref_helper( + member_ref.Class.row_index, member_ref.Class.row, pe + ) + yield DnType( token, typerefname, @@ -191,7 +193,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: MethodList (index into MethodDef table; it marks the first of a contiguous run of Methods owned by this Type) """ nested_class_table = enclosing_and_nested_classes_index_table(pe) - + accessor_map: Dict[int, str] = {} for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe): accessor_map[methoddef] = methoddef_access @@ -216,7 +218,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: method_name = method_name[4:] typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe) - + yield DnType(token, typedefname, namespace=typedefnamespace, member=method_name, access=access) @@ -232,7 +234,7 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: FieldList (index into Field table; it marks the first of a contiguous run of Fields owned by this Type) """ nested_class_table = enclosing_and_nested_classes_index_table(pe) - + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) @@ -243,9 +245,9 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: if field.row is None: logger.debug("TypeDef[0x%X] FieldList[0x%X] row is None", rid, idx) continue - + typedefnamespace, typedefname = resolve_nested_typedef_name(nested_class_table, rid, typedef, pe) - + token: int = calculate_dotnet_token_value(field.table.number, field.row_index) yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name) @@ -315,47 +317,48 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int): assert pe.net is not None assert pe.net.mdtables is not None - table = pe.net.mdtables.tables.get(table_index, []) - if table is None: - # table index is not valid + if row_index - 1 <= 0: return None - + try: - return table[row_index] + table = pe.net.mdtables.tables.get(table_index, []) + return table[row_index - 1] except IndexError: return None -def resolve_nested_typedef_name(nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE): +def resolve_nested_typedef_name( + nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE +): """Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" - + if index in nested_class_table: typedef_name = [] name = typedef.TypeName - + # Append the current typedef name typedef_name.append(name) - + while nested_class_table[index] in nested_class_table: # Iterate through the typedef table to resolve the nested name - table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index]-1) + table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index]) if table_row is None: return typedef.TypeNamespace, tuple(typedef_name[::-1]) - + name = table_row.TypeName typedef_name.append(name) index = nested_class_table[index] - + # Document the root enclosing details - table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index]-1) + table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index]) if table_row is None: return typedef.TypeNamespace, tuple(typedef_name[::-1]) - + enclosing_name = table_row.TypeName typedef_name.append(enclosing_name) - + return table_row.TypeNamespace, tuple(typedef_name[::-1]) - + else: return typedef.TypeNamespace, (typedef.TypeName,) @@ -367,25 +370,25 @@ def resolve_nested_typeref_helper(index: int, typeref: dnfile.mdtable.TypeDefRow typeref_name = [] name = typeref.TypeName # Not appending the current typeref name to avoid potential duplicate - + # Validate index - table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, index-1) + table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, index) if table_row is None: return typeref.TypeNamespace, (typeref.TypeName,) - + while isinstance(table_row.ResolutionScope.table, dnfile.mdtable.TypeRef): # Iterate through the typeref table to resolve the nested name typeref_name.append(name) name = table_row.TypeName - table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, table_row.ResolutionScope.row_index-1) + table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, table_row.ResolutionScope.row_index) if table_row is None: return typeref.TypeNamespace, tuple(typeref_name[::-1]) - + # Document the root enclosing details typeref_name.append(table_row.TypeName) - + return table_row.TypeNamespace, tuple(typeref_name[::-1]) - + else: return typeref.TypeNamespace, (typeref.TypeName,) @@ -393,9 +396,9 @@ def resolve_nested_typeref_helper(index: int, typeref: dnfile.mdtable.TypeDefRow def enclosing_and_nested_classes_index_table(pe: dnfile.dnPE): """Build index for EnclosingClass based off the NestedClass row index in the nestedclass table""" nested_class_table = {} - + # Used to find nested classes in typedef - for rid, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): + for _rid, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): assert isinstance(nestedclass, dnfile.mdtable.NestedClassRow) nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index @@ -405,7 +408,7 @@ def enclosing_and_nested_classes_index_table(pe: dnfile.dnPE): def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: """get .NET types from TypeDef and TypeRef tables""" nested_class_table = enclosing_and_nested_classes_index_table(pe) - + for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) @@ -416,9 +419,9 @@ def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): assert isinstance(typeref, dnfile.mdtable.TypeRefRow) - + typerefnamespace, typerefname = resolve_nested_typeref_helper(typeref.ResolutionScope.row_index, typeref, pe) - + typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) yield DnType(typeref_token, typerefname, namespace=typerefnamespace) From b0c90de7cebffd5b54ff3526551e411814115351 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:32:17 -0500 Subject: [PATCH 29/49] Update capa/features/extractors/dnfile/helpers.py Co-authored-by: Mike Hunhoff --- capa/features/extractors/dnfile/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index fd62d2602..6fbeccac2 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -363,7 +363,7 @@ def resolve_nested_typedef_name( return typedef.TypeNamespace, (typedef.TypeName,) -def resolve_nested_typeref_helper(index: int, typeref: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE): +def resolve_nested_typeref_name(index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE): """Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" # If the ResolutionScope decodes to a typeRef type then it is nested if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef): From ba0ecbd96f826912445f0837b975615f6b609ca3 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:32:49 -0500 Subject: [PATCH 30/49] Update capa/features/extractors/dnfile/helpers.py Co-authored-by: Mike Hunhoff --- capa/features/extractors/dnfile/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 6fbeccac2..13a9580e1 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -365,7 +365,7 @@ def resolve_nested_typedef_name( def resolve_nested_typeref_name(index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE): """Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" - # If the ResolutionScope decodes to a typeRef type then it is nested + # If the ResolutionScope decodes to a TypeRef type then it is nested if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef): typeref_name = [] name = typeref.TypeName From f97f7f57d2cbe034f9f7f1d3a629ec580d0d86a7 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:32:58 -0500 Subject: [PATCH 31/49] Update capa/features/extractors/dnfile/helpers.py Co-authored-by: Mike Hunhoff --- capa/features/extractors/dnfile/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 13a9580e1..2d5d5d56b 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -398,7 +398,7 @@ def enclosing_and_nested_classes_index_table(pe: dnfile.dnPE): nested_class_table = {} # Used to find nested classes in typedef - for _rid, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): + for _, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): assert isinstance(nestedclass, dnfile.mdtable.NestedClassRow) nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index From 807fc1fe83e06800384779b4f5553fe9d33375d0 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:33:08 -0500 Subject: [PATCH 32/49] Update capa/features/extractors/dnfile/helpers.py Co-authored-by: Mike Hunhoff --- capa/features/extractors/dnfile/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 2d5d5d56b..d5f1840e5 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -394,7 +394,7 @@ def resolve_nested_typeref_name(index: int, typeref: dnfile.mdtable.TypeRefRow, def enclosing_and_nested_classes_index_table(pe: dnfile.dnPE): - """Build index for EnclosingClass based off the NestedClass row index in the nestedclass table""" + """Build index for EnclosingClass based off the NestedClass row index in the NestedClass table""" nested_class_table = {} # Used to find nested classes in typedef From abccf7d85bf4644b490f5ff6850b4a3c0f55d901 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:33:17 -0500 Subject: [PATCH 33/49] Update capa/features/extractors/dnfile/helpers.py Co-authored-by: Mike Hunhoff --- capa/features/extractors/dnfile/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index d5f1840e5..fc24c3a34 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -330,7 +330,7 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int): def resolve_nested_typedef_name( nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE ): - """Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" + """Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeDef name as a tuple""" if index in nested_class_table: typedef_name = [] From a1b9319476596994ae5d39356a62fde26587a1fb Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:33:22 -0500 Subject: [PATCH 34/49] Update capa/features/extractors/dnfile/helpers.py Co-authored-by: Mike Hunhoff --- capa/features/extractors/dnfile/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index fc24c3a34..358653ebf 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -369,7 +369,7 @@ def resolve_nested_typeref_name(index: int, typeref: dnfile.mdtable.TypeRefRow, if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef): typeref_name = [] name = typeref.TypeName - # Not appending the current typeref name to avoid potential duplicate + # Not appending the current TypeRef name to avoid potential duplicate # Validate index table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, index) From d9800b7032126bbc94ca81910b60cbabbc762b4c Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:33:30 -0500 Subject: [PATCH 35/49] Update capa/features/extractors/dnfile/helpers.py Co-authored-by: Mike Hunhoff --- capa/features/extractors/dnfile/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 358653ebf..b378bc188 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -377,7 +377,7 @@ def resolve_nested_typeref_name(index: int, typeref: dnfile.mdtable.TypeRefRow, return typeref.TypeNamespace, (typeref.TypeName,) while isinstance(table_row.ResolutionScope.table, dnfile.mdtable.TypeRef): - # Iterate through the typeref table to resolve the nested name + # Iterate through the TypeRef table to resolve the nested name typeref_name.append(name) name = table_row.TypeName table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, table_row.ResolutionScope.row_index) From 9ccdd01a511d9ae340178ca5662ae62ac5350c9a Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:33:38 -0500 Subject: [PATCH 36/49] Update capa/features/extractors/dnfile/helpers.py Co-authored-by: Mike Hunhoff --- capa/features/extractors/dnfile/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index b378bc188..8ad796b9d 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -340,7 +340,7 @@ def resolve_nested_typedef_name( typedef_name.append(name) while nested_class_table[index] in nested_class_table: - # Iterate through the typedef table to resolve the nested name + # Iterate through the TypeDef table to resolve the nested name table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index]) if table_row is None: return typedef.TypeNamespace, tuple(typedef_name[::-1]) From d84f2b10b2c38ea5eb7f6951d3ad6978945e9679 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:34:09 -0500 Subject: [PATCH 37/49] Update capa/features/extractors/dnfile/helpers.py Co-authored-by: Mike Hunhoff --- capa/features/extractors/dnfile/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index 8ad796b9d..dacf7fd09 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -397,7 +397,7 @@ def enclosing_and_nested_classes_index_table(pe: dnfile.dnPE): """Build index for EnclosingClass based off the NestedClass row index in the NestedClass table""" nested_class_table = {} - # Used to find nested classes in typedef + # Used to find nested classes in TypeDef table for _, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): assert isinstance(nestedclass, dnfile.mdtable.NestedClassRow) nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index From 62533f7e8ee2d2f31b81d5b8c3d1a7fbf45d6621 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:37:20 -0500 Subject: [PATCH 38/49] Update helpers.py --- capa/features/extractors/dnfile/helpers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index dacf7fd09..b0ff3b009 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -192,7 +192,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: TypeNamespace (index into String heap) MethodList (index into MethodDef table; it marks the first of a contiguous run of Methods owned by this Type) """ - nested_class_table = enclosing_and_nested_classes_index_table(pe) + nested_class_table = get_dotnet_nested_class_table_index(pe) accessor_map: Dict[int, str] = {} for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe): @@ -233,7 +233,7 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: TypeNamespace (index into String heap) FieldList (index into Field table; it marks the first of a contiguous run of Fields owned by this Type) """ - nested_class_table = enclosing_and_nested_classes_index_table(pe) + nested_class_table = get_dotnet_nested_class_table_index(pe) for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) @@ -393,7 +393,7 @@ def resolve_nested_typeref_name(index: int, typeref: dnfile.mdtable.TypeRefRow, return typeref.TypeNamespace, (typeref.TypeName,) -def enclosing_and_nested_classes_index_table(pe: dnfile.dnPE): +def get_dotnet_nested_class_table_index(pe: dnfile.dnPE): """Build index for EnclosingClass based off the NestedClass row index in the NestedClass table""" nested_class_table = {} @@ -407,7 +407,7 @@ def enclosing_and_nested_classes_index_table(pe: dnfile.dnPE): def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: """get .NET types from TypeDef and TypeRef tables""" - nested_class_table = enclosing_and_nested_classes_index_table(pe) + nested_class_table = get_dotnet_nested_class_table_index(pe) for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): assert isinstance(typedef, dnfile.mdtable.TypeDefRow) From 1dd923aefffd49b32883ffbea2b7a575b2845ad6 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:37:48 -0500 Subject: [PATCH 39/49] Update dotnetfile.py --- capa/features/extractors/dotnetfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index 2eecfa6ad..b73431d23 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -42,7 +42,7 @@ calculate_dotnet_token_value, get_dotnet_unmanaged_imports, resolve_nested_typeref_helper, - enclosing_and_nested_classes_index_table, + get_dotnet_nested_class_table_index, ) logger = logging.getLogger(__name__) @@ -95,7 +95,7 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]: """emit class features from TypeRef and TypeDef tables""" - nested_class_table = enclosing_and_nested_classes_index_table(pe) + nested_class_table = get_dotnet_nested_class_table_index(pe) for rid, typedef in iter_dotnet_table(pe, dnfile.mdtable.TypeDef.number): # emit internal .NET classes From b71c8ea3d7640680828f863f611fb370432beec8 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 13:26:48 -0500 Subject: [PATCH 40/49] Update tuple type in types.py --- capa/features/extractors/dnfile/types.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/capa/features/extractors/dnfile/types.py b/capa/features/extractors/dnfile/types.py index 22790a03a..242c45519 100644 --- a/capa/features/extractors/dnfile/types.py +++ b/capa/features/extractors/dnfile/types.py @@ -7,14 +7,14 @@ # See the License for the specific language governing permissions and limitations under the License. from typing import Optional - +from typing import Tuple class DnType: - def __init__(self, token: int, class_: tuple, namespace: str = "", member: str = "", access: Optional[str] = None): + def __init__(self, token: int, class_: Tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None): self.token: int = token self.access: Optional[str] = access self.namespace: str = namespace - self.class_: tuple = class_ + self.class_: Tuple[str, ...] = class_ if member == ".ctor": member = "ctor" @@ -42,7 +42,7 @@ def __repr__(self): return str(self) @staticmethod - def format_name(class_: tuple, namespace: str = "", member: str = ""): + def format_name(class_: Tuple[str, ...], namespace: str = "", member: str = ""): if len(class_) > 1: class_str = "/".join(class_) # Concat items in tuple, separated by a "/" else: From 500ded3de0a20bfb2dbd70337f381923c654533e Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 13:38:25 -0500 Subject: [PATCH 41/49] Update dotnetfile.py --- capa/features/extractors/dotnetfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index b73431d23..d8ef5db77 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -41,7 +41,7 @@ resolve_nested_typedef_name, calculate_dotnet_token_value, get_dotnet_unmanaged_imports, - resolve_nested_typeref_helper, + resolve_nested_typeref_name, get_dotnet_nested_class_table_index, ) @@ -110,7 +110,7 @@ def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Cla # emit external .NET classes assert isinstance(typeref, dnfile.mdtable.TypeRefRow) - typerefnamespace, typerefname = resolve_nested_typeref_helper(typeref.ResolutionScope.row_index, typeref, pe) + typerefnamespace, typerefname = resolve_nested_typeref_name(typeref.ResolutionScope.row_index, typeref, pe) token = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token) From 465cb35f1cd2dd20b0f7ab1b2a63cde58efd00c4 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 13:51:37 -0500 Subject: [PATCH 42/49] Update return value annotations in helpers.py --- capa/features/extractors/dnfile/helpers.py | 30 ++++++++++++---------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index b0ff3b009..ba27bd36c 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -131,7 +131,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: # remove get_/set_ from MemberRef name member_ref_name = member_ref_name[4:] - typerefnamespace, typerefname = resolve_nested_typeref_helper( + typerefnamespace, typerefname = resolve_nested_typeref_name( member_ref.Class.row_index, member_ref.Class.row, pe ) @@ -313,7 +313,7 @@ def get_dotnet_unmanaged_imports(pe: dnfile.dnPE) -> Iterator[DnUnmanagedMethod] yield DnUnmanagedMethod(token, module, method) -def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int): +def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> Optional[dnfile.base.MDTableRow]: assert pe.net is not None assert pe.net.mdtables is not None @@ -329,8 +329,8 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int): def resolve_nested_typedef_name( nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE -): - """Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeDef name as a tuple""" +) -> tuple[str, Tuple[str, ...]]: + """Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" if index in nested_class_table: typedef_name = [] @@ -340,7 +340,7 @@ def resolve_nested_typedef_name( typedef_name.append(name) while nested_class_table[index] in nested_class_table: - # Iterate through the TypeDef table to resolve the nested name + # Iterate through the typedef table to resolve the nested name table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeDef.number, nested_class_table[index]) if table_row is None: return typedef.TypeNamespace, tuple(typedef_name[::-1]) @@ -363,13 +363,15 @@ def resolve_nested_typedef_name( return typedef.TypeNamespace, (typedef.TypeName,) -def resolve_nested_typeref_name(index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE): +def resolve_nested_typeref_name( + index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE +) -> tuple[str, Tuple[str, ...]]: """Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" - # If the ResolutionScope decodes to a TypeRef type then it is nested + # If the ResolutionScope decodes to a typeRef type then it is nested if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef): typeref_name = [] name = typeref.TypeName - # Not appending the current TypeRef name to avoid potential duplicate + # Not appending the current typeref name to avoid potential duplicate # Validate index table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, index) @@ -377,7 +379,7 @@ def resolve_nested_typeref_name(index: int, typeref: dnfile.mdtable.TypeRefRow, return typeref.TypeNamespace, (typeref.TypeName,) while isinstance(table_row.ResolutionScope.table, dnfile.mdtable.TypeRef): - # Iterate through the TypeRef table to resolve the nested name + # Iterate through the typeref table to resolve the nested name typeref_name.append(name) name = table_row.TypeName table_row = get_dotnet_table_row(pe, dnfile.mdtable.TypeRef.number, table_row.ResolutionScope.row_index) @@ -393,12 +395,12 @@ def resolve_nested_typeref_name(index: int, typeref: dnfile.mdtable.TypeRefRow, return typeref.TypeNamespace, (typeref.TypeName,) -def get_dotnet_nested_class_table_index(pe: dnfile.dnPE): - """Build index for EnclosingClass based off the NestedClass row index in the NestedClass table""" +def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> dict: + """Build index for EnclosingClass based off the NestedClass row index in the nestedclass table""" nested_class_table = {} - # Used to find nested classes in TypeDef table - for _, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): + # Used to find nested classes in typedef + for _rid, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): assert isinstance(nestedclass, dnfile.mdtable.NestedClassRow) nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index @@ -420,7 +422,7 @@ def get_dotnet_types(pe: dnfile.dnPE) -> Iterator[DnType]: for rid, typeref in iter_dotnet_table(pe, dnfile.mdtable.TypeRef.number): assert isinstance(typeref, dnfile.mdtable.TypeRefRow) - typerefnamespace, typerefname = resolve_nested_typeref_helper(typeref.ResolutionScope.row_index, typeref, pe) + typerefnamespace, typerefname = resolve_nested_typeref_name(typeref.ResolutionScope.row_index, typeref, pe) typeref_token: int = calculate_dotnet_token_value(dnfile.mdtable.TypeRef.number, rid) yield DnType(typeref_token, typerefname, namespace=typerefnamespace) From c3b8e2627ca3992793d39c3b4c5742ea034aa5d5 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 13:54:41 -0500 Subject: [PATCH 43/49] Linting update types.py --- capa/features/extractors/dnfile/types.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/capa/features/extractors/dnfile/types.py b/capa/features/extractors/dnfile/types.py index 242c45519..44208baf6 100644 --- a/capa/features/extractors/dnfile/types.py +++ b/capa/features/extractors/dnfile/types.py @@ -6,8 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Optional -from typing import Tuple +from typing import Tuple, Optional class DnType: def __init__(self, token: int, class_: Tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None): From 89e7878dfad85c6ec56598144d0087e50ecfc41b Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 13:55:17 -0500 Subject: [PATCH 44/49] Linting update dotnetfile.py --- capa/features/extractors/dotnetfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index d8ef5db77..4c9b41507 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -39,9 +39,9 @@ get_dotnet_managed_imports, get_dotnet_managed_methods, resolve_nested_typedef_name, + resolve_nested_typeref_name, calculate_dotnet_token_value, get_dotnet_unmanaged_imports, - resolve_nested_typeref_name, get_dotnet_nested_class_table_index, ) From ee6f7455f58a606c0f9214d594cd895c40b30de2 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Thu, 4 Jan 2024 15:14:35 -0500 Subject: [PATCH 45/49] Added unit tests to fixtures.py --- tests/fixtures.py | 112 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/tests/fixtures.py b/tests/fixtures.py index 950c439a8..a06308a1c 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -393,6 +393,10 @@ def get_data_path_by_name(name) -> Path: return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_" elif name.startswith("1038a2"): return CD / "data" / "1038a23daad86042c66bfe6c9d052d27048de9653bde5750dc0f240c792d9ac8.elf_" + elif name.startswith("nested_typedef"): + return CD / "data" / "dotnet" / "dd9098ff91717f4906afe9dafdfa2f52.exe_" + elif name.startswith("nested_typeref"): + return CD / "data" / "dotnet" / "2c7d60f77812607dec5085973ff76cea.dll_" else: raise ValueError(f"unexpected sample fixture: {name}") @@ -1274,6 +1278,114 @@ def parametrize(params, values, **kwargs): ), # MemberRef method False, ), + ( + "nested_typedef", + "file", + capa.features.common.Class("mynamespace.myclass_outer0"), + True, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("mynamespace.myclass_outer1"), + True, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("mynamespace.myclass_outer0/myclass_inner0_0"), + True, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("mynamespace.myclass_outer0/myclass_inner0_1"), + True, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("mynamespace.myclass_outer1/myclass_inner1_0"), + True, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("mynamespace.myclass_outer1/myclass_inner1_1"), + True, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("mynamespace.myclass_outer1/myclass_inner1_0/myclass_inner_inner"), + True, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("myclass_inner_inner"), + False, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("myclass_inner1_0"), + False, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("myclass_inner1_1"), + False, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("myclass_inner0_0"), + False, + ), + ( + "nested_typedef", + "file", + capa.features.common.Class("myclass_inner0_1"), + False, + ), + ( + "nested_typeref", + "file", + capa.features.file.Import("Android.OS.Build/VERSION::SdkInt"), + True, + ), + ( + "nested_typeref", + "file", + capa.features.file.Import("Android.Media.Image/Plane::Buffer"), + True, + ), + ( + "nested_typeref", + "file", + capa.features.file.Import("Android.Provider.Telephony/Sent/Sent::ContentUri"), + True, + ), + ( + "nested_typeref", + "file", + capa.features.file.Import("Android.OS.Build::SdkInt"), + False, + ), + ( + "nested_typeref", + "file", + capa.features.file.Import("Plane::Buffer"), + False, + ), + ( + "nested_typeref", + "file", + capa.features.file.Import("Sent::ContentUri"), + False, + ), ], # order tests by (file, item) # so that our LRU cache is most effective. From 3194caa1d3ea77258e2774529720bedc221484cf Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Fri, 5 Jan 2024 10:08:35 -0500 Subject: [PATCH 46/49] Update types.py --- capa/features/extractors/dnfile/types.py | 1 + 1 file changed, 1 insertion(+) diff --git a/capa/features/extractors/dnfile/types.py b/capa/features/extractors/dnfile/types.py index 44208baf6..fab3196e8 100644 --- a/capa/features/extractors/dnfile/types.py +++ b/capa/features/extractors/dnfile/types.py @@ -8,6 +8,7 @@ from typing import Tuple, Optional + class DnType: def __init__(self, token: int, class_: Tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None): self.token: int = token From aff5a13f41cc1b73ad4dd28e5ad67f4214fc6534 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Fri, 5 Jan 2024 10:15:24 -0500 Subject: [PATCH 47/49] Linting fix for types.py --- capa/features/extractors/dnfile/types.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/capa/features/extractors/dnfile/types.py b/capa/features/extractors/dnfile/types.py index fab3196e8..4afcc81e1 100644 --- a/capa/features/extractors/dnfile/types.py +++ b/capa/features/extractors/dnfile/types.py @@ -10,7 +10,9 @@ class DnType: - def __init__(self, token: int, class_: Tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None): + def __init__( + self, token: int, class_: Tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None + ): self.token: int = token self.access: Optional[str] = access self.namespace: str = namespace From 38ee13d44f118c9d884f6ee133b6bf4a812a3d76 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Fri, 5 Jan 2024 10:17:33 -0500 Subject: [PATCH 48/49] Update CHANGELOG.md --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f6d1776b..7d981fca6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ - binja: add support for forwarded exports #1646 @xusheng6 - binja: add support for symtab names #1504 @xusheng6 - add com class/interface features #322 @Aayush-goel-04 +- dotnet: emit enclosing class information for nested classes #1780 #1913 @bkojusner @mike-hunhoff ### Breaking Changes @@ -57,7 +58,6 @@ - data-manipulation/compression/create-cabinet-on-windows michael.hunhoff@mandiant.com jakub.jozwiak@mandiant.com - data-manipulation/compression/extract-cabinet-on-windows jakub.jozwiak@mandiant.com - lib/create-file-decompression-interface-context-on-windows jakub.jozwiak@mandiant.com -- ### Bug Fixes - ghidra: fix `ints_to_bytes` performance #1761 @mike-hunhoff @@ -1626,4 +1626,4 @@ Download a standalone binary below and checkout the readme [here on GitHub](http ### Raw diffs - [capa v1.0.0...v1.1.0](https://github.com/mandiant/capa/compare/v1.0.0...v1.1.0) - - [capa-rules v1.0.0...v1.1.0](https://github.com/mandiant/capa-rules/compare/v1.0.0...v1.1.0) \ No newline at end of file + - [capa-rules v1.0.0...v1.1.0](https://github.com/mandiant/capa-rules/compare/v1.0.0...v1.1.0) From 3c6c82d5d6ebff32f9dd9fed49eba36634a6a5f7 Mon Sep 17 00:00:00 2001 From: Blas <25017260+bkojusner@users.noreply.github.com> Date: Fri, 5 Jan 2024 11:30:39 -0500 Subject: [PATCH 49/49] Small changes to return types in helpers.py --- capa/features/extractors/dnfile/helpers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index ba27bd36c..e4bdfa011 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -329,7 +329,7 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O def resolve_nested_typedef_name( nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE -) -> tuple[str, Tuple[str, ...]]: +) -> Tuple[str, Tuple[str, ...]]: """Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" if index in nested_class_table: @@ -365,7 +365,7 @@ def resolve_nested_typedef_name( def resolve_nested_typeref_name( index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE -) -> tuple[str, Tuple[str, ...]]: +) -> Tuple[str, Tuple[str, ...]]: """Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" # If the ResolutionScope decodes to a typeRef type then it is nested if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef): @@ -395,12 +395,12 @@ def resolve_nested_typeref_name( return typeref.TypeNamespace, (typeref.TypeName,) -def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> dict: +def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]: """Build index for EnclosingClass based off the NestedClass row index in the nestedclass table""" nested_class_table = {} # Used to find nested classes in typedef - for _rid, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): + for _, nestedclass in iter_dotnet_table(pe, dnfile.mdtable.NestedClass.number): assert isinstance(nestedclass, dnfile.mdtable.NestedClassRow) nested_class_table[nestedclass.NestedClass.row_index] = nestedclass.EnclosingClass.row_index