From bb98da2b2bf2b40df4fcadde323956adf629e17e Mon Sep 17 00:00:00 2001 From: Sam Byass Date: Wed, 22 Sep 2021 16:46:12 +0100 Subject: [PATCH] Arm64 improvements - throw helpers, literals, etc. --- .../ARM64/Arm64CallThrowHelperAction.cs | 87 ++++++++ .../Arm64UnknownGlobalToConstantAction.cs | 39 ++++ .../Arm64UnmanagedLiteralToConstantAction.cs | 39 ++++ .../ARM64/BaseArm64ConditionalJumpAction.cs | 13 +- .../Base/AbstractExceptionThrowerAction.cs | 58 +++++ .../Important/CallExceptionThrowerFunction.cs | 45 +--- .../AsmAnalyzerArmV8A.InstructionChecks.cs | 93 +++++--- Cpp2IL.Core/Analysis/AsmAnalyzerArmV8a.cs | 28 +-- Cpp2IL.Core/Analysis/AsmAnalyzerBase.cs | 8 +- Cpp2IL.Core/Analysis/MethodUtils.cs | 204 ++++++++++-------- .../1RenameLocalsPostProcessor.cs | 3 + Cpp2IL.Core/Extensions.cs | 4 +- Cpp2IL.Core/Utils.cs | 72 ++++++- 13 files changed, 496 insertions(+), 197 deletions(-) create mode 100644 Cpp2IL.Core/Analysis/Actions/ARM64/Arm64CallThrowHelperAction.cs create mode 100644 Cpp2IL.Core/Analysis/Actions/ARM64/Arm64UnknownGlobalToConstantAction.cs create mode 100644 Cpp2IL.Core/Analysis/Actions/ARM64/Arm64UnmanagedLiteralToConstantAction.cs create mode 100644 Cpp2IL.Core/Analysis/Actions/Base/AbstractExceptionThrowerAction.cs diff --git a/Cpp2IL.Core/Analysis/Actions/ARM64/Arm64CallThrowHelperAction.cs b/Cpp2IL.Core/Analysis/Actions/ARM64/Arm64CallThrowHelperAction.cs new file mode 100644 index 00000000..11eea2a3 --- /dev/null +++ b/Cpp2IL.Core/Analysis/Actions/ARM64/Arm64CallThrowHelperAction.cs @@ -0,0 +1,87 @@ +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; +using Cpp2IL.Core.Analysis.Actions.Base; +using Cpp2IL.Core.Analysis.ResultModels; +using Gee.External.Capstone.Arm64; +using LibCpp2IL; +using Mono.Cecil; + +namespace Cpp2IL.Core.Analysis.Actions.ARM64 +{ + public class Arm64CallThrowHelperAction : AbstractExceptionThrowerAction + { + private static readonly ConcurrentDictionary _exceptionsThrownByAddress = new(); + private static readonly List _checkedAddresses = new(); + + public static bool IsThrowHelper(long pointer, int depth = 0) + { + if (depth >= 5) + return false; + + if (_exceptionsThrownByAddress.ContainsKey(pointer)) + return true; + + if (_checkedAddresses.Contains(pointer)) + return false; + + _checkedAddresses.Add(pointer); + + //This will only return up to the first branch, because it's an unmanaged function, but that's fine for these purposes + var funcBody = Utils.GetArm64MethodBodyAtVirtualAddress((ulong)pointer, false); + + var registerPages = new Dictionary(); + foreach (var arm64Instruction in funcBody.Where(i => i.Mnemonic is "adrp")) + { + registerPages[arm64Instruction.Details.Operands[0].Register.Name.ToLowerInvariant()] = arm64Instruction.Details.Operands[1].Immediate; + } + + var registerAddresses = new Dictionary(); + foreach (var arm64Instruction in funcBody.Where(i => i.Mnemonic is "add" && i.Details.Operands.Length == 3)) + { + var regName = arm64Instruction.Details.Operands[1].RegisterSafe()?.Name; + if (regName != null && registerPages.TryGetValue(regName, out var page) && arm64Instruction.Details.Operands[2].IsImmediate()) + { + var destName = arm64Instruction.Details.Operands[0].RegisterSafe()?.Name; + registerAddresses[destName ?? "invalid"] = page + arm64Instruction.Details.Operands[2].Immediate; + } + } + + foreach (var potentialLiteralAddress in registerAddresses.Values) + { + if (Utils.TryGetLiteralAt(LibCpp2IlMain.Binary!, (ulong)LibCpp2IlMain.Binary!.MapVirtualAddressToRaw((ulong)potentialLiteralAddress)) is not { } literal) + continue; + if (Utils.TryLookupTypeDefKnownNotGeneric($"System.{literal}") is not { } exceptionType) + continue; + + Logger.VerboseNewline($"Identified direct exception thrower: 0x{pointer:X} throws {exceptionType.FullName}. Instructions were {string.Join(", ", funcBody.Select(i => $"0x{i.Address:X} {i.Mnemonic}"))}", "Analyze"); + _exceptionsThrownByAddress.TryAdd(pointer, exceptionType); + return true; + } + + //Check for inherited exception throwers. + foreach (var nextPtr in + from i in funcBody + where i.Mnemonic is "b" or "bl" && i.Details.Operands[0].IsImmediate() + select i.Details.Operands[0].Immediate + into nextPtr + where IsThrowHelper(nextPtr, depth + 1) + select nextPtr) + { + _exceptionsThrownByAddress.TryAdd(pointer, _exceptionsThrownByAddress[nextPtr]); + return true; + } + + return false; + } + + public static TypeDefinition? GetExceptionThrown(long ptr) => _exceptionsThrownByAddress.TryGetValue(ptr, out var ex) ? ex : null; + + public Arm64CallThrowHelperAction(MethodAnalysis context, Arm64Instruction instruction) : base(context, instruction) + { + var functionAddress = instruction.Details.Operands[0].Immediate; + _exceptionType = _exceptionsThrownByAddress[functionAddress]; + } + } +} \ No newline at end of file diff --git a/Cpp2IL.Core/Analysis/Actions/ARM64/Arm64UnknownGlobalToConstantAction.cs b/Cpp2IL.Core/Analysis/Actions/ARM64/Arm64UnknownGlobalToConstantAction.cs new file mode 100644 index 00000000..28c0d410 --- /dev/null +++ b/Cpp2IL.Core/Analysis/Actions/ARM64/Arm64UnknownGlobalToConstantAction.cs @@ -0,0 +1,39 @@ +using Cpp2IL.Core.Analysis.Actions.Base; +using Cpp2IL.Core.Analysis.ResultModels; +using Gee.External.Capstone.Arm64; +using Mono.Cecil.Cil; + +namespace Cpp2IL.Core.Analysis.Actions.ARM64 +{ + public class Arm64UnknownGlobalToConstantAction : BaseAction + { + private UnknownGlobalAddr? _globalAddr; + private ConstantDefinition? _constantMade; + + public Arm64UnknownGlobalToConstantAction(MethodAnalysis context, Arm64Instruction instruction, ulong globalAddress) : base(context, instruction) + { + var destReg = Utils.GetRegisterNameNew(instruction.Details.Operands[0].RegisterSafe()?.Id ?? Arm64RegisterId.Invalid); + + if(string.IsNullOrEmpty(destReg)) + return; + + _globalAddr = new(globalAddress); + _constantMade = context.MakeConstant(typeof(Il2CppString), _globalAddr, reg: destReg); + } + + public override Instruction[] ToILInstructions(MethodAnalysis context, ILProcessor processor) + { + throw new System.NotImplementedException(); + } + + public override string? ToPsuedoCode() + { + throw new System.NotImplementedException(); + } + + public override string ToTextSummary() + { + return $"Loads {_globalAddr} into new constant {_constantMade}"; + } + } +} \ No newline at end of file diff --git a/Cpp2IL.Core/Analysis/Actions/ARM64/Arm64UnmanagedLiteralToConstantAction.cs b/Cpp2IL.Core/Analysis/Actions/ARM64/Arm64UnmanagedLiteralToConstantAction.cs new file mode 100644 index 00000000..90875b51 --- /dev/null +++ b/Cpp2IL.Core/Analysis/Actions/ARM64/Arm64UnmanagedLiteralToConstantAction.cs @@ -0,0 +1,39 @@ +using Cpp2IL.Core.Analysis.Actions.Base; +using Cpp2IL.Core.Analysis.ResultModels; +using Gee.External.Capstone.Arm64; +using Mono.Cecil.Cil; + +namespace Cpp2IL.Core.Analysis.Actions.ARM64 +{ + public class Arm64UnmanagedLiteralToConstantAction : BaseAction + { + private readonly ConstantDefinition? _constantMade; + public readonly Il2CppString? Il2CppString; + + public Arm64UnmanagedLiteralToConstantAction(MethodAnalysis context, Arm64Instruction instruction, string literal, ulong address) : base(context, instruction) + { + var destReg = Utils.GetRegisterNameNew(instruction.Details.Operands[0].RegisterSafe()?.Id ?? Arm64RegisterId.Invalid); + + if(string.IsNullOrEmpty(destReg)) + return; + + Il2CppString = new(literal, address); + _constantMade = context.MakeConstant(typeof(Il2CppString), Il2CppString, reg: destReg); + } + + public override Instruction[] ToILInstructions(MethodAnalysis context, ILProcessor processor) + { + throw new System.NotImplementedException(); + } + + public override string? ToPsuedoCode() + { + throw new System.NotImplementedException(); + } + + public override string ToTextSummary() + { + return $"Loads il2cpp string {Il2CppString} into new constant {_constantMade}"; + } + } +} \ No newline at end of file diff --git a/Cpp2IL.Core/Analysis/Actions/ARM64/BaseArm64ConditionalJumpAction.cs b/Cpp2IL.Core/Analysis/Actions/ARM64/BaseArm64ConditionalJumpAction.cs index 82c2d615..204d3ecb 100644 --- a/Cpp2IL.Core/Analysis/Actions/ARM64/BaseArm64ConditionalJumpAction.cs +++ b/Cpp2IL.Core/Analysis/Actions/ARM64/BaseArm64ConditionalJumpAction.cs @@ -1,4 +1,5 @@ -using Cpp2IL.Core.Analysis.Actions.Base; +using System; +using Cpp2IL.Core.Analysis.Actions.Base; using Cpp2IL.Core.Analysis.ResultModels; using Gee.External.Capstone.Arm64; @@ -13,7 +14,15 @@ protected BaseArm64ConditionalJumpAction(MethodAnalysis contex protected sealed override bool IsImplicitNRE() { - //TODO + var body = Utils.GetArm64MethodBodyAtVirtualAddress(JumpTarget); + + for (var i = 0; i < Math.Min(3, body.Count); i++) + { + if (body[i].Mnemonic is "b" or "bl" && body[i].Details.Operands[0].IsImmediate() && Arm64CallThrowHelperAction.IsThrowHelper(body[i].Details.Operands[0].Immediate)) + if (Arm64CallThrowHelperAction.GetExceptionThrown(body[i].Details.Operands[0].Immediate)?.Name == "NullReferenceException") + return true; + } + return false; } } diff --git a/Cpp2IL.Core/Analysis/Actions/Base/AbstractExceptionThrowerAction.cs b/Cpp2IL.Core/Analysis/Actions/Base/AbstractExceptionThrowerAction.cs new file mode 100644 index 00000000..3bd4a4c1 --- /dev/null +++ b/Cpp2IL.Core/Analysis/Actions/Base/AbstractExceptionThrowerAction.cs @@ -0,0 +1,58 @@ +using System.Linq; +using Cpp2IL.Core.Analysis.ResultModels; +using Mono.Cecil; +using Mono.Cecil.Cil; +using Mono.Cecil.Rocks; +using Instruction = Iced.Intel.Instruction; + +namespace Cpp2IL.Core.Analysis.Actions.Base +{ + public abstract class AbstractExceptionThrowerAction : BaseAction + { + protected TypeDefinition? _exceptionType; + + protected AbstractExceptionThrowerAction(MethodAnalysis context, T instruction) : base(context, instruction) + { + } + + public sealed override Mono.Cecil.Cil.Instruction[] ToILInstructions(MethodAnalysis context, ILProcessor processor) + { + if (_exceptionType == null) + throw new TaintedInstructionException(); + + var ctor = _exceptionType.GetConstructors().FirstOrDefault(c => !c.HasParameters); + + if (ctor == null) + { + var exceptionCtor = Utils.ExceptionReference.GetConstructors().First(c => c.HasParameters && c.Parameters.Count == 1 && c.Parameters[0].ParameterType.Name == "String"); + return new[] + { + processor.Create(OpCodes.Ldstr, $"Exception of type {_exceptionType.FullName}, but couldn't find a no-arg ctor"), + processor.Create(OpCodes.Newobj, processor.ImportReference(exceptionCtor)), + processor.Create(OpCodes.Throw) + }; + } + + return new[] + { + processor.Create(OpCodes.Newobj, processor.ImportReference(ctor)), + processor.Create(OpCodes.Throw) + }; + } + + public sealed override string? ToPsuedoCode() + { + return $"throw new {_exceptionType}()"; + } + + public sealed override string ToTextSummary() + { + return $"[!] Constructs and throws an exception of kind {_exceptionType}\n"; + } + + public sealed override bool IsImportant() + { + return true; + } + } +} \ No newline at end of file diff --git a/Cpp2IL.Core/Analysis/Actions/x86/Important/CallExceptionThrowerFunction.cs b/Cpp2IL.Core/Analysis/Actions/x86/Important/CallExceptionThrowerFunction.cs index 7c7ce95c..3a3b90fe 100644 --- a/Cpp2IL.Core/Analysis/Actions/x86/Important/CallExceptionThrowerFunction.cs +++ b/Cpp2IL.Core/Analysis/Actions/x86/Important/CallExceptionThrowerFunction.cs @@ -12,10 +12,9 @@ namespace Cpp2IL.Core.Analysis.Actions.x86.Important { - public class CallExceptionThrowerFunction : BaseAction + public class CallExceptionThrowerFunction : AbstractExceptionThrowerAction { - private static readonly ConcurrentDictionary ExceptionThrowers = new ConcurrentDictionary(); - private TypeDefinition? _exceptionType; + private static readonly ConcurrentDictionary ExceptionThrowers = new(); internal static void Reset() => ExceptionThrowers.Clear(); @@ -116,45 +115,5 @@ public CallExceptionThrowerFunction(MethodAnalysis context, Instruc if(_exceptionType != null) context.MakeLocal(_exceptionType, reg: "rax"); } - - public override Mono.Cecil.Cil.Instruction[] ToILInstructions(MethodAnalysis context, ILProcessor processor) - { - if (_exceptionType == null) - throw new TaintedInstructionException(); - - var ctor = _exceptionType.GetConstructors().FirstOrDefault(c => !c.HasParameters); - - if (ctor == null) - { - var exceptionCtor = Utils.ExceptionReference.GetConstructors().First(c => c.HasParameters && c.Parameters.Count == 1 && c.Parameters[0].ParameterType.Name == "String"); - return new[] - { - processor.Create(OpCodes.Ldstr, $"Exception of type {_exceptionType.FullName}, but couldn't find a no-arg ctor"), - processor.Create(OpCodes.Newobj, processor.ImportReference(exceptionCtor)), - processor.Create(OpCodes.Throw) - }; - } - - return new[] - { - processor.Create(OpCodes.Newobj, processor.ImportReference(ctor)), - processor.Create(OpCodes.Throw) - }; - } - - public override string? ToPsuedoCode() - { - return $"throw new {_exceptionType}()"; - } - - public override string ToTextSummary() - { - return $"[!] Constructs and throws an exception of kind {_exceptionType}\n"; - } - - public override bool IsImportant() - { - return true; - } } } \ No newline at end of file diff --git a/Cpp2IL.Core/Analysis/AsmAnalyzerArmV8A.InstructionChecks.cs b/Cpp2IL.Core/Analysis/AsmAnalyzerArmV8A.InstructionChecks.cs index 5019be95..48e5ca36 100644 --- a/Cpp2IL.Core/Analysis/AsmAnalyzerArmV8A.InstructionChecks.cs +++ b/Cpp2IL.Core/Analysis/AsmAnalyzerArmV8A.InstructionChecks.cs @@ -1,7 +1,5 @@ using System; -using System.Diagnostics; using Cpp2IL.Core.Analysis.Actions.ARM64; -using Cpp2IL.Core.Analysis.Actions.x86; using Cpp2IL.Core.Analysis.ResultModels; using Gee.External.Capstone.Arm64; using LibCpp2IL; @@ -63,15 +61,36 @@ private void CheckForSingleOpInstruction(Arm64Instruction instruction) if (SharedState.MethodsByAddress.TryGetValue(jumpTarget, out var managedFunctionBeingCalled)) { Analysis.Actions.Add(new Arm64ManagedFunctionCallAction(Analysis, instruction)); - } else if (jumpTarget == _keyFunctionAddresses.il2cpp_object_new || jumpTarget == _keyFunctionAddresses.il2cpp_vm_object_new || jumpTarget == _keyFunctionAddresses.il2cpp_codegen_object_new) + } + else if (jumpTarget == _keyFunctionAddresses.il2cpp_object_new || jumpTarget == _keyFunctionAddresses.il2cpp_vm_object_new || jumpTarget == _keyFunctionAddresses.il2cpp_codegen_object_new) { Analysis.Actions.Add(new Arm64NewObjectAction(Analysis, instruction)); } + else if (jumpTarget < Utils.GetAddressOfNextFunctionStart((ulong)instruction.Address) && jumpTarget > (ulong)instruction.Address) + { + //Jumping over an instruction, may need to expand function to include jumpTarget. + } + else if (Arm64CallThrowHelperAction.IsThrowHelper((long)jumpTarget)) + { + Analysis.Actions.Add(new Arm64CallThrowHelperAction(Analysis, instruction)); + break; //Skip adding a return lower down. + } //If we're a b, we need a return too if (instruction.Mnemonic == "b") Analysis.Actions.Add(new Arm64ReturnAction(Analysis, instruction)); break; + case "br": + case "blr": + //Branch to register + + //This part is TODO + //because we need to know what's in the register first (e.g. virtual function) + + //We need a ret if br + if(instruction.Mnemonic == "br") + Analysis.Actions.Add(new Arm64ReturnAction(Analysis, instruction)); + break; } } @@ -141,42 +160,58 @@ private void CheckForTwoOpInstruction(Arm64Instruction instruction) case "ldr" when t0 is Arm64OperandType.Register && t1 is Arm64OperandType.Memory && memVar is ConstantDefinition { Value: long pageAddress } && memoryOffset < 0x4000: //Combined with adrp to load a global. The adrp loads the page, and this adds an additional offset to resolve a specific memory value. var globalAddress = (ulong)(pageAddress + memoryOffset); - if (LibCpp2IlMain.GetAnyGlobalByAddress(globalAddress) is not { IsValid: true } global) + MetadataUsage global = null; + if (LibCpp2IlMain.GetAnyGlobalByAddress(globalAddress) is { IsValid: true } global2) + global = global2; + else { //Try pointer to global try { var possiblePtr = LibCpp2IlMain.Binary!.ReadClassAtVirtualAddress(globalAddress); - if (LibCpp2IlMain.GetAnyGlobalByAddress(possiblePtr) is { IsValid: true } global2) - global = global2; - else - break; + if (LibCpp2IlMain.GetAnyGlobalByAddress(possiblePtr) is { IsValid: true } global3) + global = global3; } catch (Exception) { - break; + //Nothing } } - //Have a global here. - switch (global.Type) + if (global != null) { - case MetadataUsageType.Type: - case MetadataUsageType.TypeInfo: - Analysis.Actions.Add(new Arm64MetadataUsageTypeToRegisterAction(Analysis, instruction)); - break; - case MetadataUsageType.MethodDef: - Analysis.Actions.Add(new Arm64MetadataUsageMethodDefToRegisterAction(Analysis, instruction)); - break; - case MetadataUsageType.MethodRef: - Analysis.Actions.Add(new Arm64MetadataUsageMethodRefToRegisterAction(Analysis, instruction)); - break; - case MetadataUsageType.FieldInfo: - Analysis.Actions.Add(new Arm64MetadataUsageFieldToRegisterAction(Analysis, instruction)); - break; - case MetadataUsageType.StringLiteral: - Analysis.Actions.Add(new Arm64MetadataUsageLiteralToRegisterAction(Analysis, instruction)); - break; + //Have a global here. + switch (global.Type) + { + case MetadataUsageType.Type: + case MetadataUsageType.TypeInfo: + Analysis.Actions.Add(new Arm64MetadataUsageTypeToRegisterAction(Analysis, instruction)); + break; + case MetadataUsageType.MethodDef: + Analysis.Actions.Add(new Arm64MetadataUsageMethodDefToRegisterAction(Analysis, instruction)); + break; + case MetadataUsageType.MethodRef: + Analysis.Actions.Add(new Arm64MetadataUsageMethodRefToRegisterAction(Analysis, instruction)); + break; + case MetadataUsageType.FieldInfo: + Analysis.Actions.Add(new Arm64MetadataUsageFieldToRegisterAction(Analysis, instruction)); + break; + case MetadataUsageType.StringLiteral: + Analysis.Actions.Add(new Arm64MetadataUsageLiteralToRegisterAction(Analysis, instruction)); + break; + } + } + + //Unknown global or string + var potentialLiteral = Utils.TryGetLiteralAt(LibCpp2IlMain.Binary!, (ulong)LibCpp2IlMain.Binary!.MapVirtualAddressToRaw(globalAddress)); + if (potentialLiteral != null && instruction.Details.Operands[0].RegisterSafe()?.Name[0] != 'v') + { + Analysis.Actions.Add(new Arm64UnmanagedLiteralToConstantAction(Analysis, instruction, potentialLiteral, globalAddress)); + } + else + { + //Unknown global + Analysis.Actions.Add(new Arm64UnknownGlobalToConstantAction(Analysis, instruction, globalAddress)); } break; @@ -188,7 +223,7 @@ private void CheckForTwoOpInstruction(Arm64Instruction instruction) //Move generic analyzed op to another reg Analysis.Actions.Add(new Arm64RegCopyAction(Analysis, instruction)); break; - case "str" when t0 is Arm64OperandType.Register && t1 is Arm64OperandType.Memory && var0 is {} && memVar is LocalDefinition: + case "str" when t0 is Arm64OperandType.Register && t1 is Arm64OperandType.Memory && var0 is { } && memVar is LocalDefinition: //Field write from register. //Unlike a bunch of other instructions, source is operand 0, destination is operand 1. Analysis.Actions.Add(new Arm64RegisterToFieldAction(Analysis, instruction)); @@ -233,7 +268,7 @@ private void CheckForThreeOpInstruction(Arm64Instruction instruction) var memVar = Analysis.GetOperandInRegister(Utils.GetRegisterNameNew(memoryBase)); var mnemonic = instruction.Mnemonic; - + switch (mnemonic) { case "orr" when r1Name is "xzr" && t2 == Arm64OperandType.Immediate && imm2 != 0: diff --git a/Cpp2IL.Core/Analysis/AsmAnalyzerArmV8a.cs b/Cpp2IL.Core/Analysis/AsmAnalyzerArmV8a.cs index 18fa9070..6111662a 100644 --- a/Cpp2IL.Core/Analysis/AsmAnalyzerArmV8a.cs +++ b/Cpp2IL.Core/Analysis/AsmAnalyzerArmV8a.cs @@ -3,7 +3,6 @@ using System.Linq; using System.Text; using Cpp2IL.Core.Analysis.PostProcessActions; -using Gee.External.Capstone; using Gee.External.Capstone.Arm64; using LibCpp2IL; using Mono.Cecil; @@ -12,36 +11,13 @@ namespace Cpp2IL.Core.Analysis { public partial class AsmAnalyzerArmV8A : AsmAnalyzerBase { - private static List _allKnownFunctionStarts; - - static AsmAnalyzerArmV8A() - { - _allKnownFunctionStarts = LibCpp2IlMain.TheMetadata!.methodDefs.Select(m => m.MethodPointer).Concat(LibCpp2IlMain.Binary!.ConcreteGenericImplementationsByAddress.Keys).ToList(); - //Sort in ascending order - _allKnownFunctionStarts.Sort(); - } - private static List DisassembleInstructions(MethodDefinition definition) { var baseAddress = definition.AsUnmanaged().MethodPointer; - //We can't use CppMethodBodyBytes to get the byte array, because ARMv7 doesn't have filler bytes like x86 does. - //So we can't work out the end of the method. - //But we can find the start of the next one! - var rawStartOfNextMethod = LibCpp2IlMain.Binary!.MapVirtualAddressToRaw(_allKnownFunctionStarts.FirstOrDefault(a => a > baseAddress)); - var rawStart = LibCpp2IlMain.Binary.MapVirtualAddressToRaw(baseAddress); - if (rawStartOfNextMethod < rawStart) - rawStartOfNextMethod = LibCpp2IlMain.Binary.RawLength; - - var bytes = LibCpp2IlMain.Binary.GetRawBinaryContent().Skip((int)rawStart).Take((int)(rawStartOfNextMethod - rawStart)).ToArray(); - - var disassembler = CapstoneDisassembler.CreateArm64Disassembler(LibCpp2IlMain.Binary.IsBigEndian ? Arm64DisassembleMode.BigEndian : Arm64DisassembleMode.LittleEndian); - disassembler.EnableInstructionDetails = true; - disassembler.DisassembleSyntax = DisassembleSyntax.Intel; - - return disassembler.Disassemble(bytes, (long)baseAddress).ToList(); + return Utils.GetArm64MethodBodyAtVirtualAddress(baseAddress); } - + private string FunctionArgumentDump; public AsmAnalyzerArmV8A(ulong methodPointer, IEnumerable instructions, BaseKeyFunctionAddresses keyFunctionAddresses) : base(methodPointer, instructions, keyFunctionAddresses) diff --git a/Cpp2IL.Core/Analysis/AsmAnalyzerBase.cs b/Cpp2IL.Core/Analysis/AsmAnalyzerBase.cs index b13fe612..50b05c21 100644 --- a/Cpp2IL.Core/Analysis/AsmAnalyzerBase.cs +++ b/Cpp2IL.Core/Analysis/AsmAnalyzerBase.cs @@ -26,7 +26,7 @@ public abstract class AsmAnalyzerBase : IAsmAnalyzer internal AsmAnalyzerBase(ulong methodPointer, IEnumerable instructions, BaseKeyFunctionAddresses keyFunctionAddresses) { - _keyFunctionAddresses = keyFunctionAddresses; + _keyFunctionAddresses = keyFunctionAddresses ?? throw new ArgumentNullException(nameof(keyFunctionAddresses)); _instructions = new(); CppAssembly = LibCpp2IlMain.Binary!; @@ -175,7 +175,7 @@ public StringBuilder BuildILToString() success = false; break; } - builder.Append('\n'); + builder.Append("\n\t"); } catch (TaintedInstructionException e) { @@ -188,7 +188,7 @@ public StringBuilder BuildILToString() success = false; break; } - builder.Append('\n'); + builder.Append("\n\t"); } catch (Exception e) { @@ -201,7 +201,7 @@ public StringBuilder BuildILToString() success = false; break; } - builder.Append('\n'); + builder.Append("\n\t"); } } } diff --git a/Cpp2IL.Core/Analysis/MethodUtils.cs b/Cpp2IL.Core/Analysis/MethodUtils.cs index 18fe5380..e045f3af 100644 --- a/Cpp2IL.Core/Analysis/MethodUtils.cs +++ b/Cpp2IL.Core/Analysis/MethodUtils.cs @@ -5,6 +5,7 @@ using Cpp2IL.Core.Analysis.Actions.Base; using Cpp2IL.Core.Analysis.Actions.x86.Important; using Cpp2IL.Core.Analysis.ResultModels; +using Cpp2IL.Core.Exceptions; using Iced.Intel; using LibCpp2IL; using LibCpp2IL.Metadata; @@ -16,7 +17,7 @@ namespace Cpp2IL.Core.Analysis { - public class MethodUtils + public static class MethodUtils { private static readonly string[] NON_FP_REGISTERS_BY_IDX = {"rcx", "rdx", "r8", "r9"}; @@ -40,10 +41,16 @@ public static bool CheckParameters(T associatedInstruction, MethodReference m if (beingCalledOn == null) beingCalledOn = method.DeclaringType; - return LibCpp2IlMain.Binary!.is32Bit ? CheckParameters32(associatedInstruction, method, context, isInstance, beingCalledOn, out arguments) : CheckParameters64(method, context, isInstance, out arguments, beingCalledOn, failOnLeftoverArgs); + return LibCpp2IlMain.Binary!.InstructionSet switch + { + InstructionSet.X86_32 => CheckParameters32(associatedInstruction, method, context, isInstance, beingCalledOn, out arguments), + InstructionSet.X86_64 => CheckParameters64(method, context, isInstance, out arguments, beingCalledOn, failOnLeftoverArgs), + InstructionSet.ARM64 => CheckParametersArmV8(method, context, isInstance, out arguments), + _ => throw new UnsupportedInstructionSetException(), + }; } - private static IAnalysedOperand? GetValueFromAppropriateReg(bool? isFloatingPoint, string fpReg, string normalReg, MethodAnalysis context) + private static IAnalysedOperand? GetValueFromAppropriateX64Reg(bool? isFloatingPoint, string fpReg, string normalReg, MethodAnalysis context) { if(isFloatingPoint == true) if (context.GetOperandInRegister(fpReg) is { } fpVal) @@ -52,6 +59,77 @@ public static bool CheckParameters(T associatedInstruction, MethodReference m return context.GetOperandInRegister(normalReg); } + private static bool CheckSingleParamNew(IAnalysedOperand arg, TypeReference parameterType) + { + switch (arg) + { + case ConstantDefinition cons when cons.Type.FullName != parameterType.ToString(): //Constant type mismatch + if (parameterType.Resolve()?.IsEnum == true && cons.Type.IsPrimitive) + break; //Forgive primitive => enum coercion. + if (parameterType.IsPrimitive && cons.Type.IsPrimitive) + break; //Forgive primitive coercion. + if (parameterType.FullName is "System.String" or "System.Object" && cons.Value is string) + break; //Forgive unmanaged string literal as managed string or object param + if (parameterType.IsPrimitive && cons.Value is Il2CppString cppString) + { + //Il2CppString contains any unknown global address that looks vaguely like a string + //We try and re-interpret it here, most commonly as a floating point value, as integer constants are usually immediate values. + var primitiveLength = Utils.GetSizeOfObject(parameterType); + var newValue = primitiveLength switch + { + 8 => LibCpp2IlMain.Binary!.ReadClassAtVirtualAddress(cppString.Address), + 4 => LibCpp2IlMain.Binary!.ReadClassAtVirtualAddress(cppString.Address), + 1 => LibCpp2IlMain.Binary!.ReadClassAtVirtualAddress(cppString.Address), + _ => throw new Exception($"'string' -> primitive: Not implemented: Size {primitiveLength}, type {parameterType}") + }; + + //Reinterpret floating-point bytes + cons.Value = parameterType.Name switch + { + "Single" => BitConverter.ToSingle(BitConverter.GetBytes((uint)newValue), 0), + "Double" => BitConverter.ToDouble(BitConverter.GetBytes(newValue), 0), + _ => newValue + }; + + //Correct type + cons.Type = Type.GetType(parameterType.FullName!)!; + break; + } + + if (parameterType.IsPrimitive && cons.Value is UnknownGlobalAddr unknownGlobalAddr) + { + //Try get unknown global values as a constant + Utils.CoerceUnknownGlobalValue(parameterType, unknownGlobalAddr, cons); + break; + } + + if (typeof(MemberReference).IsAssignableFrom(cons.Type) && parameterType.Name == "IntPtr") + break; //We allow this, because an IntPtr is usually a type or, more commonly, method pointer. + if (typeof(FieldReference).IsAssignableFrom(cons.Type) && parameterType.Name == "RuntimeFieldHandle") + break; //These are the same struct - we represent it as a FieldReference but it's actually a runtime field handle. + if (typeof(TypeReference).IsAssignableFrom(cons.Type) && parameterType.Name == "RuntimeTypeHandle") + break; //These are the same struct - we represent it as a TypeReference but it's actually a runtime type handle. + return false; + case LocalDefinition local: + if (parameterType.IsArray && local.Type?.IsArray != true) + return false; //Absolutely do not forgive parameters which expect an array, and we're passing a single value + if (local.Type != null && parameterType.Resolve().IsAssignableFrom(local.Type)) + //Basic "Success" condition, parameter type matches type of local + break; + if (parameterType.IsPrimitive && local.Type?.IsPrimitive == true) + break; //Forgive primitive coercion. + if (local.Type?.IsArray == true && parameterType.Resolve().IsAssignableFrom(Utils.ArrayReference)) + break; //Forgive IEnumerables etc + if (local.Type is GenericParameter && parameterType is GenericParameter && local.Type.Name == parameterType.Name) + break; //Unknown generic params which share a name. Not sure this is needed. + if (local.KnownInitialValue is 0) + break; //Literal null value. This is ok. + return false; + } + + return true; + } + private static bool CheckParameters64(MethodReference method, MethodAnalysis context, bool isInstance, [NotNullWhen(true)] out List? arguments, TypeReference beingCalledOn, bool failOnLeftoverArgs = true) { arguments = null; @@ -60,9 +138,9 @@ private static bool CheckParameters64(MethodReference method, MethodAnalysis< if (!isInstance) actualArgs.Add(context.GetOperandInRegister("rcx") ?? context.GetOperandInRegister("xmm0")); - actualArgs.Add(GetValueFromAppropriateReg(method.Parameters.GetValueSafely(0)?.ParameterType?.ShouldBeInFloatingPointRegister(), "xmm1", "rdx", context)); - actualArgs.Add(GetValueFromAppropriateReg(method.Parameters.GetValueSafely(1)?.ParameterType?.ShouldBeInFloatingPointRegister(), "xmm2", "r8", context)); - actualArgs.Add(GetValueFromAppropriateReg(method.Parameters.GetValueSafely(2)?.ParameterType?.ShouldBeInFloatingPointRegister(), "xmm3", "r9", context)); + actualArgs.Add(GetValueFromAppropriateX64Reg(method.Parameters.GetValueSafely(0)?.ParameterType?.ShouldBeInFloatingPointRegister(), "xmm1", "rdx", context)); + actualArgs.Add(GetValueFromAppropriateX64Reg(method.Parameters.GetValueSafely(1)?.ParameterType?.ShouldBeInFloatingPointRegister(), "xmm2", "r8", context)); + actualArgs.Add(GetValueFromAppropriateX64Reg(method.Parameters.GetValueSafely(2)?.ParameterType?.ShouldBeInFloatingPointRegister(), "xmm3", "r9", context)); if (actualArgs.FindLast(a => a is ConstantDefinition {Value: MethodReference _}) is ConstantDefinition {Value: MethodReference actualGenericMethod}) { @@ -120,69 +198,8 @@ private static bool CheckParameters64(MethodReference method, MethodAnalysis< arg = loc; } - switch (arg) - { - //We assert parameter type to be non-null in all of these cases, because we've null-checked the default value further up. - - case ConstantDefinition cons when cons.Type.FullName != parameterType!.ToString(): //Constant type mismatch - if (parameterType.Resolve()?.IsEnum == true && cons.Type.IsPrimitive) - break; //Forgive primitive => enum coercion. - if (parameterType.IsPrimitive && cons.Type.IsPrimitive) - break; //Forgive primitive coercion. - if ((parameterType.FullName == "System.String" || parameterType.FullName == "System.Object") && cons.Value is string) - break; - if (parameterType.IsPrimitive && cons.Value is Il2CppString cppString) - { - var primitiveLength = Utils.GetSizeOfObject(parameterType); - ulong newValue; - if (primitiveLength == 8) - newValue = LibCpp2IlMain.Binary!.ReadClassAtVirtualAddress(cppString.Address); - else if (primitiveLength == 4) - newValue = LibCpp2IlMain.Binary!.ReadClassAtVirtualAddress(cppString.Address); - else if (primitiveLength == 1) - newValue = LibCpp2IlMain.Binary!.ReadClassAtVirtualAddress(cppString.Address); - else - throw new Exception($"'string' -> primitive: Not implemented: Size {primitiveLength}, type {parameterType}"); - - if (parameterType.Name == "Single") - cons.Value = BitConverter.ToSingle(BitConverter.GetBytes((uint) newValue), 0); - else if (parameterType.Name == "Double") - cons.Value = BitConverter.ToDouble(BitConverter.GetBytes(newValue), 0); - else - cons.Value = newValue; - - cons.Type = Type.GetType(parameterType.FullName!)!; - break; - } - - if (parameterType.IsPrimitive && cons.Value is UnknownGlobalAddr unknownGlobalAddr) - { - Utils.CoerceUnknownGlobalValue(parameterType, unknownGlobalAddr, cons); - break; - } - if (typeof(MemberReference).IsAssignableFrom(cons.Type) && parameterType.Name == "IntPtr") - break; //We allow this, because an IntPtr is usually a type or, more commonly, method pointer. - if (typeof(FieldReference).IsAssignableFrom(cons.Type) && parameterType.Name == "RuntimeFieldHandle") - break; //These are the same struct - we represent it as a FieldReference but it's actually a runtime field handle. - if (typeof(TypeReference).IsAssignableFrom(cons.Type) && parameterType.Name == "RuntimeTypeHandle") - break; //These are the same struct - we represent it as a TypeReference but it's actually a runtime type handle. - return false; - case LocalDefinition local: - if (parameterType.IsArray && local.Type?.IsArray != true) - return false; //Fail. Array<->non array is non-forgivable. - if(local.Type != null && parameterType!.Resolve().IsAssignableFrom(local.Type)) - //"Success" condition, all matches - break; - if (parameterType!.IsPrimitive && local.Type?.IsPrimitive == true) - break; //Forgive primitive coercion. - if (local.Type?.IsArray == true && parameterType.Resolve().IsAssignableFrom(Utils.ArrayReference)) - break; - if (local.Type is GenericParameter && parameterType is GenericParameter && local.Type.Name == parameterType.Name) - break; - if (local.KnownInitialValue is int i && i == 0) - break; //Null. - return false; - } + if (!CheckSingleParamNew(arg, parameterType)) + return false; //todo handle value types (Structs) @@ -224,7 +241,7 @@ private static bool CheckParameters32(T associatedInstruction, MethodReferenc } var value = context.Stack.Peek(); - if (CheckSingleParameter(value, parameterData.ParameterType)) + if (CheckSingleParamNew(value, parameterData.ParameterType)) { //This parameter is fine, move on. listToRePush.Add(context.Stack.Pop()); @@ -240,7 +257,7 @@ private static bool CheckParameters32(T associatedInstruction, MethodReferenc var fieldsToCheck = structTypeDef?.Fields.Where(f => !f.IsStatic).ToList(); - if (structTypeDef != null && context.Stack.Count >= fieldsToCheck.Count) + if (structTypeDef != null && context.Stack.Count >= fieldsToCheck!.Count) { //We have enough stack entries to fill the fields. var listOfStackArgs = new List(); @@ -255,7 +272,7 @@ private static bool CheckParameters32(T associatedInstruction, MethodReferenc { var structField = fieldsToCheck[i]; var actualArg = listOfStackArgs[i]; - allStructFieldsMatch &= CheckSingleParameter(actualArg, structField.FieldType); + allStructFieldsMatch &= CheckSingleParamNew(actualArg, structField.FieldType); } if (allStructFieldsMatch) @@ -265,10 +282,10 @@ private static bool CheckParameters32(T associatedInstruction, MethodReferenc //as its used as the arguments //Allocate an instance of the struct - var allocateInstanceAction = AbstractNewObjAction.Make(context, associatedInstruction, structTypeDef); + var allocateInstanceAction = AbstractNewObjAction.Make(context, associatedInstruction, structTypeDef); context.Actions.Add(allocateInstanceAction); - var instanceLocal = allocateInstanceAction.LocalReturned; + var instanceLocal = allocateInstanceAction.LocalReturned!; //Set the fields from the operands for (var i = 0; i < listOfStackArgs.Count; i++) @@ -278,7 +295,7 @@ private static bool CheckParameters32(T associatedInstruction, MethodReferenc var stackArg = listOfStackArgs[i]; if (stackArg is LocalDefinition local) //I'm sorry for what the next line contains. - context.Actions.Add((BaseAction)(object) new RegToFieldAction((MethodAnalysis)(object) context, (Instruction) (object) associatedInstruction, FieldUtils.FieldBeingAccessedData.FromDirectField(associatedField), (LocalDefinition)(object) instanceLocal!, (LocalDefinition)(object) local)); + context.Actions.Add((BaseAction)(object) new RegToFieldAction((MethodAnalysis)(object) context, (Instruction) (object) associatedInstruction!, FieldUtils.FieldBeingAccessedData.FromDirectField(associatedField), instanceLocal, local)); else { //TODO Constants @@ -310,26 +327,37 @@ private static bool CheckParameters32(T associatedInstruction, MethodReferenc return true; } - private static bool CheckSingleParameter(IAnalysedOperand analyzedOperand, TypeReference expectedType) + private static bool CheckParametersArmV8(MethodReference method, MethodAnalysis context, bool isInstance, [NotNullWhen(true)] out List? arguments) { - switch (analyzedOperand) + //See MethodAnalysis#HandleArm64Parameters for a detailed explanation of how this works. + arguments = null; + + var xCount = isInstance ? 1 : 0; + var vCount = 0; + + var ret = new List(); + foreach (var parameterDefinition in method.Parameters) { - case ConstantDefinition cons when cons.Type.FullName != expectedType.ToString(): //Constant type mismatch - //In the case of a constant, check if we can re-interpret. + //Floating point -> v reg, else -> x reg + var reg = parameterDefinition.ParameterType.ShouldBeInFloatingPointRegister() ? $"v{vCount++}" : $"x{xCount++}"; - if (expectedType.ToString() == "System.Boolean" && cons.Value is ulong constantNumber) - { - //Reinterpret as bool. - cons.Type = typeof(bool); - cons.Value = constantNumber == 1UL; - return true; - } + if (reg[^1] >= '8') + return false; //TODO stack support. Probably not needed often. + + var operand = context.GetOperandInRegister(reg); + if (operand == null) + //Missing an arg - instant fail. return false; - case LocalDefinition local when local.Type == null || !expectedType.Resolve().IsAssignableFrom(local.Type): //Local type mismatch - return false; + + if (!CheckSingleParamNew(operand, parameterDefinition.ParameterType)) + //Mismatched type. + return false; + + ret.Add(operand); } + arguments = ret; return true; } diff --git a/Cpp2IL.Core/Analysis/PostProcessActions/1RenameLocalsPostProcessor.cs b/Cpp2IL.Core/Analysis/PostProcessActions/1RenameLocalsPostProcessor.cs index dacb1c7c..6a122e29 100644 --- a/Cpp2IL.Core/Analysis/PostProcessActions/1RenameLocalsPostProcessor.cs +++ b/Cpp2IL.Core/Analysis/PostProcessActions/1RenameLocalsPostProcessor.cs @@ -72,6 +72,9 @@ public override void PostProcess(MethodAnalysis analysis) //lower first character nameBase = $"{char.ToLower(nameBase[0])}{nameBase[1..]}"; + if (nameBase.Contains("k__BackingField")) + nameBase = nameBase[1..nameBase.IndexOf(">", StringComparison.Ordinal)]; + if (nameBase.Contains("`")) nameBase = nameBase[..nameBase.IndexOf("`", StringComparison.Ordinal)]; diff --git a/Cpp2IL.Core/Extensions.cs b/Cpp2IL.Core/Extensions.cs index 78a13150..a5a3c674 100644 --- a/Cpp2IL.Core/Extensions.cs +++ b/Cpp2IL.Core/Extensions.cs @@ -68,9 +68,7 @@ public static Dictionary Clone(this Dictionary original) public static T[] SubArray(this T[] data, int index, int length) { - var result = new T[length]; - Array.Copy(data, index, result, 0, length); - return result; + return data.Skip(index).Take(length).ToArray(); } public static T RemoveAndReturn(this List data, int index) diff --git a/Cpp2IL.Core/Utils.cs b/Cpp2IL.Core/Utils.cs index 3c3fdc7e..f7751ada 100644 --- a/Cpp2IL.Core/Utils.cs +++ b/Cpp2IL.Core/Utils.cs @@ -5,7 +5,9 @@ using System.Linq; using System.Text; using System.Text.RegularExpressions; +using Cpp2IL.Core.Analysis; using Cpp2IL.Core.Analysis.ResultModels; +using Gee.External.Capstone; using Gee.External.Capstone.Arm; using Gee.External.Capstone.Arm64; using Iced.Intel; @@ -461,6 +463,9 @@ public static MethodReference MakeGeneric(this MethodReference self, params Type public static string? TryGetLiteralAt(Il2CppBinary theDll, ulong rawAddr) { + if (theDll.RawLength <= (long)rawAddr) + return null; + var c = Convert.ToChar(theDll.GetByteAtRawAddress(rawAddr)); if (char.IsLetterOrDigit(c) || char.IsPunctuation(c) || char.IsSymbol(c) || char.IsWhiteSpace(c)) { @@ -939,7 +944,7 @@ public static ulong GetAddressOfInstruction(T t) return t switch { - Iced.Intel.Instruction x86 => x86.IP, + Instruction x86 => x86.IP, ArmInstruction arm => (ulong)arm.Address, Arm64Instruction arm64 => (ulong)arm64.Address, _ => throw new($"Unsupported instruction type {t.GetType()}"), @@ -953,11 +958,74 @@ public static ulong GetAddressOfNextInstruction(T t) return t switch { - Iced.Intel.Instruction x86 => x86.NextIP, + Instruction x86 => x86.NextIP, ArmInstruction arm => (ulong)(arm.Address + 4), Arm64Instruction arm64 => (ulong)(arm64.Address + 4), _ => throw new($"Unsupported instruction type {t.GetType()}"), }; } + + private static List? _allKnownFunctionStarts; + private static CapstoneArm64Disassembler? _arm64Disassembler; + + private static void InitArm64Decompilation() + { + _allKnownFunctionStarts = LibCpp2IlMain.TheMetadata!.methodDefs.Select(m => m.MethodPointer).Concat(LibCpp2IlMain.Binary!.ConcreteGenericImplementationsByAddress.Keys).ToList(); + //Sort in ascending order + _allKnownFunctionStarts.Sort(); + + var disassembler = CapstoneDisassembler.CreateArm64Disassembler(LibCpp2IlMain.Binary.IsBigEndian ? Arm64DisassembleMode.BigEndian : Arm64DisassembleMode.LittleEndian); + disassembler.EnableInstructionDetails = true; + disassembler.EnableSkipDataMode = true; + disassembler.DisassembleSyntax = DisassembleSyntax.Intel; + _arm64Disassembler = disassembler; + } + + public static ulong GetAddressOfNextFunctionStart(ulong current) + { + if(_allKnownFunctionStarts == null) + InitArm64Decompilation(); + + return _allKnownFunctionStarts!.FirstOrDefault(a => a > current); + } + + public static List GetArm64MethodBodyAtVirtualAddress(ulong virtAddress, bool managed = true) + { + if(_allKnownFunctionStarts == null) + InitArm64Decompilation(); + + //We can't use CppMethodBodyBytes to get the byte array, because ARMv7 doesn't have filler bytes like x86 does. + //So we can't work out the end of the method. + //But we can find the start of the next one! (If managed) + if (managed) + { + var rawStartOfNextMethod = LibCpp2IlMain.Binary!.MapVirtualAddressToRaw(GetAddressOfNextFunctionStart(virtAddress)); + var rawStart = LibCpp2IlMain.Binary.MapVirtualAddressToRaw(virtAddress); + if (rawStartOfNextMethod < rawStart) + rawStartOfNextMethod = LibCpp2IlMain.Binary.RawLength; + + byte[] bytes = LibCpp2IlMain.Binary.GetRawBinaryContent().Skip((int)rawStart).Take((int)(rawStartOfNextMethod - rawStart)).ToArray(); + + return _arm64Disassembler!.Disassemble(bytes, (long)virtAddress).ToList(); + } + + //Unmanaged function, look for first b or bl + var pos = (int) LibCpp2IlMain.Binary!.MapVirtualAddressToRaw(virtAddress); + var allBytes = LibCpp2IlMain.Binary.GetRawBinaryContent(); + List ret = new(); + + var keepGoing = true; + while (keepGoing) + { + //All arm64 instructions are 4 bytes + ret.AddRange(_arm64Disassembler!.Disassemble(allBytes.Skip(pos).Take(4).ToArray(), (long)virtAddress)); + virtAddress += 4; + pos += 4; + + keepGoing = !ret.Any(i => i.Mnemonic is "b" or "bl"); + } + + return ret; + } } } \ No newline at end of file