diff --git a/Il2CppInspector/Il2CppBinaryARM.cs b/Il2CppInspector/Il2CppBinaryARM.cs index 3c19dc3..ae0f4cc 100644 --- a/Il2CppInspector/Il2CppBinaryARM.cs +++ b/Il2CppInspector/Il2CppBinaryARM.cs @@ -6,6 +6,7 @@ */ using System; +using System.Collections.Generic; using System.Linq; namespace Il2CppInspector @@ -16,6 +17,165 @@ namespace Il2CppInspector public Il2CppBinaryARM(IFileFormatReader stream, uint codeRegistration, uint metadataRegistration) : base(stream, codeRegistration, metadataRegistration) { } + // Thumb 2 Supplement Reference Manual: http://class.ece.iastate.edu/cpre288/resources/docs/Thumb-2SupplementReferenceManual.pdf + + // Section 3.1 + private uint getNextThumbInstruction(IFileFormatReader image) { + // Assume 16-bit + uint inst = image.ReadUInt16(); + + // Is 32-bit? + if (inst.Bits(13, 15) == 0b111) + if (inst.Bits(11, 2) != 0b00) + inst = (inst << 16) + image.ReadUInt16(); + + return inst; + } + + // Page 4-166 (MOVS encoding T1, MOVW encoding T3), Page 4-171 (MOVT) + // In Thumb, an 8-byte MOV instruction is MOVW followed by MOVT + private enum Thumb : uint + { + MovW = 0b100100, + MovT = 0b101100 + } + + private (uint reg_d, ushort imm)? getMovImm(uint inst, Thumb movType) { + uint reg_d, imm; + + // Encoding T1 + if (inst.Bits(11, 21) == 0b00100 && movType == Thumb.MovW) { + imm = inst.Bits(0, 8); + reg_d = inst.Bits(8, 3); + return (reg_d, (ushort) imm); + } + + // Encoding T3 + if (inst.Bits(20, 6) != (uint) movType || inst.Bits(27, 5) != 0b11110 || inst.Bits(15, 1) != 0) + return null; + + imm = (inst.Bits(16, 4) << 12) + (inst.Bits(26, 1) << 11) + (inst.Bits(12, 3) << 8) + inst.Bits(0, 8); + reg_d = inst.Bits(8, 4); + return (reg_d, (ushort) imm); + } + + // Section 4.6.4 (ADD encoding T2) + private (uint reg_dn, uint reg_m)? getAddReg(uint inst) { + if (inst.Bits(8, 8) != 0b_0100_0100) + return null; + + var reg_dn = (inst.Bits(7, 1) << 3) + inst.Bits(0, 3); + var reg_m = inst.Bits(3, 4); + return (reg_dn, reg_m); + } + + // Section 4.6.43 (LDR encoding T1) + private (uint reg_n, uint reg_t, ushort imm)? getLdrImm(uint inst) { + if (inst.Bits(11, 5) != 0b_01101) + return null; + + var reg_n = inst.Bits(3, 3); + var reg_t = inst.Bits(0, 3); + var imm = inst.Bits(6, 5); + return (reg_n, reg_t, (ushort) imm); + } + + // Section 4.6.12 (B.W encoding T4; for encoding T3, flip bit 12) + private bool isBW(uint inst) => inst.Bits(27, 5) == 0b11110 && inst.Bits(14, 2) == 0b10 && inst.Bits(12, 1) == 1; + + // Sweep a Thumb function and return the register values at the end (register number => value) + private Dictionary sweepThumbForAddressLoads(List func, uint baseAddress, IFileFormatReader image) { + // List of registers and addresses loaded into them + var regs = new Dictionary(); + + // Program counter is R15 in ARM + // https://www.scss.tcd.ie/~waldroj/3d1/arm_arm.pdf states: + // For a Thumb instruction, the value read is the address of the instruction plus 4 bytes + regs.Add(15, baseAddress + 4); + + // Iterate each instruction + foreach (var inst in func) { + + var accepted = false; + + // Is it a MOVW? + if (getMovImm(inst, Thumb.MovW) is (uint movw_reg_d, ushort movw_imm)) { + if (regs.ContainsKey(movw_reg_d)) + regs[movw_reg_d] = movw_imm; // Clears top 16 bits + else + regs.Add(movw_reg_d, movw_imm); + + accepted = true; + } + + // Is it a MOVT? + if (getMovImm(inst, Thumb.MovT) is (uint movt_reg_d, ushort movt_imm)) { + if (regs.ContainsKey(movt_reg_d)) + regs[movt_reg_d] |= (uint) movt_imm << 16; + else + regs.Add(movt_reg_d, (uint) movt_imm << 16); + + accepted = true; + } + + // Is it a pointer de-reference (LDR Rt, [Rn, #imm])? + if (getLdrImm(inst) is (uint ldr_reg_n, uint ldr_reg_t, ushort ldr_imm)) { + // The code below works in the generic case for all Rt, Rn and #imm, + // but for our scan we want to restrict it such that Rt == Rn and #imm == 0 + // otherwise we might pick up functions we don't want + if (ldr_reg_n == ldr_reg_t && ldr_imm == 0) + + if (regs.ContainsKey(ldr_reg_n)) { + var offset = (regs[ldr_reg_n] & 0xffff_fffe) + ldr_imm; + var value = image.ReadUInt32(image.MapVATR(offset)); + if (regs.ContainsKey(ldr_reg_t)) + regs[ldr_reg_t] = value; + else + regs.Add(ldr_reg_t, value); + + accepted = true; + } + } + + // Is it an ADD Rdn, Rm? + if (getAddReg(inst) is (uint add_reg_dn, uint add_reg_m)) { + if (regs.ContainsKey(add_reg_dn) && regs.ContainsKey(add_reg_m)) { + regs[add_reg_dn] += regs[add_reg_m]; + + accepted = true; + } + } + + // is it the end? + if (isBW(inst)) + accepted = true; + + // In our scan, we will ONLY accept one of the above instructions + if (!accepted) + return null; + + // Advance program counter which we need to calculate ADDs with PC as operand correctly + regs[15] += inst.Bits(29, 3) == 0b111 ? 4u : 2u; + } + return regs; + } + + // Get a Thumb function that ends in B.W + private List getThumbFunctionAtFileOffset(IFileFormatReader image, uint loc, uint maxLength) { + // Read a function that ends in a hard branch (B.W) or exceeds maxLength instructions + var func = new List(); + uint inst; + + image.Position = loc; + + do { + inst = getNextThumbInstruction(image); + func.Add(inst); + } while (!isBW(inst) && func.Count < maxLength); + + return func; + } + protected override (ulong, ulong) ConsiderCode(IFileFormatReader image, uint loc) { // Assembly bytes to search for at start of each function ulong metadataRegistration, codeRegistration; @@ -29,6 +189,7 @@ namespace Il2CppInspector if (new byte[] { 0x1c, 0x0, 0x9f, 0xe5, 0x1c, 0x10, 0x9f, 0xe5, 0x1c, 0x20, 0x9f, 0xe5 }.SequenceEqual(buff)) { // Get offset to all addresses + // The +8 is because in ARM, PC always contains the currently executing instruction + 8 var offset = image.ReadUInt32(loc + 0x24) + loc + 0xc + 8; // Get pointer to Il2CppCodegenRegistration(void) @@ -68,52 +229,54 @@ namespace Il2CppInspector return (codeRegistration, metadataRegistration); } - // ARMv7 Thumb (T1) metadata v23 - // void Il2CppCodeRegistration() + // Thumb-2 + // We use a method similar to the linear sweep in Il2CppBinaryARM64; see the comments there for details + loc &= 0xffff_fffe; image.Position = loc; - // Check for ADD Rx, PC in relevant parts of function - buff = image.ReadBytes(0x20); - if (buff[0x0C] == 0x79 && buff[0x0D] == 0x44 && // ADD R1, PC - buff[0x16] == 0x78 && buff[0x17] == 0x44 && // ADD R0, PC - buff[0x1E] == 0x7A && buff[0x1F] == 0x44) // ADD R2, PC - { - // Follow path to metadata pointer - var ppMetadata = decodeMovImm32(buff) + loc + 0x10; - image.Position = ppMetadata; - metadataRegistration = image.ReadUInt32(); + // Load function into memory + // In practice, the longest function length we need is not generally longer than 11 instructions + var func = getThumbFunctionAtFileOffset(image, loc, 11); - // Follow path to code pointer - var pCode = decodeMovImm32(buff.Skip(8).Take(4).Concat(buff.Skip(14).Take(4)).ToArray()); - codeRegistration = pCode + loc + 0x1A + image.GlobalOffset; + // Don't accept functions longer than 10 instructions (in this case, the last instruction won't be a B.W) + if (!isBW(func[^1])) + return (0, 0); - return (codeRegistration, metadataRegistration); + // Get a list of registers and values in them at the end of the function + var regs = sweepThumbForAddressLoads(func, (uint) image.GlobalOffset + loc, image); + if (regs == null) + return (0, 0); + + uint r0, r1; + + // Is it the Il2CppCodeRegistration.cpp initializer? + // R0-R3 + PC will be set and they will be the only registers set + // R2 and R3 must be zero + if (regs.Count() == 5 && regs.TryGetValue(0, out _) && regs.TryGetValue(1, out r1) + && regs.TryGetValue(2, out uint r2) && regs.TryGetValue(3, out uint r3)) { + + if (r2 == 0 && r3 == 0) { + // Load up the function whose address is in R1 + func = getThumbFunctionAtFileOffset(image, image.MapVATR(r1 & 0xffff_fffe), 11); + + if (!isBW(func[^1])) + return (0, 0); + + regs = sweepThumbForAddressLoads(func, r1 & 0xffff_fffe, image); + } } - // ARMv7 Thumb (T1) - // http://liris.cnrs.fr/~mmrissa/lib/exe/fetch.php?media=armv7-a-r-manual.pdf - A8.8.106 - // http://armconverter.com/hextoarm/ - var bytes = new byte[] { 0x2d, 0xe9, 0x00, 0x48, 0xeb, 0x46 }; - image.Position = loc; - buff = image.ReadBytes(6); - if (!bytes.SequenceEqual(buff)) - return (0, 0); - bytes = new byte[] { 0x00, 0x23, 0x00, 0x22, 0xbd, 0xe8, 0x00, 0x48 }; - image.Position += 0x10; - buff = image.ReadBytes(8); - if (!bytes.SequenceEqual(buff)) - return (0, 0); - image.Position = loc + 6; - image.Position = (image.MapVATR(decodeMovImm32(image.ReadBytes(8))) & 0xfffffffc) + 0x0e; - metadataRegistration = decodeMovImm32(image.ReadBytes(8)); - codeRegistration = decodeMovImm32(image.ReadBytes(8)); - return (codeRegistration, metadataRegistration); - } + // Is it Il2CppCodegenRegistration(void)? + // In v21 and later, R0-R2 + PC will be set and they will be the only registers set + // Pre-v21, R0-R1 + PC will be the only registers set - private uint decodeMovImm32(byte[] asm) { - ushort low = (ushort) (asm[2] + ((asm[3] & 0x70) << 4) + ((asm[1] & 0x04) << 9) + ((asm[0] & 0x0f) << 12)); - ushort high = (ushort) (asm[6] + ((asm[7] & 0x70) << 4) + ((asm[5] & 0x04) << 9) + ((asm[4] & 0x0f) << 12)); - return (uint) ((high << 16) + low); + if (image.Version >= 21 && regs.Count == 4 && regs.TryGetValue(0, out r0) && regs.TryGetValue(1, out r1) && regs.TryGetValue(2, out uint _)) + return (r0 & 0xffff_fffe, r1 & 0xffff_fffe); + + if (image.Version < 21 && regs.Count == 3 && regs.TryGetValue(0, out r0) && regs.TryGetValue(1, out r1)) + return (r0 & 0xffff_fffe, r1 & 0xffff_fffe); + + return (0, 0); } } -} +} \ No newline at end of file