use different late binding and image scan implementations

This commit is contained in:
LukeFZ
2023-11-29 21:34:49 +01:00
parent 8ffc7e0021
commit a24fcb50bd
2 changed files with 323 additions and 221 deletions

View File

@@ -173,6 +173,30 @@ namespace Il2CppInspector
// Synonyms: copying, piracy, theft, strealing, infringement of copyright // Synonyms: copying, piracy, theft, strealing, infringement of copyright
BinaryImage.Position = 0; BinaryImage.Position = 0;
var words = BinaryImage.ReadArray<ulong>(0, (int)BinaryImage.Length / (BinaryImage.Bits / 8));
var usages = new List<MetadataUsage>();
for (uint i = 0; i < words.Length; i++)
{
var metadataValue = words[i];
if (metadataValue < uint.MaxValue)
{
var encodedToken = (uint)metadataValue;
var usage = MetadataUsage.FromEncodedIndex(this, encodedToken);
if (usage.Type > 0
&& usage.Type <= MetadataUsageType.MethodRef
&& metadataValue == (((uint)usage.Type << 29) | ((uint)usage.SourceIndex << 1)) + 1
&& BinaryImage.TryMapFileOffsetToVA(i * ((uint)BinaryImage.Bits / 8), out var va))
{
usages.Add(MetadataUsage.FromEncodedIndex(this, encodedToken, va));
}
}
}
return usages;
/*BinaryImage.Position = 0;
var sequenceLength = 0; var sequenceLength = 0;
var threshold = 6000; // current versions of mscorlib generate about 6000-7000 metadata usages var threshold = 6000; // current versions of mscorlib generate about 6000-7000 metadata usages
var usagesCount = 0; var usagesCount = 0;
@@ -222,7 +246,7 @@ namespace Il2CppInspector
} }
Console.WriteLine("Late binding metadata usage block could not be auto-detected - metadata usage references will not be available for this project"); Console.WriteLine("Late binding metadata usage block could not be auto-detected - metadata usage references will not be available for this project");
return null; return null;*/
} }
// Thumb instruction pointers have the bottom bit set to signify a switch from ARM to Thumb when jumping // Thumb instruction pointers have the bottom bit set to signify a switch from ARM to Thumb when jumping

View File

@@ -1,220 +1,298 @@
/* /*
Copyright 2020-2021 Katy Coe - http://www.djkaty.com - https://github.com/djkaty Copyright 2020-2021 Katy Coe - http://www.djkaty.com - https://github.com/djkaty
All rights reserved. All rights reserved.
*/ */
using System; using System;
using System.Collections.Generic; using System.Buffers;
using System.Linq; using System.Collections.Generic;
using System.Text; using System.Linq;
using System.Text;
namespace Il2CppInspector
{ namespace Il2CppInspector
partial class Il2CppBinary {
{ partial class Il2CppBinary
// Find a sequence of bytes {
// Adapted from https://stackoverflow.com/a/332667 // Boyer-Moore-Horspool
private int FindBytes(byte[] blob, byte[] signature, int requiredAlignment = 1, int startOffset = 0) { public IEnumerable<uint> FindAllBytes(byte[] blob, byte[] signature, uint requiredAlignment = 1)
var firstMatchByte = Array.IndexOf(blob, signature[0], startOffset); {
var test = new byte[signature.Length]; var badBytes = ArrayPool<uint>.Shared.Rent(256);
while (firstMatchByte >= 0 && firstMatchByte <= blob.Length - signature.Length) { var signatureLength = (uint) signature.Length;
Buffer.BlockCopy(blob, firstMatchByte, test, 0, signature.Length);
if (test.SequenceEqual(signature) && firstMatchByte % requiredAlignment == 0) for (uint i = 0; i < 256; i++)
return firstMatchByte; {
badBytes[(int)i] = signatureLength;
firstMatchByte = Array.IndexOf(blob, signature[0], firstMatchByte + 1); }
}
return -1; var lastSignatureIndex = signatureLength - 1;
}
for (uint i = 0; i < lastSignatureIndex; i++)
// Find all occurrences of a sequence of bytes, using word alignment by default {
private IEnumerable<uint> FindAllBytes(byte[] blob, byte[] signature, int alignment = 0) { badBytes[signature[(int)i]] = lastSignatureIndex - i;
var offset = 0; }
while (offset != -1) {
offset = FindBytes(blob, signature, alignment != 0 ? alignment : Image.Bits / 8, offset); var blobLength = blob.Length;
if (offset != -1) {
yield return (uint) offset; var currentIndex = 0u;
offset += Image.Bits / 8;
} while (currentIndex <= blobLength - signatureLength)
} {
} for (uint i = lastSignatureIndex; blob[currentIndex + i] == signature[(int)i]; i--)
{
// Find strings if (i == 0)
private IEnumerable<uint> FindAllStrings(byte[] blob, string str) => FindAllBytes(blob, Encoding.ASCII.GetBytes(str), 1); {
yield return currentIndex;
// Find 32-bit words break;
private IEnumerable<uint> FindAllDWords(byte[] blob, uint word) => FindAllBytes(blob, BitConverter.GetBytes(word), 4); }
}
// Find 64-bit words
private IEnumerable<uint> FindAllQWords(byte[] blob, ulong word) => FindAllBytes(blob, BitConverter.GetBytes(word), 8); currentIndex += badBytes[blob[currentIndex + lastSignatureIndex]];
// Find words for the current binary size var alignment = currentIndex % requiredAlignment;
private IEnumerable<uint> FindAllWords(byte[] blob, ulong word) if (alignment != 0)
=> Image.Bits switch { currentIndex += requiredAlignment - alignment;
32 => FindAllDWords(blob, (uint) word), }
64 => FindAllQWords(blob, word),
_ => throw new InvalidOperationException("Invalid architecture bit size") ArrayPool<uint>.Shared.Return(badBytes);
}; }
// Find all valid virtual address pointers to a virtual address // Find strings
private IEnumerable<ulong> FindAllMappedWords(byte[] blob, ulong va) { private IEnumerable<uint> FindAllStrings(byte[] blob, string str) => FindAllBytes(blob, Encoding.ASCII.GetBytes(str), 1);
var fileOffsets = FindAllWords(blob, va);
foreach (var offset in fileOffsets) // Find 32-bit words
if (Image.TryMapFileOffsetToVA(offset, out va)) private IEnumerable<uint> FindAllDWords(byte[] blob, uint word) => FindAllBytes(blob, BitConverter.GetBytes(word), 4);
yield return va;
} // Find 64-bit words
private IEnumerable<uint> FindAllQWords(byte[] blob, ulong word) => FindAllBytes(blob, BitConverter.GetBytes(word), 8);
// Find all valid virtual address pointers to a set of virtual addresses
private IEnumerable<ulong> FindAllMappedWords(byte[] blob, IEnumerable<ulong> va) => va.SelectMany(a => FindAllMappedWords(blob, a)); // Find words for the current binary size
private IEnumerable<uint> FindAllWords(byte[] blob, ulong word)
// Find all valid pointer chains to a set of virtual addresses with the specified number of indirections => Image.Bits switch {
private IEnumerable<ulong> FindAllPointerChains(byte[] blob, IEnumerable<ulong> va, int indirections) { 32 => FindAllDWords(blob, (uint) word),
IEnumerable<ulong> vas = va; 64 => FindAllQWords(blob, word),
for (int i = 0; i < indirections; i++) _ => throw new InvalidOperationException("Invalid architecture bit size")
vas = FindAllMappedWords(blob, vas); };
return vas;
} // Find all valid virtual address pointers to a virtual address
private IEnumerable<ulong> FindAllMappedWords(byte[] blob, ulong va) {
// Scan the image for the needed data structures var fileOffsets = FindAllWords(blob, va);
private (ulong, ulong) ImageScan(Metadata metadata) { foreach (var offset in fileOffsets)
Image.Position = 0; if (Image.TryMapFileOffsetToVA(offset, out va))
var imageBytes = Image.ReadBytes((int) Image.Length); yield return va;
}
var ptrSize = (uint) Image.Bits / 8;
ulong codeRegistration = 0; // Find all valid virtual address pointers to a set of virtual addresses
IEnumerable<ulong> vas; private IEnumerable<ulong> FindAllMappedWords(byte[] blob, IEnumerable<ulong> va) => va.SelectMany(a => FindAllMappedWords(blob, a));
// Find CodeRegistration // Find all valid pointer chains to a set of virtual addresses with the specified number of indirections
// >= 24.2 private IEnumerable<ulong> FindAllPointerChains(byte[] blob, ulong va, int indirections) {
if (metadata.Version >= 24.2) { foreach (var vas in FindAllMappedWords(blob, va))
{
// < 27: mscorlib.dll is always the first CodeGenModule if (indirections == 1)
// >= 27: mscorlib.dll is always the last CodeGenModule (Assembly-CSharp.dll is always the first but non-Unity builds don't have this DLL) {
// NOTE: winrt.dll + other DLLs can come after mscorlib.dll so we can't use its location to get an accurate module count yield return vas;
var offsets = FindAllStrings(imageBytes, "mscorlib.dll\0"); }
vas = offsets.Select(o => Image.MapFileOffsetToVA(o)); else
{
// Unwind from string pointer -> CodeGenModule -> CodeGenModules + x foreach (var foundPointer in FindAllPointerChains(blob, vas, indirections - 1))
vas = FindAllPointerChains(imageBytes, vas, 2); {
IEnumerable<ulong> codeRegVas = null; yield return foundPointer;
}
// We'll work back one pointer width at a time trying to find the first CodeGenModule }
// Let's hope there aren't more than 200 DLLs in any given application :) }
var maxCodeGenModules = 200;
//IEnumerable<ulong> vas = va;
for (int backtrack = 0; backtrack < maxCodeGenModules && (codeRegVas?.Count() ?? 0) != 1; backtrack++) { //for (int i = 0; i < indirections; i++)
// Unwind from CodeGenModules + x -> CodeRegistration + y // vas = FindAllMappedWords(blob, vas);
codeRegVas = FindAllMappedWords(imageBytes, vas); //return vas;
}
// The previous word must be the number of CodeGenModules
if (codeRegVas.Count() == 1) { // Scan the image for the needed data structures
var codeGenModuleCount = Image.ReadMappedWord(codeRegVas.First() - ptrSize); private (ulong, ulong) ImageScan(Metadata metadata) {
Image.Position = 0;
// Basic validity check var imageBytes = Image.ReadBytes((int) Image.Length);
if (codeGenModuleCount <= 0 || codeGenModuleCount > maxCodeGenModules)
codeRegVas = Enumerable.Empty<ulong>(); var ptrSize = (uint) Image.Bits / 8;
} ulong codeRegistration = 0;
IEnumerable<ulong> vas;
// Move to the previous CodeGenModule if the above fails
vas = vas.Select(va => va - ptrSize); // Find CodeRegistration
} // >= 24.2
if (metadata.Version >= 24.2) {
if (!codeRegVas.Any())
return (0, 0); // < 27: mscorlib.dll is always the first CodeGenModule
// >= 27: mscorlib.dll is always the last CodeGenModule (Assembly-CSharp.dll is always the first but non-Unity builds don't have this DLL)
if (codeRegVas.Count() > 1) // NOTE: winrt.dll + other DLLs can come after mscorlib.dll so we can't use its location to get an accurate module count
throw new InvalidOperationException("More than one valid pointer chain found during data heuristics"); ulong FindCodeRegistration()
{
// pCodeGenModules is the last field in CodeRegistration so we subtract the size of one pointer from the struct size var imagesCount = Metadata.Images.Length;
codeRegistration = codeRegVas.First() - ((ulong) metadata.Sizeof(typeof(Il2CppCodeRegistration), Image.Version, Image.Bits / 8) - ptrSize);
foreach (var offset in FindAllStrings(imageBytes, "mscorlib.dll\0"))
// In v24.3, windowsRuntimeFactoryTable collides with codeGenModules. So far no samples have had windowsRuntimeFactoryCount > 0; {
// if this changes we'll have to get smarter about disambiguating these two. if (!Image.TryMapFileOffsetToVA(offset, out var va))
var cr = Image.ReadMappedObject<Il2CppCodeRegistration>(codeRegistration); continue;
if (Image.Version == 24.2 && cr.interopDataCount == 0) { // Unwind from string pointer -> CodeGenModule -> CodeGenModules + x
Image.Version = 24.3; foreach (var potentialCodeGenModules in FindAllPointerChains(imageBytes, va, 2))
codeRegistration -= ptrSize * 2; // two extra words for WindowsRuntimeFactory {
} if (metadata.Version >= 27)
{
if (Image.Version == 27 && cr.reversePInvokeWrapperCount > 0x30000) for (int i = imagesCount - 1; i >= 0; i--)
{ {
// If reversePInvokeWrapperCount is a pointer, then it's because we're actually on 27.1 and there's a genericAdjustorThunks pointer interfering. foreach (var potentialCodeRegistrationPtr in FindAllPointerChains(imageBytes,
// We need to bump version to 27.1 and back up one more pointer. potentialCodeGenModules - (ulong) i * ptrSize, 1))
Image.Version = 27.1; {
codeRegistration -= ptrSize; var expectedImageCountPtr = potentialCodeRegistrationPtr - ptrSize;
} var expectedImageCount = ptrSize == 4 ? Image.ReadMappedInt32(expectedImageCountPtr) : Image.ReadMappedInt64(expectedImageCountPtr);
} if (expectedImageCount == imagesCount)
return potentialCodeRegistrationPtr;
// Find CodeRegistration }
// <= 24.1 }
else { }
// The first item in CodeRegistration is the total number of method pointers else
vas = FindAllMappedWords(imageBytes, (ulong) metadata.Methods.Count(m => (uint) m.methodIndex != 0xffff_ffff)); {
for (int i = 0; i < imagesCount; i++)
if (!vas.Any()) {
return (0, 0); foreach (var potentialCodeRegistrationPtr in FindAllPointerChains(imageBytes,
potentialCodeGenModules - (ulong)i * ptrSize, 1))
// The count of method pointers will be followed some bytes later by {
// the count of custom attribute generators; the distance between them return potentialCodeRegistrationPtr;
// depends on the il2cpp version so we just use ReadMappedObject to simplify the math }
foreach (var va in vas) { }
var cr = Image.ReadMappedObject<Il2CppCodeRegistration>(va); }
}
if (cr.customAttributeCount == metadata.AttributeTypeRanges.Length) }
codeRegistration = va;
} return 0;
}
if (codeRegistration == 0)
return (0, 0); /*
} // We'll work back one pointer width at a time trying to find the first CodeGenModule
// Let's hope there aren't more than 200 DLLs in any given application :)
// Find MetadataRegistration var maxCodeGenModules = 200;
// >= 19
var metadataRegistration = 0ul; for (int backtrack = 0; backtrack < maxCodeGenModules && (codeRegVas?.Count() ?? 0) != 1; backtrack++) {
// Unwind from CodeGenModules + x -> CodeRegistration + y
// Find TypeDefinitionsSizesCount (4th last field) then work back to the start of the struct codeRegVas = FindAllMappedWords(imageBytes, vas);
// This saves us from guessing where metadataUsagesCount is later
var mrSize = (ulong) metadata.Sizeof(typeof(Il2CppMetadataRegistration), Image.Version, Image.Bits / 8); // The previous word must be the number of CodeGenModules
vas = FindAllMappedWords(imageBytes, (ulong) metadata.Types.Length).Select(a => a - mrSize + ptrSize * 4); if (codeRegVas.Count() == 1) {
var codeGenModuleCount = Image.ReadMappedWord(codeRegVas.First() - ptrSize);
// >= 19 && < 27
if (Image.Version < 27) // Basic validity check
foreach (var va in vas) { if (codeGenModuleCount <= 0 || codeGenModuleCount > maxCodeGenModules)
var mr = Image.ReadMappedObject<Il2CppMetadataRegistration>(va); codeRegVas = Enumerable.Empty<ulong>();
if (mr.metadataUsagesCount == (ulong) metadata.MetadataUsageLists.Length) }
metadataRegistration = va;
} // Move to the previous CodeGenModule if the above fails
vas = vas.Select(va => va - ptrSize);
// plagiarism. noun - https://www.lexico.com/en/definition/plagiarism }
// the practice of taking someone else's work or ideas and passing them off as one's own.
// Synonyms: copying, piracy, theft, strealing, infringement of copyright if (!codeRegVas.Any())
return (0, 0);
// >= 27
else { if (codeRegVas.Count() > 1)
// We're going to just sanity check all of the fields throw new InvalidOperationException("More than one valid pointer chain found during data heuristics");
// All counts should be under a certain threshold */
// All pointers should be mappable to the binary
var codeRegVa = FindCodeRegistration();
var mrFieldCount = mrSize / (ulong) (Image.Bits / 8);
foreach (var va in vas) { if (codeRegVa == 0)
var mrWords = Image.ReadMappedWordArray(va, (int) mrFieldCount); return (0, 0);
// Even field indices are counts, odd field indices are pointers
bool ok = true; // pCodeGenModules is the last field in CodeRegistration so we subtract the size of one pointer from the struct size
for (var i = 0; i < mrWords.Length && ok; i++) { codeRegistration = codeRegVa - ((ulong) metadata.Sizeof(typeof(Il2CppCodeRegistration), Image.Version, Image.Bits / 8) - ptrSize);
ok = i % 2 == 0 ? mrWords[i] < 0x30000 : Image.TryMapVATR((ulong) mrWords[i], out _);
} // In v24.3, windowsRuntimeFactoryTable collides with codeGenModules. So far no samples have had windowsRuntimeFactoryCount > 0;
if (ok) // if this changes we'll have to get smarter about disambiguating these two.
metadataRegistration = va; var cr = Image.ReadMappedObject<Il2CppCodeRegistration>(codeRegistration);
}
} if (Image.Version == 24.2 && cr.interopDataCount == 0) {
if (metadataRegistration == 0) Image.Version = 24.3;
return (0, 0); codeRegistration -= ptrSize * 2; // two extra words for WindowsRuntimeFactory
}
return (codeRegistration, metadataRegistration);
} if (Image.Version == 27 && cr.reversePInvokeWrapperCount > 0x30000)
} {
} // If reversePInvokeWrapperCount is a pointer, then it's because we're actually on 27.1 and there's a genericAdjustorThunks pointer interfering.
// We need to bump version to 27.1 and back up one more pointer.
Image.Version = 27.1;
codeRegistration -= ptrSize;
}
}
// Find CodeRegistration
// <= 24.1
else {
// The first item in CodeRegistration is the total number of method pointers
vas = FindAllMappedWords(imageBytes, (ulong) metadata.Methods.Count(m => (uint) m.methodIndex != 0xffff_ffff));
if (!vas.Any())
return (0, 0);
// The count of method pointers will be followed some bytes later by
// the count of custom attribute generators; the distance between them
// depends on the il2cpp version so we just use ReadMappedObject to simplify the math
foreach (var va in vas) {
var cr = Image.ReadMappedObject<Il2CppCodeRegistration>(va);
if (cr.customAttributeCount == metadata.AttributeTypeRanges.Length)
codeRegistration = va;
}
if (codeRegistration == 0)
return (0, 0);
}
// Find MetadataRegistration
// >= 19
var metadataRegistration = 0ul;
// Find TypeDefinitionsSizesCount (4th last field) then work back to the start of the struct
// This saves us from guessing where metadataUsagesCount is later
var mrSize = (ulong) metadata.Sizeof(typeof(Il2CppMetadataRegistration), Image.Version, Image.Bits / 8);
var typesLength = (ulong) metadata.Types.Length;
vas = FindAllMappedWords(imageBytes, typesLength).Select(a => a - mrSize + ptrSize * 4);
// >= 19 && < 27
if (Image.Version < 27)
foreach (var va in vas) {
var mr = Image.ReadMappedObject<Il2CppMetadataRegistration>(va);
if (mr.metadataUsagesCount == (ulong) metadata.MetadataUsageLists.Length)
metadataRegistration = va;
}
// plagiarism. noun - https://www.lexico.com/en/definition/plagiarism
// the practice of taking someone else's work or ideas and passing them off as one's own.
// Synonyms: copying, piracy, theft, strealing, infringement of copyright
// >= 27
else {
// We're going to just sanity check all of the fields
// All counts should be under a certain threshold
// All pointers should be mappable to the binary
var mrFieldCount = mrSize / (ulong) (Image.Bits / 8);
foreach (var va in vas) {
var mrWords = Image.ReadMappedWordArray(va, (int) mrFieldCount);
// Even field indices are counts, odd field indices are pointers
bool ok = true;
for (var i = 0; i < mrWords.Length && ok; i++) {
ok = i % 2 == 0 || Image.TryMapVATR((ulong) mrWords[i], out _);
}
if (ok)
metadataRegistration = va;
}
}
if (metadataRegistration == 0)
return (0, 0);
return (codeRegistration, metadataRegistration);
}
}
}