ELF: Use RLE histograms to determine encryption stripe size

This commit is contained in:
Katy Coe
2020-12-15 19:35:03 +01:00
parent 1476be577a
commit e4e6ab0438

View File

@@ -369,49 +369,57 @@ namespace Il2CppInspector
if (xorKeyCandidateStriped != 0x00) { if (xorKeyCandidateStriped != 0x00) {
// Some files may use a striped encryption whereby alternate blocks are encrypted and un-encrypted // Some files may use a striped encryption whereby alternate blocks are encrypted and un-encrypted
// The first part of each section is always encrypted. Scan for the first unencrypted block and find its size // The first part of each section is always encrypted.
// Limit ourselves to maxSearchLength. If no stripe has been found by then, the whole section is probably encrypted
// We refer to issue #96 where the code uses striped encryption in 4KB blocks // We refer to issue #96 where the code uses striped encryption in 4KB blocks
// We perform heuristics for block of size blockSize below // We perform heuristics for block of size blockSize below
var start = conv.Int(sectionByName[".text"].sh_offset); const int blockSize = 0x100;
var length = conv.Int(sectionByName[".text"].sh_size); const int maxBrokenRun = 4;
var blockSize = 0x100; const int minMultiplierInValid = 6;
var maxSearchLength = 128 * 1024; const int minTotalValidInBucket = 0x10;
var firstUnencrypted = 0xffffffff;
var stripeSize = 0xffffffff;
// At least this many instructions per block must pass the threshold to be considered unencrypted // Take all of the instructions from the code section starting on a VA block boundary and determine which are valid
var threshold = (blockSize / 4) * 0.8; var startSkip = 0;
// A stripe of encryption or non-encryption is considered to have ended when this many blocks in the opposite state are found
var maxBlocksInARow = 4;
// Align start position to search block size
if (conv.Int(sectionByName[".text"].sh_addr) % blockSize != 0) if (conv.Int(sectionByName[".text"].sh_addr) % blockSize != 0)
start += blockSize - conv.Int(sectionByName[".text"].sh_addr) % blockSize; startSkip = blockSize - conv.Int(sectionByName[".text"].sh_addr) % blockSize;
var probablyEncryptedCount = 0; var insts = ReadArray<uint>(conv.Long(sectionByName[".text"].sh_offset) + startSkip, (conv.Int(sectionByName[".text"].sh_size) - startSkip) / 4);
var probablyUnencryptedCount = 0; var instsValid = insts.Select(i => Bits == 32? isCommonARMv7(i) : isCommonARMv8A(i)).ToList();
for (var pos = start; pos < start + maxSearchLength && stripeSize == 0xffffffff; pos += blockSize) { // Use RLE to produce frequency distribution of number of consecutive valid and invalid instructions,
var size = Math.Min(blockSize, start + length - pos); // allowing for maxBrokenRun breaks in valid instructions in a row before considering a run to have ended
var dwords = ReadArray<uint>(pos, size / 4); var freqValid = new SortedDictionary<int, int>();
var runLength = 0;
var brokenRun = 0;
foreach (var i in instsValid) {
if (i) {
runLength = runLength + brokenRun + 1;
brokenRun = 0;
} else if (runLength > 0) {
brokenRun++;
var commonInstructions = dwords.Count(w => Bits == 32? isCommonARMv7(w) : isCommonARMv8A(w)); if (brokenRun > maxBrokenRun) {
var probablyEncrypted = commonInstructions < threshold; if (freqValid.ContainsKey(runLength))
freqValid[runLength]++;
// Increment one or the other; reset the other one to zero else
probablyEncryptedCount = probablyEncrypted? probablyEncryptedCount + 1 : 0; freqValid[runLength] = 1;
probablyUnencryptedCount = probablyEncryptedCount == 0 ? probablyUnencryptedCount + 1 : 0; runLength = 0;
}
if (probablyUnencryptedCount >= maxBlocksInARow && firstUnencrypted == 0xffffffff) }
firstUnencrypted = (uint) (pos - (probablyUnencryptedCount - 1) * blockSize);
if (probablyEncryptedCount >= maxBlocksInARow && firstUnencrypted != 0xffffffff)
stripeSize = (uint) (pos - firstUnencrypted - (probablyEncryptedCount - 1) * blockSize);
} }
// Create a histogram of how often each range of valid instruction counts occurred
// The uses of 4 refer to the size of an ARM instruction
var histValid = freqValid.GroupBy(f => f.Key - (f.Key % (blockSize / 4)))
.Select(f => new {
Key = f.Key * 4,
Value = f.Sum(x => x.Value)
}).ToDictionary(x => x.Key, x => x.Value);
// Find first point in the histogram where the number of valid instructions suddenly spikes
var stripeSize = (uint) histValid.Zip(histValid.Skip(1), (p,c) => (p,c))
.FirstOrDefault(x => x.c.Value >= x.p.Value * minMultiplierInValid && x.c.Value >= minTotalValidInBucket).c.Key;
// Select the key // Select the key
// If more than one key candidates are the same, select the most common candidate // If more than one key candidates are the same, select the most common candidate
@@ -533,7 +541,7 @@ namespace Il2CppInspector
var length = conv.Int(section.sh_size); var length = conv.Int(section.sh_size);
// Non-striped // Non-striped
if (stripeSize == 0xffffffff) { if (stripeSize == 0) {
xorRange(start, length, xorValue); xorRange(start, length, xorValue);
return; return;
} }