From 6c6f72618aae6f9fb8be81597a76dc6446323e00 Mon Sep 17 00:00:00 2001 From: Katy Coe Date: Sat, 4 Jul 2020 17:10:09 +0200 Subject: [PATCH] C++: Split CppType, CppField, CppTypes into separate files --- Il2CppInspector.Common/Cpp/CppField.cs | 74 ++++ Il2CppInspector.Common/Cpp/CppType.cs | 538 +------------------------ Il2CppInspector.Common/Cpp/CppTypes.cs | 489 ++++++++++++++++++++++ 3 files changed, 564 insertions(+), 537 deletions(-) create mode 100644 Il2CppInspector.Common/Cpp/CppField.cs create mode 100644 Il2CppInspector.Common/Cpp/CppTypes.cs diff --git a/Il2CppInspector.Common/Cpp/CppField.cs b/Il2CppInspector.Common/Cpp/CppField.cs new file mode 100644 index 0000000..65ecb43 --- /dev/null +++ b/Il2CppInspector.Common/Cpp/CppField.cs @@ -0,0 +1,74 @@ +/* + Copyright 2020 Katy Coe - http://www.djkaty.com - https://github.com/djkaty + + All rights reserved. +*/ + +namespace Il2CppInspector.Cpp +{ + // A field in a C++ type + public class CppField + { + // The name of the field + public string Name { get; set; } + + // The offset of the field into the type + public int Offset { get; set; } + + // The offset of the field into the type in bytes + public int OffsetBytes => Offset / 8; + + // The size of the field + public int Size => (BitfieldSize > 0 ? BitfieldSize : Type.Size); + + public int SizeBytes => (Size / 8) + (Size % 8 > 0 ? 1 : 0); + + // The size of the field in bits + public int BitfieldSize { get; set; } + + // The LSB of the bitfield + public int BitfieldLSB => Offset % 8; + + // The MSB of the bitfield + public int BitfieldMSB => BitfieldLSB + Size - 1; + + // The type of the field + public CppType Type { get; set; } + + // C++ representation of field + public virtual string ToString(string format = "") { + var offset = format == "o" ? $"/* 0x{OffsetBytes:x2} - 0x{OffsetBytes + SizeBytes - 1:x2} (0x{SizeBytes:x2}) */" : ""; + + var field = Type switch { + // nested anonymous types + CppComplexType t when string.IsNullOrEmpty(t.Name) => (format == "o"? "\n" : "") + t.ToString(format)[..^1] + (Name.Length > 0? " " + Name : ""), + // function pointers + CppFnPtrType t when string.IsNullOrEmpty(t.Name) => (format == "o"? " " : "") + t.FieldToString(Name), + // regular fields + _ => $"{(format == "o"? " ":"")}{Type.Name} {Name}" + (BitfieldSize > 0? $" : {BitfieldSize}" : "") + }; + + var suffix = ""; + + // arrays + if (Type is CppArrayType a) + suffix += "[" + a.Length + "]"; + + // bitfields + if (BitfieldSize > 0 && format == "o") + suffix += $" /* bits {BitfieldLSB} - {BitfieldMSB} */"; + + return offset + field + suffix; + } + public override string ToString() => ToString(); + } + + // An enum key and value pair + public class CppEnumField : CppField + { + // The value of this key name + public ulong Value { get; set; } + + public override string ToString(string format = "") => Name + " = " + Value; + } +} diff --git a/Il2CppInspector.Common/Cpp/CppType.cs b/Il2CppInspector.Common/Cpp/CppType.cs index 5c2059b..f178028 100644 --- a/Il2CppInspector.Common/Cpp/CppType.cs +++ b/Il2CppInspector.Common/Cpp/CppType.cs @@ -1,5 +1,5 @@ /* - Copyright 2020 Katy Coe - http://www.hearthcode.org - http://www.djkaty.com + Copyright 2020 Katy Coe - http://www.djkaty.com - https://github.com/djkaty All rights reserved. */ @@ -279,540 +279,4 @@ namespace Il2CppInspector.Cpp return sb.ToString(); } } - - // A field in a C++ type - public class CppField - { - // The name of the field - public string Name { get; set; } - - // The offset of the field into the type - public int Offset { get; set; } - - // The offset of the field into the type in bytes - public int OffsetBytes => Offset / 8; - - // The size of the field - public int Size => (BitfieldSize > 0 ? BitfieldSize : Type.Size); - - public int SizeBytes => (Size / 8) + (Size % 8 > 0 ? 1 : 0); - - // The size of the field in bits - public int BitfieldSize { get; set; } - - // The LSB of the bitfield - public int BitfieldLSB => Offset % 8; - - // The MSB of the bitfield - public int BitfieldMSB => BitfieldLSB + Size - 1; - - // The type of the field - public CppType Type { get; set; } - - // C++ representation of field - public virtual string ToString(string format = "") { - var offset = format == "o" ? $"/* 0x{OffsetBytes:x2} - 0x{OffsetBytes + SizeBytes - 1:x2} (0x{SizeBytes:x2}) */" : ""; - - var field = Type switch { - // nested anonymous types - CppComplexType t when string.IsNullOrEmpty(t.Name) => (format == "o"? "\n" : "") + t.ToString(format)[..^1] + (Name.Length > 0? " " + Name : ""), - // function pointers - CppFnPtrType t when string.IsNullOrEmpty(t.Name) => (format == "o"? " " : "") + t.FieldToString(Name), - // regular fields - _ => $"{(format == "o"? " ":"")}{Type.Name} {Name}" + (BitfieldSize > 0? $" : {BitfieldSize}" : "") - }; - - var suffix = ""; - - // arrays - if (Type is CppArrayType a) - suffix += "[" + a.Length + "]"; - - // bitfields - if (BitfieldSize > 0 && format == "o") - suffix += $" /* bits {BitfieldLSB} - {BitfieldMSB} */"; - - return offset + field + suffix; - } - public override string ToString() => ToString(); - } - - // An enum key and value pair - public class CppEnumField : CppField - { - // The value of this key name - public ulong Value { get; set; } - - public override string ToString(string format = "") => Name + " = " + Value; - } - - // A collection of C++ types - public class CppTypes : IEnumerable - { - // All of the types - public Dictionary Types { get; } - - public CppType this[string s] => Types[s]; - - public IEnumerator GetEnumerator() => Types.Values.GetEnumerator(); - IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); - - // Architecture width in bits (32/64) - to determine pointer sizes - public int WordSize { get; } - - private static readonly List primitiveTypes = new List { - new CppType("uint8_t", 8), - new CppType("uint16_t", 16), - new CppType("uint32_t", 32), - new CppType("uint64_t", 64), - new CppType("int8_t", 8), - new CppType("int16_t", 16), - new CppType("int32_t", 32), - new CppType("int64_t", 64), - new CppType("char", 8), - new CppType("int", 32), - new CppType("float", 32), - new CppType("double", 64), - new CppType("bool", 8), - new CppType("void", 0) - }; - - public CppTypes(int wordSize) { - if (wordSize != 32 && wordSize != 64) - throw new ArgumentOutOfRangeException("Architecture word size must be 32 or 64-bit to generate C++ data"); - - WordSize = wordSize; - Types = primitiveTypes.ToDictionary(t => t.Name, t => t); - - // This is all compiler-dependent, let's hope for the best! - Add(new CppType("long", WordSize)); - Add(new CppType("intptr_t", WordSize)); - Add(new CppType("uintptr_t", WordSize)); - Add(new CppType("size_t", WordSize)); - } - - // Parse a block of C++ source code, adding any types found - public void AddFromDeclarationText(string text) { - using StringReader lines = new StringReader(text); - - var rgxExternDecl = new Regex(@"struct (\S+);"); - var rgxTypedefForwardDecl = new Regex(@"typedef struct (\S+) (\S+);"); - var rgxTypedefFnPtr = new Regex(@"typedef\s+(?:struct )?" + CppFnPtrType.Regex + ";"); - var rgxTypedef = new Regex(@"typedef (\S+?)\s*\**\s*(\S+);"); - var rgxFieldFnPtr = new Regex(CppFnPtrType.Regex + @";"); - var rgxField = new Regex(@"^(?:struct |enum )?(\S+?)\s*\**\s*((?:\S|\s*,\s*)+)(?:\s*:\s*([0-9]+))?;"); - var rgxEnumValue = new Regex(@"^\s*([A-Za-z0-9_]+)(?:\s*=\s*(.+?))?,?\s*$"); - - var rgxStripKeywords = new Regex(@"\b(?:const|unsigned|volatile)\b"); - var rgxCompressPtrs = new Regex(@"\*\s+\*"); - - var rgxArrayField = new Regex(@"(\S+?)\[([0-9]+)\]"); - - var rgxAlignment = new Regex(@"__attribute__\(\(aligned\(([0-9]+)\)\)\)"); - var rgxIsBitDirective = new Regex(@"#ifdef\s+IS_(32|64)BIT"); - var rgxSingleLineComment = new Regex(@"/\*.*?\*/"); - - var currentType = new Stack(); - bool falseIfBlock = false; - bool inComment = false; - bool inMethod = false; - var nextEnumValue = 0ul; - string line; - - while ((line = lines.ReadLine()) != null) { - - // Remove comments - if (line.Contains("//")) - line = line.Substring(0, line.IndexOf("//", StringComparison.Ordinal)); - - // End of multi-line comment? - if (line.Contains("*/") && inComment) { - inComment = false; - line = line.Substring(line.IndexOf("*/", StringComparison.Ordinal) + 2); - } - - if (inComment) { - Debug.WriteLine($"[COMMENT ] {line}"); - continue; - } - - // Remove all single-line comments - line = rgxSingleLineComment.Replace(line, ""); - - // Start of multi-line comment? - if (line.Contains("/*") && !inComment) { - inComment = true; - line = line.Substring(0, line.IndexOf("/*")); - } - - // Ignore global variables - if (line.StartsWith("const ") && currentType.Count == 0) { - Debug.WriteLine($"[GLOBAL ] {line}"); - continue; - } - - // Ignore methods - // Note: This is a very lazy way of processing early version IL2CPP headers - if (line != "}" && inMethod) { - Debug.WriteLine($"[METHOD ] {line}"); - continue; - } - - if (line == "}" && inMethod) { - inMethod = false; - - Debug.WriteLine($"[METHOD END ] {line}"); - continue; - } - - if (line.StartsWith("static inline ")) { - inMethod = true; - - Debug.WriteLine($"[METHOD START ] {line}"); - continue; - } - - // Remove keywords we don't care about - line = rgxStripKeywords.Replace(line, ""); - - // Remove whitespace in multiple indirections - line = rgxCompressPtrs.Replace(line, "**"); - - // Process __attribute((aligned(x))) - var alignment = 0; - var alignmentMatch = rgxAlignment.Match(line); - if (alignmentMatch.Success) { - alignment = int.Parse(alignmentMatch.Groups[1].Captures[0].ToString()); - line = rgxAlignment.Replace(line, ""); - } - - line = line.Trim(); - - // Ignore blank lines - if (line.Length == 0) - continue; - - // Process #ifs before anything else - // Doesn't handle nesting but we probably don't need to (use a Stack if we do) - var ifdef = rgxIsBitDirective.Match(line); - if (ifdef.Success) { - var bits = int.Parse(ifdef.Groups[1].Captures[0].ToString()); - if (bits != WordSize) - falseIfBlock = true; - - Debug.WriteLine($"[IF ] {line}"); - continue; - } - if (line == "#else") { - falseIfBlock = !falseIfBlock; - - Debug.WriteLine($"[ELSE ] {line}"); - continue; - } - if (line == "#endif") { - falseIfBlock = false; - - Debug.WriteLine($"[ENDIF ] {line}"); - continue; - } - - if (falseIfBlock) { - Debug.WriteLine($"[FALSE ] {line}"); - continue; - } - - // External declaration - // struct ; - // NOTE: Unfortunately we're not going to ever know the size of this type - var externDecl = rgxExternDecl.Match(line); - if (externDecl.Success) { - var declType = externDecl.Groups[1].Captures[0].ToString(); - - Types.Add(declType, new CppComplexType(CompoundType.Struct) {Name = declType}); - - Debug.WriteLine($"[EXTERN DECL ] {line}"); - continue; - } - - // Forward declaration - // typedef struct - var typedef = rgxTypedefForwardDecl.Match(line); - if (typedef.Success) { - var alias = typedef.Groups[2].Captures[0].ToString(); - var declType = typedef.Groups[1].Captures[0].ToString(); - - // Sometimes we might get multiple forward declarations for the same type - if (!Types.ContainsKey(declType)) - Types.Add(declType, new CppComplexType(CompoundType.Struct) {Name = declType}); - - // Sometimes the alias might be the same name as the type (this is usually the case) - if (!Types.ContainsKey(alias)) - Types.Add(alias, Types[declType].AsAlias(alias)); - - Debug.WriteLine($"[FORWARD DECL ] {line}"); - continue; - } - - // Function pointer - // typedef (*)(); - typedef = rgxTypedefFnPtr.Match(line); - if (typedef.Success) { - var alias = typedef.Groups[2].Captures[0].ToString(); - - var fnPtrType = CppFnPtrType.FromSignature(this, line); - fnPtrType.Name = alias; - - Types.Add(alias, fnPtrType); - - Debug.WriteLine($"[TYPEDEF FNPTR] {line} -- Adding method pointer typedef to {alias}"); - continue; - } - - // Alias - // typedef [*..] ; - typedef = rgxTypedef.Match(line); - if (typedef.Success) { - var alias = typedef.Groups[2].Captures[0].ToString(); - var existingType = typedef.Groups[1].Captures[0].ToString(); - - // Potential multiple indirection - var type = Types[existingType]; - var pointers = line.Count(c => c == '*'); - for (int i = 0; i < pointers; i++) - type = type.AsPointer(WordSize); - - Types.Add(alias, type.AsAlias(alias)); - - Debug.WriteLine($"[TYPEDEF {(pointers > 0? "PTR":"VAL")} ] {line} -- Adding typedef from {type.Name} to {alias}"); - continue; - } - - // Start of struct - // typedef struct - if ((line.StartsWith("typedef struct") || line.StartsWith("struct ")) && line.IndexOf(";", StringComparison.Ordinal) == -1 - && currentType.Count == 0) { - currentType.Push(new CppComplexType(CompoundType.Struct)); - - if (line.StartsWith("struct ")) - currentType.Peek().Name = line.Split(' ')[1]; - - Debug.WriteLine($"\n[STRUCT START ] {line}"); - continue; - } - - // Start of union - // typedef union - if (line.StartsWith("typedef union") && line.IndexOf(";", StringComparison.Ordinal) == -1) { - currentType.Push(new CppComplexType(CompoundType.Union)); - - Debug.WriteLine($"\n[UNION START ] {line}"); - continue; - } - - // Start of enum - // typedef enum - if (line.StartsWith("typedef enum") && line.IndexOf(";", StringComparison.Ordinal) == -1) { - currentType.Push(new CppComplexType(CompoundType.Enum)); - nextEnumValue = 0; - - Debug.WriteLine($"\n[ENUM START ] {line}"); - continue; - } - - // Nested complex field - // struct - // union - var words = line.Split(' '); - if ((words[0] == "union" || words[0] == "struct") && words.Length <= 2) { - currentType.Push(new CppComplexType(words[0] == "struct"? CompoundType.Struct : CompoundType.Union)); - - Debug.WriteLine($"[FIELD START ] {line}"); - continue; - } - - // End of already named struct - if (line == "};" && currentType.Count == 1) { - var ct = currentType.Pop(); - if (!Types.ContainsKey(ct.Name)) - Types.Add(ct.Name, ct); - else - ((CppComplexType) Types[ct.Name]).Fields = ct.Fields; - - Debug.WriteLine($"[STRUCT END ] {line} -- {ct.Name}\n"); - continue; - } - - // End of complex field, complex type or enum - // end of [typedef] struct/union/enum - if (line.StartsWith("}") && line.EndsWith(";")) { - var name = line[1..^1].Trim(); - var ct = currentType.Pop(); - - // End of top-level typedef, so it's a type name - if (currentType.Count == 0) { - ct.Name = name; - - if (!Types.ContainsKey(name)) - Types.Add(name, ct); - - // We will have to copy the type data if the type was forward declared, - // because other types are already referencing it; replacing it in the - // collection will not replace the references to the empty version in - // other types - else { - ((CppComplexType) Types[name]).Fields = ct.Fields; - } - - Debug.WriteLine($"[STRUCT END ] {line} -- {name}\n"); - } - - // Otherwise it's a field name in the current type - else { - var parent = currentType.Peek(); - parent.AddField(new CppField { Name = name, Type = ct }); - - Debug.WriteLine($"[FIELD END ] {line} -- {ct.Name} {name}"); - } - continue; - } - - // Function pointer field - var fieldFnPtr = rgxFieldFnPtr.Match(line); - if (fieldFnPtr.Success) { - var fnPtrType = CppFnPtrType.FromSignature(this, line); - - var name = fieldFnPtr.Groups[2].Captures[0].ToString(); - - var ct = currentType.Peek(); - ct.AddField(new CppField {Name = name, Type = fnPtrType}, alignment); - - Debug.WriteLine($"[FIELD FNPTR ] {line} -- {name}"); - continue; - } - - // Pointer or value field - var field = rgxField.Match(line); - - if (field.Success) { - var names = field.Groups[2].Captures[0].ToString(); - var typeName = field.Groups[1].Captures[0].ToString(); - - // Multiple fields can be separated by commas - foreach (var fieldName in names.Split(',')) { - string name = fieldName.Trim(); - - // Array - var array = rgxArrayField.Match(name); - int arraySize = 0; - if (array.Success && array.Groups[2].Captures.Count > 0) { - arraySize = int.Parse(array.Groups[2].Captures[0].ToString()); - name = array.Groups[1].Captures[0].ToString(); - } - - // Bitfield - int bitfield = 0; - if (field.Groups[3].Captures.Count > 0) - bitfield = int.Parse(field.Groups[3].Captures[0].ToString()); - - // Potential multiple indirection - var type = Types[typeName]; - var pointers = line.Count(c => c == '*'); - for (int i = 0; i < pointers; i++) - type = type.AsPointer(WordSize); - - var ct = currentType.Peek(); - - if (arraySize > 0) - type = type.AsArray(arraySize); - - ct.AddField(new CppField {Name = name, Type = type, BitfieldSize = bitfield}, alignment); - - if (bitfield == 0) - Debug.WriteLine($"[FIELD {(pointers > 0 ? "PTR" : "VAL")} ] {line} -- {name}"); - else - Debug.WriteLine($"[BITFIELD ] {line} -- {name} : {bitfield}"); - } - continue; - } - - // Enum value field - var enumValue = rgxEnumValue.Match(line); - if (enumValue.Success) { - var name = enumValue.Groups[1].Captures[0].ToString(); - - var value = nextEnumValue++; - if (enumValue.Groups[2].Captures.Count > 0) { - // Convert the text to a ulong even if it's hexadecimal with a 0x prefix - var valueText = enumValue.Groups[2].Captures[0].ToString(); - var conv = new System.ComponentModel.UInt64Converter(); - - // Handle bit shift operator - var values = valueText.Split("<<").Select(t => (ulong) conv.ConvertFromInvariantString(t.Trim())).ToArray(); - value = values.Length == 1 ? values[0] : values[0] << (int)values[1]; - nextEnumValue = value + 1; - } - - var ct = currentType.Peek(); - ct.AddField(new CppEnumField {Name = name, Type = WordSize == 32 ? Types["uint32_t"] : Types["uint64_t"], Value = value}); - - Debug.WriteLine($"[ENUM VALUE ] {line} -- {name} = {value}"); - continue; - } - - // Make sure we're not ignoring anything we shouldn't - Debug.WriteLine($"[IGNORE ] {line}"); - - // Block opens - if (line == "{") - continue; - - // Global variables - if (line.StartsWith("static")) - continue; - - // Pragma directives - if (line.StartsWith("#pragma")) - continue; - - // Imports - if (line.StartsWith("extern")) - continue; - - throw new InvalidOperationException("Could not understand C++ code: " + line); - } - } - - // Get a type from its name, handling pointer types - public CppType GetType(string typeName) { - var baseName = typeName.Replace("*", ""); - var indirectionCount = typeName.Length - baseName.Length; - - var type = Types[baseName.Trim()]; - for (int i = 0; i < indirectionCount; i++) - type = type.AsPointer(WordSize); - - return type; - } - - // Add a type externally - public void Add(CppType type) => Types.Add(type.Name, type); - - // Generate a populated CppTypes object from a set of Unity headers - public static CppTypes FromUnityVersion(UnityVersion version, int wordSize = 32) - => FromUnityHeaders(UnityHeader.GetHeaderForVersion(version), wordSize); - - public static CppTypes FromUnityHeaders(UnityHeader header, int wordSize = 32) { - var cppTypes = new CppTypes(wordSize); - - // Add junk from config files we haven't included - cppTypes.Add(new CppType("Il2CppIManagedObjectHolder")); - cppTypes.Add(new CppType("Il2CppIUnknown")); - - // Process Unity headers - var headers = header.GetHeaderText(); - cppTypes.AddFromDeclarationText(headers); - - return cppTypes; - } - } } diff --git a/Il2CppInspector.Common/Cpp/CppTypes.cs b/Il2CppInspector.Common/Cpp/CppTypes.cs new file mode 100644 index 0000000..718369d --- /dev/null +++ b/Il2CppInspector.Common/Cpp/CppTypes.cs @@ -0,0 +1,489 @@ +/* + Copyright 2020 Katy Coe - http://www.djkaty.com - https://github.com/djkaty + + All rights reserved. +*/ + +using System; +using System.Collections; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Linq; +using System.Text.RegularExpressions; +using Il2CppInspector.Cpp.UnityHeaders; + +namespace Il2CppInspector.Cpp +{ + // A collection of C++ types + public class CppTypes : IEnumerable + { + // All of the types + public Dictionary Types { get; } + + public CppType this[string s] => Types[s]; + + public IEnumerator GetEnumerator() => Types.Values.GetEnumerator(); + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + + // Architecture width in bits (32/64) - to determine pointer sizes + public int WordSize { get; } + + private static readonly List primitiveTypes = new List { + new CppType("uint8_t", 8), + new CppType("uint16_t", 16), + new CppType("uint32_t", 32), + new CppType("uint64_t", 64), + new CppType("int8_t", 8), + new CppType("int16_t", 16), + new CppType("int32_t", 32), + new CppType("int64_t", 64), + new CppType("char", 8), + new CppType("int", 32), + new CppType("float", 32), + new CppType("double", 64), + new CppType("bool", 8), + new CppType("void", 0) + }; + + public CppTypes(int wordSize) { + if (wordSize != 32 && wordSize != 64) + throw new ArgumentOutOfRangeException("Architecture word size must be 32 or 64-bit to generate C++ data"); + + WordSize = wordSize; + Types = primitiveTypes.ToDictionary(t => t.Name, t => t); + + // This is all compiler-dependent, let's hope for the best! + Add(new CppType("long", WordSize)); + Add(new CppType("intptr_t", WordSize)); + Add(new CppType("uintptr_t", WordSize)); + Add(new CppType("size_t", WordSize)); + } + + #region Code parser + // Parse a block of C++ source code, adding any types found + public void AddFromDeclarationText(string text) { + using StringReader lines = new StringReader(text); + + var rgxExternDecl = new Regex(@"struct (\S+);"); + var rgxTypedefForwardDecl = new Regex(@"typedef struct (\S+) (\S+);"); + var rgxTypedefFnPtr = new Regex(@"typedef\s+(?:struct )?" + CppFnPtrType.Regex + ";"); + var rgxTypedef = new Regex(@"typedef (\S+?)\s*\**\s*(\S+);"); + var rgxFieldFnPtr = new Regex(CppFnPtrType.Regex + @";"); + var rgxField = new Regex(@"^(?:struct |enum )?(\S+?)\s*\**\s*((?:\S|\s*,\s*)+)(?:\s*:\s*([0-9]+))?;"); + var rgxEnumValue = new Regex(@"^\s*([A-Za-z0-9_]+)(?:\s*=\s*(.+?))?,?\s*$"); + + var rgxStripKeywords = new Regex(@"\b(?:const|unsigned|volatile)\b"); + var rgxCompressPtrs = new Regex(@"\*\s+\*"); + + var rgxArrayField = new Regex(@"(\S+?)\[([0-9]+)\]"); + + var rgxAlignment = new Regex(@"__attribute__\(\(aligned\(([0-9]+)\)\)\)"); + var rgxIsBitDirective = new Regex(@"#ifdef\s+IS_(32|64)BIT"); + var rgxSingleLineComment = new Regex(@"/\*.*?\*/"); + + var currentType = new Stack(); + bool falseIfBlock = false; + bool inComment = false; + bool inMethod = false; + var nextEnumValue = 0ul; + string line; + + while ((line = lines.ReadLine()) != null) { + + // Remove comments + if (line.Contains("//")) + line = line.Substring(0, line.IndexOf("//", StringComparison.Ordinal)); + + // End of multi-line comment? + if (line.Contains("*/") && inComment) { + inComment = false; + line = line.Substring(line.IndexOf("*/", StringComparison.Ordinal) + 2); + } + + if (inComment) { + Debug.WriteLine($"[COMMENT ] {line}"); + continue; + } + + // Remove all single-line comments + line = rgxSingleLineComment.Replace(line, ""); + + // Start of multi-line comment? + if (line.Contains("/*") && !inComment) { + inComment = true; + line = line.Substring(0, line.IndexOf("/*")); + } + + // Ignore global variables + if (line.StartsWith("const ") && currentType.Count == 0) { + Debug.WriteLine($"[GLOBAL ] {line}"); + continue; + } + + // Ignore methods + // Note: This is a very lazy way of processing early version IL2CPP headers + if (line != "}" && inMethod) { + Debug.WriteLine($"[METHOD ] {line}"); + continue; + } + + if (line == "}" && inMethod) { + inMethod = false; + + Debug.WriteLine($"[METHOD END ] {line}"); + continue; + } + + if (line.StartsWith("static inline ")) { + inMethod = true; + + Debug.WriteLine($"[METHOD START ] {line}"); + continue; + } + + // Remove keywords we don't care about + line = rgxStripKeywords.Replace(line, ""); + + // Remove whitespace in multiple indirections + line = rgxCompressPtrs.Replace(line, "**"); + + // Process __attribute((aligned(x))) + var alignment = 0; + var alignmentMatch = rgxAlignment.Match(line); + if (alignmentMatch.Success) { + alignment = int.Parse(alignmentMatch.Groups[1].Captures[0].ToString()); + line = rgxAlignment.Replace(line, ""); + } + + line = line.Trim(); + + // Ignore blank lines + if (line.Length == 0) + continue; + + // Process #ifs before anything else + // Doesn't handle nesting but we probably don't need to (use a Stack if we do) + var ifdef = rgxIsBitDirective.Match(line); + if (ifdef.Success) { + var bits = int.Parse(ifdef.Groups[1].Captures[0].ToString()); + if (bits != WordSize) + falseIfBlock = true; + + Debug.WriteLine($"[IF ] {line}"); + continue; + } + if (line == "#else") { + falseIfBlock = !falseIfBlock; + + Debug.WriteLine($"[ELSE ] {line}"); + continue; + } + if (line == "#endif") { + falseIfBlock = false; + + Debug.WriteLine($"[ENDIF ] {line}"); + continue; + } + + if (falseIfBlock) { + Debug.WriteLine($"[FALSE ] {line}"); + continue; + } + + // External declaration + // struct ; + // NOTE: Unfortunately we're not going to ever know the size of this type + var externDecl = rgxExternDecl.Match(line); + if (externDecl.Success) { + var declType = externDecl.Groups[1].Captures[0].ToString(); + + Types.Add(declType, new CppComplexType(CompoundType.Struct) {Name = declType}); + + Debug.WriteLine($"[EXTERN DECL ] {line}"); + continue; + } + + // Forward declaration + // typedef struct + var typedef = rgxTypedefForwardDecl.Match(line); + if (typedef.Success) { + var alias = typedef.Groups[2].Captures[0].ToString(); + var declType = typedef.Groups[1].Captures[0].ToString(); + + // Sometimes we might get multiple forward declarations for the same type + if (!Types.ContainsKey(declType)) + Types.Add(declType, new CppComplexType(CompoundType.Struct) {Name = declType}); + + // Sometimes the alias might be the same name as the type (this is usually the case) + if (!Types.ContainsKey(alias)) + Types.Add(alias, Types[declType].AsAlias(alias)); + + Debug.WriteLine($"[FORWARD DECL ] {line}"); + continue; + } + + // Function pointer + // typedef (*)(); + typedef = rgxTypedefFnPtr.Match(line); + if (typedef.Success) { + var alias = typedef.Groups[2].Captures[0].ToString(); + + var fnPtrType = CppFnPtrType.FromSignature(this, line); + fnPtrType.Name = alias; + + Types.Add(alias, fnPtrType); + + Debug.WriteLine($"[TYPEDEF FNPTR] {line} -- Adding method pointer typedef to {alias}"); + continue; + } + + // Alias + // typedef [*..] ; + typedef = rgxTypedef.Match(line); + if (typedef.Success) { + var alias = typedef.Groups[2].Captures[0].ToString(); + var existingType = typedef.Groups[1].Captures[0].ToString(); + + // Potential multiple indirection + var type = Types[existingType]; + var pointers = line.Count(c => c == '*'); + for (int i = 0; i < pointers; i++) + type = type.AsPointer(WordSize); + + Types.Add(alias, type.AsAlias(alias)); + + Debug.WriteLine($"[TYPEDEF {(pointers > 0? "PTR":"VAL")} ] {line} -- Adding typedef from {type.Name} to {alias}"); + continue; + } + + // Start of struct + // typedef struct + if ((line.StartsWith("typedef struct") || line.StartsWith("struct ")) && line.IndexOf(";", StringComparison.Ordinal) == -1 + && currentType.Count == 0) { + currentType.Push(new CppComplexType(CompoundType.Struct)); + + if (line.StartsWith("struct ")) + currentType.Peek().Name = line.Split(' ')[1]; + + Debug.WriteLine($"\n[STRUCT START ] {line}"); + continue; + } + + // Start of union + // typedef union + if (line.StartsWith("typedef union") && line.IndexOf(";", StringComparison.Ordinal) == -1) { + currentType.Push(new CppComplexType(CompoundType.Union)); + + Debug.WriteLine($"\n[UNION START ] {line}"); + continue; + } + + // Start of enum + // typedef enum + if (line.StartsWith("typedef enum") && line.IndexOf(";", StringComparison.Ordinal) == -1) { + currentType.Push(new CppComplexType(CompoundType.Enum)); + nextEnumValue = 0; + + Debug.WriteLine($"\n[ENUM START ] {line}"); + continue; + } + + // Nested complex field + // struct + // union + var words = line.Split(' '); + if ((words[0] == "union" || words[0] == "struct") && words.Length <= 2) { + currentType.Push(new CppComplexType(words[0] == "struct"? CompoundType.Struct : CompoundType.Union)); + + Debug.WriteLine($"[FIELD START ] {line}"); + continue; + } + + // End of already named struct + if (line == "};" && currentType.Count == 1) { + var ct = currentType.Pop(); + if (!Types.ContainsKey(ct.Name)) + Types.Add(ct.Name, ct); + else + ((CppComplexType) Types[ct.Name]).Fields = ct.Fields; + + Debug.WriteLine($"[STRUCT END ] {line} -- {ct.Name}\n"); + continue; + } + + // End of complex field, complex type or enum + // end of [typedef] struct/union/enum + if (line.StartsWith("}") && line.EndsWith(";")) { + var name = line[1..^1].Trim(); + var ct = currentType.Pop(); + + // End of top-level typedef, so it's a type name + if (currentType.Count == 0) { + ct.Name = name; + + if (!Types.ContainsKey(name)) + Types.Add(name, ct); + + // We will have to copy the type data if the type was forward declared, + // because other types are already referencing it; replacing it in the + // collection will not replace the references to the empty version in + // other types + else { + ((CppComplexType) Types[name]).Fields = ct.Fields; + } + + Debug.WriteLine($"[STRUCT END ] {line} -- {name}\n"); + } + + // Otherwise it's a field name in the current type + else { + var parent = currentType.Peek(); + parent.AddField(new CppField { Name = name, Type = ct }); + + Debug.WriteLine($"[FIELD END ] {line} -- {ct.Name} {name}"); + } + continue; + } + + // Function pointer field + var fieldFnPtr = rgxFieldFnPtr.Match(line); + if (fieldFnPtr.Success) { + var fnPtrType = CppFnPtrType.FromSignature(this, line); + + var name = fieldFnPtr.Groups[2].Captures[0].ToString(); + + var ct = currentType.Peek(); + ct.AddField(new CppField {Name = name, Type = fnPtrType}, alignment); + + Debug.WriteLine($"[FIELD FNPTR ] {line} -- {name}"); + continue; + } + + // Pointer or value field + var field = rgxField.Match(line); + + if (field.Success) { + var names = field.Groups[2].Captures[0].ToString(); + var typeName = field.Groups[1].Captures[0].ToString(); + + // Multiple fields can be separated by commas + foreach (var fieldName in names.Split(',')) { + string name = fieldName.Trim(); + + // Array + var array = rgxArrayField.Match(name); + int arraySize = 0; + if (array.Success && array.Groups[2].Captures.Count > 0) { + arraySize = int.Parse(array.Groups[2].Captures[0].ToString()); + name = array.Groups[1].Captures[0].ToString(); + } + + // Bitfield + int bitfield = 0; + if (field.Groups[3].Captures.Count > 0) + bitfield = int.Parse(field.Groups[3].Captures[0].ToString()); + + // Potential multiple indirection + var type = Types[typeName]; + var pointers = line.Count(c => c == '*'); + for (int i = 0; i < pointers; i++) + type = type.AsPointer(WordSize); + + var ct = currentType.Peek(); + + if (arraySize > 0) + type = type.AsArray(arraySize); + + ct.AddField(new CppField {Name = name, Type = type, BitfieldSize = bitfield}, alignment); + + if (bitfield == 0) + Debug.WriteLine($"[FIELD {(pointers > 0 ? "PTR" : "VAL")} ] {line} -- {name}"); + else + Debug.WriteLine($"[BITFIELD ] {line} -- {name} : {bitfield}"); + } + continue; + } + + // Enum value field + var enumValue = rgxEnumValue.Match(line); + if (enumValue.Success) { + var name = enumValue.Groups[1].Captures[0].ToString(); + + var value = nextEnumValue++; + if (enumValue.Groups[2].Captures.Count > 0) { + // Convert the text to a ulong even if it's hexadecimal with a 0x prefix + var valueText = enumValue.Groups[2].Captures[0].ToString(); + var conv = new System.ComponentModel.UInt64Converter(); + + // Handle bit shift operator + var values = valueText.Split("<<").Select(t => (ulong) conv.ConvertFromInvariantString(t.Trim())).ToArray(); + value = values.Length == 1 ? values[0] : values[0] << (int)values[1]; + nextEnumValue = value + 1; + } + + var ct = currentType.Peek(); + ct.AddField(new CppEnumField {Name = name, Type = WordSize == 32 ? Types["uint32_t"] : Types["uint64_t"], Value = value}); + + Debug.WriteLine($"[ENUM VALUE ] {line} -- {name} = {value}"); + continue; + } + + // Make sure we're not ignoring anything we shouldn't + Debug.WriteLine($"[IGNORE ] {line}"); + + // Block opens + if (line == "{") + continue; + + // Global variables + if (line.StartsWith("static")) + continue; + + // Pragma directives + if (line.StartsWith("#pragma")) + continue; + + // Imports + if (line.StartsWith("extern")) + continue; + + throw new InvalidOperationException("Could not understand C++ code: " + line); + } + } + #endregion + + // Get a type from its name, handling pointer types + public CppType GetType(string typeName) { + var baseName = typeName.Replace("*", ""); + var indirectionCount = typeName.Length - baseName.Length; + + var type = Types[baseName.Trim()]; + for (int i = 0; i < indirectionCount; i++) + type = type.AsPointer(WordSize); + + return type; + } + + // Add a type externally + public void Add(CppType type) => Types.Add(type.Name, type); + + // Generate a populated CppTypes object from a set of Unity headers + public static CppTypes FromUnityVersion(UnityVersion version, int wordSize = 32) + => FromUnityHeaders(UnityHeader.GetHeaderForVersion(version), wordSize); + + public static CppTypes FromUnityHeaders(UnityHeader header, int wordSize = 32) { + var cppTypes = new CppTypes(wordSize); + + // Add junk from config files we haven't included + cppTypes.Add(new CppType("Il2CppIManagedObjectHolder")); + cppTypes.Add(new CppType("Il2CppIUnknown")); + + // Process Unity headers + var headers = header.GetHeaderText(); + cppTypes.AddFromDeclarationText(headers); + + return cppTypes; + } + } +}