C++: Finish initial draft of header parsing

This commit is contained in:
Katy Coe
2020-06-25 15:42:37 +02:00
parent d8aaba375e
commit ee2f851a1a
2 changed files with 242 additions and 42 deletions

View File

@@ -19,6 +19,13 @@ using Il2CppInspector.Reflection;
namespace Il2CppInspector.Cpp
{
// Compound type
public enum CompoundType
{
Struct,
Union
}
// A type with no fields
public class CppType
{
@@ -26,58 +33,103 @@ namespace Il2CppInspector.Cpp
public string Name { get; internal set; }
// The size of the C++ type in bytes
public int Size { get; protected set; }
public virtual int Size { get; protected set; }
public CppType(string name, int size) {
Name = name;
Size = size;
}
public override string ToString() => Name + " // Size: " + Size;
}
// A struct, union or class type (type with fields)
public class CppComplexType : CppType
{
// The compound type
public CompoundType CompoundType;
// Dictionary of byte offset in the type to each field
// Unions and bitfields can have more than one field at the same offset
public Dictionary<int, List<CppField>> Fields { get; } = new Dictionary<int, List<CppField>>();
public Dictionary<int, List<CppField>> Fields { get; internal set; } = new Dictionary<int, List<CppField>>();
public CppComplexType() : base("", 0) {}
public CppComplexType(CompoundType compoundType) : base("", 0) => CompoundType = compoundType;
// Size can't be calculated lazily (as we go along adding fields) because of forward declarations
public override int Size =>
CompoundType == CompoundType.Union
// Union size is the size of the largest element in the union
? Fields.Values.SelectMany(f => f).Select(f => f.Size).Max()
: Fields.Values.SelectMany(f => f).Select(f => f.Size).Sum();
// Add a field to the type. Returns the offset of the field in the type
public int AddField(CppField field) {
// TODO: Use InheritanceStyleEnum to determine whether the field is embedded or a pointer
field.Offset = Size;
field.Offset = CompoundType == CompoundType.Struct ? Size : 0;
if (Fields.ContainsKey(Size))
Fields[Size].Add(field);
if (Fields.ContainsKey(field.Offset))
Fields[field.Offset].Add(field);
else
Fields.Add(Size, new List<CppField> { field });
Fields.Add(field.Offset, new List<CppField> { field });
Size += field.Type.Size;
return Size;
}
// Summarize all field names and offsets
public override string ToString() {
var sb = new StringBuilder();
sb.Append(CompoundType == CompoundType.Struct ? "struct " : "union ");
sb.AppendLine(Name + (Name.Length > 0? " ":"") + "{");
foreach (var field in Fields.Values.SelectMany(f => f))
sb.AppendLine(" " + field);
sb.Append($"}}; // Size: 0x{Size:x2}");
return sb.ToString();
}
}
// A field in a C++ type
public struct CppField
{
// The type collection this belongs to
public CppTypes CppTypes { get; set; }
// The name of the field
public string Name { get; set; }
// The offset of the field into the type
public int Offset { get; set; }
// The size of the field (this will differ from the type size if the field is a pointer)
public int Size => IsPointer? CppTypes.WordSize : Type.Size;
// The type of the field
public CppType Type { get; set; }
// This type will be wrong (by design) for function pointers, pointers to pointers etc.
// and we only want it to calculate offsets so we hide this
internal CppType Type { get; set; }
// True if the field is a pointer
internal bool IsPointer { get; set; }
// C++ representation of field (might be incorrect due to the above)
public override string ToString() => $"/* 0x{Offset:x2} - 0x{Offset + Size - 1:x2} (0x{Size:x2}) */ " + Name +
// nested anonymous types
(Type.Name == "" ? "\n" + Type : "");
}
// A collection of C++ types
public class CppTypes
public class CppTypes : IEnumerable<CppType>
{
// All of the types
private Dictionary<string, CppType> types { get; }
public Dictionary<string, CppType> Types { get; }
public CppType this[string s] => types[s];
public CppType this[string s] => Types[s];
public IEnumerator<CppType> GetEnumerator() => Types.Values.GetEnumerator();
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
// Architecture width in bytes (4 bytes for 32-bit or 8 bytes for 64-bit, to determine pointer sizes)
public int WordSize { get; }
@@ -91,44 +143,109 @@ namespace Il2CppInspector.Cpp
new CppType("int16_t", 2),
new CppType("int32_t", 4),
new CppType("int64_t", 8),
new CppType("char", 1)
new CppType("char", 1),
new CppType("int", 4),
new CppType("float", 4),
new CppType("double", 8)
};
public CppTypes(int wordSize) {
WordSize = wordSize;
types = primitiveTypes.ToDictionary(t => t.Name, t => (CppType) t);
Types = primitiveTypes.ToDictionary(t => t.Name, t => t);
// This is all compiler-dependent, let's hope for the best!
types.Add("uintptr_t", new CppType("uintptr_t", WordSize));
types.Add("size_t", new CppType("size_t", WordSize));
Types.Add("uintptr_t", new CppType("uintptr_t", WordSize));
Types.Add("size_t", new CppType("size_t", WordSize));
Types.Add("void", new CppType("void", WordSize));
}
// Parse a block of C++ source code, adding any types found
public void AddFromDeclarationText(string text) {
using StringReader lines = new StringReader(text);
var rgxExternDecl = new Regex(@"struct (\S+);");
var rgxTypedefForwardDecl = new Regex(@"typedef struct (\S+) (\S+);");
var rgxTypedefFnPtr = new Regex(@"typedef\s+(?:struct )?\S+\s*\(\s*\*(\S+)\)\s*\(.*\);");
var rgxTypedefPtr = new Regex(@"typedef (\S+)\s*\*\s*(\S+);");
var rgxTypedef = new Regex(@"typedef (\S+) (\S+);");
var rgxTypedefFnPtr = new Regex(@"typedef\s+\S+\s*\(\s*\*(\S+)\)\s*\(.*\);");
var rgxFieldFnPtr = new Regex(@"\S+\s*\(\s*\*(\S+)\)\s*\(.*\);");
var rgxFieldPtr = new Regex(@"^(?:struct )?(\S+)\s*\*\s*(\S+);");
var rgxFieldVal = new Regex(@"^(?:struct )?(\S+)\s+(\S+);");
var currentType = new Stack<CppComplexType>();
bool inEnum = false;
string line;
// TODO: Bitfields
// TODO: Arrays
// TODO: Alignment directives
// TODO: unsigned
// TODO: enum prefix in field (Il2CppWindowsRuntimeTypeName)
// TODO: comma-separated fields
// TODO: #ifdef IS_32BIT
// TODO: volatile
while ((line = lines.ReadLine()) != null) {
// Sanitize
line = line.Trim();
line = line.Replace(" const ", " ");
line = line.Replace("const*", "*");
if (line.StartsWith("const "))
line = line.Substring(6);
line = line.Replace("**", "*"); // we don't care about pointers to pointers
line = line.Replace("* *", "*"); // pointer to const pointer
// External declaration
// struct <external-type>;
// NOTE: Unfortunately we're not going to ever know the size of this type
var externDecl = rgxExternDecl.Match(line);
if (externDecl.Success) {
var declType = externDecl.Groups[1].Captures[0].ToString();
Types.Add(declType, new CppComplexType(CompoundType.Struct) {Name = declType});
Debug.WriteLine($"[EXTERN DECL ] {line}");
continue;
}
// Forward declaration
// typedef struct <struct-type> <alias>
var typedef = rgxTypedefForwardDecl.Match(line);
if (typedef.Success) {
// We're lazy so we're just going to hope that the struct and alias names are the same
var alias = typedef.Groups[2].Captures[0].ToString();
var declType = typedef.Groups[1].Captures[0].ToString();
Debug.Assert(alias == declType);
// Sometimes we might get multiple forward declarations for the same type
if (!Types.ContainsKey(alias))
Types.Add(alias, new CppComplexType(CompoundType.Struct) {Name = alias});
Debug.WriteLine($"[FORWARD DECL ] {line}");
continue;
}
// Function pointer
// typedef <retType> (*<alias>)(<args>);
var typedef = rgxTypedefFnPtr.Match(line);
typedef = rgxTypedefFnPtr.Match(line);
if (typedef.Success) {
var alias = typedef.Groups[1].Captures[0].ToString();
types.Add(alias, types["uintptr_t"]);
Types.Add(alias, Types["uintptr_t"]);
Debug.WriteLine($"[TYPEDEF PTR ] {line} -- Adding method pointer typedef to {alias}");
Debug.WriteLine($"[TYPEDEF FNPTR] {line} -- Adding method pointer typedef to {alias}");
continue;
}
// Pointer alias
// typedef <targetType>* <alias>;
typedef = rgxTypedefPtr.Match(line);
if (typedef.Success) {
var alias = typedef.Groups[2].Captures[0].ToString();
Types.Add(alias, Types["uintptr_t"]);
Debug.WriteLine($"[TYPEDEF PTR ] {line} -- Adding pointer typedef to {alias}");
continue;
}
@@ -139,25 +256,40 @@ namespace Il2CppInspector.Cpp
var alias = typedef.Groups[2].Captures[0].ToString();
var existingType = typedef.Groups[1].Captures[0].ToString();
// Pointers
if (existingType.Contains("*")) {
// TODO: Typedef pointers
}
// Regular aliases
else {
types.Add(alias, types[existingType]);
Types.Add(alias, Types[existingType]);
Debug.WriteLine($"[TYPEDEF ALIAS] {line} -- Adding typedef from {existingType} to {alias}");
continue;
}
Debug.WriteLine($"[TYPEDEF ALIAS] {line} -- Adding typedef from {existingType} to {alias}");
continue;
}
// Start of struct
// typedef struct <optional-type-name>
if (line.StartsWith("typedef struct") && line.IndexOf(";", StringComparison.Ordinal) == -1) {
currentType.Push(new CppComplexType());
if ((line.StartsWith("typedef struct") || line.StartsWith("struct ")) && line.IndexOf(";", StringComparison.Ordinal) == -1
&& currentType.Count == 0) {
currentType.Push(new CppComplexType(CompoundType.Struct));
Debug.WriteLine($"[STRUCT START ] {line}");
if (line.StartsWith("struct "))
currentType.Peek().Name = line.Split(' ')[1];
Debug.WriteLine($"\n[STRUCT START ] {line}");
continue;
}
// Start of union
// typedef union <optional-type-name>
if (line.StartsWith("typedef union") && line.IndexOf(";", StringComparison.Ordinal) == -1) {
currentType.Push(new CppComplexType(CompoundType.Union));
Debug.WriteLine($"\n[UNION START ] {line}");
continue;
}
// Start of enum
// typedef enum <optional-type-name>
if (line.StartsWith("typedef enum") && line.IndexOf(";", StringComparison.Ordinal) == -1) {
inEnum = true;
Debug.WriteLine($"\n[ENUM START ] {line}");
continue;
}
@@ -166,38 +298,102 @@ namespace Il2CppInspector.Cpp
// union <optional-type-name>
var words = line.Split(' ');
if ((words[0] == "union" || words[0] == "struct") && words.Length <= 2) {
currentType.Push(new CppComplexType());
currentType.Push(new CppComplexType(words[0] == "struct"? CompoundType.Struct : CompoundType.Union));
Debug.WriteLine($"[FIELD START ] {line}");
continue;
}
// End of complex field or complex type
// end of [typedef] struct/union
if (line.StartsWith("}") && line.EndsWith(";") && currentType.Count > 0) {
// End of already named struct
if (line == "};" && currentType.Count == 1) {
var ct = currentType.Pop();
if (!Types.ContainsKey(ct.Name))
Types.Add(ct.Name, ct);
else
((CppComplexType) Types[ct.Name]).Fields = ct.Fields;
Debug.WriteLine($"[STRUCT END ] {line} -- {ct.Name}\n");
continue;
}
// End of complex field, complex type or enum
// end of [typedef] struct/union/enum
if (line.StartsWith("}") && line.EndsWith(";")) {
// We need this to avoid false positives on field matches below
if (currentType.Count == 0 && !inEnum) {
Debug.WriteLine($"[IGNORE ] {line}");
continue;
}
var name = line[1..^1].Trim();
if (inEnum) {
Types.Add(name, new CppType(name, WordSize));
inEnum = false;
Debug.WriteLine($"[ENUM END ] {line} -- {name}\n");
continue;
}
var ct = currentType.Pop();
// End of top-level typedef, so it's a type name
if (currentType.Count == 0) {
ct.Name = name;
types.Add(ct.Name, ct);
Debug.WriteLine($"[STRUCT END ] {line} -- {name}");
if (!Types.ContainsKey(name))
Types.Add(name, ct);
// We will have to copy the type data if the type was forward declared,
// because other types are already referencing it; replacing it in the
// collection will not replace the references to the empty version in
// other types
else {
((CppComplexType) Types[name]).Fields = ct.Fields;
}
Debug.WriteLine($"[STRUCT END ] {line} -- {name}\n");
}
// Otherwise it's a field name in the current type
else {
var parent = currentType.Peek();
parent.AddField(new CppField { Name = name, Type = ct });
parent.AddField(new CppField { CppTypes = this, Name = name, Type = ct });
Debug.WriteLine($"[FIELD END ] {line} -- {ct.Name} {name}");
}
continue;
}
Debug.WriteLine("[IGNORE ] " + line);
// Function pointer field
var fieldFnPtr = rgxFieldFnPtr.Match(line);
if (fieldFnPtr.Success) {
var name = fieldFnPtr.Groups[1].Captures[0].ToString();
var ct = currentType.Peek();
ct.AddField(new CppField() {CppTypes = this, Name = name, Type = Types["uintptr_t"], IsPointer = false});
Debug.WriteLine($"[FIELD FNPTR ] {line} -- {name}");
continue;
}
// Pointer or value field
var fieldPtr = rgxFieldPtr.Match(line);
var fieldVal = rgxFieldVal.Match(line);
if (fieldPtr.Success || fieldVal.Success) {
var fieldMatch = fieldPtr.Success ? fieldPtr : fieldVal;
var name = fieldMatch.Groups[2].Captures[0].ToString();
var type = fieldMatch.Groups[1].Captures[0].ToString();
var ct = currentType.Peek();
ct.AddField(new CppField {CppTypes = this, Name = name, Type = Types[type], IsPointer = fieldPtr.Success});
Debug.WriteLine($"[FIELD {(fieldPtr.Success? "PTR":"VAL")} ] {line} -- {name}");
continue;
}
Debug.WriteLine($"[IGNORE ] {line}");
}
}

View File

@@ -5,6 +5,7 @@
*/
using System;
using System.Diagnostics;
using System.IO;
using Il2CppInspector.Cpp;
using Il2CppInspector.Outputs.UnityHeaders;
@@ -22,6 +23,9 @@ namespace Il2CppInspector
var cppTypes = CppTypes.FromUnityHeaders(new UnityVersion("2019.3.1f1"));
foreach (var cppType in cppTypes.Types)
Debug.WriteLine(cppType.Key + ":\n" + cppType.Value + "\n");
throw new NotImplementedException();
}
}