Struct reading and disassembly script overhaul, various misc. loading fixes, bump to .NET 9 (#13)

* Bump projects to .net 9 and update nugets

* add VersionedSerialization + source generator

* migrate versioning to StructVersion class, add handling/detection for 29.2/31.2

* add new struct definitions

* rename serialization methods and add BinaryObjectStreamReader for interop

* Rework metadata struct loading to use new struct versioning

* move 29/31.1/.2 to use tags (-2022,-2023) instead of minor versions

* fix metadata usage validity checks

* rework code registration offsetting a bit and add second 29/31.1 condition

* tweak .1 condition (again)

* 29/31.2 was a psyop

* also remove 29.2 from the readme

* remove loading of packed dlls - this was a very unsafe feature

* support auto-recovering type indices from type handles
fixes loading of memory-dumped v29+ libraries since those replacee their class indices on load with a pointer to the corresponding type

* support loading PEs without an export table

* also read UnresolvedVirtualCallCount on regular v31

* Disable plugin loading for now

* Overhaul disassembler script + add Binary Ninja target (#12)

* Overhaul diassembler scripts:
- No longer defines top level functions
- Split into three classes: StatusHandler (like before), DisassemblerInterface (for interfacing with the used program API), ScriptContext (for definiting general functions that use the disassembler interface)
- Add type annotations to all class methods and remove 2.7 compatibility stuff (Ghidra now supports Python 3 so this is unnecessary anymore)
- Disassembler backends are now responsible for launching metadata/script processing, to better support disassembler differences
- String handling is back in the base ScriptContext class, disassembler interfaces opt into the fake string segment creation and fall back to the old method if it isn't supported

* Add Binary Ninja disassembler script backend
This uses the new backend-controlled execution to launch metadata processing on a background thread to keep the ui responsive

* make binary ninja script use own _BINARYNINJA_ define and add define helpers to header

* Update README to account for new script and binary ninja backend

* implement fake string segment functions for binary ninja but don't advertise support

* also cache API function types in binary ninja backend

* fix ida script and disable folders again

* Fix metadata usage issues caused by it being a value type now

* make TryMapVATR overrideable and implement it for ELFs

* Make field offset reading use TryMapVATR to reduce exceptions

* Fix NRE in Assembly ctor on < v24.2

* Update actions workflow to produce cross-platform CLI binaries, update readme to reflect .net 9 changes

* workflow: only restore packages for projects that are being built

* workflow: tweak caching and fix gui compilation

* workflow: remove double .zip in CLI artifact name

* 29/31.2 don't actually exist, this logic is not needed
This commit is contained in:
Luke
2024-11-14 14:32:11 +01:00
committed by GitHub
parent 5b0476fcc5
commit b05c03964a
130 changed files with 5117 additions and 4371 deletions

View File

@@ -1,106 +1,124 @@
# Ghidra-specific implementation
from ghidra.app.cmd.function import ApplyFunctionSignatureCmd
from ghidra.app.script import GhidraScriptUtil
from ghidra.app.util.cparser.C import CParserUtils
from ghidra.program.model.data import ArrayDataType
from ghidra.program.model.symbol import SourceType
from ghidra.program.model.symbol import RefType
from ghidra.app.cmd.label import DemanglerCmd
xrefs = currentProgram.getReferenceManager()
#try:
# from typing import TYPE_CHECKING
# if TYPE_CHECKING:
# from ..shared_base import BaseStatusHandler, BaseDisassemblerInterface, ScriptContext
# import json
# import os
# import sys
# from datetime import datetime
#except:
# pass
def set_name(addr, name):
if not name.startswith("_ZN"):
createLabel(toAddr(addr), name, True)
return
cmd = DemanglerCmd(currentAddress.getAddress(hex(addr)), name)
if not cmd.applyTo(currentProgram, monitor):
print("Failed to apply demangled name to %s at %s due %s, falling back to mangled" % (name, hex(addr), cmd.getStatusMsg()))
createLabel(toAddr(addr), name, True)
class GhidraDisassemblerInterface(BaseDisassemblerInterface):
supports_fake_string_segment = False
def make_function(start, end = None):
addr = toAddr(start)
# Don't override existing functions
fn = getFunctionAt(addr)
if fn is None:
# Create new function if none exists
createFunction(addr, None)
def get_script_directory(self) -> str:
return getSourceFile().getParentFile().toString()
def make_array(addr, numItems, cppType):
if cppType.startswith('struct '):
cppType = cppType[7:]
t = getDataTypes(cppType)[0]
a = ArrayDataType(t, numItems, t.getLength())
addr = toAddr(addr)
removeDataAt(addr)
createData(addr, a)
def on_start(self):
self.xrefs = currentProgram.getReferenceManager()
def define_code(code):
# Code declarations are not supported in Ghidra
# This only affects string literals for metadata version < 19
# TODO: Replace with creating a DataType for enums
# Check that the user has parsed the C headers first
if len(getDataTypes('Il2CppObject')) == 0:
print('STOP! You must import the generated C header file (%TYPE_HEADER_RELATIVE_PATH%) before running this script.')
print('See https://github.com/djkaty/Il2CppInspector/blob/master/README.md#adding-metadata-to-your-ghidra-workflow for instructions.')
sys.exit()
# Ghidra sets the image base for ELF to 0x100000 for some reason
# https://github.com/NationalSecurityAgency/ghidra/issues/1020
# Make sure that the base address is 0
# Without this, Ghidra may not analyze the binary correctly and you will just waste your time
# If 0 doesn't work for you, replace it with the base address from the output of the CLI or GUI
if currentProgram.getExecutableFormat().endswith('(ELF)'):
currentProgram.setImageBase(toAddr(0), True)
# Don't trigger decompiler
setAnalysisOption(currentProgram, "Call Convention ID", "false")
def on_finish(self):
pass
def define_function(self, address: int, end: int | None = None):
address = toAddr(address)
# Don't override existing functions
fn = getFunctionAt(address)
if fn is None:
# Create new function if none exists
createFunction(address, None)
def define_data_array(self, address: int, type: str, count: int):
if type.startswith('struct '):
type = type[7:]
t = getDataTypes(type)[0]
a = ArrayDataType(t, count, t.getLength())
address = toAddr(address)
removeDataAt(address)
createData(address, a)
def set_data_type(self, address: int, type: str):
if type.startswith('struct '):
type = type[7:]
try:
t = getDataTypes(type)[0]
address = toAddr(address)
removeDataAt(address)
createData(address, t)
except:
print("Failed to set type: %s" % type)
def set_function_type(self, address: int, type: str):
make_function(address)
typeSig = CParserUtils.parseSignature(None, currentProgram, type)
ApplyFunctionSignatureCmd(toAddr(address), typeSig, SourceType.USER_DEFINED, False, True).applyTo(currentProgram)
def set_data_comment(self, address: int, cmt: str):
setEOLComment(toAddr(address), cmt)
def set_function_comment(self, address: int, cmt: str):
setPlateComment(toAddr(address), cmt)
def set_data_name(self, address: int, name: str):
address = toAddr(address)
if len(name) > 2000:
print("Name length exceeds 2000 characters, skipping (%s)" % name)
return
if not name.startswith("_ZN"):
createLabel(address, name, True)
return
cmd = DemanglerCmd(address, name)
if not cmd.applyTo(currentProgram, monitor):
print(f"Failed to apply demangled name to {name} at {address} due {cmd.getStatusMsg()}, falling back to mangled")
createLabel(address, name, True)
def set_function_name(self, address: int, name: str):
return self.set_data_name(address, name)
def add_cross_reference(self, from_address: int, to_address: int):
self.xrefs.addMemoryReference(toAddr(from_address), toAddr(to_address), RefType.DATA, SourceType.USER_DEFINED, 0)
def import_c_typedef(self, type_def: str):
# Code declarations are not supported in Ghidra
# This only affects string literals for metadata version < 19
# TODO: Replace with creating a DataType for enums
pass
class GhidraStatusHandler(BaseStatusHandler):
pass
def set_function_type(addr, sig):
make_function(addr)
typeSig = CParserUtils.parseSignature(None, currentProgram, sig)
ApplyFunctionSignatureCmd(toAddr(addr), typeSig, SourceType.USER_DEFINED, False, True).applyTo(currentProgram)
def set_type(addr, cppType):
if cppType.startswith('struct '):
cppType = cppType[7:]
try:
t = getDataTypes(cppType)[0]
addr = toAddr(addr)
removeDataAt(addr)
createData(addr, t)
except:
print("Failed to set type: %s" % cppType)
def set_comment(addr, text):
setEOLComment(toAddr(addr), text)
def set_header_comment(addr, text):
setPlateComment(toAddr(addr), text)
def script_prologue(status):
# Check that the user has parsed the C headers first
if len(getDataTypes('Il2CppObject')) == 0:
print('STOP! You must import the generated C header file (%TYPE_HEADER_RELATIVE_PATH%) before running this script.')
print('See https://github.com/djkaty/Il2CppInspector/blob/master/README.md#adding-metadata-to-your-ghidra-workflow for instructions.')
sys.exit()
# Ghidra sets the image base for ELF to 0x100000 for some reason
# https://github.com/NationalSecurityAgency/ghidra/issues/1020
# Make sure that the base address is 0
# Without this, Ghidra may not analyze the binary correctly and you will just waste your time
# If 0 doesn't work for you, replace it with the base address from the output of the CLI or GUI
if currentProgram.getExecutableFormat().endswith('(ELF)'):
currentProgram.setImageBase(toAddr(0), True)
# Don't trigger decompiler
setAnalysisOption(currentProgram, "Call Convention ID", "false")
def get_script_directory(): return getSourceFile().getParentFile().toString()
def script_epilogue(status): pass
def add_function_to_group(addr, group): pass
def add_xref(addr, to):
xrefs.addMemoryReference(currentAddress.getAddress(hex(addr)), currentAddress.getAddress(hex(to)), RefType.DATA, SourceType.USER_DEFINED, 0)
def process_string_literals(status, data):
for d in jsonData['stringLiterals']:
define_string(d)
# I don't know how to make inline strings in Ghidra
# Just revert back original impl
addr = parse_address(d)
set_name(addr, d['name'])
set_type(addr, r'struct String *')
set_comment(addr, d['string'])
status.update_progress()
class StatusHandler(BaseStatusHandler): pass
status = GhidraStatusHandler()
backend = GhidraDisassemblerInterface()
context = ScriptContext(backend, status)
context.process()