Struct reading and disassembly script overhaul, various misc. loading fixes, bump to .NET 9 (#13)
* Bump projects to .net 9 and update nugets * add VersionedSerialization + source generator * migrate versioning to StructVersion class, add handling/detection for 29.2/31.2 * add new struct definitions * rename serialization methods and add BinaryObjectStreamReader for interop * Rework metadata struct loading to use new struct versioning * move 29/31.1/.2 to use tags (-2022,-2023) instead of minor versions * fix metadata usage validity checks * rework code registration offsetting a bit and add second 29/31.1 condition * tweak .1 condition (again) * 29/31.2 was a psyop * also remove 29.2 from the readme * remove loading of packed dlls - this was a very unsafe feature * support auto-recovering type indices from type handles fixes loading of memory-dumped v29+ libraries since those replacee their class indices on load with a pointer to the corresponding type * support loading PEs without an export table * also read UnresolvedVirtualCallCount on regular v31 * Disable plugin loading for now * Overhaul disassembler script + add Binary Ninja target (#12) * Overhaul diassembler scripts: - No longer defines top level functions - Split into three classes: StatusHandler (like before), DisassemblerInterface (for interfacing with the used program API), ScriptContext (for definiting general functions that use the disassembler interface) - Add type annotations to all class methods and remove 2.7 compatibility stuff (Ghidra now supports Python 3 so this is unnecessary anymore) - Disassembler backends are now responsible for launching metadata/script processing, to better support disassembler differences - String handling is back in the base ScriptContext class, disassembler interfaces opt into the fake string segment creation and fall back to the old method if it isn't supported * Add Binary Ninja disassembler script backend This uses the new backend-controlled execution to launch metadata processing on a background thread to keep the ui responsive * make binary ninja script use own _BINARYNINJA_ define and add define helpers to header * Update README to account for new script and binary ninja backend * implement fake string segment functions for binary ninja but don't advertise support * also cache API function types in binary ninja backend * fix ida script and disable folders again * Fix metadata usage issues caused by it being a value type now * make TryMapVATR overrideable and implement it for ELFs * Make field offset reading use TryMapVATR to reduce exceptions * Fix NRE in Assembly ctor on < v24.2 * Update actions workflow to produce cross-platform CLI binaries, update readme to reflect .net 9 changes * workflow: only restore packages for projects that are being built * workflow: tweak caching and fix gui compilation * workflow: remove double .zip in CLI artifact name * 29/31.2 don't actually exist, this logic is not needed
This commit is contained in:
@@ -1,106 +1,124 @@
|
||||
# Ghidra-specific implementation
|
||||
from ghidra.app.cmd.function import ApplyFunctionSignatureCmd
|
||||
from ghidra.app.script import GhidraScriptUtil
|
||||
from ghidra.app.util.cparser.C import CParserUtils
|
||||
from ghidra.program.model.data import ArrayDataType
|
||||
from ghidra.program.model.symbol import SourceType
|
||||
from ghidra.program.model.symbol import RefType
|
||||
from ghidra.app.cmd.label import DemanglerCmd
|
||||
|
||||
xrefs = currentProgram.getReferenceManager()
|
||||
#try:
|
||||
# from typing import TYPE_CHECKING
|
||||
# if TYPE_CHECKING:
|
||||
# from ..shared_base import BaseStatusHandler, BaseDisassemblerInterface, ScriptContext
|
||||
# import json
|
||||
# import os
|
||||
# import sys
|
||||
# from datetime import datetime
|
||||
#except:
|
||||
# pass
|
||||
|
||||
def set_name(addr, name):
|
||||
if not name.startswith("_ZN"):
|
||||
createLabel(toAddr(addr), name, True)
|
||||
return
|
||||
cmd = DemanglerCmd(currentAddress.getAddress(hex(addr)), name)
|
||||
if not cmd.applyTo(currentProgram, monitor):
|
||||
print("Failed to apply demangled name to %s at %s due %s, falling back to mangled" % (name, hex(addr), cmd.getStatusMsg()))
|
||||
createLabel(toAddr(addr), name, True)
|
||||
class GhidraDisassemblerInterface(BaseDisassemblerInterface):
|
||||
supports_fake_string_segment = False
|
||||
|
||||
def make_function(start, end = None):
|
||||
addr = toAddr(start)
|
||||
# Don't override existing functions
|
||||
fn = getFunctionAt(addr)
|
||||
if fn is None:
|
||||
# Create new function if none exists
|
||||
createFunction(addr, None)
|
||||
def get_script_directory(self) -> str:
|
||||
return getSourceFile().getParentFile().toString()
|
||||
|
||||
def make_array(addr, numItems, cppType):
|
||||
if cppType.startswith('struct '):
|
||||
cppType = cppType[7:]
|
||||
|
||||
t = getDataTypes(cppType)[0]
|
||||
a = ArrayDataType(t, numItems, t.getLength())
|
||||
addr = toAddr(addr)
|
||||
removeDataAt(addr)
|
||||
createData(addr, a)
|
||||
def on_start(self):
|
||||
self.xrefs = currentProgram.getReferenceManager()
|
||||
|
||||
def define_code(code):
|
||||
# Code declarations are not supported in Ghidra
|
||||
# This only affects string literals for metadata version < 19
|
||||
# TODO: Replace with creating a DataType for enums
|
||||
# Check that the user has parsed the C headers first
|
||||
if len(getDataTypes('Il2CppObject')) == 0:
|
||||
print('STOP! You must import the generated C header file (%TYPE_HEADER_RELATIVE_PATH%) before running this script.')
|
||||
print('See https://github.com/djkaty/Il2CppInspector/blob/master/README.md#adding-metadata-to-your-ghidra-workflow for instructions.')
|
||||
sys.exit()
|
||||
|
||||
# Ghidra sets the image base for ELF to 0x100000 for some reason
|
||||
# https://github.com/NationalSecurityAgency/ghidra/issues/1020
|
||||
# Make sure that the base address is 0
|
||||
# Without this, Ghidra may not analyze the binary correctly and you will just waste your time
|
||||
# If 0 doesn't work for you, replace it with the base address from the output of the CLI or GUI
|
||||
if currentProgram.getExecutableFormat().endswith('(ELF)'):
|
||||
currentProgram.setImageBase(toAddr(0), True)
|
||||
|
||||
# Don't trigger decompiler
|
||||
setAnalysisOption(currentProgram, "Call Convention ID", "false")
|
||||
|
||||
def on_finish(self):
|
||||
pass
|
||||
|
||||
def define_function(self, address: int, end: int | None = None):
|
||||
address = toAddr(address)
|
||||
# Don't override existing functions
|
||||
fn = getFunctionAt(address)
|
||||
if fn is None:
|
||||
# Create new function if none exists
|
||||
createFunction(address, None)
|
||||
|
||||
def define_data_array(self, address: int, type: str, count: int):
|
||||
if type.startswith('struct '):
|
||||
type = type[7:]
|
||||
|
||||
t = getDataTypes(type)[0]
|
||||
a = ArrayDataType(t, count, t.getLength())
|
||||
address = toAddr(address)
|
||||
removeDataAt(address)
|
||||
createData(address, a)
|
||||
|
||||
def set_data_type(self, address: int, type: str):
|
||||
if type.startswith('struct '):
|
||||
type = type[7:]
|
||||
|
||||
try:
|
||||
t = getDataTypes(type)[0]
|
||||
address = toAddr(address)
|
||||
removeDataAt(address)
|
||||
createData(address, t)
|
||||
except:
|
||||
print("Failed to set type: %s" % type)
|
||||
|
||||
def set_function_type(self, address: int, type: str):
|
||||
make_function(address)
|
||||
typeSig = CParserUtils.parseSignature(None, currentProgram, type)
|
||||
ApplyFunctionSignatureCmd(toAddr(address), typeSig, SourceType.USER_DEFINED, False, True).applyTo(currentProgram)
|
||||
|
||||
def set_data_comment(self, address: int, cmt: str):
|
||||
setEOLComment(toAddr(address), cmt)
|
||||
|
||||
def set_function_comment(self, address: int, cmt: str):
|
||||
setPlateComment(toAddr(address), cmt)
|
||||
|
||||
def set_data_name(self, address: int, name: str):
|
||||
address = toAddr(address)
|
||||
|
||||
if len(name) > 2000:
|
||||
print("Name length exceeds 2000 characters, skipping (%s)" % name)
|
||||
return
|
||||
|
||||
if not name.startswith("_ZN"):
|
||||
createLabel(address, name, True)
|
||||
return
|
||||
|
||||
cmd = DemanglerCmd(address, name)
|
||||
if not cmd.applyTo(currentProgram, monitor):
|
||||
print(f"Failed to apply demangled name to {name} at {address} due {cmd.getStatusMsg()}, falling back to mangled")
|
||||
createLabel(address, name, True)
|
||||
|
||||
def set_function_name(self, address: int, name: str):
|
||||
return self.set_data_name(address, name)
|
||||
|
||||
def add_cross_reference(self, from_address: int, to_address: int):
|
||||
self.xrefs.addMemoryReference(toAddr(from_address), toAddr(to_address), RefType.DATA, SourceType.USER_DEFINED, 0)
|
||||
|
||||
def import_c_typedef(self, type_def: str):
|
||||
# Code declarations are not supported in Ghidra
|
||||
# This only affects string literals for metadata version < 19
|
||||
# TODO: Replace with creating a DataType for enums
|
||||
pass
|
||||
|
||||
class GhidraStatusHandler(BaseStatusHandler):
|
||||
pass
|
||||
|
||||
def set_function_type(addr, sig):
|
||||
make_function(addr)
|
||||
typeSig = CParserUtils.parseSignature(None, currentProgram, sig)
|
||||
ApplyFunctionSignatureCmd(toAddr(addr), typeSig, SourceType.USER_DEFINED, False, True).applyTo(currentProgram)
|
||||
|
||||
def set_type(addr, cppType):
|
||||
if cppType.startswith('struct '):
|
||||
cppType = cppType[7:]
|
||||
|
||||
try:
|
||||
t = getDataTypes(cppType)[0]
|
||||
addr = toAddr(addr)
|
||||
removeDataAt(addr)
|
||||
createData(addr, t)
|
||||
except:
|
||||
print("Failed to set type: %s" % cppType)
|
||||
|
||||
def set_comment(addr, text):
|
||||
setEOLComment(toAddr(addr), text)
|
||||
|
||||
def set_header_comment(addr, text):
|
||||
setPlateComment(toAddr(addr), text)
|
||||
|
||||
def script_prologue(status):
|
||||
# Check that the user has parsed the C headers first
|
||||
if len(getDataTypes('Il2CppObject')) == 0:
|
||||
print('STOP! You must import the generated C header file (%TYPE_HEADER_RELATIVE_PATH%) before running this script.')
|
||||
print('See https://github.com/djkaty/Il2CppInspector/blob/master/README.md#adding-metadata-to-your-ghidra-workflow for instructions.')
|
||||
sys.exit()
|
||||
|
||||
# Ghidra sets the image base for ELF to 0x100000 for some reason
|
||||
# https://github.com/NationalSecurityAgency/ghidra/issues/1020
|
||||
# Make sure that the base address is 0
|
||||
# Without this, Ghidra may not analyze the binary correctly and you will just waste your time
|
||||
# If 0 doesn't work for you, replace it with the base address from the output of the CLI or GUI
|
||||
if currentProgram.getExecutableFormat().endswith('(ELF)'):
|
||||
currentProgram.setImageBase(toAddr(0), True)
|
||||
|
||||
# Don't trigger decompiler
|
||||
setAnalysisOption(currentProgram, "Call Convention ID", "false")
|
||||
|
||||
def get_script_directory(): return getSourceFile().getParentFile().toString()
|
||||
|
||||
def script_epilogue(status): pass
|
||||
def add_function_to_group(addr, group): pass
|
||||
def add_xref(addr, to):
|
||||
xrefs.addMemoryReference(currentAddress.getAddress(hex(addr)), currentAddress.getAddress(hex(to)), RefType.DATA, SourceType.USER_DEFINED, 0)
|
||||
|
||||
def process_string_literals(status, data):
|
||||
for d in jsonData['stringLiterals']:
|
||||
define_string(d)
|
||||
|
||||
# I don't know how to make inline strings in Ghidra
|
||||
# Just revert back original impl
|
||||
addr = parse_address(d)
|
||||
set_name(addr, d['name'])
|
||||
set_type(addr, r'struct String *')
|
||||
set_comment(addr, d['string'])
|
||||
|
||||
status.update_progress()
|
||||
|
||||
class StatusHandler(BaseStatusHandler): pass
|
||||
status = GhidraStatusHandler()
|
||||
backend = GhidraDisassemblerInterface()
|
||||
context = ScriptContext(backend, status)
|
||||
context.process()
|
||||
Reference in New Issue
Block a user