Overhaul disassembler script + add Binary Ninja target (#12)

* Overhaul diassembler scripts:
- No longer defines top level functions
- Split into three classes: StatusHandler (like before), DisassemblerInterface (for interfacing with the used program API), ScriptContext (for definiting general functions that use the disassembler interface)
- Add type annotations to all class methods and remove 2.7 compatibility stuff (Ghidra now supports Python 3 so this is unnecessary anymore)
- Disassembler backends are now responsible for launching metadata/script processing, to better support disassembler differences
- String handling is back in the base ScriptContext class, disassembler interfaces opt into the fake string segment creation and fall back to the old method if it isn't supported

* Add Binary Ninja disassembler script backend
This uses the new backend-controlled execution to launch metadata processing on a background thread to keep the ui responsive

* make binary ninja script use own _BINARYNINJA_ define and add define helpers to header

* Update README to account for new script and binary ninja backend

* implement fake string segment functions for binary ninja but don't advertise support

* also cache API function types in binary ninja backend

* fix ida script and disable folders again
This commit is contained in:
Luke
2024-11-08 23:31:40 +01:00
committed by GitHub
parent 792268f52f
commit 5715760e8b
9 changed files with 912 additions and 510 deletions

View File

@@ -1,106 +1,124 @@
# Ghidra-specific implementation
from ghidra.app.cmd.function import ApplyFunctionSignatureCmd
from ghidra.app.script import GhidraScriptUtil
from ghidra.app.util.cparser.C import CParserUtils
from ghidra.program.model.data import ArrayDataType
from ghidra.program.model.symbol import SourceType
from ghidra.program.model.symbol import RefType
from ghidra.app.cmd.label import DemanglerCmd
xrefs = currentProgram.getReferenceManager()
#try:
# from typing import TYPE_CHECKING
# if TYPE_CHECKING:
# from ..shared_base import BaseStatusHandler, BaseDisassemblerInterface, ScriptContext
# import json
# import os
# import sys
# from datetime import datetime
#except:
# pass
def set_name(addr, name):
if not name.startswith("_ZN"):
createLabel(toAddr(addr), name, True)
return
cmd = DemanglerCmd(currentAddress.getAddress(hex(addr)), name)
if not cmd.applyTo(currentProgram, monitor):
print("Failed to apply demangled name to %s at %s due %s, falling back to mangled" % (name, hex(addr), cmd.getStatusMsg()))
createLabel(toAddr(addr), name, True)
class GhidraDisassemblerInterface(BaseDisassemblerInterface):
supports_fake_string_segment = False
def make_function(start, end = None):
addr = toAddr(start)
# Don't override existing functions
fn = getFunctionAt(addr)
if fn is None:
# Create new function if none exists
createFunction(addr, None)
def get_script_directory(self) -> str:
return getSourceFile().getParentFile().toString()
def make_array(addr, numItems, cppType):
if cppType.startswith('struct '):
cppType = cppType[7:]
t = getDataTypes(cppType)[0]
a = ArrayDataType(t, numItems, t.getLength())
addr = toAddr(addr)
removeDataAt(addr)
createData(addr, a)
def on_start(self):
self.xrefs = currentProgram.getReferenceManager()
def define_code(code):
# Code declarations are not supported in Ghidra
# This only affects string literals for metadata version < 19
# TODO: Replace with creating a DataType for enums
# Check that the user has parsed the C headers first
if len(getDataTypes('Il2CppObject')) == 0:
print('STOP! You must import the generated C header file (%TYPE_HEADER_RELATIVE_PATH%) before running this script.')
print('See https://github.com/djkaty/Il2CppInspector/blob/master/README.md#adding-metadata-to-your-ghidra-workflow for instructions.')
sys.exit()
# Ghidra sets the image base for ELF to 0x100000 for some reason
# https://github.com/NationalSecurityAgency/ghidra/issues/1020
# Make sure that the base address is 0
# Without this, Ghidra may not analyze the binary correctly and you will just waste your time
# If 0 doesn't work for you, replace it with the base address from the output of the CLI or GUI
if currentProgram.getExecutableFormat().endswith('(ELF)'):
currentProgram.setImageBase(toAddr(0), True)
# Don't trigger decompiler
setAnalysisOption(currentProgram, "Call Convention ID", "false")
def on_finish(self):
pass
def define_function(self, address: int, end: int | None = None):
address = toAddr(address)
# Don't override existing functions
fn = getFunctionAt(address)
if fn is None:
# Create new function if none exists
createFunction(address, None)
def define_data_array(self, address: int, type: str, count: int):
if type.startswith('struct '):
type = type[7:]
t = getDataTypes(type)[0]
a = ArrayDataType(t, count, t.getLength())
address = toAddr(address)
removeDataAt(address)
createData(address, a)
def set_data_type(self, address: int, type: str):
if type.startswith('struct '):
type = type[7:]
try:
t = getDataTypes(type)[0]
address = toAddr(address)
removeDataAt(address)
createData(address, t)
except:
print("Failed to set type: %s" % type)
def set_function_type(self, address: int, type: str):
make_function(address)
typeSig = CParserUtils.parseSignature(None, currentProgram, type)
ApplyFunctionSignatureCmd(toAddr(address), typeSig, SourceType.USER_DEFINED, False, True).applyTo(currentProgram)
def set_data_comment(self, address: int, cmt: str):
setEOLComment(toAddr(address), cmt)
def set_function_comment(self, address: int, cmt: str):
setPlateComment(toAddr(address), cmt)
def set_data_name(self, address: int, name: str):
address = toAddr(address)
if len(name) > 2000:
print("Name length exceeds 2000 characters, skipping (%s)" % name)
return
if not name.startswith("_ZN"):
createLabel(address, name, True)
return
cmd = DemanglerCmd(address, name)
if not cmd.applyTo(currentProgram, monitor):
print(f"Failed to apply demangled name to {name} at {address} due {cmd.getStatusMsg()}, falling back to mangled")
createLabel(address, name, True)
def set_function_name(self, address: int, name: str):
return self.set_data_name(address, name)
def add_cross_reference(self, from_address: int, to_address: int):
self.xrefs.addMemoryReference(toAddr(from_address), toAddr(to_address), RefType.DATA, SourceType.USER_DEFINED, 0)
def import_c_typedef(self, type_def: str):
# Code declarations are not supported in Ghidra
# This only affects string literals for metadata version < 19
# TODO: Replace with creating a DataType for enums
pass
class GhidraStatusHandler(BaseStatusHandler):
pass
def set_function_type(addr, sig):
make_function(addr)
typeSig = CParserUtils.parseSignature(None, currentProgram, sig)
ApplyFunctionSignatureCmd(toAddr(addr), typeSig, SourceType.USER_DEFINED, False, True).applyTo(currentProgram)
def set_type(addr, cppType):
if cppType.startswith('struct '):
cppType = cppType[7:]
try:
t = getDataTypes(cppType)[0]
addr = toAddr(addr)
removeDataAt(addr)
createData(addr, t)
except:
print("Failed to set type: %s" % cppType)
def set_comment(addr, text):
setEOLComment(toAddr(addr), text)
def set_header_comment(addr, text):
setPlateComment(toAddr(addr), text)
def script_prologue(status):
# Check that the user has parsed the C headers first
if len(getDataTypes('Il2CppObject')) == 0:
print('STOP! You must import the generated C header file (%TYPE_HEADER_RELATIVE_PATH%) before running this script.')
print('See https://github.com/djkaty/Il2CppInspector/blob/master/README.md#adding-metadata-to-your-ghidra-workflow for instructions.')
sys.exit()
# Ghidra sets the image base for ELF to 0x100000 for some reason
# https://github.com/NationalSecurityAgency/ghidra/issues/1020
# Make sure that the base address is 0
# Without this, Ghidra may not analyze the binary correctly and you will just waste your time
# If 0 doesn't work for you, replace it with the base address from the output of the CLI or GUI
if currentProgram.getExecutableFormat().endswith('(ELF)'):
currentProgram.setImageBase(toAddr(0), True)
# Don't trigger decompiler
setAnalysisOption(currentProgram, "Call Convention ID", "false")
def get_script_directory(): return getSourceFile().getParentFile().toString()
def script_epilogue(status): pass
def add_function_to_group(addr, group): pass
def add_xref(addr, to):
xrefs.addMemoryReference(currentAddress.getAddress(hex(addr)), currentAddress.getAddress(hex(to)), RefType.DATA, SourceType.USER_DEFINED, 0)
def process_string_literals(status, data):
for d in jsonData['stringLiterals']:
define_string(d)
# I don't know how to make inline strings in Ghidra
# Just revert back original impl
addr = parse_address(d)
set_name(addr, d['name'])
set_type(addr, r'struct String *')
set_comment(addr, d['string'])
status.update_progress()
class StatusHandler(BaseStatusHandler): pass
status = GhidraStatusHandler()
backend = GhidraDisassemblerInterface()
context = ScriptContext(backend, status)
context.process()