Struct reading and disassembly script overhaul, various misc. loading fixes, bump to .NET 9 (#13)

* Bump projects to .net 9 and update nugets

* add VersionedSerialization + source generator

* migrate versioning to StructVersion class, add handling/detection for 29.2/31.2

* add new struct definitions

* rename serialization methods and add BinaryObjectStreamReader for interop

* Rework metadata struct loading to use new struct versioning

* move 29/31.1/.2 to use tags (-2022,-2023) instead of minor versions

* fix metadata usage validity checks

* rework code registration offsetting a bit and add second 29/31.1 condition

* tweak .1 condition (again)

* 29/31.2 was a psyop

* also remove 29.2 from the readme

* remove loading of packed dlls - this was a very unsafe feature

* support auto-recovering type indices from type handles
fixes loading of memory-dumped v29+ libraries since those replacee their class indices on load with a pointer to the corresponding type

* support loading PEs without an export table

* also read UnresolvedVirtualCallCount on regular v31

* Disable plugin loading for now

* Overhaul disassembler script + add Binary Ninja target (#12)

* Overhaul diassembler scripts:
- No longer defines top level functions
- Split into three classes: StatusHandler (like before), DisassemblerInterface (for interfacing with the used program API), ScriptContext (for definiting general functions that use the disassembler interface)
- Add type annotations to all class methods and remove 2.7 compatibility stuff (Ghidra now supports Python 3 so this is unnecessary anymore)
- Disassembler backends are now responsible for launching metadata/script processing, to better support disassembler differences
- String handling is back in the base ScriptContext class, disassembler interfaces opt into the fake string segment creation and fall back to the old method if it isn't supported

* Add Binary Ninja disassembler script backend
This uses the new backend-controlled execution to launch metadata processing on a background thread to keep the ui responsive

* make binary ninja script use own _BINARYNINJA_ define and add define helpers to header

* Update README to account for new script and binary ninja backend

* implement fake string segment functions for binary ninja but don't advertise support

* also cache API function types in binary ninja backend

* fix ida script and disable folders again

* Fix metadata usage issues caused by it being a value type now

* make TryMapVATR overrideable and implement it for ELFs

* Make field offset reading use TryMapVATR to reduce exceptions

* Fix NRE in Assembly ctor on < v24.2

* Update actions workflow to produce cross-platform CLI binaries, update readme to reflect .net 9 changes

* workflow: only restore packages for projects that are being built

* workflow: tweak caching and fix gui compilation

* workflow: remove double .zip in CLI artifact name

* 29/31.2 don't actually exist, this logic is not needed
This commit is contained in:
Luke
2024-11-14 14:32:11 +01:00
committed by GitHub
parent 5b0476fcc5
commit b05c03964a
130 changed files with 5117 additions and 4371 deletions

View File

@@ -0,0 +1,284 @@
from binaryninja import *
#try:
# from typing import TYPE_CHECKING
# if TYPE_CHECKING:
# from ..shared_base import BaseStatusHandler, BaseDisassemblerInterface, ScriptContext
# import json
# import os
# import sys
# from datetime import datetime
#except:
# pass
CURRENT_PATH = os.path.dirname(os.path.realpath(__file__))
class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface):
# this is implemented,
# however the write API does not seem to work properly here (possibly a bug),
# so this is disabled for now
supports_fake_string_segment: bool = False
_status: BaseStatusHandler
_view: BinaryView
_undo_id: str
_components: dict[str, Component]
_type_cache: dict[str, Type]
_function_type_cache: dict[str, Type]
_address_size: int
_endianness: Literal["little", "big"]
def __init__(self, status: BaseStatusHandler):
self._status = status
def _get_or_create_type(self, type: str) -> Type:
if type.startswith("struct "):
type = type[len("struct "):]
elif type.startswith("class "):
type = type[len("class "):]
if type in self._type_cache:
return self._type_cache[type]
if type.endswith("*"):
base_type = self._get_or_create_type(type[:-1].strip())
parsed = PointerType.create(self._view.arch, base_type) # type: ignore
else:
parsed = self._view.get_type_by_name(type)
if parsed is None:
parsed, errors = self._view.parse_type_string(type)
self._type_cache[type] = parsed
return parsed
def get_script_directory(self) -> str:
return CURRENT_PATH
def on_start(self):
self._view = bv # type: ignore
self._undo_id = self._view.begin_undo_actions()
self._view.set_analysis_hold(True)
self._components = {}
self._type_cache = {}
self._function_type_cache = {}
self._address_size = self._view.address_size
self._endianness = "little" if self._view.endianness == Endianness.LittleEndian else "big"
self._status.update_step("Parsing header")
with open(os.path.join(self.get_script_directory(), "il2cpp.h"), "r") as f:
parsed_types, errors = TypeParser.default.parse_types_from_source(
f.read(),
"il2cpp.h",
self._view.platform if self._view.platform is not None else Platform["windows-x86_64"],
self._view,
[
"--target=x86_64-pc-linux",
"-x", "c++",
"-D_BINARYNINJA_=1"
]
)
if parsed_types is None:
log_error("Failed to import header")
log_error(errors)
return
self._status.update_step("Importing header types", len(parsed_types.types))
def import_progress_func(progress: int, total: int):
self._status.update_progress(1)
return True
self._view.define_user_types([(x.name, x.type) for x in parsed_types.types], import_progress_func)
def on_finish(self):
self._view.commit_undo_actions(self._undo_id)
self._view.set_analysis_hold(False)
self._view.update_analysis()
def define_function(self, address: int, end: int | None = None):
if self._view.get_function_at(address) is not None:
return
self._view.create_user_function(address)
def define_data_array(self, address: int, type: str, count: int):
parsed_type = self._get_or_create_type(type)
array_type = ArrayType.create(parsed_type, count)
var = self._view.get_data_var_at(address)
if var is None:
self._view.define_user_data_var(address, array_type)
else:
var.type = array_type
def set_data_type(self, address: int, type: str):
var = self._view.get_data_var_at(address)
dtype = self._get_or_create_type(type)
if var is None:
self._view.define_user_data_var(address, dtype)
else:
var.type = dtype
def set_function_type(self, address: int, type: str):
function = self._view.get_function_at(address)
if function is None:
return
if type in self._function_type_cache:
function.type = self._function_type_cache[type] # type: ignore
else:
#log_info(f"skipping function type setting for {address}, {type}")
#pass
function.type = type.replace("this", "`this`")
def set_data_comment(self, address: int, cmt: str):
self._view.set_comment_at(address, cmt)
def set_function_comment(self, address: int, cmt: str):
function = self._view.get_function_at(address)
if function is None:
return
function.comment = cmt
def set_data_name(self, address: int, name: str):
var = self._view.get_data_var_at(address)
if var is None:
return
if name.startswith("_Z"):
type, demangled = demangle_gnu3(self._view.arch, name, self._view)
var.name = get_qualified_name(demangled)
else:
var.name = name
def set_function_name(self, address: int, name: str):
function = self._view.get_function_at(address)
if function is None:
return
if name.startswith("_Z"):
type, demangled = demangle_gnu3(self._view.arch, name, self._view)
function.name = get_qualified_name(demangled)
#function.type = type - this does not work due to the generated types not being namespaced. :(
else:
function.name = name
def add_cross_reference(self, from_address: int, to_address: int):
self._view.add_user_data_ref(from_address, to_address)
def import_c_typedef(self, type_def: str):
self._view.define_user_type(None, type_def)
# optional
def _get_or_create_component(self, name: str):
if name in self._components:
return self._components[name]
current = name
if current.count("/") != 0:
split_idx = current.rindex("/")
parent, child = current[:split_idx], current[split_idx:]
parent = self._get_or_create_component(name)
component = self._view.create_component(child, parent)
else:
component = self._view.create_component(name)
self._components[name] = component
return component
def add_function_to_group(self, address: int, group: str):
return
function = self._view.get_function_at(address)
if function is None:
return
self._get_or_create_component(group).add_function(function)
def cache_function_types(self, signatures: list[str]):
function_sigs = set(signatures)
if len(function_sigs) == 0:
return
typestr = ";\n".join(function_sigs).replace("this", "_this") + ";"
res = self._view.parse_types_from_string(typestr)
for function_sig, function in zip(function_sigs, res.functions.values()): # type: ignore
self._function_type_cache[function_sig] = function
# only required if supports_fake_string_segment == True
def create_fake_segment(self, name: str, size: int) -> int:
last_end_addr = self._view.mapped_address_ranges[-1].end
if last_end_addr % 0x1000 != 0:
last_end_addr += (0x1000 - (last_end_addr % 0x1000))
self._view.add_user_segment(last_end_addr, size, 0, 0, SegmentFlag.SegmentContainsData)
self._view.add_user_section(name, last_end_addr, size, SectionSemantics.ReadOnlyDataSectionSemantics)
return last_end_addr
def write_string(self, address: int, value: str):
self._view.write(address, value.encode() + b"\x00")
def write_address(self, address: int, value: int):
self._view.write(address, value.to_bytes(self._address_size, self._endianness))
class BinaryNinjaStatusHandler(BaseStatusHandler):
def __init__(self, thread: BackgroundTaskThread):
self.step = "Initializing"
self.max_items = 0
self.current_items = 0
self.start_time = datetime.now()
self.step_start_time = self.start_time
self.last_updated_time = datetime.min
self._thread = thread
def initialize(self): pass
def update(self):
if self.was_cancelled():
raise RuntimeError("Cancelled script.")
current_time = datetime.now()
if 0.5 > (current_time - self.last_updated_time).total_seconds():
return
self.last_updated_time = current_time
step_time = current_time - self.step_start_time
total_time = current_time - self.start_time
self._thread.progress = f"Processing IL2CPP metadata: {self.step} ({self.current_items}/{self.max_items}), elapsed: {step_time} ({total_time})"
def update_step(self, step, max_items = 0):
self.step = step
self.max_items = max_items
self.current_items = 0
self.step_start_time = datetime.now()
self.last_updated_time = datetime.min
self.update()
def update_progress(self, new_progress = 1):
self.current_items += new_progress
self.update()
def was_cancelled(self): return False
def close(self):
pass
# Entry point
class Il2CppTask(BackgroundTaskThread):
def __init__(self):
BackgroundTaskThread.__init__(self, "Processing IL2CPP metadata...", False)
def run(self):
status = BinaryNinjaStatusHandler(self)
backend = BinaryNinjaDisassemblerInterface(status)
context = ScriptContext(backend, status)
context.process()
Il2CppTask().start()