Struct reading and disassembly script overhaul, various misc. loading fixes, bump to .NET 9 (#13)

* Bump projects to .net 9 and update nugets * add VersionedSerialization + source generator * migrate versioning to StructVersion class, add handling/detection for 29.2/31.2 * add new struct definitions * rename serialization methods and add BinaryObjectStreamReader for interop * Rework metadata struct loading to use new struct versioning * move 29/31.1/.2 to use tags (-2022,-2023) instead of minor versions * fix metadata usage validity checks * rework code registration offsetting a bit and add second 29/31.1 condition * tweak .1 condition (again) * 29/31.2 was a psyop * also remove 29.2 from the readme * remove loading of packed dlls - this was a very unsafe feature * support auto-recovering type indices from type handles fixes loading of memory-dumped v29+ libraries since those replacee their class indices on load with a pointer to the corresponding type * support loading PEs without an export table * also read UnresolvedVirtualCallCount on regular v31 * Disable plugin loading for now * Overhaul disassembler script + add Binary Ninja target (#12) * Overhaul diassembler scripts: - No longer defines top level functions - Split into three classes: StatusHandler (like before), DisassemblerInterface (for interfacing with the used program API), ScriptContext (for definiting general functions that use the disassembler interface) - Add type annotations to all class methods and remove 2.7 compatibility stuff (Ghidra now supports Python 3 so this is unnecessary anymore) - Disassembler backends are now responsible for launching metadata/script processing, to better support disassembler differences - String handling is back in the base ScriptContext class, disassembler interfaces opt into the fake string segment creation and fall back to the old method if it isn't supported * Add Binary Ninja disassembler script backend This uses the new backend-controlled execution to launch metadata processing on a background thread to keep the ui responsive * make binary ninja script use own _BINARYNINJA_ define and add define helpers to header * Update README to account for new script and binary ninja backend * implement fake string segment functions for binary ninja but don't advertise support * also cache API function types in binary ninja backend * fix ida script and disable folders again * Fix metadata usage issues caused by it being a value type now * make TryMapVATR overrideable and implement it for ELFs * Make field offset reading use TryMapVATR to reduce exceptions * Fix NRE in Assembly ctor on < v24.2 * Update actions workflow to produce cross-platform CLI binaries, update readme to reflect .net 9 changes * workflow: only restore packages for projects that are being built * workflow: tweak caching and fix gui compilation * workflow: remove double .zip in CLI artifact name * 29/31.2 don't actually exist, this logic is not needed
2024-11-14 14:32:11 +01:00
parent 5b0476fcc5
commit b05c03964a
130 changed files with 5117 additions and 4371 deletions
--- a/Il2CppInspector.Common/Outputs/ScriptResources/Targets/BinaryNinja.py
+++ b/Il2CppInspector.Common/Outputs/ScriptResources/Targets/BinaryNinja.py
@@ -0,0 +1,284 @@
+from binaryninja import *
+
+#try:
+#	from typing import TYPE_CHECKING
+#	if TYPE_CHECKING:
+#		from ..shared_base import BaseStatusHandler, BaseDisassemblerInterface, ScriptContext
+#		import json
+#		import os
+#		import sys
+#		from datetime import datetime
+#except:
+#	pass
+
+CURRENT_PATH = os.path.dirname(os.path.realpath(__file__))
+
+class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface):
+	# this is implemented, 
+	# however the write API does not seem to work properly here (possibly a bug), 
+	# so this is disabled for now
+	supports_fake_string_segment: bool = False
+
+	_status: BaseStatusHandler
+	
+	_view: BinaryView
+	_undo_id: str
+	_components: dict[str, Component]
+	_type_cache: dict[str, Type]
+	_function_type_cache: dict[str, Type]
+
+	_address_size: int
+	_endianness: Literal["little", "big"]
+
+	def __init__(self, status: BaseStatusHandler):
+		self._status = status
+
+	def _get_or_create_type(self, type: str) -> Type:
+		if type.startswith("struct "):
+			type = type[len("struct "):]
+		elif type.startswith("class "):
+			type = type[len("class "):]
+
+		if type in self._type_cache:
+			return self._type_cache[type]
+		
+		if type.endswith("*"):
+			base_type = self._get_or_create_type(type[:-1].strip())
+
+			parsed = PointerType.create(self._view.arch, base_type)  # type: ignore
+		else:
+			parsed = self._view.get_type_by_name(type)
+			if parsed is None:
+				parsed, errors = self._view.parse_type_string(type)
+
+		self._type_cache[type] = parsed
+		return parsed
+
+	def get_script_directory(self) -> str:
+		return CURRENT_PATH
+
+	def on_start(self):
+		self._view = bv # type: ignore
+		self._undo_id = self._view.begin_undo_actions()
+		self._view.set_analysis_hold(True)
+		self._components = {}
+		self._type_cache = {}
+		self._function_type_cache = {}
+
+		self._address_size = self._view.address_size
+		self._endianness = "little" if self._view.endianness == Endianness.LittleEndian else "big"
+		
+		self._status.update_step("Parsing header")
+
+		with open(os.path.join(self.get_script_directory(), "il2cpp.h"), "r") as f:
+			parsed_types, errors = TypeParser.default.parse_types_from_source(
+				f.read(),
+				"il2cpp.h",
+				self._view.platform if self._view.platform is not None else Platform["windows-x86_64"],
+				self._view,
+				[
+					"--target=x86_64-pc-linux",
+					"-x", "c++",
+					"-D_BINARYNINJA_=1"
+				]
+			)
+
+			if parsed_types is None:
+				log_error("Failed to import header")
+				log_error(errors)
+				return
+
+		self._status.update_step("Importing header types", len(parsed_types.types))
+
+		def import_progress_func(progress: int, total: int):
+			self._status.update_progress(1)
+			return True
+
+		self._view.define_user_types([(x.name, x.type) for x in parsed_types.types], import_progress_func)
+
+	def on_finish(self):
+		self._view.commit_undo_actions(self._undo_id)
+		self._view.set_analysis_hold(False)
+		self._view.update_analysis()
+
+	def define_function(self, address: int, end: int | None = None):
+		if self._view.get_function_at(address) is not None:
+			return
+		
+		self._view.create_user_function(address)
+
+	def define_data_array(self, address: int, type: str, count: int):
+		parsed_type = self._get_or_create_type(type)
+		array_type = ArrayType.create(parsed_type, count)
+		var = self._view.get_data_var_at(address)
+		if var is None:
+			self._view.define_user_data_var(address, array_type)
+		else:
+			var.type = array_type
+
+	def set_data_type(self, address: int, type: str):
+		var = self._view.get_data_var_at(address)
+		dtype = self._get_or_create_type(type)
+		if var is None:
+			self._view.define_user_data_var(address, dtype)
+		else:
+			var.type = dtype
+
+	def set_function_type(self, address: int, type: str):
+		function = self._view.get_function_at(address)
+		if function is None:
+			return
+		
+		if type in self._function_type_cache:
+			function.type = self._function_type_cache[type] # type: ignore
+		else:
+			#log_info(f"skipping function type setting for {address}, {type}")
+			#pass
+			function.type = type.replace("this", "`this`")
+
+	def set_data_comment(self, address: int, cmt: str):
+		self._view.set_comment_at(address, cmt)
+
+	def set_function_comment(self, address: int, cmt: str):
+		function = self._view.get_function_at(address)
+		if function is None:
+			return
+
+		function.comment = cmt	
+
+	def set_data_name(self, address: int, name: str):
+		var = self._view.get_data_var_at(address)
+		if var is None:
+			return
+		
+		if name.startswith("_Z"):
+			type, demangled = demangle_gnu3(self._view.arch, name, self._view)
+			var.name = get_qualified_name(demangled)
+		else:
+			var.name = name
+
+	def set_function_name(self, address: int, name: str):
+		function = self._view.get_function_at(address)
+		if function is None:
+			return
+
+		if name.startswith("_Z"):
+			type, demangled = demangle_gnu3(self._view.arch, name, self._view)
+			function.name = get_qualified_name(demangled)
+			#function.type = type - this does not work due to the generated types not being namespaced. :(
+		else:
+			function.name = name
+
+	def add_cross_reference(self, from_address: int, to_address: int):
+		self._view.add_user_data_ref(from_address, to_address)
+
+	def import_c_typedef(self, type_def: str): 
+		self._view.define_user_type(None, type_def)
+
+	# optional
+	def _get_or_create_component(self, name: str):
+		if name in self._components:
+			return self._components[name]
+	
+		current = name
+		if current.count("/") != 0:
+			split_idx = current.rindex("/")
+			parent, child = current[:split_idx], current[split_idx:]
+			parent = self._get_or_create_component(name)
+			component = self._view.create_component(child, parent)
+		else:
+			component = self._view.create_component(name)
+
+		self._components[name] = component
+		return component
+
+	def add_function_to_group(self, address: int, group: str):
+		return
+		function = self._view.get_function_at(address)
+		if function is None:
+			return
+		
+		self._get_or_create_component(group).add_function(function)
+
+	def cache_function_types(self, signatures: list[str]):
+		function_sigs = set(signatures)
+		if len(function_sigs) == 0:
+			return
+		
+		typestr = ";\n".join(function_sigs).replace("this", "_this") + ";"
+		res = self._view.parse_types_from_string(typestr)
+		for function_sig, function in zip(function_sigs, res.functions.values()): # type: ignore
+			self._function_type_cache[function_sig] = function
+
+	# only required if supports_fake_string_segment == True
+	def create_fake_segment(self, name: str, size: int) -> int: 
+		last_end_addr = self._view.mapped_address_ranges[-1].end
+		if last_end_addr % 0x1000 != 0: 
+			last_end_addr += (0x1000 - (last_end_addr % 0x1000))
+
+		self._view.add_user_segment(last_end_addr, size, 0, 0, SegmentFlag.SegmentContainsData)
+		self._view.add_user_section(name, last_end_addr, size, SectionSemantics.ReadOnlyDataSectionSemantics)
+		return last_end_addr
+	
+	def write_string(self, address: int, value: str):
+		self._view.write(address, value.encode() + b"\x00")
+
+	def write_address(self, address: int, value: int):
+		self._view.write(address, value.to_bytes(self._address_size, self._endianness))
+
+
+class BinaryNinjaStatusHandler(BaseStatusHandler):
+	def __init__(self, thread: BackgroundTaskThread):
+		self.step = "Initializing"
+		self.max_items = 0
+		self.current_items = 0
+		self.start_time = datetime.now()
+		self.step_start_time = self.start_time
+		self.last_updated_time = datetime.min
+		self._thread = thread
+	
+	def initialize(self): pass
+
+	def update(self):
+		if self.was_cancelled():
+			raise RuntimeError("Cancelled script.")
+
+		current_time = datetime.now()
+		if 0.5 > (current_time - self.last_updated_time).total_seconds():
+			return
+
+		self.last_updated_time = current_time
+
+		step_time = current_time - self.step_start_time
+		total_time = current_time - self.start_time
+		self._thread.progress = f"Processing IL2CPP metadata: {self.step} ({self.current_items}/{self.max_items}), elapsed: {step_time} ({total_time})"
+
+	def update_step(self, step, max_items = 0):
+		self.step = step
+		self.max_items = max_items
+		self.current_items = 0
+		self.step_start_time = datetime.now()
+		self.last_updated_time = datetime.min
+		self.update()
+
+	def update_progress(self, new_progress = 1):
+		self.current_items += new_progress
+		self.update()
+
+	def was_cancelled(self): return False
+
+	def close(self):
+		pass
+
+# Entry point
+class Il2CppTask(BackgroundTaskThread):
+	def __init__(self):
+		BackgroundTaskThread.__init__(self, "Processing IL2CPP metadata...", False)
+
+	def run(self):
+		status = BinaryNinjaStatusHandler(self)
+		backend = BinaryNinjaDisassemblerInterface(status)
+		context = ScriptContext(backend, status)
+		context.process()
+
+Il2CppTask().start()