From ec76447122d92967b14db1246002408120417d26 Mon Sep 17 00:00:00 2001 From: Luke <17146677+LukeFZ@users.noreply.github.com> Date: Sat, 25 Jan 2025 14:21:56 +0100 Subject: [PATCH] Fix misc. issues in the generated disassembler scripts (#17) * attempt to fix ghidra script issues * add ghidra python 3 note to readme * fix binary ninja script erroring on function type parsing * fix ida script not skipping function creation on pe binaries * fix writing of multibyte strings breaking fake string segment * also adjust binja write_string impl --- .../ScriptResources/Targets/BinaryNinja.py | 56 ++++++++++++------- .../Outputs/ScriptResources/Targets/Ghidra.py | 4 +- .../Outputs/ScriptResources/Targets/IDA.py | 7 ++- .../Outputs/ScriptResources/shared_base.py | 13 +++-- README.md | 2 + 5 files changed, 53 insertions(+), 29 deletions(-) diff --git a/Il2CppInspector.Common/Outputs/ScriptResources/Targets/BinaryNinja.py b/Il2CppInspector.Common/Outputs/ScriptResources/Targets/BinaryNinja.py index 10af829..fc2ada7 100644 --- a/Il2CppInspector.Common/Outputs/ScriptResources/Targets/BinaryNinja.py +++ b/Il2CppInspector.Common/Outputs/ScriptResources/Targets/BinaryNinja.py @@ -8,6 +8,8 @@ from binaryninja import * # import os # import sys # from datetime import datetime +# from typing import Literal +# bv: BinaryView = None # type: ignore #except: # pass @@ -30,6 +32,12 @@ class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface): _address_size: int _endianness: Literal["little", "big"] + TYPE_PARSER_OPTIONS = [ + "--target=x86_64-pc-linux", + "-x", "c++", + "-D_BINARYNINJA_=1" + ] + def __init__(self, status: BaseStatusHandler): self._status = status @@ -54,6 +62,22 @@ class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface): self._type_cache[type] = parsed return parsed + def _parse_type_source(self, types: str, filename: str | None = None): + parsed_types, errors = TypeParser.default.parse_types_from_source( + types, + filename if filename else "types.hpp", + self._view.platform if self._view.platform is not None else Platform["windows-x86_64"], + self._view, + self.TYPE_PARSER_OPTIONS + ) + + if parsed_types is None: + log_error("Failed to import types.") + log_error(errors) + return None + + return parsed_types + def get_script_directory(self) -> str: return CURRENT_PATH @@ -71,21 +95,8 @@ class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface): self._status.update_step("Parsing header") with open(os.path.join(self.get_script_directory(), "il2cpp.h"), "r") as f: - parsed_types, errors = TypeParser.default.parse_types_from_source( - f.read(), - "il2cpp.h", - self._view.platform if self._view.platform is not None else Platform["windows-x86_64"], - self._view, - [ - "--target=x86_64-pc-linux", - "-x", "c++", - "-D_BINARYNINJA_=1" - ] - ) - + parsed_types = self._parse_type_source(f.read(), "il2cpp.hpp") if parsed_types is None: - log_error("Failed to import header") - log_error(errors) return self._status.update_step("Importing header types", len(parsed_types.types)) @@ -206,9 +217,14 @@ class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface): return typestr = ";\n".join(function_sigs).replace("this", "_this") + ";" - res = self._view.parse_types_from_string(typestr) - for function_sig, function in zip(function_sigs, res.functions.values()): # type: ignore - self._function_type_cache[function_sig] = function + parsed_types = self._parse_type_source(typestr, "cached_types.hpp") + if parsed_types is None: + return + + # bv.parse_types_from_source returns a dict in the functions field. + # TypeParser.parse_types_from_source does not. + for function_sig, function in zip(function_sigs, parsed_types.functions): + self._function_type_cache[function_sig] = function.type # only required if supports_fake_string_segment == True def create_fake_segment(self, name: str, size: int) -> int: @@ -220,8 +236,10 @@ class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface): self._view.add_user_section(name, last_end_addr, size, SectionSemantics.ReadOnlyDataSectionSemantics) return last_end_addr - def write_string(self, address: int, value: str): - self._view.write(address, value.encode() + b"\x00") + def write_string(self, address: int, value: str) -> int: + encoded = value.encode() + b"\x00" + self._view.write(address, encoded) + return len(encoded) def write_address(self, address: int, value: int): self._view.write(address, value.to_bytes(self._address_size, self._endianness)) diff --git a/Il2CppInspector.Common/Outputs/ScriptResources/Targets/Ghidra.py b/Il2CppInspector.Common/Outputs/ScriptResources/Targets/Ghidra.py index 8544895..df0e4a2 100644 --- a/Il2CppInspector.Common/Outputs/ScriptResources/Targets/Ghidra.py +++ b/Il2CppInspector.Common/Outputs/ScriptResources/Targets/Ghidra.py @@ -5,6 +5,7 @@ from ghidra.program.model.data import ArrayDataType from ghidra.program.model.symbol import SourceType from ghidra.program.model.symbol import RefType from ghidra.app.cmd.label import DemanglerCmd +from ghidra.app.services import DataTypeManagerService #try: # from typing import TYPE_CHECKING @@ -77,8 +78,7 @@ class GhidraDisassemblerInterface(BaseDisassemblerInterface): print("Failed to set type: %s" % type) def set_function_type(self, address: int, type: str): - make_function(address) - typeSig = CParserUtils.parseSignature(None, currentProgram, type) + typeSig = CParserUtils.parseSignature(DataTypeManagerService@None, currentProgram, type) ApplyFunctionSignatureCmd(toAddr(address), typeSig, SourceType.USER_DEFINED, False, True).applyTo(currentProgram) def set_data_comment(self, address: int, cmt: str): diff --git a/Il2CppInspector.Common/Outputs/ScriptResources/Targets/IDA.py b/Il2CppInspector.Common/Outputs/ScriptResources/Targets/IDA.py index d73ba13..ca7f055 100644 --- a/Il2CppInspector.Common/Outputs/ScriptResources/Targets/IDA.py +++ b/Il2CppInspector.Common/Outputs/ScriptResources/Targets/IDA.py @@ -106,8 +106,8 @@ class IDADisassemblerInterface(BaseDisassemblerInterface): ida_typeinf.set_c_macros(original_macros) # Skip make_function on Windows GameAssembly.dll files due to them predefining all functions through pdata which makes the method very slow - skip_make_function = ida_segment.get_segm_by_name(".pdata") is not None - if skip_make_function: + self._skip_function_creation = ida_segment.get_segm_by_name(".pdata") is not None + if self._skip_function_creation: print(".pdata section found, skipping function boundaries") if FOLDERS_AVAILABLE: @@ -202,11 +202,12 @@ class IDADisassemblerInterface(BaseDisassemblerInterface): return start - def write_string(self, address: int, value: str): + def write_string(self, address: int, value: str) -> int: encoded_string = value.encode() + b'\x00' string_length = len(encoded_string) ida_bytes.put_bytes(address, encoded_string) ida_bytes.create_strlit(address, string_length, ida_nalt.STRTYPE_C) + return string_length def write_address(self, address: int, value: int): if self._is_32_bit: diff --git a/Il2CppInspector.Common/Outputs/ScriptResources/shared_base.py b/Il2CppInspector.Common/Outputs/ScriptResources/shared_base.py index e5ca8da..8693fee 100644 --- a/Il2CppInspector.Common/Outputs/ScriptResources/shared_base.py +++ b/Il2CppInspector.Common/Outputs/ScriptResources/shared_base.py @@ -64,7 +64,7 @@ class BaseDisassemblerInterface(abc.ABC): # only required if supports_fake_string_segment == True def create_fake_segment(self, name: str, size: int) -> int: return 0 - def write_string(self, address: int, value: str): pass + def write_string(self, address: int, value: str) -> int: pass def write_address(self, address: int, value: int): pass class ScriptContext: @@ -191,11 +191,11 @@ class ScriptContext: self.define_string(d) ref_addr = self.parse_address(d) - self._backend.write_string(current_string_address, d["string"]) + written_string_length = self._backend.write_string(current_string_address, d["string"]) self._backend.set_data_type(ref_addr, r'const char* const') self._backend.write_address(ref_addr, current_string_address) - current_string_address += len(d["string"]) + 1 + current_string_address += written_string_length self._status.update_progress() else: for d in metadata['stringLiterals']: @@ -286,5 +286,8 @@ class ScriptContext: end_time = datetime.now() print(f"Took: {end_time - start_time}") - except RuntimeError: pass - finally: self._status.shutdown() \ No newline at end of file + except RuntimeError: + pass + + finally: + self._status.shutdown() \ No newline at end of file diff --git a/README.md b/README.md index 8307125..6c33eaf 100644 --- a/README.md +++ b/README.md @@ -354,6 +354,8 @@ Example IDA C++ decompilation after applying Il2CppInspector (initialization cod ### Adding metadata to your Ghidra workflow +**NOTE:** The Ghidra script currently requires Python 3 support in Ghidra, meaning you have to use v11.3+ (or install the latest dev version if 11.3 is not out yet), as well as the `PyGhidra` launch mode. + Run Il2CppInspector with the `-p` switch to choose the Ghidra script output file, and `-t Ghidra` (case-sensitive) to specify Ghidra as the script type you wish to produce. Three files are generated by Il2CppInspector for Ghidra: the Python script itself, a JSON metadata file and a C++ type header file. These files must be present for the script to run successfully.