Fix misc. issues in the generated disassembler scripts (#17)

* attempt to fix ghidra script issues

* add ghidra python 3 note to readme

* fix binary ninja script erroring on function type parsing

* fix ida script not skipping function creation on pe binaries

* fix writing of multibyte strings breaking fake string segment

* also adjust binja write_string impl
This commit is contained in:
Luke
2025-01-25 14:21:56 +01:00
committed by GitHub
parent 4e46c29cee
commit ec76447122
5 changed files with 53 additions and 29 deletions

View File

@@ -8,6 +8,8 @@ from binaryninja import *
# import os # import os
# import sys # import sys
# from datetime import datetime # from datetime import datetime
# from typing import Literal
# bv: BinaryView = None # type: ignore
#except: #except:
# pass # pass
@@ -30,6 +32,12 @@ class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface):
_address_size: int _address_size: int
_endianness: Literal["little", "big"] _endianness: Literal["little", "big"]
TYPE_PARSER_OPTIONS = [
"--target=x86_64-pc-linux",
"-x", "c++",
"-D_BINARYNINJA_=1"
]
def __init__(self, status: BaseStatusHandler): def __init__(self, status: BaseStatusHandler):
self._status = status self._status = status
@@ -54,6 +62,22 @@ class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface):
self._type_cache[type] = parsed self._type_cache[type] = parsed
return parsed return parsed
def _parse_type_source(self, types: str, filename: str | None = None):
parsed_types, errors = TypeParser.default.parse_types_from_source(
types,
filename if filename else "types.hpp",
self._view.platform if self._view.platform is not None else Platform["windows-x86_64"],
self._view,
self.TYPE_PARSER_OPTIONS
)
if parsed_types is None:
log_error("Failed to import types.")
log_error(errors)
return None
return parsed_types
def get_script_directory(self) -> str: def get_script_directory(self) -> str:
return CURRENT_PATH return CURRENT_PATH
@@ -71,21 +95,8 @@ class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface):
self._status.update_step("Parsing header") self._status.update_step("Parsing header")
with open(os.path.join(self.get_script_directory(), "il2cpp.h"), "r") as f: with open(os.path.join(self.get_script_directory(), "il2cpp.h"), "r") as f:
parsed_types, errors = TypeParser.default.parse_types_from_source( parsed_types = self._parse_type_source(f.read(), "il2cpp.hpp")
f.read(),
"il2cpp.h",
self._view.platform if self._view.platform is not None else Platform["windows-x86_64"],
self._view,
[
"--target=x86_64-pc-linux",
"-x", "c++",
"-D_BINARYNINJA_=1"
]
)
if parsed_types is None: if parsed_types is None:
log_error("Failed to import header")
log_error(errors)
return return
self._status.update_step("Importing header types", len(parsed_types.types)) self._status.update_step("Importing header types", len(parsed_types.types))
@@ -206,9 +217,14 @@ class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface):
return return
typestr = ";\n".join(function_sigs).replace("this", "_this") + ";" typestr = ";\n".join(function_sigs).replace("this", "_this") + ";"
res = self._view.parse_types_from_string(typestr) parsed_types = self._parse_type_source(typestr, "cached_types.hpp")
for function_sig, function in zip(function_sigs, res.functions.values()): # type: ignore if parsed_types is None:
self._function_type_cache[function_sig] = function return
# bv.parse_types_from_source returns a dict in the functions field.
# TypeParser.parse_types_from_source does not.
for function_sig, function in zip(function_sigs, parsed_types.functions):
self._function_type_cache[function_sig] = function.type
# only required if supports_fake_string_segment == True # only required if supports_fake_string_segment == True
def create_fake_segment(self, name: str, size: int) -> int: def create_fake_segment(self, name: str, size: int) -> int:
@@ -220,8 +236,10 @@ class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface):
self._view.add_user_section(name, last_end_addr, size, SectionSemantics.ReadOnlyDataSectionSemantics) self._view.add_user_section(name, last_end_addr, size, SectionSemantics.ReadOnlyDataSectionSemantics)
return last_end_addr return last_end_addr
def write_string(self, address: int, value: str): def write_string(self, address: int, value: str) -> int:
self._view.write(address, value.encode() + b"\x00") encoded = value.encode() + b"\x00"
self._view.write(address, encoded)
return len(encoded)
def write_address(self, address: int, value: int): def write_address(self, address: int, value: int):
self._view.write(address, value.to_bytes(self._address_size, self._endianness)) self._view.write(address, value.to_bytes(self._address_size, self._endianness))

View File

@@ -5,6 +5,7 @@ from ghidra.program.model.data import ArrayDataType
from ghidra.program.model.symbol import SourceType from ghidra.program.model.symbol import SourceType
from ghidra.program.model.symbol import RefType from ghidra.program.model.symbol import RefType
from ghidra.app.cmd.label import DemanglerCmd from ghidra.app.cmd.label import DemanglerCmd
from ghidra.app.services import DataTypeManagerService
#try: #try:
# from typing import TYPE_CHECKING # from typing import TYPE_CHECKING
@@ -77,8 +78,7 @@ class GhidraDisassemblerInterface(BaseDisassemblerInterface):
print("Failed to set type: %s" % type) print("Failed to set type: %s" % type)
def set_function_type(self, address: int, type: str): def set_function_type(self, address: int, type: str):
make_function(address) typeSig = CParserUtils.parseSignature(DataTypeManagerService@None, currentProgram, type)
typeSig = CParserUtils.parseSignature(None, currentProgram, type)
ApplyFunctionSignatureCmd(toAddr(address), typeSig, SourceType.USER_DEFINED, False, True).applyTo(currentProgram) ApplyFunctionSignatureCmd(toAddr(address), typeSig, SourceType.USER_DEFINED, False, True).applyTo(currentProgram)
def set_data_comment(self, address: int, cmt: str): def set_data_comment(self, address: int, cmt: str):

View File

@@ -106,8 +106,8 @@ class IDADisassemblerInterface(BaseDisassemblerInterface):
ida_typeinf.set_c_macros(original_macros) ida_typeinf.set_c_macros(original_macros)
# Skip make_function on Windows GameAssembly.dll files due to them predefining all functions through pdata which makes the method very slow # Skip make_function on Windows GameAssembly.dll files due to them predefining all functions through pdata which makes the method very slow
skip_make_function = ida_segment.get_segm_by_name(".pdata") is not None self._skip_function_creation = ida_segment.get_segm_by_name(".pdata") is not None
if skip_make_function: if self._skip_function_creation:
print(".pdata section found, skipping function boundaries") print(".pdata section found, skipping function boundaries")
if FOLDERS_AVAILABLE: if FOLDERS_AVAILABLE:
@@ -202,11 +202,12 @@ class IDADisassemblerInterface(BaseDisassemblerInterface):
return start return start
def write_string(self, address: int, value: str): def write_string(self, address: int, value: str) -> int:
encoded_string = value.encode() + b'\x00' encoded_string = value.encode() + b'\x00'
string_length = len(encoded_string) string_length = len(encoded_string)
ida_bytes.put_bytes(address, encoded_string) ida_bytes.put_bytes(address, encoded_string)
ida_bytes.create_strlit(address, string_length, ida_nalt.STRTYPE_C) ida_bytes.create_strlit(address, string_length, ida_nalt.STRTYPE_C)
return string_length
def write_address(self, address: int, value: int): def write_address(self, address: int, value: int):
if self._is_32_bit: if self._is_32_bit:

View File

@@ -64,7 +64,7 @@ class BaseDisassemblerInterface(abc.ABC):
# only required if supports_fake_string_segment == True # only required if supports_fake_string_segment == True
def create_fake_segment(self, name: str, size: int) -> int: return 0 def create_fake_segment(self, name: str, size: int) -> int: return 0
def write_string(self, address: int, value: str): pass def write_string(self, address: int, value: str) -> int: pass
def write_address(self, address: int, value: int): pass def write_address(self, address: int, value: int): pass
class ScriptContext: class ScriptContext:
@@ -191,11 +191,11 @@ class ScriptContext:
self.define_string(d) self.define_string(d)
ref_addr = self.parse_address(d) ref_addr = self.parse_address(d)
self._backend.write_string(current_string_address, d["string"]) written_string_length = self._backend.write_string(current_string_address, d["string"])
self._backend.set_data_type(ref_addr, r'const char* const') self._backend.set_data_type(ref_addr, r'const char* const')
self._backend.write_address(ref_addr, current_string_address) self._backend.write_address(ref_addr, current_string_address)
current_string_address += len(d["string"]) + 1 current_string_address += written_string_length
self._status.update_progress() self._status.update_progress()
else: else:
for d in metadata['stringLiterals']: for d in metadata['stringLiterals']:
@@ -286,5 +286,8 @@ class ScriptContext:
end_time = datetime.now() end_time = datetime.now()
print(f"Took: {end_time - start_time}") print(f"Took: {end_time - start_time}")
except RuntimeError: pass except RuntimeError:
finally: self._status.shutdown() pass
finally:
self._status.shutdown()

View File

@@ -354,6 +354,8 @@ Example IDA C++ decompilation after applying Il2CppInspector (initialization cod
### Adding metadata to your Ghidra workflow ### Adding metadata to your Ghidra workflow
**NOTE:** The Ghidra script currently requires Python 3 support in Ghidra, meaning you have to use v11.3+ (or install the latest dev version if 11.3 is not out yet), as well as the `PyGhidra` launch mode.
Run Il2CppInspector with the `-p` switch to choose the Ghidra script output file, and `-t Ghidra` (case-sensitive) to specify Ghidra as the script type you wish to produce. Run Il2CppInspector with the `-p` switch to choose the Ghidra script output file, and `-t Ghidra` (case-sensitive) to specify Ghidra as the script type you wish to produce.
Three files are generated by Il2CppInspector for Ghidra: the Python script itself, a JSON metadata file and a C++ type header file. These files must be present for the script to run successfully. Three files are generated by Il2CppInspector for Ghidra: the Python script itself, a JSON metadata file and a C++ type header file. These files must be present for the script to run successfully.