Fix misc. issues in the generated disassembler scripts (#17)

* attempt to fix ghidra script issues

* add ghidra python 3 note to readme

* fix binary ninja script erroring on function type parsing

* fix ida script not skipping function creation on pe binaries

* fix writing of multibyte strings breaking fake string segment

* also adjust binja write_string impl
This commit is contained in:
Luke
2025-01-25 14:21:56 +01:00
committed by GitHub
parent 4e46c29cee
commit ec76447122
5 changed files with 53 additions and 29 deletions

View File

@@ -8,6 +8,8 @@ from binaryninja import *
# import os
# import sys
# from datetime import datetime
# from typing import Literal
# bv: BinaryView = None # type: ignore
#except:
# pass
@@ -30,6 +32,12 @@ class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface):
_address_size: int
_endianness: Literal["little", "big"]
TYPE_PARSER_OPTIONS = [
"--target=x86_64-pc-linux",
"-x", "c++",
"-D_BINARYNINJA_=1"
]
def __init__(self, status: BaseStatusHandler):
self._status = status
@@ -54,6 +62,22 @@ class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface):
self._type_cache[type] = parsed
return parsed
def _parse_type_source(self, types: str, filename: str | None = None):
parsed_types, errors = TypeParser.default.parse_types_from_source(
types,
filename if filename else "types.hpp",
self._view.platform if self._view.platform is not None else Platform["windows-x86_64"],
self._view,
self.TYPE_PARSER_OPTIONS
)
if parsed_types is None:
log_error("Failed to import types.")
log_error(errors)
return None
return parsed_types
def get_script_directory(self) -> str:
return CURRENT_PATH
@@ -71,21 +95,8 @@ class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface):
self._status.update_step("Parsing header")
with open(os.path.join(self.get_script_directory(), "il2cpp.h"), "r") as f:
parsed_types, errors = TypeParser.default.parse_types_from_source(
f.read(),
"il2cpp.h",
self._view.platform if self._view.platform is not None else Platform["windows-x86_64"],
self._view,
[
"--target=x86_64-pc-linux",
"-x", "c++",
"-D_BINARYNINJA_=1"
]
)
parsed_types = self._parse_type_source(f.read(), "il2cpp.hpp")
if parsed_types is None:
log_error("Failed to import header")
log_error(errors)
return
self._status.update_step("Importing header types", len(parsed_types.types))
@@ -206,9 +217,14 @@ class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface):
return
typestr = ";\n".join(function_sigs).replace("this", "_this") + ";"
res = self._view.parse_types_from_string(typestr)
for function_sig, function in zip(function_sigs, res.functions.values()): # type: ignore
self._function_type_cache[function_sig] = function
parsed_types = self._parse_type_source(typestr, "cached_types.hpp")
if parsed_types is None:
return
# bv.parse_types_from_source returns a dict in the functions field.
# TypeParser.parse_types_from_source does not.
for function_sig, function in zip(function_sigs, parsed_types.functions):
self._function_type_cache[function_sig] = function.type
# only required if supports_fake_string_segment == True
def create_fake_segment(self, name: str, size: int) -> int:
@@ -220,8 +236,10 @@ class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface):
self._view.add_user_section(name, last_end_addr, size, SectionSemantics.ReadOnlyDataSectionSemantics)
return last_end_addr
def write_string(self, address: int, value: str):
self._view.write(address, value.encode() + b"\x00")
def write_string(self, address: int, value: str) -> int:
encoded = value.encode() + b"\x00"
self._view.write(address, encoded)
return len(encoded)
def write_address(self, address: int, value: int):
self._view.write(address, value.to_bytes(self._address_size, self._endianness))

View File

@@ -5,6 +5,7 @@ from ghidra.program.model.data import ArrayDataType
from ghidra.program.model.symbol import SourceType
from ghidra.program.model.symbol import RefType
from ghidra.app.cmd.label import DemanglerCmd
from ghidra.app.services import DataTypeManagerService
#try:
# from typing import TYPE_CHECKING
@@ -77,8 +78,7 @@ class GhidraDisassemblerInterface(BaseDisassemblerInterface):
print("Failed to set type: %s" % type)
def set_function_type(self, address: int, type: str):
make_function(address)
typeSig = CParserUtils.parseSignature(None, currentProgram, type)
typeSig = CParserUtils.parseSignature(DataTypeManagerService@None, currentProgram, type)
ApplyFunctionSignatureCmd(toAddr(address), typeSig, SourceType.USER_DEFINED, False, True).applyTo(currentProgram)
def set_data_comment(self, address: int, cmt: str):

View File

@@ -106,8 +106,8 @@ class IDADisassemblerInterface(BaseDisassemblerInterface):
ida_typeinf.set_c_macros(original_macros)
# Skip make_function on Windows GameAssembly.dll files due to them predefining all functions through pdata which makes the method very slow
skip_make_function = ida_segment.get_segm_by_name(".pdata") is not None
if skip_make_function:
self._skip_function_creation = ida_segment.get_segm_by_name(".pdata") is not None
if self._skip_function_creation:
print(".pdata section found, skipping function boundaries")
if FOLDERS_AVAILABLE:
@@ -202,11 +202,12 @@ class IDADisassemblerInterface(BaseDisassemblerInterface):
return start
def write_string(self, address: int, value: str):
def write_string(self, address: int, value: str) -> int:
encoded_string = value.encode() + b'\x00'
string_length = len(encoded_string)
ida_bytes.put_bytes(address, encoded_string)
ida_bytes.create_strlit(address, string_length, ida_nalt.STRTYPE_C)
return string_length
def write_address(self, address: int, value: int):
if self._is_32_bit:

View File

@@ -64,7 +64,7 @@ class BaseDisassemblerInterface(abc.ABC):
# only required if supports_fake_string_segment == True
def create_fake_segment(self, name: str, size: int) -> int: return 0
def write_string(self, address: int, value: str): pass
def write_string(self, address: int, value: str) -> int: pass
def write_address(self, address: int, value: int): pass
class ScriptContext:
@@ -191,11 +191,11 @@ class ScriptContext:
self.define_string(d)
ref_addr = self.parse_address(d)
self._backend.write_string(current_string_address, d["string"])
written_string_length = self._backend.write_string(current_string_address, d["string"])
self._backend.set_data_type(ref_addr, r'const char* const')
self._backend.write_address(ref_addr, current_string_address)
current_string_address += len(d["string"]) + 1
current_string_address += written_string_length
self._status.update_progress()
else:
for d in metadata['stringLiterals']:
@@ -286,5 +286,8 @@ class ScriptContext:
end_time = datetime.now()
print(f"Took: {end_time - start_time}")
except RuntimeError: pass
finally: self._status.shutdown()
except RuntimeError:
pass
finally:
self._status.shutdown()

View File

@@ -354,6 +354,8 @@ Example IDA C++ decompilation after applying Il2CppInspector (initialization cod
### Adding metadata to your Ghidra workflow
**NOTE:** The Ghidra script currently requires Python 3 support in Ghidra, meaning you have to use v11.3+ (or install the latest dev version if 11.3 is not out yet), as well as the `PyGhidra` launch mode.
Run Il2CppInspector with the `-p` switch to choose the Ghidra script output file, and `-t Ghidra` (case-sensitive) to specify Ghidra as the script type you wish to produce.
Three files are generated by Il2CppInspector for Ghidra: the Python script itself, a JSON metadata file and a C++ type header file. These files must be present for the script to run successfully.