fix and enable binary ninja fake string segment support

This commit is contained in:
LukeFZ
2025-04-23 16:52:05 +02:00
parent c12429bf97
commit d70db09901

View File

@@ -1,302 +1,330 @@
from binaryninja import * from binaryninja import (
BinaryView,
Component,
Type,
PointerType,
TypeParser,
Platform,
Endianness,
ArrayType,
BackgroundTaskThread,
demangle_gnu3,
get_qualified_name,
SegmentFlag,
SectionSemantics,
)
from binaryninja.log import log_error
#try: # try:
# from typing import TYPE_CHECKING # from typing import TYPE_CHECKING
# if TYPE_CHECKING: # if TYPE_CHECKING:
# from ..shared_base import BaseStatusHandler, BaseDisassemblerInterface, ScriptContext # from ..shared_base import BaseStatusHandler, BaseDisassemblerInterface, ScriptContext
# import json # import json
# import os # import os
# import sys # import sys
# from datetime import datetime # from datetime import datetime
# from typing import Literal # from typing import Literal
# bv: BinaryView = None # type: ignore # bv: BinaryView = None # type: ignore
#except: # except:
# pass # pass
CURRENT_PATH = os.path.dirname(os.path.realpath(__file__)) CURRENT_PATH = os.path.dirname(os.path.realpath(__file__))
class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface): class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface):
# this is implemented, supports_fake_string_segment: bool = True
# however the write API does not seem to work properly here (possibly a bug),
# so this is disabled for now
supports_fake_string_segment: bool = False
_status: BaseStatusHandler _status: BaseStatusHandler
_view: BinaryView _view: BinaryView
_undo_id: str _undo_id: str
_components: dict[str, Component] _components: dict[str, Component]
_type_cache: dict[str, Type] _type_cache: dict[str, Type]
_function_type_cache: dict[str, Type] _function_type_cache: dict[str, Type]
_address_size: int _address_size: int
_endianness: Literal["little", "big"] _endianness: Literal["little", "big"]
TYPE_PARSER_OPTIONS = [ TYPE_PARSER_OPTIONS = ["--target=x86_64-pc-linux", "-x", "c++", "-D_BINARYNINJA_=1"]
"--target=x86_64-pc-linux",
"-x", "c++",
"-D_BINARYNINJA_=1"
]
def __init__(self, status: BaseStatusHandler): def __init__(self, status: BaseStatusHandler):
self._status = status self._status = status
def _get_or_create_type(self, type: str) -> Type: def _get_or_create_type(self, type: str) -> Type:
if type.startswith("struct "): if type.startswith("struct "):
type = type[len("struct "):] type = type[len("struct ") :]
elif type.startswith("class "): elif type.startswith("class "):
type = type[len("class "):] type = type[len("class ") :]
if type in self._type_cache: if type in self._type_cache:
return self._type_cache[type] return self._type_cache[type]
if type.endswith("*"): if type.endswith("*"):
base_type = self._get_or_create_type(type[:-1].strip()) base_type = self._get_or_create_type(type[:-1].strip())
parsed = PointerType.create(self._view.arch, base_type) # type: ignore parsed = PointerType.create(self._view.arch, base_type) # type: ignore
else: else:
parsed = self._view.get_type_by_name(type) parsed = self._view.get_type_by_name(type)
if parsed is None: if parsed is None:
parsed, errors = self._view.parse_type_string(type) parsed, errors = self._view.parse_type_string(type)
self._type_cache[type] = parsed self._type_cache[type] = parsed
return parsed return parsed
def _parse_type_source(self, types: str, filename: str | None = None): def _parse_type_source(self, types: str, filename: str | None = None):
parsed_types, errors = TypeParser.default.parse_types_from_source( parsed_types, errors = TypeParser.default.parse_types_from_source(
types, types,
filename if filename else "types.hpp", filename if filename else "types.hpp",
self._view.platform if self._view.platform is not None else Platform["windows-x86_64"], self._view.platform
self._view, if self._view.platform is not None
self.TYPE_PARSER_OPTIONS else Platform["windows-x86_64"],
) self._view,
self.TYPE_PARSER_OPTIONS,
)
if parsed_types is None: if parsed_types is None:
log_error("Failed to import types.") log_error("Failed to import types.")
log_error(errors) log_error(errors)
return None return None
return parsed_types return parsed_types
def get_script_directory(self) -> str: def get_script_directory(self) -> str:
return CURRENT_PATH return CURRENT_PATH
def on_start(self): def on_start(self):
self._view = bv # type: ignore self._view = bv # type: ignore
self._undo_id = self._view.begin_undo_actions() self._undo_id = self._view.begin_undo_actions()
self._view.set_analysis_hold(True) self._view.set_analysis_hold(True)
self._components = {} self._components = {}
self._type_cache = {} self._type_cache = {}
self._function_type_cache = {} self._function_type_cache = {}
self._address_size = self._view.address_size self._address_size = self._view.address_size
self._endianness = "little" if self._view.endianness == Endianness.LittleEndian else "big" self._endianness = (
"little" if self._view.endianness == Endianness.LittleEndian else "big"
)
self._status.update_step("Parsing header") self._status.update_step("Parsing header")
with open(os.path.join(self.get_script_directory(), "il2cpp.h"), "r") as f: with open(os.path.join(self.get_script_directory(), "il2cpp.h"), "r") as f:
parsed_types = self._parse_type_source(f.read(), "il2cpp.hpp") parsed_types = self._parse_type_source(f.read(), "il2cpp.hpp")
if parsed_types is None: if parsed_types is None:
return return
self._status.update_step("Importing header types", len(parsed_types.types)) self._status.update_step("Importing header types", len(parsed_types.types))
def import_progress_func(progress: int, total: int): def import_progress_func(progress: int, total: int):
self._status.update_progress(1) self._status.update_progress(1)
return True return True
self._view.define_user_types([(x.name, x.type) for x in parsed_types.types], import_progress_func) self._view.define_user_types(
[(x.name, x.type) for x in parsed_types.types], import_progress_func
)
def on_finish(self): def on_finish(self):
self._view.commit_undo_actions(self._undo_id) self._view.commit_undo_actions(self._undo_id)
self._view.set_analysis_hold(False) self._view.set_analysis_hold(False)
self._view.update_analysis() self._view.update_analysis()
def define_function(self, address: int, end: int | None = None): def define_function(self, address: int, end: int | None = None):
if self._view.get_function_at(address) is not None: if self._view.get_function_at(address) is not None:
return return
self._view.create_user_function(address) self._view.create_user_function(address)
def define_data_array(self, address: int, type: str, count: int): def define_data_array(self, address: int, type: str, count: int):
parsed_type = self._get_or_create_type(type) parsed_type = self._get_or_create_type(type)
array_type = ArrayType.create(parsed_type, count) array_type = ArrayType.create(parsed_type, count)
var = self._view.get_data_var_at(address) var = self._view.get_data_var_at(address)
if var is None: if var is None:
self._view.define_user_data_var(address, array_type) self._view.define_user_data_var(address, array_type)
else: else:
var.type = array_type var.type = array_type
def set_data_type(self, address: int, type: str): def set_data_type(self, address: int, type: str):
var = self._view.get_data_var_at(address) var = self._view.get_data_var_at(address)
dtype = self._get_or_create_type(type) dtype = self._get_or_create_type(type)
if var is None: if var is None:
self._view.define_user_data_var(address, dtype) self._view.define_user_data_var(address, dtype)
else: else:
var.type = dtype var.type = dtype
def set_function_type(self, address: int, type: str): def set_function_type(self, address: int, type: str):
function = self._view.get_function_at(address) function = self._view.get_function_at(address)
if function is None: if function is None:
return return
if type in self._function_type_cache: if type in self._function_type_cache:
function.type = self._function_type_cache[type] # type: ignore function.type = self._function_type_cache[type] # type: ignore
else: else:
#log_info(f"skipping function type setting for {address}, {type}") # log_info(f"skipping function type setting for {address}, {type}")
#pass # pass
function.type = type.replace("this", "`this`") function.type = type.replace("this", "`this`")
def set_data_comment(self, address: int, cmt: str): def set_data_comment(self, address: int, cmt: str):
self._view.set_comment_at(address, cmt) self._view.set_comment_at(address, cmt)
def set_function_comment(self, address: int, cmt: str): def set_function_comment(self, address: int, cmt: str):
function = self._view.get_function_at(address) function = self._view.get_function_at(address)
if function is None: if function is None:
return return
function.comment = cmt function.comment = cmt
def set_data_name(self, address: int, name: str): def set_data_name(self, address: int, name: str):
var = self._view.get_data_var_at(address) var = self._view.get_data_var_at(address)
if var is None: if var is None:
return return
if name.startswith("_Z"): if name.startswith("_Z"):
type, demangled = demangle_gnu3(self._view.arch, name, self._view) type, demangled = demangle_gnu3(self._view.arch, name, self._view)
var.name = get_qualified_name(demangled) var.name = get_qualified_name(demangled)
else: else:
var.name = name var.name = name
def set_function_name(self, address: int, name: str): def set_function_name(self, address: int, name: str):
function = self._view.get_function_at(address) function = self._view.get_function_at(address)
if function is None: if function is None:
return return
if name.startswith("_Z"): if name.startswith("_Z"):
type, demangled = demangle_gnu3(self._view.arch, name, self._view) type, demangled = demangle_gnu3(self._view.arch, name, self._view)
function.name = get_qualified_name(demangled) function.name = get_qualified_name(demangled)
#function.type = type - this does not work due to the generated types not being namespaced. :( # function.type = type - this does not work due to the generated types not being namespaced. :(
else: else:
function.name = name function.name = name
def add_cross_reference(self, from_address: int, to_address: int): def add_cross_reference(self, from_address: int, to_address: int):
self._view.add_user_data_ref(from_address, to_address) self._view.add_user_data_ref(from_address, to_address)
def import_c_typedef(self, type_def: str): def import_c_typedef(self, type_def: str):
self._view.define_user_type(None, type_def) self._view.define_user_type(None, type_def)
# optional # optional
def _get_or_create_component(self, name: str): def _get_or_create_component(self, name: str):
if name in self._components: if name in self._components:
return self._components[name] return self._components[name]
current = name current = name
if current.count("/") != 0: if current.count("/") != 0:
split_idx = current.rindex("/") split_idx = current.rindex("/")
parent, child = current[:split_idx], current[split_idx:] parent, child = current[:split_idx], current[split_idx:]
parent = self._get_or_create_component(name) parent = self._get_or_create_component(name)
component = self._view.create_component(child, parent) component = self._view.create_component(child, parent)
else: else:
component = self._view.create_component(name) component = self._view.create_component(name)
self._components[name] = component self._components[name] = component
return component return component
def add_function_to_group(self, address: int, group: str): def add_function_to_group(self, address: int, group: str):
return return
function = self._view.get_function_at(address) function = self._view.get_function_at(address)
if function is None: if function is None:
return return
self._get_or_create_component(group).add_function(function) self._get_or_create_component(group).add_function(function)
def cache_function_types(self, signatures: list[str]): def cache_function_types(self, signatures: list[str]):
function_sigs = set(signatures) function_sigs = set(signatures)
if len(function_sigs) == 0: if len(function_sigs) == 0:
return return
typestr = ";\n".join(function_sigs).replace("this", "_this") + ";" typestr = ";\n".join(function_sigs).replace("this", "_this") + ";"
parsed_types = self._parse_type_source(typestr, "cached_types.hpp") parsed_types = self._parse_type_source(typestr, "cached_types.hpp")
if parsed_types is None: if parsed_types is None:
return return
# bv.parse_types_from_source returns a dict in the functions field. # bv.parse_types_from_source returns a dict in the functions field.
# TypeParser.parse_types_from_source does not. # TypeParser.parse_types_from_source does not.
for function_sig, function in zip(function_sigs, parsed_types.functions): for function_sig, function in zip(function_sigs, parsed_types.functions):
self._function_type_cache[function_sig] = function.type self._function_type_cache[function_sig] = function.type
# only required if supports_fake_string_segment == True # only required if supports_fake_string_segment == True
def create_fake_segment(self, name: str, size: int) -> int: def create_fake_segment(self, name: str, size: int) -> int:
last_end_addr = self._view.mapped_address_ranges[-1].end last_end_addr = self._view.mapped_address_ranges[-1].end
if last_end_addr % 0x1000 != 0: if last_end_addr % 0x1000 != 0:
last_end_addr += (0x1000 - (last_end_addr % 0x1000)) last_end_addr += 0x1000 - (last_end_addr % 0x1000)
self._view.add_user_segment(last_end_addr, size, 0, 0, SegmentFlag.SegmentContainsData) self._view.memory_map.add_memory_region(
self._view.add_user_section(name, last_end_addr, size, SectionSemantics.ReadOnlyDataSectionSemantics) f"mem_{name}",
return last_end_addr last_end_addr,
bytes(size),
SegmentFlag.SegmentContainsData | SegmentFlag.SegmentReadable,
)
def write_string(self, address: int, value: str) -> int: self._view.add_user_section(
encoded = value.encode() + b"\x00" name, last_end_addr, size, SectionSemantics.ReadOnlyDataSectionSemantics
self._view.write(address, encoded) )
return len(encoded)
def write_address(self, address: int, value: int): return last_end_addr
self._view.write(address, value.to_bytes(self._address_size, self._endianness))
def write_string(self, address: int, value: str) -> int:
encoded = value.encode() + b"\x00"
self._view.write(address, encoded)
return len(encoded)
def write_address(self, address: int, value: int):
self._view.write(address, value.to_bytes(self._address_size, self._endianness))
class BinaryNinjaStatusHandler(BaseStatusHandler): class BinaryNinjaStatusHandler(BaseStatusHandler):
def __init__(self, thread: BackgroundTaskThread): def __init__(self, thread: BackgroundTaskThread):
self.step = "Initializing" self.step = "Initializing"
self.max_items = 0 self.max_items = 0
self.current_items = 0 self.current_items = 0
self.start_time = datetime.now() self.start_time = datetime.now()
self.step_start_time = self.start_time self.step_start_time = self.start_time
self.last_updated_time = datetime.min self.last_updated_time = datetime.min
self._thread = thread self._thread = thread
def initialize(self): pass def initialize(self):
pass
def update(self): def update(self):
if self.was_cancelled(): if self.was_cancelled():
raise RuntimeError("Cancelled script.") raise RuntimeError("Cancelled script.")
current_time = datetime.now() current_time = datetime.now()
if 0.5 > (current_time - self.last_updated_time).total_seconds(): if 0.5 > (current_time - self.last_updated_time).total_seconds():
return return
self.last_updated_time = current_time self.last_updated_time = current_time
step_time = current_time - self.step_start_time step_time = current_time - self.step_start_time
total_time = current_time - self.start_time total_time = current_time - self.start_time
self._thread.progress = f"Processing IL2CPP metadata: {self.step} ({self.current_items}/{self.max_items}), elapsed: {step_time} ({total_time})" self._thread.progress = f"Processing IL2CPP metadata: {self.step} ({self.current_items}/{self.max_items}), elapsed: {step_time} ({total_time})"
def update_step(self, step, max_items = 0): def update_step(self, step, max_items=0):
self.step = step self.step = step
self.max_items = max_items self.max_items = max_items
self.current_items = 0 self.current_items = 0
self.step_start_time = datetime.now() self.step_start_time = datetime.now()
self.last_updated_time = datetime.min self.last_updated_time = datetime.min
self.update() self.update()
def update_progress(self, new_progress = 1): def update_progress(self, new_progress=1):
self.current_items += new_progress self.current_items += new_progress
self.update() self.update()
def was_cancelled(self): return False def was_cancelled(self):
return False
def close(self):
pass
def close(self):
pass
# Entry point # Entry point
class Il2CppTask(BackgroundTaskThread): class Il2CppTask(BackgroundTaskThread):
def __init__(self): def __init__(self):
BackgroundTaskThread.__init__(self, "Processing IL2CPP metadata...", False) BackgroundTaskThread.__init__(self, "Processing IL2CPP metadata...", False)
def run(self):
status = BinaryNinjaStatusHandler(self)
backend = BinaryNinjaDisassemblerInterface(status)
context = ScriptContext(backend, status)
context.process()
def run(self):
status = BinaryNinjaStatusHandler(self)
backend = BinaryNinjaDisassemblerInterface(status)
context = ScriptContext(backend, status)
context.process()
Il2CppTask().start() Il2CppTask().start()