fix and enable binary ninja fake string segment support

This commit is contained in:
LukeFZ
2025-04-23 16:52:05 +02:00
parent c12429bf97
commit d70db09901

View File

@@ -1,302 +1,330 @@
from binaryninja import * from binaryninja import (
BinaryView,
Component,
Type,
PointerType,
TypeParser,
Platform,
Endianness,
ArrayType,
BackgroundTaskThread,
demangle_gnu3,
get_qualified_name,
SegmentFlag,
SectionSemantics,
)
from binaryninja.log import log_error
#try: # try:
# from typing import TYPE_CHECKING # from typing import TYPE_CHECKING
# if TYPE_CHECKING: # if TYPE_CHECKING:
# from ..shared_base import BaseStatusHandler, BaseDisassemblerInterface, ScriptContext # from ..shared_base import BaseStatusHandler, BaseDisassemblerInterface, ScriptContext
# import json # import json
# import os # import os
# import sys # import sys
# from datetime import datetime # from datetime import datetime
# from typing import Literal # from typing import Literal
# bv: BinaryView = None # type: ignore # bv: BinaryView = None # type: ignore
#except: # except:
# pass # pass
CURRENT_PATH = os.path.dirname(os.path.realpath(__file__)) CURRENT_PATH = os.path.dirname(os.path.realpath(__file__))
class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface): class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface):
# this is implemented, supports_fake_string_segment: bool = True
# however the write API does not seem to work properly here (possibly a bug),
# so this is disabled for now
supports_fake_string_segment: bool = False
_status: BaseStatusHandler _status: BaseStatusHandler
_view: BinaryView
_undo_id: str
_components: dict[str, Component]
_type_cache: dict[str, Type]
_function_type_cache: dict[str, Type]
_address_size: int _view: BinaryView
_endianness: Literal["little", "big"] _undo_id: str
_components: dict[str, Component]
_type_cache: dict[str, Type]
_function_type_cache: dict[str, Type]
TYPE_PARSER_OPTIONS = [ _address_size: int
"--target=x86_64-pc-linux", _endianness: Literal["little", "big"]
"-x", "c++",
"-D_BINARYNINJA_=1"
]
def __init__(self, status: BaseStatusHandler): TYPE_PARSER_OPTIONS = ["--target=x86_64-pc-linux", "-x", "c++", "-D_BINARYNINJA_=1"]
self._status = status
def _get_or_create_type(self, type: str) -> Type: def __init__(self, status: BaseStatusHandler):
if type.startswith("struct "): self._status = status
type = type[len("struct "):]
elif type.startswith("class "):
type = type[len("class "):]
if type in self._type_cache: def _get_or_create_type(self, type: str) -> Type:
return self._type_cache[type] if type.startswith("struct "):
type = type[len("struct ") :]
if type.endswith("*"): elif type.startswith("class "):
base_type = self._get_or_create_type(type[:-1].strip()) type = type[len("class ") :]
parsed = PointerType.create(self._view.arch, base_type) # type: ignore if type in self._type_cache:
else: return self._type_cache[type]
parsed = self._view.get_type_by_name(type)
if parsed is None:
parsed, errors = self._view.parse_type_string(type)
self._type_cache[type] = parsed if type.endswith("*"):
return parsed base_type = self._get_or_create_type(type[:-1].strip())
def _parse_type_source(self, types: str, filename: str | None = None): parsed = PointerType.create(self._view.arch, base_type) # type: ignore
parsed_types, errors = TypeParser.default.parse_types_from_source( else:
types, parsed = self._view.get_type_by_name(type)
filename if filename else "types.hpp", if parsed is None:
self._view.platform if self._view.platform is not None else Platform["windows-x86_64"], parsed, errors = self._view.parse_type_string(type)
self._view,
self.TYPE_PARSER_OPTIONS
)
if parsed_types is None: self._type_cache[type] = parsed
log_error("Failed to import types.") return parsed
log_error(errors)
return None
return parsed_types
def get_script_directory(self) -> str: def _parse_type_source(self, types: str, filename: str | None = None):
return CURRENT_PATH parsed_types, errors = TypeParser.default.parse_types_from_source(
types,
filename if filename else "types.hpp",
self._view.platform
if self._view.platform is not None
else Platform["windows-x86_64"],
self._view,
self.TYPE_PARSER_OPTIONS,
)
def on_start(self): if parsed_types is None:
self._view = bv # type: ignore log_error("Failed to import types.")
self._undo_id = self._view.begin_undo_actions() log_error(errors)
self._view.set_analysis_hold(True) return None
self._components = {}
self._type_cache = {}
self._function_type_cache = {}
self._address_size = self._view.address_size return parsed_types
self._endianness = "little" if self._view.endianness == Endianness.LittleEndian else "big"
self._status.update_step("Parsing header")
with open(os.path.join(self.get_script_directory(), "il2cpp.h"), "r") as f: def get_script_directory(self) -> str:
parsed_types = self._parse_type_source(f.read(), "il2cpp.hpp") return CURRENT_PATH
if parsed_types is None:
return
self._status.update_step("Importing header types", len(parsed_types.types)) def on_start(self):
self._view = bv # type: ignore
self._undo_id = self._view.begin_undo_actions()
self._view.set_analysis_hold(True)
self._components = {}
self._type_cache = {}
self._function_type_cache = {}
def import_progress_func(progress: int, total: int): self._address_size = self._view.address_size
self._status.update_progress(1) self._endianness = (
return True "little" if self._view.endianness == Endianness.LittleEndian else "big"
)
self._view.define_user_types([(x.name, x.type) for x in parsed_types.types], import_progress_func) self._status.update_step("Parsing header")
def on_finish(self): with open(os.path.join(self.get_script_directory(), "il2cpp.h"), "r") as f:
self._view.commit_undo_actions(self._undo_id) parsed_types = self._parse_type_source(f.read(), "il2cpp.hpp")
self._view.set_analysis_hold(False) if parsed_types is None:
self._view.update_analysis() return
def define_function(self, address: int, end: int | None = None): self._status.update_step("Importing header types", len(parsed_types.types))
if self._view.get_function_at(address) is not None:
return
self._view.create_user_function(address)
def define_data_array(self, address: int, type: str, count: int): def import_progress_func(progress: int, total: int):
parsed_type = self._get_or_create_type(type) self._status.update_progress(1)
array_type = ArrayType.create(parsed_type, count) return True
var = self._view.get_data_var_at(address)
if var is None:
self._view.define_user_data_var(address, array_type)
else:
var.type = array_type
def set_data_type(self, address: int, type: str): self._view.define_user_types(
var = self._view.get_data_var_at(address) [(x.name, x.type) for x in parsed_types.types], import_progress_func
dtype = self._get_or_create_type(type) )
if var is None:
self._view.define_user_data_var(address, dtype)
else:
var.type = dtype
def set_function_type(self, address: int, type: str): def on_finish(self):
function = self._view.get_function_at(address) self._view.commit_undo_actions(self._undo_id)
if function is None: self._view.set_analysis_hold(False)
return self._view.update_analysis()
if type in self._function_type_cache:
function.type = self._function_type_cache[type] # type: ignore
else:
#log_info(f"skipping function type setting for {address}, {type}")
#pass
function.type = type.replace("this", "`this`")
def set_data_comment(self, address: int, cmt: str): def define_function(self, address: int, end: int | None = None):
self._view.set_comment_at(address, cmt) if self._view.get_function_at(address) is not None:
return
def set_function_comment(self, address: int, cmt: str): self._view.create_user_function(address)
function = self._view.get_function_at(address)
if function is None:
return
function.comment = cmt def define_data_array(self, address: int, type: str, count: int):
parsed_type = self._get_or_create_type(type)
array_type = ArrayType.create(parsed_type, count)
var = self._view.get_data_var_at(address)
if var is None:
self._view.define_user_data_var(address, array_type)
else:
var.type = array_type
def set_data_name(self, address: int, name: str): def set_data_type(self, address: int, type: str):
var = self._view.get_data_var_at(address) var = self._view.get_data_var_at(address)
if var is None: dtype = self._get_or_create_type(type)
return if var is None:
self._view.define_user_data_var(address, dtype)
if name.startswith("_Z"): else:
type, demangled = demangle_gnu3(self._view.arch, name, self._view) var.type = dtype
var.name = get_qualified_name(demangled)
else:
var.name = name
def set_function_name(self, address: int, name: str): def set_function_type(self, address: int, type: str):
function = self._view.get_function_at(address) function = self._view.get_function_at(address)
if function is None: if function is None:
return return
if name.startswith("_Z"): if type in self._function_type_cache:
type, demangled = demangle_gnu3(self._view.arch, name, self._view) function.type = self._function_type_cache[type] # type: ignore
function.name = get_qualified_name(demangled) else:
#function.type = type - this does not work due to the generated types not being namespaced. :( # log_info(f"skipping function type setting for {address}, {type}")
else: # pass
function.name = name function.type = type.replace("this", "`this`")
def add_cross_reference(self, from_address: int, to_address: int): def set_data_comment(self, address: int, cmt: str):
self._view.add_user_data_ref(from_address, to_address) self._view.set_comment_at(address, cmt)
def import_c_typedef(self, type_def: str): def set_function_comment(self, address: int, cmt: str):
self._view.define_user_type(None, type_def) function = self._view.get_function_at(address)
if function is None:
return
# optional function.comment = cmt
def _get_or_create_component(self, name: str):
if name in self._components:
return self._components[name]
current = name
if current.count("/") != 0:
split_idx = current.rindex("/")
parent, child = current[:split_idx], current[split_idx:]
parent = self._get_or_create_component(name)
component = self._view.create_component(child, parent)
else:
component = self._view.create_component(name)
self._components[name] = component def set_data_name(self, address: int, name: str):
return component var = self._view.get_data_var_at(address)
if var is None:
return
def add_function_to_group(self, address: int, group: str): if name.startswith("_Z"):
return type, demangled = demangle_gnu3(self._view.arch, name, self._view)
function = self._view.get_function_at(address) var.name = get_qualified_name(demangled)
if function is None: else:
return var.name = name
self._get_or_create_component(group).add_function(function)
def cache_function_types(self, signatures: list[str]): def set_function_name(self, address: int, name: str):
function_sigs = set(signatures) function = self._view.get_function_at(address)
if len(function_sigs) == 0: if function is None:
return return
typestr = ";\n".join(function_sigs).replace("this", "_this") + ";"
parsed_types = self._parse_type_source(typestr, "cached_types.hpp")
if parsed_types is None:
return
# bv.parse_types_from_source returns a dict in the functions field. if name.startswith("_Z"):
# TypeParser.parse_types_from_source does not. type, demangled = demangle_gnu3(self._view.arch, name, self._view)
for function_sig, function in zip(function_sigs, parsed_types.functions): function.name = get_qualified_name(demangled)
self._function_type_cache[function_sig] = function.type # function.type = type - this does not work due to the generated types not being namespaced. :(
else:
function.name = name
# only required if supports_fake_string_segment == True def add_cross_reference(self, from_address: int, to_address: int):
def create_fake_segment(self, name: str, size: int) -> int: self._view.add_user_data_ref(from_address, to_address)
last_end_addr = self._view.mapped_address_ranges[-1].end
if last_end_addr % 0x1000 != 0:
last_end_addr += (0x1000 - (last_end_addr % 0x1000))
self._view.add_user_segment(last_end_addr, size, 0, 0, SegmentFlag.SegmentContainsData) def import_c_typedef(self, type_def: str):
self._view.add_user_section(name, last_end_addr, size, SectionSemantics.ReadOnlyDataSectionSemantics) self._view.define_user_type(None, type_def)
return last_end_addr
def write_string(self, address: int, value: str) -> int:
encoded = value.encode() + b"\x00"
self._view.write(address, encoded)
return len(encoded)
def write_address(self, address: int, value: int): # optional
self._view.write(address, value.to_bytes(self._address_size, self._endianness)) def _get_or_create_component(self, name: str):
if name in self._components:
return self._components[name]
current = name
if current.count("/") != 0:
split_idx = current.rindex("/")
parent, child = current[:split_idx], current[split_idx:]
parent = self._get_or_create_component(name)
component = self._view.create_component(child, parent)
else:
component = self._view.create_component(name)
self._components[name] = component
return component
def add_function_to_group(self, address: int, group: str):
return
function = self._view.get_function_at(address)
if function is None:
return
self._get_or_create_component(group).add_function(function)
def cache_function_types(self, signatures: list[str]):
function_sigs = set(signatures)
if len(function_sigs) == 0:
return
typestr = ";\n".join(function_sigs).replace("this", "_this") + ";"
parsed_types = self._parse_type_source(typestr, "cached_types.hpp")
if parsed_types is None:
return
# bv.parse_types_from_source returns a dict in the functions field.
# TypeParser.parse_types_from_source does not.
for function_sig, function in zip(function_sigs, parsed_types.functions):
self._function_type_cache[function_sig] = function.type
# only required if supports_fake_string_segment == True
def create_fake_segment(self, name: str, size: int) -> int:
last_end_addr = self._view.mapped_address_ranges[-1].end
if last_end_addr % 0x1000 != 0:
last_end_addr += 0x1000 - (last_end_addr % 0x1000)
self._view.memory_map.add_memory_region(
f"mem_{name}",
last_end_addr,
bytes(size),
SegmentFlag.SegmentContainsData | SegmentFlag.SegmentReadable,
)
self._view.add_user_section(
name, last_end_addr, size, SectionSemantics.ReadOnlyDataSectionSemantics
)
return last_end_addr
def write_string(self, address: int, value: str) -> int:
encoded = value.encode() + b"\x00"
self._view.write(address, encoded)
return len(encoded)
def write_address(self, address: int, value: int):
self._view.write(address, value.to_bytes(self._address_size, self._endianness))
class BinaryNinjaStatusHandler(BaseStatusHandler): class BinaryNinjaStatusHandler(BaseStatusHandler):
def __init__(self, thread: BackgroundTaskThread): def __init__(self, thread: BackgroundTaskThread):
self.step = "Initializing" self.step = "Initializing"
self.max_items = 0 self.max_items = 0
self.current_items = 0 self.current_items = 0
self.start_time = datetime.now() self.start_time = datetime.now()
self.step_start_time = self.start_time self.step_start_time = self.start_time
self.last_updated_time = datetime.min self.last_updated_time = datetime.min
self._thread = thread self._thread = thread
def initialize(self): pass
def update(self): def initialize(self):
if self.was_cancelled(): pass
raise RuntimeError("Cancelled script.")
current_time = datetime.now() def update(self):
if 0.5 > (current_time - self.last_updated_time).total_seconds(): if self.was_cancelled():
return raise RuntimeError("Cancelled script.")
self.last_updated_time = current_time current_time = datetime.now()
if 0.5 > (current_time - self.last_updated_time).total_seconds():
return
step_time = current_time - self.step_start_time self.last_updated_time = current_time
total_time = current_time - self.start_time
self._thread.progress = f"Processing IL2CPP metadata: {self.step} ({self.current_items}/{self.max_items}), elapsed: {step_time} ({total_time})"
def update_step(self, step, max_items = 0): step_time = current_time - self.step_start_time
self.step = step total_time = current_time - self.start_time
self.max_items = max_items self._thread.progress = f"Processing IL2CPP metadata: {self.step} ({self.current_items}/{self.max_items}), elapsed: {step_time} ({total_time})"
self.current_items = 0
self.step_start_time = datetime.now()
self.last_updated_time = datetime.min
self.update()
def update_progress(self, new_progress = 1): def update_step(self, step, max_items=0):
self.current_items += new_progress self.step = step
self.update() self.max_items = max_items
self.current_items = 0
self.step_start_time = datetime.now()
self.last_updated_time = datetime.min
self.update()
def was_cancelled(self): return False def update_progress(self, new_progress=1):
self.current_items += new_progress
self.update()
def was_cancelled(self):
return False
def close(self):
pass
def close(self):
pass
# Entry point # Entry point
class Il2CppTask(BackgroundTaskThread): class Il2CppTask(BackgroundTaskThread):
def __init__(self): def __init__(self):
BackgroundTaskThread.__init__(self, "Processing IL2CPP metadata...", False) BackgroundTaskThread.__init__(self, "Processing IL2CPP metadata...", False)
def run(self): def run(self):
status = BinaryNinjaStatusHandler(self) status = BinaryNinjaStatusHandler(self)
backend = BinaryNinjaDisassemblerInterface(status) backend = BinaryNinjaDisassemblerInterface(status)
context = ScriptContext(backend, status) context = ScriptContext(backend, status)
context.process() context.process()
Il2CppTask().start()
Il2CppTask().start()