Overhaul disassembler script + add Binary Ninja target (#12)

* Overhaul diassembler scripts:
- No longer defines top level functions
- Split into three classes: StatusHandler (like before), DisassemblerInterface (for interfacing with the used program API), ScriptContext (for definiting general functions that use the disassembler interface)
- Add type annotations to all class methods and remove 2.7 compatibility stuff (Ghidra now supports Python 3 so this is unnecessary anymore)
- Disassembler backends are now responsible for launching metadata/script processing, to better support disassembler differences
- String handling is back in the base ScriptContext class, disassembler interfaces opt into the fake string segment creation and fall back to the old method if it isn't supported

* Add Binary Ninja disassembler script backend
This uses the new backend-controlled execution to launch metadata processing on a background thread to keep the ui responsive

* make binary ninja script use own _BINARYNINJA_ define and add define helpers to header

* Update README to account for new script and binary ninja backend

* implement fake string segment functions for binary ninja but don't advertise support

* also cache API function types in binary ninja backend

* fix ida script and disable folders again
This commit is contained in:
Luke
2024-11-08 23:31:40 +01:00
committed by GitHub
parent 792268f52f
commit 5715760e8b
9 changed files with 912 additions and 510 deletions

View File

@@ -0,0 +1,284 @@
from binaryninja import *
#try:
# from typing import TYPE_CHECKING
# if TYPE_CHECKING:
# from ..shared_base import BaseStatusHandler, BaseDisassemblerInterface, ScriptContext
# import json
# import os
# import sys
# from datetime import datetime
#except:
# pass
CURRENT_PATH = os.path.dirname(os.path.realpath(__file__))
class BinaryNinjaDisassemblerInterface(BaseDisassemblerInterface):
# this is implemented,
# however the write API does not seem to work properly here (possibly a bug),
# so this is disabled for now
supports_fake_string_segment: bool = False
_status: BaseStatusHandler
_view: BinaryView
_undo_id: str
_components: dict[str, Component]
_type_cache: dict[str, Type]
_function_type_cache: dict[str, Type]
_address_size: int
_endianness: Literal["little", "big"]
def __init__(self, status: BaseStatusHandler):
self._status = status
def _get_or_create_type(self, type: str) -> Type:
if type.startswith("struct "):
type = type[len("struct "):]
elif type.startswith("class "):
type = type[len("class "):]
if type in self._type_cache:
return self._type_cache[type]
if type.endswith("*"):
base_type = self._get_or_create_type(type[:-1].strip())
parsed = PointerType.create(self._view.arch, base_type) # type: ignore
else:
parsed = self._view.get_type_by_name(type)
if parsed is None:
parsed, errors = self._view.parse_type_string(type)
self._type_cache[type] = parsed
return parsed
def get_script_directory(self) -> str:
return CURRENT_PATH
def on_start(self):
self._view = bv # type: ignore
self._undo_id = self._view.begin_undo_actions()
self._view.set_analysis_hold(True)
self._components = {}
self._type_cache = {}
self._function_type_cache = {}
self._address_size = self._view.address_size
self._endianness = "little" if self._view.endianness == Endianness.LittleEndian else "big"
self._status.update_step("Parsing header")
with open(os.path.join(self.get_script_directory(), "il2cpp.h"), "r") as f:
parsed_types, errors = TypeParser.default.parse_types_from_source(
f.read(),
"il2cpp.h",
self._view.platform if self._view.platform is not None else Platform["windows-x86_64"],
self._view,
[
"--target=x86_64-pc-linux",
"-x", "c++",
"-D_BINARYNINJA_=1"
]
)
if parsed_types is None:
log_error("Failed to import header")
log_error(errors)
return
self._status.update_step("Importing header types", len(parsed_types.types))
def import_progress_func(progress: int, total: int):
self._status.update_progress(1)
return True
self._view.define_user_types([(x.name, x.type) for x in parsed_types.types], import_progress_func)
def on_finish(self):
self._view.commit_undo_actions(self._undo_id)
self._view.set_analysis_hold(False)
self._view.update_analysis()
def define_function(self, address: int, end: int | None = None):
if self._view.get_function_at(address) is not None:
return
self._view.create_user_function(address)
def define_data_array(self, address: int, type: str, count: int):
parsed_type = self._get_or_create_type(type)
array_type = ArrayType.create(parsed_type, count)
var = self._view.get_data_var_at(address)
if var is None:
self._view.define_user_data_var(address, array_type)
else:
var.type = array_type
def set_data_type(self, address: int, type: str):
var = self._view.get_data_var_at(address)
dtype = self._get_or_create_type(type)
if var is None:
self._view.define_user_data_var(address, dtype)
else:
var.type = dtype
def set_function_type(self, address: int, type: str):
function = self._view.get_function_at(address)
if function is None:
return
if type in self._function_type_cache:
function.type = self._function_type_cache[type] # type: ignore
else:
#log_info(f"skipping function type setting for {address}, {type}")
#pass
function.type = type.replace("this", "`this`")
def set_data_comment(self, address: int, cmt: str):
self._view.set_comment_at(address, cmt)
def set_function_comment(self, address: int, cmt: str):
function = self._view.get_function_at(address)
if function is None:
return
function.comment = cmt
def set_data_name(self, address: int, name: str):
var = self._view.get_data_var_at(address)
if var is None:
return
if name.startswith("_Z"):
type, demangled = demangle_gnu3(self._view.arch, name, self._view)
var.name = get_qualified_name(demangled)
else:
var.name = name
def set_function_name(self, address: int, name: str):
function = self._view.get_function_at(address)
if function is None:
return
if name.startswith("_Z"):
type, demangled = demangle_gnu3(self._view.arch, name, self._view)
function.name = get_qualified_name(demangled)
#function.type = type - this does not work due to the generated types not being namespaced. :(
else:
function.name = name
def add_cross_reference(self, from_address: int, to_address: int):
self._view.add_user_data_ref(from_address, to_address)
def import_c_typedef(self, type_def: str):
self._view.define_user_type(None, type_def)
# optional
def _get_or_create_component(self, name: str):
if name in self._components:
return self._components[name]
current = name
if current.count("/") != 0:
split_idx = current.rindex("/")
parent, child = current[:split_idx], current[split_idx:]
parent = self._get_or_create_component(name)
component = self._view.create_component(child, parent)
else:
component = self._view.create_component(name)
self._components[name] = component
return component
def add_function_to_group(self, address: int, group: str):
return
function = self._view.get_function_at(address)
if function is None:
return
self._get_or_create_component(group).add_function(function)
def cache_function_types(self, signatures: list[str]):
function_sigs = set(signatures)
if len(function_sigs) == 0:
return
typestr = ";\n".join(function_sigs).replace("this", "_this") + ";"
res = self._view.parse_types_from_string(typestr)
for function_sig, function in zip(function_sigs, res.functions.values()): # type: ignore
self._function_type_cache[function_sig] = function
# only required if supports_fake_string_segment == True
def create_fake_segment(self, name: str, size: int) -> int:
last_end_addr = self._view.mapped_address_ranges[-1].end
if last_end_addr % 0x1000 != 0:
last_end_addr += (0x1000 - (last_end_addr % 0x1000))
self._view.add_user_segment(last_end_addr, size, 0, 0, SegmentFlag.SegmentContainsData)
self._view.add_user_section(name, last_end_addr, size, SectionSemantics.ReadOnlyDataSectionSemantics)
return last_end_addr
def write_string(self, address: int, value: str):
self._view.write(address, value.encode() + b"\x00")
def write_address(self, address: int, value: int):
self._view.write(address, value.to_bytes(self._address_size, self._endianness))
class BinaryNinjaStatusHandler(BaseStatusHandler):
def __init__(self, thread: BackgroundTaskThread):
self.step = "Initializing"
self.max_items = 0
self.current_items = 0
self.start_time = datetime.now()
self.step_start_time = self.start_time
self.last_updated_time = datetime.min
self._thread = thread
def initialize(self): pass
def update(self):
if self.was_cancelled():
raise RuntimeError("Cancelled script.")
current_time = datetime.now()
if 0.5 > (current_time - self.last_updated_time).total_seconds():
return
self.last_updated_time = current_time
step_time = current_time - self.step_start_time
total_time = current_time - self.start_time
self._thread.progress = f"Processing IL2CPP metadata: {self.step} ({self.current_items}/{self.max_items}), elapsed: {step_time} ({total_time})"
def update_step(self, step, max_items = 0):
self.step = step
self.max_items = max_items
self.current_items = 0
self.step_start_time = datetime.now()
self.last_updated_time = datetime.min
self.update()
def update_progress(self, new_progress = 1):
self.current_items += new_progress
self.update()
def was_cancelled(self): return False
def close(self):
pass
# Entry point
class Il2CppTask(BackgroundTaskThread):
def __init__(self):
BackgroundTaskThread.__init__(self, "Processing IL2CPP metadata...", False)
def run(self):
status = BinaryNinjaStatusHandler(self)
backend = BinaryNinjaDisassemblerInterface(status)
context = ScriptContext(backend, status)
context.process()
Il2CppTask().start()

View File

@@ -1,106 +1,124 @@
# Ghidra-specific implementation
from ghidra.app.cmd.function import ApplyFunctionSignatureCmd
from ghidra.app.script import GhidraScriptUtil
from ghidra.app.util.cparser.C import CParserUtils
from ghidra.program.model.data import ArrayDataType
from ghidra.program.model.symbol import SourceType
from ghidra.program.model.symbol import RefType
from ghidra.app.cmd.label import DemanglerCmd
xrefs = currentProgram.getReferenceManager()
#try:
# from typing import TYPE_CHECKING
# if TYPE_CHECKING:
# from ..shared_base import BaseStatusHandler, BaseDisassemblerInterface, ScriptContext
# import json
# import os
# import sys
# from datetime import datetime
#except:
# pass
def set_name(addr, name):
if not name.startswith("_ZN"):
createLabel(toAddr(addr), name, True)
return
cmd = DemanglerCmd(currentAddress.getAddress(hex(addr)), name)
if not cmd.applyTo(currentProgram, monitor):
print("Failed to apply demangled name to %s at %s due %s, falling back to mangled" % (name, hex(addr), cmd.getStatusMsg()))
createLabel(toAddr(addr), name, True)
class GhidraDisassemblerInterface(BaseDisassemblerInterface):
supports_fake_string_segment = False
def make_function(start, end = None):
addr = toAddr(start)
# Don't override existing functions
fn = getFunctionAt(addr)
if fn is None:
# Create new function if none exists
createFunction(addr, None)
def get_script_directory(self) -> str:
return getSourceFile().getParentFile().toString()
def make_array(addr, numItems, cppType):
if cppType.startswith('struct '):
cppType = cppType[7:]
t = getDataTypes(cppType)[0]
a = ArrayDataType(t, numItems, t.getLength())
addr = toAddr(addr)
removeDataAt(addr)
createData(addr, a)
def on_start(self):
self.xrefs = currentProgram.getReferenceManager()
def define_code(code):
# Code declarations are not supported in Ghidra
# This only affects string literals for metadata version < 19
# TODO: Replace with creating a DataType for enums
# Check that the user has parsed the C headers first
if len(getDataTypes('Il2CppObject')) == 0:
print('STOP! You must import the generated C header file (%TYPE_HEADER_RELATIVE_PATH%) before running this script.')
print('See https://github.com/djkaty/Il2CppInspector/blob/master/README.md#adding-metadata-to-your-ghidra-workflow for instructions.')
sys.exit()
# Ghidra sets the image base for ELF to 0x100000 for some reason
# https://github.com/NationalSecurityAgency/ghidra/issues/1020
# Make sure that the base address is 0
# Without this, Ghidra may not analyze the binary correctly and you will just waste your time
# If 0 doesn't work for you, replace it with the base address from the output of the CLI or GUI
if currentProgram.getExecutableFormat().endswith('(ELF)'):
currentProgram.setImageBase(toAddr(0), True)
# Don't trigger decompiler
setAnalysisOption(currentProgram, "Call Convention ID", "false")
def on_finish(self):
pass
def define_function(self, address: int, end: int | None = None):
address = toAddr(address)
# Don't override existing functions
fn = getFunctionAt(address)
if fn is None:
# Create new function if none exists
createFunction(address, None)
def define_data_array(self, address: int, type: str, count: int):
if type.startswith('struct '):
type = type[7:]
t = getDataTypes(type)[0]
a = ArrayDataType(t, count, t.getLength())
address = toAddr(address)
removeDataAt(address)
createData(address, a)
def set_data_type(self, address: int, type: str):
if type.startswith('struct '):
type = type[7:]
try:
t = getDataTypes(type)[0]
address = toAddr(address)
removeDataAt(address)
createData(address, t)
except:
print("Failed to set type: %s" % type)
def set_function_type(self, address: int, type: str):
make_function(address)
typeSig = CParserUtils.parseSignature(None, currentProgram, type)
ApplyFunctionSignatureCmd(toAddr(address), typeSig, SourceType.USER_DEFINED, False, True).applyTo(currentProgram)
def set_data_comment(self, address: int, cmt: str):
setEOLComment(toAddr(address), cmt)
def set_function_comment(self, address: int, cmt: str):
setPlateComment(toAddr(address), cmt)
def set_data_name(self, address: int, name: str):
address = toAddr(address)
if len(name) > 2000:
print("Name length exceeds 2000 characters, skipping (%s)" % name)
return
if not name.startswith("_ZN"):
createLabel(address, name, True)
return
cmd = DemanglerCmd(address, name)
if not cmd.applyTo(currentProgram, monitor):
print(f"Failed to apply demangled name to {name} at {address} due {cmd.getStatusMsg()}, falling back to mangled")
createLabel(address, name, True)
def set_function_name(self, address: int, name: str):
return self.set_data_name(address, name)
def add_cross_reference(self, from_address: int, to_address: int):
self.xrefs.addMemoryReference(toAddr(from_address), toAddr(to_address), RefType.DATA, SourceType.USER_DEFINED, 0)
def import_c_typedef(self, type_def: str):
# Code declarations are not supported in Ghidra
# This only affects string literals for metadata version < 19
# TODO: Replace with creating a DataType for enums
pass
class GhidraStatusHandler(BaseStatusHandler):
pass
def set_function_type(addr, sig):
make_function(addr)
typeSig = CParserUtils.parseSignature(None, currentProgram, sig)
ApplyFunctionSignatureCmd(toAddr(addr), typeSig, SourceType.USER_DEFINED, False, True).applyTo(currentProgram)
def set_type(addr, cppType):
if cppType.startswith('struct '):
cppType = cppType[7:]
try:
t = getDataTypes(cppType)[0]
addr = toAddr(addr)
removeDataAt(addr)
createData(addr, t)
except:
print("Failed to set type: %s" % cppType)
def set_comment(addr, text):
setEOLComment(toAddr(addr), text)
def set_header_comment(addr, text):
setPlateComment(toAddr(addr), text)
def script_prologue(status):
# Check that the user has parsed the C headers first
if len(getDataTypes('Il2CppObject')) == 0:
print('STOP! You must import the generated C header file (%TYPE_HEADER_RELATIVE_PATH%) before running this script.')
print('See https://github.com/djkaty/Il2CppInspector/blob/master/README.md#adding-metadata-to-your-ghidra-workflow for instructions.')
sys.exit()
# Ghidra sets the image base for ELF to 0x100000 for some reason
# https://github.com/NationalSecurityAgency/ghidra/issues/1020
# Make sure that the base address is 0
# Without this, Ghidra may not analyze the binary correctly and you will just waste your time
# If 0 doesn't work for you, replace it with the base address from the output of the CLI or GUI
if currentProgram.getExecutableFormat().endswith('(ELF)'):
currentProgram.setImageBase(toAddr(0), True)
# Don't trigger decompiler
setAnalysisOption(currentProgram, "Call Convention ID", "false")
def get_script_directory(): return getSourceFile().getParentFile().toString()
def script_epilogue(status): pass
def add_function_to_group(addr, group): pass
def add_xref(addr, to):
xrefs.addMemoryReference(currentAddress.getAddress(hex(addr)), currentAddress.getAddress(hex(to)), RefType.DATA, SourceType.USER_DEFINED, 0)
def process_string_literals(status, data):
for d in jsonData['stringLiterals']:
define_string(d)
# I don't know how to make inline strings in Ghidra
# Just revert back original impl
addr = parse_address(d)
set_name(addr, d['name'])
set_type(addr, r'struct String *')
set_comment(addr, d['string'])
status.update_progress()
class StatusHandler(BaseStatusHandler): pass
status = GhidraStatusHandler()
backend = GhidraDisassemblerInterface()
context = ScriptContext(backend, status)
context.process()

View File

@@ -8,6 +8,8 @@ import ida_nalt
import ida_ida
import ida_ua
import ida_segment
import ida_funcs
import ida_xref
try: # 7.7+
import ida_srclang
@@ -23,206 +25,205 @@ try:
except ImportError:
FOLDERS_AVAILABLE = False
cached_genflags = 0
skip_make_function = False
func_dirtree = None
is_32_bit = False
fake_segments_base = None
def script_prologue(status):
global cached_genflags, skip_make_function, func_dirtree, is_32_bit, fake_segments_base
# Disable autoanalysis
cached_genflags = ida_ida.inf_get_genflags()
ida_ida.inf_set_genflags(cached_genflags & ~ida_ida.INFFL_AUTO)
# Unload type libraries we know to cause issues - like the c++ linux one
PLATFORMS = ["x86", "x64", "arm", "arm64"]
PROBLEMATIC_TYPELIBS = ["gnulnx"]
for lib in PROBLEMATIC_TYPELIBS:
for platform in PLATFORMS:
ida_typeinf.del_til(f"{lib}_{platform}")
# Set name mangling to GCC 3.x and display demangled as default
ida_ida.inf_set_demnames(ida_ida.DEMNAM_GCC3 | ida_ida.DEMNAM_NAME)
status.update_step('Processing Types')
if IDACLANG_AVAILABLE:
header_path = os.path.join(get_script_directory(), "%TYPE_HEADER_RELATIVE_PATH%")
ida_srclang.set_parser_argv("clang", "-target x86_64-pc-linux -x c++ -D_IDACLANG_=1") # -target required for 8.3+
ida_srclang.parse_decls_with_parser("clang", None, header_path, True)
else:
original_macros = ida_typeinf.get_c_macros()
ida_typeinf.set_c_macros(original_macros + ";_IDA_=1")
ida_typeinf.idc_parse_types(os.path.join(get_script_directory(), "%TYPE_HEADER_RELATIVE_PATH%"), ida_typeinf.PT_FILE)
ida_typeinf.set_c_macros(original_macros)
# Skip make_function on Windows GameAssembly.dll files due to them predefining all functions through pdata which makes the method very slow
skip_make_function = ida_segment.get_segm_by_name(".pdata") is not None
if skip_make_function:
print(".pdata section found, skipping function boundaries")
if FOLDERS_AVAILABLE:
func_dirtree = ida_dirtree.get_std_dirtree(ida_dirtree.DIRTREE_FUNCS)
is_32_bit = ida_ida.inf_is_32bit_exactly()
def script_epilogue(status):
# Reenable auto-analysis
global cached_genflags
ida_ida.inf_set_genflags(cached_genflags)
# Utility methods
def set_name(addr, name):
ida_name.set_name(addr, name, ida_name.SN_NOWARN | ida_name.SN_NOCHECK | ida_name.SN_FORCE)
def make_function(start, end = None):
global skip_make_function
if skip_make_function:
return
ida_bytes.del_items(start, ida_bytes.DELIT_SIMPLE, 12) # Undefine x bytes which should hopefully be enough for the first instruction
ida_ua.create_insn(start) # Create instruction at start
if not ida_funcs.add_func(start, end if end is not None else ida_idaapi.BADADDR): # This fails if the function doesn't start with an instruction
print(f"failed to mark function {hex(start)}-{hex(end) if end is not None else '???'} as function")
TYPE_CACHE = {}
def get_type(typeName):
if typeName not in TYPE_CACHE:
info = ida_typeinf.idc_parse_decl(None, typeName, ida_typeinf.PT_RAWARGS)
if info is None:
print(f"Failed to create type {typeName}.")
return None
TYPE_CACHE[typeName] = info[1:]
return TYPE_CACHE[typeName]
#try:
# from typing import TYPE_CHECKING
# if TYPE_CHECKING:
# from ..shared_base import BaseStatusHandler, BaseDisassemblerInterface, ScriptContext
# import json
# import os
# from datetime import datetime
#except:
# pass
TINFO_DEFINITE = 0x0001 # These only exist in idc for some reason, so we redefine it here
DEFAULT_TIL: "til_t" = None # type: ignore
def set_type(addr, cppType):
cppType += ';'
class IDADisassemblerInterface(BaseDisassemblerInterface):
supports_fake_string_segment = True
info = get_type(cppType)
if info is None:
return
_status: BaseStatusHandler
if ida_typeinf.apply_type(None, info[0], info[1], addr, TINFO_DEFINITE) is None:
print(f"set_type({hex(addr)}, {cppType}); failed!")
def set_function_type(addr, sig):
set_type(addr, sig)
def make_array(addr, numItems, cppType):
set_type(addr, cppType)
flags = ida_bytes.get_flags(addr)
if ida_bytes.is_struct(flags):
opinfo = ida_nalt.opinfo_t()
ida_bytes.get_opcode(opinfo, addr, 0, flags)
entrySize = ida_bytes.get_data_elsize(addr, flags, opinfo)
tid = opinfo.tid
else:
entrySize = ida_bytes.get_item_size(addr)
tid = ida_idaapi.BADADDR
ida_bytes.create_data(addr, flags, numItems * entrySize, tid)
def define_code(code):
ida_typeinf.idc_parse_types(code)
def set_comment(addr, comment, repeatable = True):
ida_bytes.set_cmt(addr, comment, repeatable)
def set_header_comment(addr, comment):
func = ida_funcs.get_func(addr)
if func is None:
return
ida_funcs.set_func_cmt(func, comment, True)
def get_script_directory():
return os.path.dirname(os.path.realpath(__file__))
folders = []
def add_function_to_group(addr, group):
global func_dirtree, folders
return
if not FOLDERS_AVAILABLE:
return
if group not in folders:
folders.append(group)
func_dirtree.mkdir(group)
name = ida_funcs.get_func_name(addr)
func_dirtree.rename(name, f"{group}/{name}")
def add_xref(addr, to):
ida_xref.add_dref(addr, to, ida_xref.XREF_USER | ida_xref.dr_I)
def write_string(addr, string):
encoded_string = string.encode() + b'\x00'
string_length = len(encoded_string)
ida_bytes.put_bytes(addr, encoded_string)
ida_bytes.create_strlit(addr, string_length, ida_nalt.STRTYPE_C)
def write_address(addr, value):
global is_32_bit
if is_32_bit:
ida_bytes.put_dword(addr, value)
else:
ida_bytes.put_qword(addr, value)
def create_fake_segment(name, size):
global is_32_bit
start = ida_ida.inf_get_max_ea()
end = start + size
ida_segment.add_segm(0, start, end, name, "DATA")
segment = ida_segment.get_segm_by_name(name)
segment.bitness = 1 if is_32_bit else 2
segment.perm = ida_segment.SEGPERM_READ
segment.update()
return start
def process_string_literals(status, data):
total_string_length = 0
for d in data['stringLiterals']:
total_string_length += len(d["string"]) + 1
_type_cache: dict
_folders: list
aligned_length = total_string_length + (4096 - (total_string_length % 4096))
segment_base = create_fake_segment(".fake_strings", aligned_length)
_function_dirtree: "ida_dirtree.dirtree_t"
_cached_genflags: int
_skip_function_creation: bool
_is_32_bit: bool
_fake_segments_base: int
current_string_address = segment_base
for d in data['stringLiterals']:
define_string(d)
def __init__(self, status: BaseStatusHandler):
self._status = status
self._type_cache = {}
self._folders = []
ref_addr = parse_address(d)
write_string(current_string_address, d["string"])
write_address(ref_addr, current_string_address)
set_type(ref_addr, r'const char* const')
self._cached_genflags = 0
self._skip_function_creation = False
self._is_32_bit = False
self._fake_segments_base = 0
current_string_address += len(d["string"]) + 1
status.update_progress()
def _get_type(self, type: str):
if type not in self._type_cache:
info = ida_typeinf.idc_parse_decl(DEFAULT_TIL, type, ida_typeinf.PT_RAWARGS)
if info is None:
print(f"Failed to create type {type}.")
return None
self._type_cache[type] = info[1:]
return self._type_cache[type]
def get_script_directory(self) -> str:
return os.path.dirname(os.path.realpath(__file__))
def on_start(self):
# Disable autoanalysis
self._cached_genflags = ida_ida.inf_get_genflags()
ida_ida.inf_set_genflags(self._cached_genflags & ~ida_ida.INFFL_AUTO)
# Unload type libraries we know to cause issues - like the c++ linux one
PLATFORMS = ["x86", "x64", "arm", "arm64"]
PROBLEMATIC_TYPELIBS = ["gnulnx"]
for lib in PROBLEMATIC_TYPELIBS:
for platform in PLATFORMS:
ida_typeinf.del_til(f"{lib}_{platform}")
# Set name mangling to GCC 3.x and display demangled as default
ida_ida.inf_set_demnames(ida_ida.DEMNAM_GCC3 | ida_ida.DEMNAM_NAME)
self._status.update_step('Processing Types')
if IDACLANG_AVAILABLE:
header_path = os.path.join(self.get_script_directory(), "%TYPE_HEADER_RELATIVE_PATH%")
ida_srclang.set_parser_argv("clang", "-target x86_64-pc-linux -x c++ -D_IDACLANG_=1") # -target required for 8.3+
ida_srclang.parse_decls_with_parser("clang", None, header_path, True)
else:
original_macros = ida_typeinf.get_c_macros()
ida_typeinf.set_c_macros(original_macros + ";_IDA_=1")
ida_typeinf.idc_parse_types(os.path.join(self.get_script_directory(), "%TYPE_HEADER_RELATIVE_PATH%"), ida_typeinf.PT_FILE)
ida_typeinf.set_c_macros(original_macros)
# Skip make_function on Windows GameAssembly.dll files due to them predefining all functions through pdata which makes the method very slow
skip_make_function = ida_segment.get_segm_by_name(".pdata") is not None
if skip_make_function:
print(".pdata section found, skipping function boundaries")
if FOLDERS_AVAILABLE:
self._function_dirtree = ida_dirtree.get_std_dirtree(ida_dirtree.DIRTREE_FUNCS)
self._is_32_bit = ida_ida.inf_is_32bit_exactly()
def on_finish(self):
ida_ida.inf_set_genflags(self._cached_genflags)
def define_function(self, address: int, end: int | None = None):
if self._skip_function_creation:
return
ida_bytes.del_items(address, ida_bytes.DELIT_SIMPLE, 12) # Undefine x bytes which should hopefully be enough for the first instruction
ida_ua.create_insn(address) # Create instruction at start
if not ida_funcs.add_func(address, end if end is not None else ida_idaapi.BADADDR): # This fails if the function doesn't start with an instruction
print(f"failed to mark function {hex(address)}-{hex(end) if end is not None else '???'} as function")
def define_data_array(self, address: int, type: str, count: int):
self.set_data_type(address, type)
flags = ida_bytes.get_flags(address)
if ida_bytes.is_struct(flags):
opinfo = ida_nalt.opinfo_t()
ida_bytes.get_opinfo(opinfo, address, 0, flags)
entrySize = ida_bytes.get_data_elsize(address, flags, opinfo)
tid = opinfo.tid
else:
entrySize = ida_bytes.get_item_size(address)
tid = ida_idaapi.BADADDR
ida_bytes.create_data(address, flags, count * entrySize, tid)
def set_data_type(self, address: int, type: str):
type += ';'
info = self._get_type(type)
if info is None:
return
if ida_typeinf.apply_type(DEFAULT_TIL, info[0], info[1], address, TINFO_DEFINITE) is None:
print(f"set_type({hex(address)}, {type}); failed!")
def set_function_type(self, address: int, type: str):
self.set_data_type(address, type)
def set_data_comment(self, address: int, cmt: str):
ida_bytes.set_cmt(address, cmt, False)
def set_function_comment(self, address: int, cmt: str):
func = ida_funcs.get_func(address)
if func is None:
return
ida_funcs.set_func_cmt(func, cmt, True)
def set_data_name(self, address: int, name: str):
ida_name.set_name(address, name, ida_name.SN_NOWARN | ida_name.SN_NOCHECK | ida_name.SN_FORCE)
def set_function_name(self, address: int, name: str):
self.set_data_name(address, name)
def add_cross_reference(self, from_address: int, to_address: int):
ida_xref.add_dref(from_address, to_address, ida_xref.XREF_USER | ida_xref.dr_I)
def import_c_typedef(self, type_def: str):
ida_typeinf.idc_parse_types(type_def, 0)
# optional
def add_function_to_group(self, address: int, group: str):
if not FOLDERS_AVAILABLE or True: # enable at your own risk - this is slow
return
if group not in self._folders:
self._folders.append(group)
self._function_dirtree.mkdir(group)
name = ida_funcs.get_func_name(address)
self._function_dirtree.rename(name, f"{group}/{name}")
# only required if supports_fake_string_segment == True
def create_fake_segment(self, name: str, size: int) -> int:
start = ida_ida.inf_get_max_ea()
end = start + size
ida_segment.add_segm(0, start, end, name, "DATA")
segment = ida_segment.get_segm_by_name(name)
segment.bitness = 1 if self._is_32_bit else 2
segment.perm = ida_segment.SEGPERM_READ
segment.update()
return start
def write_string(self, address: int, value: str):
encoded_string = value.encode() + b'\x00'
string_length = len(encoded_string)
ida_bytes.put_bytes(address, encoded_string)
ida_bytes.create_strlit(address, string_length, ida_nalt.STRTYPE_C)
def write_address(self, address: int, value: int):
if self._is_32_bit:
ida_bytes.put_dword(address, value)
else:
ida_bytes.put_qword(address, value)
# Status handler
class StatusHandler(BaseStatusHandler):
class IDAStatusHandler(BaseStatusHandler):
def __init__(self):
self.step = "Initializing"
self.max_items = 0
self.current_items = 0
self.start_time = datetime.datetime.now()
self.start_time = datetime.now()
self.step_start_time = self.start_time
self.last_updated_time = datetime.datetime.min
self.last_updated_time = datetime.min
def initialize(self):
ida_kernwin.show_wait_box("Processing")
@@ -231,7 +232,7 @@ class StatusHandler(BaseStatusHandler):
if self.was_cancelled():
raise RuntimeError("Cancelled script.")
current_time = datetime.datetime.now()
current_time = datetime.now()
if 0.5 > (current_time - self.last_updated_time).total_seconds():
return
@@ -254,8 +255,8 @@ Elapsed: {step_time} ({total_time})
self.step = step
self.max_items = max_items
self.current_items = 0
self.step_start_time = datetime.datetime.now()
self.last_updated_time = datetime.datetime.min
self.step_start_time = datetime.now()
self.last_updated_time = datetime.min
self.update()
def update_progress(self, new_progress = 1):
@@ -265,5 +266,10 @@ Elapsed: {step_time} ({total_time})
def was_cancelled(self):
return ida_kernwin.user_cancelled()
def close(self):
ida_kernwin.hide_wait_box()
def shutdown(self):
ida_kernwin.hide_wait_box()
status = IDAStatusHandler()
backend = IDADisassemblerInterface(status)
context = ScriptContext(backend, status)
context.process()