Ghidra script improvements (#7)

* Always set image base to 0 for Ghidra

* Script improvements

Everything here is for Ghidra:
* Use `%` instead of f-string (Ghidra still uses python 2.7, meanwhile f-strings were added in 3.6)
* Handle errors when applying type (Ghidra throws exception unlike IDA)
* Don't trigger decompiler (analysis will be faster)
* Revert back string literals

* Set image base to zero only for ELF

I don't know about PE with Ghidra

* Update README for disassemblers

* IDA 7.6 required due `ida_ida.inf_is_32bit_exactly()`

* Ghidra now don't launch decompiler for whole binary

* Set image base to 0 in script

Remove %IMAGE_BASE% since we don't use it anymore

* Create XREFs for Ghidra

Now you can jump from `MethodInfo` to actual method

* Fix demangler for Ghidra

Ghidra's demangler can process only functions in auto analysis. Now both `TypeInfo` and `MethodInfo` are displayed properly

---------

Co-authored-by: commonuserlol <commonuserlol@users.noreply.github.com>
This commit is contained in:
commonuserlol
2024-08-10 22:00:41 +03:00
committed by GitHub
parent 61087849bd
commit 665e70324f
5 changed files with 74 additions and 35 deletions

View File

@@ -61,8 +61,7 @@ namespace Il2CppInspector.Outputs
.Replace("%SCRIPTFILENAME%", Path.GetFileName(outputFile))
.Replace("%TYPE_HEADER_RELATIVE_PATH%", typeHeaderRelativePath.ToEscapedString())
.Replace("%JSON_METADATA_RELATIVE_PATH%", jsonMetadataRelativePath.ToEscapedString())
.Replace("%TARGET_UNITY_VERSION%", model.UnityHeaders.ToString())
.Replace("%IMAGE_BASE%", model.Package.BinaryImage.ImageBase.ToAddressString());
.Replace("%TARGET_UNITY_VERSION%", model.UnityHeaders.ToString());
File.WriteAllText(outputFile, script);
}

View File

@@ -4,9 +4,19 @@ from ghidra.app.script import GhidraScriptUtil
from ghidra.app.util.cparser.C import CParserUtils
from ghidra.program.model.data import ArrayDataType
from ghidra.program.model.symbol import SourceType
from ghidra.program.model.symbol import RefType
from ghidra.app.cmd.label import DemanglerCmd
xrefs = currentProgram.getReferenceManager()
def set_name(addr, name):
createLabel(toAddr(addr), name, True)
if not name.startswith("_ZN"):
createLabel(toAddr(addr), name, True)
return
cmd = DemanglerCmd(currentAddress.getAddress(hex(addr)), name)
if not cmd.applyTo(currentProgram, monitor):
print("Failed to apply demangled name to %s at %s due %s, falling back to mangled" % (name, hex(addr), cmd.getStatusMsg()))
createLabel(toAddr(addr), name, True)
def make_function(start, end = None):
addr = toAddr(start)
@@ -41,10 +51,13 @@ def set_type(addr, cppType):
if cppType.startswith('struct '):
cppType = cppType[7:]
t = getDataTypes(cppType)[0]
addr = toAddr(addr)
removeDataAt(addr)
createData(addr, t)
try:
t = getDataTypes(cppType)[0]
addr = toAddr(addr)
removeDataAt(addr)
createData(addr, t)
except:
print("Failed to set type: %s" % cppType)
def set_comment(addr, text):
setEOLComment(toAddr(addr), text)
@@ -61,15 +74,33 @@ def script_prologue(status):
# Ghidra sets the image base for ELF to 0x100000 for some reason
# https://github.com/NationalSecurityAgency/ghidra/issues/1020
# Make sure that the base address is 0
# Without this, Ghidra may not analyze the binary correctly and you will just waste your time
# If 0 doesn't work for you, replace it with the base address from the output of the CLI or GUI
if currentProgram.getExecutableFormat().endswith('(ELF)'):
currentProgram.setImageBase(toAddr(%IMAGE_BASE%), True)
currentProgram.setImageBase(toAddr(0), True)
# Don't trigger decompiler
setAnalysisOption(currentProgram, "Call Convention ID", "false")
def get_script_directory(): return getSourceFile().getParentFile().toString()
def script_epilogue(status): pass
def add_function_to_group(addr, group): pass
def add_xref(addr, to): pass
def create_fake_segment(name, size): pass
def write_string(addr, string): pass
def write_address(addr, value): pass
def add_xref(addr, to):
xrefs.addMemoryReference(currentAddress.getAddress(hex(addr)), currentAddress.getAddress(hex(to)), RefType.DATA, SourceType.USER_DEFINED, 0)
def process_string_literals(status, data):
for d in jsonData['stringLiterals']:
define_string(d)
# I don't know how to make inline strings in Ghidra
# Just revert back original impl
addr = parse_address(d)
set_name(addr, d['name'])
set_type(addr, r'struct String *')
set_comment(addr, d['string'])
status.update_progress()
class StatusHandler(BaseStatusHandler): pass

View File

@@ -193,6 +193,26 @@ def create_fake_segment(name, size):
return start
def process_string_literals(status, data):
total_string_length = 0
for d in data['stringLiterals']:
total_string_length += len(d["string"]) + 1
aligned_length = total_string_length + (4096 - (total_string_length % 4096))
segment_base = create_fake_segment(".fake_strings", aligned_length)
current_string_address = segment_base
for d in data['stringLiterals']:
define_string(d)
ref_addr = parse_address(d)
write_string(current_string_address, d["string"])
write_address(ref_addr, current_string_address)
set_type(ref_addr, r'const char* const')
current_string_address += len(d["string"]) + 1
status.update_progress()
# Status handler
class StatusHandler(BaseStatusHandler):

View File

@@ -96,25 +96,7 @@ def process_json(jsonData, status):
if 'virtualAddress' in jsonData['stringLiterals'][0]:
status.update_step('Processing string literals (V19+)', len(jsonData['stringLiterals']))
total_string_length = 0
for d in jsonData['stringLiterals']:
total_string_length += len(d["string"]) + 1
aligned_length = total_string_length + (4096 - (total_string_length % 4096))
segment_base = create_fake_segment(".fake_strings", aligned_length)
current_string_address = segment_base
for d in jsonData['stringLiterals']:
define_string(d)
ref_addr = parse_address(d)
write_string(current_string_address, d["string"])
write_address(ref_addr, current_string_address)
set_type(ref_addr, r'const char* const')
current_string_address += len(d["string"]) + 1
status.update_progress()
process_string_literals(status, jsonData)
# String literals for version < 19
else:
@@ -195,6 +177,6 @@ try:
script_epilogue(status)
status.update_step('Script execution complete.')
print(f"Took: {datetime.datetime.now() - start_time}")
print("Took: %s" % (datetime.datetime.now() - start_time))
except RuntimeError: pass
finally: status.close()