Ghidra script improvements (#7)

* Always set image base to 0 for Ghidra

* Script improvements

Everything here is for Ghidra:
* Use `%` instead of f-string (Ghidra still uses python 2.7, meanwhile f-strings were added in 3.6)
* Handle errors when applying type (Ghidra throws exception unlike IDA)
* Don't trigger decompiler (analysis will be faster)
* Revert back string literals

* Set image base to zero only for ELF

I don't know about PE with Ghidra

* Update README for disassemblers

* IDA 7.6 required due `ida_ida.inf_is_32bit_exactly()`

* Ghidra now don't launch decompiler for whole binary

* Set image base to 0 in script

Remove %IMAGE_BASE% since we don't use it anymore

* Create XREFs for Ghidra

Now you can jump from `MethodInfo` to actual method

* Fix demangler for Ghidra

Ghidra's demangler can process only functions in auto analysis. Now both `TypeInfo` and `MethodInfo` are displayed properly

---------

Co-authored-by: commonuserlol <commonuserlol@users.noreply.github.com>
This commit is contained in:
commonuserlol
2024-08-10 22:00:41 +03:00
committed by GitHub
parent 61087849bd
commit 665e70324f
5 changed files with 74 additions and 35 deletions

View File

@@ -4,9 +4,19 @@ from ghidra.app.script import GhidraScriptUtil
from ghidra.app.util.cparser.C import CParserUtils
from ghidra.program.model.data import ArrayDataType
from ghidra.program.model.symbol import SourceType
from ghidra.program.model.symbol import RefType
from ghidra.app.cmd.label import DemanglerCmd
xrefs = currentProgram.getReferenceManager()
def set_name(addr, name):
createLabel(toAddr(addr), name, True)
if not name.startswith("_ZN"):
createLabel(toAddr(addr), name, True)
return
cmd = DemanglerCmd(currentAddress.getAddress(hex(addr)), name)
if not cmd.applyTo(currentProgram, monitor):
print("Failed to apply demangled name to %s at %s due %s, falling back to mangled" % (name, hex(addr), cmd.getStatusMsg()))
createLabel(toAddr(addr), name, True)
def make_function(start, end = None):
addr = toAddr(start)
@@ -41,10 +51,13 @@ def set_type(addr, cppType):
if cppType.startswith('struct '):
cppType = cppType[7:]
t = getDataTypes(cppType)[0]
addr = toAddr(addr)
removeDataAt(addr)
createData(addr, t)
try:
t = getDataTypes(cppType)[0]
addr = toAddr(addr)
removeDataAt(addr)
createData(addr, t)
except:
print("Failed to set type: %s" % cppType)
def set_comment(addr, text):
setEOLComment(toAddr(addr), text)
@@ -61,15 +74,33 @@ def script_prologue(status):
# Ghidra sets the image base for ELF to 0x100000 for some reason
# https://github.com/NationalSecurityAgency/ghidra/issues/1020
# Make sure that the base address is 0
# Without this, Ghidra may not analyze the binary correctly and you will just waste your time
# If 0 doesn't work for you, replace it with the base address from the output of the CLI or GUI
if currentProgram.getExecutableFormat().endswith('(ELF)'):
currentProgram.setImageBase(toAddr(%IMAGE_BASE%), True)
currentProgram.setImageBase(toAddr(0), True)
# Don't trigger decompiler
setAnalysisOption(currentProgram, "Call Convention ID", "false")
def get_script_directory(): return getSourceFile().getParentFile().toString()
def script_epilogue(status): pass
def add_function_to_group(addr, group): pass
def add_xref(addr, to): pass
def create_fake_segment(name, size): pass
def write_string(addr, string): pass
def write_address(addr, value): pass
def add_xref(addr, to):
xrefs.addMemoryReference(currentAddress.getAddress(hex(addr)), currentAddress.getAddress(hex(to)), RefType.DATA, SourceType.USER_DEFINED, 0)
def process_string_literals(status, data):
for d in jsonData['stringLiterals']:
define_string(d)
# I don't know how to make inline strings in Ghidra
# Just revert back original impl
addr = parse_address(d)
set_name(addr, d['name'])
set_type(addr, r'struct String *')
set_comment(addr, d['string'])
status.update_progress()
class StatusHandler(BaseStatusHandler): pass