diff --git a/Il2CppInspector.Common/Outputs/PythonScript.cs b/Il2CppInspector.Common/Outputs/PythonScript.cs index 9be54838..48cf5aa4 100644 --- a/Il2CppInspector.Common/Outputs/PythonScript.cs +++ b/Il2CppInspector.Common/Outputs/PythonScript.cs @@ -61,8 +61,7 @@ public void WriteScriptToFile(string outputFile, string target, string existingT .Replace("%SCRIPTFILENAME%", Path.GetFileName(outputFile)) .Replace("%TYPE_HEADER_RELATIVE_PATH%", typeHeaderRelativePath.ToEscapedString()) .Replace("%JSON_METADATA_RELATIVE_PATH%", jsonMetadataRelativePath.ToEscapedString()) - .Replace("%TARGET_UNITY_VERSION%", model.UnityHeaders.ToString()) - .Replace("%IMAGE_BASE%", model.Package.BinaryImage.ImageBase.ToAddressString()); + .Replace("%TARGET_UNITY_VERSION%", model.UnityHeaders.ToString()); File.WriteAllText(outputFile, script); } diff --git a/Il2CppInspector.Common/Outputs/ScriptResources/Targets/Ghidra.py b/Il2CppInspector.Common/Outputs/ScriptResources/Targets/Ghidra.py index 73dd487d..ce897927 100644 --- a/Il2CppInspector.Common/Outputs/ScriptResources/Targets/Ghidra.py +++ b/Il2CppInspector.Common/Outputs/ScriptResources/Targets/Ghidra.py @@ -4,9 +4,19 @@ from ghidra.app.util.cparser.C import CParserUtils from ghidra.program.model.data import ArrayDataType from ghidra.program.model.symbol import SourceType +from ghidra.program.model.symbol import RefType +from ghidra.app.cmd.label import DemanglerCmd + +xrefs = currentProgram.getReferenceManager() def set_name(addr, name): - createLabel(toAddr(addr), name, True) + if not name.startswith("_ZN"): + createLabel(toAddr(addr), name, True) + return + cmd = DemanglerCmd(currentAddress.getAddress(hex(addr)), name) + if not cmd.applyTo(currentProgram, monitor): + print("Failed to apply demangled name to %s at %s due %s, falling back to mangled" % (name, hex(addr), cmd.getStatusMsg())) + createLabel(toAddr(addr), name, True) def make_function(start, end = None): addr = toAddr(start) @@ -41,10 +51,13 @@ def set_type(addr, cppType): if cppType.startswith('struct '): cppType = cppType[7:] - t = getDataTypes(cppType)[0] - addr = toAddr(addr) - removeDataAt(addr) - createData(addr, t) + try: + t = getDataTypes(cppType)[0] + addr = toAddr(addr) + removeDataAt(addr) + createData(addr, t) + except: + print("Failed to set type: %s" % cppType) def set_comment(addr, text): setEOLComment(toAddr(addr), text) @@ -61,15 +74,33 @@ def script_prologue(status): # Ghidra sets the image base for ELF to 0x100000 for some reason # https://github.com/NationalSecurityAgency/ghidra/issues/1020 + # Make sure that the base address is 0 + # Without this, Ghidra may not analyze the binary correctly and you will just waste your time + # If 0 doesn't work for you, replace it with the base address from the output of the CLI or GUI if currentProgram.getExecutableFormat().endswith('(ELF)'): - currentProgram.setImageBase(toAddr(%IMAGE_BASE%), True) + currentProgram.setImageBase(toAddr(0), True) + + # Don't trigger decompiler + setAnalysisOption(currentProgram, "Call Convention ID", "false") def get_script_directory(): return getSourceFile().getParentFile().toString() def script_epilogue(status): pass def add_function_to_group(addr, group): pass -def add_xref(addr, to): pass -def create_fake_segment(name, size): pass -def write_string(addr, string): pass -def write_address(addr, value): pass +def add_xref(addr, to): + xrefs.addMemoryReference(currentAddress.getAddress(hex(addr)), currentAddress.getAddress(hex(to)), RefType.DATA, SourceType.USER_DEFINED, 0) + +def process_string_literals(status, data): + for d in jsonData['stringLiterals']: + define_string(d) + + # I don't know how to make inline strings in Ghidra + # Just revert back original impl + addr = parse_address(d) + set_name(addr, d['name']) + set_type(addr, r'struct String *') + set_comment(addr, d['string']) + + status.update_progress() + class StatusHandler(BaseStatusHandler): pass diff --git a/Il2CppInspector.Common/Outputs/ScriptResources/Targets/IDA.py b/Il2CppInspector.Common/Outputs/ScriptResources/Targets/IDA.py index 67de37f2..ace0f7f9 100644 --- a/Il2CppInspector.Common/Outputs/ScriptResources/Targets/IDA.py +++ b/Il2CppInspector.Common/Outputs/ScriptResources/Targets/IDA.py @@ -193,6 +193,26 @@ def create_fake_segment(name, size): return start +def process_string_literals(status, data): + total_string_length = 0 + for d in data['stringLiterals']: + total_string_length += len(d["string"]) + 1 + + aligned_length = total_string_length + (4096 - (total_string_length % 4096)) + segment_base = create_fake_segment(".fake_strings", aligned_length) + + current_string_address = segment_base + for d in data['stringLiterals']: + define_string(d) + + ref_addr = parse_address(d) + write_string(current_string_address, d["string"]) + write_address(ref_addr, current_string_address) + set_type(ref_addr, r'const char* const') + + current_string_address += len(d["string"]) + 1 + status.update_progress() + # Status handler class StatusHandler(BaseStatusHandler): diff --git a/Il2CppInspector.Common/Outputs/ScriptResources/shared-main.py b/Il2CppInspector.Common/Outputs/ScriptResources/shared-main.py index 13728cf0..ee02b631 100644 --- a/Il2CppInspector.Common/Outputs/ScriptResources/shared-main.py +++ b/Il2CppInspector.Common/Outputs/ScriptResources/shared-main.py @@ -96,25 +96,7 @@ def process_json(jsonData, status): if 'virtualAddress' in jsonData['stringLiterals'][0]: status.update_step('Processing string literals (V19+)', len(jsonData['stringLiterals'])) - total_string_length = 0 - for d in jsonData['stringLiterals']: - total_string_length += len(d["string"]) + 1 - - aligned_length = total_string_length + (4096 - (total_string_length % 4096)) - segment_base = create_fake_segment(".fake_strings", aligned_length) - - current_string_address = segment_base - for d in jsonData['stringLiterals']: - define_string(d) - - ref_addr = parse_address(d) - write_string(current_string_address, d["string"]) - write_address(ref_addr, current_string_address) - set_type(ref_addr, r'const char* const') - - current_string_address += len(d["string"]) + 1 - status.update_progress() - + process_string_literals(status, jsonData) # String literals for version < 19 else: @@ -195,6 +177,6 @@ def process_json(jsonData, status): script_epilogue(status) status.update_step('Script execution complete.') - print(f"Took: {datetime.datetime.now() - start_time}") + print("Took: %s" % (datetime.datetime.now() - start_time)) except RuntimeError: pass finally: status.close() diff --git a/README.md b/README.md index aeb99321..fa195bd4 100644 --- a/README.md +++ b/README.md @@ -20,9 +20,9 @@ This is a continuation of [Il2CppInspector, by djkaty](https://github.com/djkaty - Made ValueTypes use their non-boxed variants when used as the this parameter - Added labeling of FieldInfo/FieldRva MetadataUsages and their respective values as comments - Implemented name mangling to properly display generics and other normally-replaced characters + - Much faster processing compared to the old version * Overhauled IDA script: - Added a progress indicator box with the current step, progress, and elapsed time - - Much faster processing compared to the old version - Automatic disabling and re-enabling of autoanalysis - Automatic unloading of conflicting type libraries - Addition of custom fake string segment to show string literal contents in decompiler @@ -321,6 +321,10 @@ The `--seperate-attributes` switch directs Il2CppInspector to put assembly-level ### Adding metadata to your IDA workflow +**NOTE:** IDA 7.6+ is required, but 7.7 is recommended. + +**NOTE:** Run script as-soon-as-possible after IDA loads binary into database + Simply run Il2CppInspector with the `-p` switch to choose the IDA script output file. Load your binary file into IDA, press Alt+F7 and select the generated script. Observe the Output Window while IDA analyzes the file - this may take a long time. Three files are generated by Il2CppInspector for IDAPython: the Python script itself, a JSON metadata file and a C++ type header file (this is `cpp/appdata/il2cpp-types.h` by default if you used the CLI, or `il2cpp.h` if you used the GUI). These files must be present for the script to run successfully. @@ -356,9 +360,11 @@ Three files are generated by Il2CppInspector for Ghidra: the Python script itsel If you know which version of Unity the binary was compiled with, you can improve the output by specifying this with `--unity-version`, for example `--unity-version 2019.3.1f1`. You can also supply any asset file from the application to detect the Unity version with `--unity-version-from-asset`. Otherwise Il2CppInspector will make an educated guess based on the contents of the binary. -**NOTE:** For best results, choose No when Ghidra asks if you would like to perform auto-analysis when the binary is first loaded. If you receive a `Conflicting data exists at address` error when running the script below, re-load the binary into the project and choose No at the auto-analysis prompt. +**NOTE:** Always choose No when Ghidra asks to perform analysis + +**NOTE:** For ELF files, set the image base to zero (`0x00000000`) in the load options for the binary. For compatibility reasons, executing the Ghidra Python script on an ELF file will change the file's image base to zero for you if necessary, however if the current image base is non-zero this may take a very long time to complete. Other file formats will retain the same image base. -**NOTE:** To significantly speed up analysis for ELF files, set the image base to zero (`0x00000000`) in the load options for the binary. For compatibility reasons, executing the Ghidra Python script on an ELF file will change the file's image base to zero for you if necessary, however if the current image base is non-zero this may take a very long time to complete. Other file formats will retain the same image base. +**NOTE:** To improve analysis time, from the _Code Browser_, choose _Edit -> Tool options -> Auto Analysis_ and change _Max Threads_ to your CPU core count. Click _OK_ and restart _Code Browser_ To import metadata into an existing Ghidra project: @@ -373,6 +379,7 @@ To import metadata into an existing Ghidra project: 6. If you have used scripts from Il2CppInspector for other binaries, ensure the Python files are named differently, or disable use of the previous script folder(s). 7. Click Refresh to make the script appear in _Script Manager_. 8. Right-click the script and choose _Run_. This may take a while to complete. +9. After initial auto analysis done, run it again to be sure about correct decompilation ![Ghidra import help](docs/Ghidra_Guide.png)