| 1 | + | #This script labels Go stripped binaries in Ghidra based on GoReSym output. |
| 2 | + | #Script requests GoReSym json output file and uses its results to rename |
| 3 | + | #user functions (including standard functions if available) and labels |
| 4 | + | #runtime_pclntab and runtime_firstmoduledata based on TabMeta.VA and ModuleMeta.VA. |
| 5 | + | #Script was tested and works with both Jython/Python2.7 in Ghidra and Python3 in Ghidratron. |
| 6 | + | #@category Analysis |
| 7 | + | |
| 8 | + | from ghidra.program.model.symbol import SourceType |
| 9 | + | |
| 10 | + | import collections |
| 11 | + | import json |
| 12 | + | import re |
| 13 | + | |
| 14 | + | # No estimation is done |
| 15 | + | CHOICE_EST_NONE = "no estimation" |
| 16 | + | # Maps entry function to the longest _rt0_ function found by GoReSym |
| 17 | + | CHOICE_EST_ENTRY_ADDR = "entry to _rt0_ function mapping" |
| 18 | + | # Maps pclntab section to TabMeta VA mapping, useful only when pclntab section is present |
| 19 | + | CHOICE_EST_PCLNTAB = "pclntab to TabMeta VA mapping" |
| 20 | + | # Tries to map addresses against known function names in Ghidra with functions found by GoReSym |
| 21 | + | CHOICE_EST_KNOWN_FUNCS = "known function names mapping" |
| 22 | + | # All available choices |
| 23 | + | CHOICES = [CHOICE_EST_NONE, CHOICE_EST_ENTRY_ADDR, CHOICE_EST_PCLNTAB, CHOICE_EST_KNOWN_FUNCS] |
| 24 | + | |
| 25 | + | def iterable(obj): |
| 26 | + | if obj is None: |
| 27 | + | return False |
| 28 | + | |
| 29 | + | try: |
| 30 | + | iter(obj) |
| 31 | + | except TypeError: |
| 32 | + | return False |
| 33 | + | |
| 34 | + | return True |
| 35 | + | |
| 36 | + | def extract_funcs(hints): |
| 37 | + | funcs = [] |
| 38 | + | for f in ["UserFunctions", "StdFunctions"]: |
| 39 | + | if iterable(hints[f]): |
| 40 | + | funcs += hints[f] |
| 41 | + | return funcs |
| 42 | + | |
| 43 | + | def _entry_addr_estimator(hints): |
| 44 | + | if not iterable(hints["StdFunctions"]): |
| 45 | + | print("No StdFunctions present, required for entry addr estimation") |
| 46 | + | return None |
| 47 | + | |
| 48 | + | # StdFunctions should have _rt0_[ARCH]_[OS](_lib)?, this maps to entry |
| 49 | + | candidates = [f for f in hints["StdFunctions"] if "_rt0_" in f["FullName"]] |
| 50 | + | |
| 51 | + | if len(candidates) == 0: |
| 52 | + | print("No rt0 functions found") |
| 53 | + | return None |
| 54 | + | |
| 55 | + | func = max(candidates, key=lambda f: len(f["FullName"])) |
| 56 | + | |
| 57 | + | fm = currentProgram.getFunctionManager() |
| 58 | + | for f in fm.getFunctions(True): |
| 59 | + | if f.getName() == "entry": |
| 60 | + | return f.getEntryPoint().getOffset()-func["Start"] |
| 61 | + | |
| 62 | + | return None |
| 63 | + | |
| 64 | + | def _pclntab_estimator(hints): |
| 65 | + | # Expected pclntab name per exec format |
| 66 | + | exec_fmt = currentProgram.getMetadata()["Executable Format"] |
| 67 | + | pclntabs = [] |
| 68 | + | block_name = ".text" |
| 69 | + | if exec_fmt == "Executable and Linking Format (ELF)": |
| 70 | + | # https://github.com/mandiant/GoReSym/blob/0c729523ed542f24b091e433204fbc6b02c88b31/objfile/elf.go#L89 |
| 71 | + | pclntabs.append(".gopclntab") |
| 72 | + | elif exec_fmt == "Portable Executable (PE)": |
| 73 | + | # https://github.com/mandiant/GoReSym/blob/0c729523ed542f24b091e433204fbc6b02c88b31/objfile/pe.go#L130 |
| 74 | + | pclntabs.append("runtime.pclntab") |
| 75 | + | pclntabs.append("runtime.epclntab") |
| 76 | + | elif exec_fmt == "Mac OS X Mach-O": |
| 77 | + | # https://github.com/mandiant/GoReSym/blob/0c729523ed542f24b091e433204fbc6b02c88b31/objfile/macho.go#L111 |
| 78 | + | pclntabs.append("__gopclntab") |
| 79 | + | block_name = "__TEXT" |
| 80 | + | else: |
| 81 | + | # Try legacy |
| 82 | + | # https://github.com/mandiant/GoReSym/blob/0c729523ed542f24b091e433204fbc6b02c88b31/objfile/pe.go#L147 |
| 83 | + | pclntabs.append("pclntab") |
| 84 | + | pclntabs.append("epclntab") |
| 85 | + | |
| 86 | + | # Check if pclntab is present |
| 87 | + | for p in pclntabs: |
| 88 | + | pclntab = currentProgram.getMemory().getBlock(p) |
| 89 | + | if pclntab is not None: |
| 90 | + | # With pclntab, offset should be a matter of TabMeta VA |
| 91 | + | tmva = hints["TabMeta"]["VA"] |
| 92 | + | text = currentProgram.getMemory().getBlock(block_name) |
| 93 | + | return text.getStart().getOffset()-tmva |
| 94 | + | |
| 95 | + | return None |
| 96 | + | |
| 97 | + | def _func_map_estimator(hints): |
| 98 | + | offs = [] |
| 99 | + | # Try to guess with known function names |
| 100 | + | hfuncs = extract_funcs(hints) |
| 101 | + | grs_funcs = {f["FullName"]: f for f in hfuncs} |
| 102 | + | |
| 103 | + | # Match function lists and collect possible offsets |
| 104 | + | fm = currentProgram.getFunctionManager() |
| 105 | + | for f in fm.getFunctions(True): |
| 106 | + | name = f.getName() |
| 107 | + | if "FUN_" in name or name == "entry": |
| 108 | + | continue |
| 109 | + | if name in grs_funcs: |
| 110 | + | offs.append(f.getEntryPoint().getOffset()-grs_funcs[name]["Start"]) |
| 111 | + | |
| 112 | + | # Count given offsets, most counts is our estimate |
| 113 | + | counter = collections.Counter(offs) |
| 114 | + | winner, winner_count = counter.most_common()[0] if len(counter) > 0 else (None, 0) |
| 115 | + | return winner |
| 116 | + | |
| 117 | + | def estimate_offset(choice, hints): |
| 118 | + | if choice == CHOICE_EST_KNOWN_FUNCS: |
| 119 | + | estimator = _func_map_estimator |
| 120 | + | elif choice == CHOICE_EST_PCLNTAB: |
| 121 | + | estimator = _pclntab_estimator |
| 122 | + | elif choice == CHOICE_EST_ENTRY_ADDR: |
| 123 | + | estimator = _entry_addr_estimator |
| 124 | + | else: |
| 125 | + | # No estimation |
| 126 | + | return 0 |
| 127 | + | |
| 128 | + | e = estimator(hints) |
| 129 | + | if e is not None: |
| 130 | + | return e |
| 131 | + | |
| 132 | + | return 0 |
| 133 | + | |
| 134 | + | def ask_offset(default): |
| 135 | + | msg = "Please specify optional memory offset for entrypoints, script estimated {} with chosen strategy".format(default) |
| 136 | + | offset = askString("Optional offset", msg, str(default)) |
| 137 | + | return int(offset) |
| 138 | + | |
| 139 | + | def rename_funcs(items, offset, simulate=False): |
| 140 | + | if not iterable(items): |
| 141 | + | return 0, 0 |
| 142 | + | |
| 143 | + | fm = currentProgram.getFunctionManager() |
| 144 | + | rename_counter, create_counter = 0, 0 |
| 145 | + | for func in items: |
| 146 | + | try: |
| 147 | + | # ' ' is considered as invalid char |
| 148 | + | # https://github.com/NationalSecurityAgency/ghidra/blob/c19276091f274a9ef0850c904c743f61c850854e/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/symbol/SymbolUtilities.java#L104 |
| 149 | + | addr, name = func["Start"], func["FullName"].replace(" ", "_") |
| 150 | + | entrypoint = toAddr(addr+offset) |
| 151 | + | |
| 152 | + | f = fm.getFunctionAt(entrypoint) |
| 153 | + | if f is None: |
| 154 | + | create_counter += 1 |
| 155 | + | if not simulate: |
| 156 | + | print("Creating new func at {}".format(entrypoint)) |
| 157 | + | createFunction(entrypoint, name) |
| 158 | + | else: |
| 159 | + | rename_counter += 1 |
| 160 | + | if not simulate: |
| 161 | + | print("Renaming func {} to {}".format(f.getName(), name)) |
| 162 | + | f.setName(name, SourceType.USER_DEFINED) |
| 163 | + | except Exception as e: |
| 164 | + | print(u"error renaming {} at {}: {}".format(name, hex(addr), e)) |
| 165 | + | |
| 166 | + | return rename_counter, create_counter |
| 167 | + | |
| 168 | + | def annotate(items, offset): |
| 169 | + | if not iterable(items): |
| 170 | + | return |
| 171 | + | |
| 172 | + | for i in items: |
| 173 | + | if i["VA"] != 0: |
| 174 | + | # ' ' is considered as invalid char |
| 175 | + | createLabel(toAddr(i["VA"]+offset), i["Str"].replace(" ", "_"), True) |
| 176 | + | |
| 177 | + | def offset_estimation_sim(hints): |
| 178 | + | # Simulate all offset calculation strategies |
| 179 | + | # Returns stats for all choices in a dict and a suggested choice |
| 180 | + | choices_dict, suggested = {}, None |
| 181 | + | |
| 182 | + | # Since CHOICES contains "no estimation" as first strategy, |
| 183 | + | # we will perform a rename without offset first and then try |
| 184 | + | # all other strategies. If we detect that another strategy |
| 185 | + | # has more renames, we return it as suggested |
| 186 | + | for choice in CHOICES: |
| 187 | + | estoff = estimate_offset(choice, hints) |
| 188 | + | hfuncs = extract_funcs(hints) |
| 189 | + | renamed, created = rename_funcs(hfuncs, estoff, True) |
| 190 | + | print("{} renamed {} and created {} functions".format(choice, renamed, created)) |
| 191 | + | |
| 192 | + | choices_dict[choice] = {"renamed": renamed, "created": created, "offset": estoff} |
| 193 | + | if suggested is None or choices_dict[suggested]["renamed"] < renamed: |
| 194 | + | suggested = choice |
| 195 | + | |
| 196 | + | return choices_dict, suggested |
| 197 | + | |
| 198 | + | |
| 199 | + | # Load input file |
| 200 | + | grsfile = askFile("GoReSym output file", "Choose GoReSym output file") |
| 201 | + | with open(grsfile.getAbsolutePath(), "rb") as fp: |
| 202 | + | buf = fp.read() |
| 203 | + | hints = json.loads(buf) |
| 204 | + | |
| 205 | + | # Run simulation and estimate offsets, offsets are returned in choices_dict |
| 206 | + | choices_dict, suggested = offset_estimation_sim(hints) |
| 207 | + | offset = choices_dict[suggested]["offset"] |
| 208 | + | |
| 209 | + | # Ask for offset estimation strategy and suggest the most appropriate |
| 210 | + | # only if "no estimation" resulted in less renames than another strategy |
| 211 | + | if suggested != CHOICE_EST_NONE: |
| 212 | + | msg = "Please choose offset estimation strategy, script got most renames with [{}]".format(suggested) |
| 213 | + | choice = askChoice("Offset estimation", msg, CHOICES, suggested) |
| 214 | + | # Ask for offset, use chosen strategy offset as default |
| 215 | + | offset = ask_offset(choices_dict[choice]["offset"]) |
| 216 | + | |
| 217 | + | # Perform labeling |
| 218 | + | rename_funcs(hints["UserFunctions"], offset) |
| 219 | + | rename_funcs(hints["StdFunctions"], offset) |
| 220 | + | annotate(hints["Interfaces"], 0) |
| 221 | + | annotate([ |
| 222 | + | {"VA": hints["TabMeta"]["VA"], "Str": "runtime_pclntab"}, |
| 223 | + | {"VA": hints["ModuleMeta"]["VA"], "Str": "runtime_firstmoduledata"} |
| 224 | + | ], 0) |
| 225 | + | |