diff options
| author | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2023-11-26 11:56:49 +0100 |
|---|---|---|
| committer | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2023-11-26 11:56:49 +0100 |
| commit | 47894bb5d2e425f28d992aee6331b89b85b2058d (patch) | |
| tree | fd08c28c447fbb95e9d8d4122514227f9a48d0ad | |
| parent | a4bf627c2440cbea392e27f138b07fa22cd9e6f1 (diff) | |
| download | focaccia-47894bb5d2e425f28d992aee6331b89b85b2058d.tar.gz focaccia-47894bb5d2e425f28d992aee6331b89b85b2058d.zip | |
Standardize X86 register names
Add some infrastructure for flexible register name matching (i.e. using 'PC' to look up RIP): - `Arch.to_regname` tries to look up a register's standard name from an arbitrary string. - `ArchX86` overrides `to_regname` to resolve alias names for registers. Currently just 'PC' for 'RIP'. - `ProgramState.read` and `ProgramState.write` use `to_regname` to make register access more convenient. Add all flags with their standard abbreviations to `x86.regnames`. Implement a full RFLAGS decomposition into its individual flags in `x86`. Replace the hacks in `run.py` and `miasm_test.py` with this more complete solution. Co-authored-by: Theofilos Augoustis <theofilos.augoustis@gmail.com> Co-authored-by: Nicola Crivellin <nicola.crivellin98@gmail.com>
| -rw-r--r-- | arch/arch.py | 18 | ||||
| -rw-r--r-- | arch/x86.py | 98 | ||||
| -rw-r--r-- | miasm_test.py | 51 | ||||
| -rw-r--r-- | run.py | 47 | ||||
| -rw-r--r-- | snapshot.py | 48 | ||||
| -rw-r--r-- | symbolic.py | 2 |
6 files changed, 142 insertions, 122 deletions
diff --git a/arch/arch.py b/arch/arch.py index a46439e..ba94631 100644 --- a/arch/arch.py +++ b/arch/arch.py @@ -1,7 +1,23 @@ +from typing import Iterable + class Arch(): - def __init__(self, archname: str, regnames: list[str]): + def __init__(self, archname: str, regnames: Iterable[str]): self.archname = archname self.regnames = set(regnames) + def to_regname(self, name: str) -> str | None: + """Transform a string into a standard register name. + + Override to implement things like name aliases etc. + + :param name: The possibly non-standard name to look up. + :return: The 'corrected' register name, or None if `name` cannot be + transformed into a register name. + """ + name = name.upper() + if name in self.regnames: + return name + return None + def __eq__(self, other): return self.regnames == other.regnames diff --git a/arch/x86.py b/arch/x86.py index 2b27315..01c1631 100644 --- a/arch/x86.py +++ b/arch/x86.py @@ -3,31 +3,81 @@ from .arch import Arch # Names of registers in the architexture -regnames = ['PC', - 'RAX', - 'RBX', - 'RCX', - 'RDX', - 'RSI', - 'RDI', - 'RBP', - 'RSP', - 'R8', - 'R9', - 'R10', - 'R11', - 'R12', - 'R13', - 'R14', - 'R15', - 'RFLAGS', - 'flag ZF', - 'flag CF', - 'flag OF', - 'flag SF', - 'flag PF', - 'flag DF'] +regnames = [ + 'RIP', + 'RAX', + 'RBX', + 'RCX', + 'RDX', + 'RSI', + 'RDI', + 'RBP', + 'RSP', + 'R8', + 'R9', + 'R10', + 'R11', + 'R12', + 'R13', + 'R14', + 'R15', + 'RFLAGS', + # FLAGS + 'CF', 'PF', 'AF', 'ZF', 'SF', 'TF', 'IF', 'DF', 'OF', 'IOPL', 'NT', + # EFLAGS + 'RF', 'VM', 'AC', 'VIF', 'VIP', 'ID', +] + +# A dictionary mapping aliases to standard register names. +regname_aliases = { + 'PC': 'RIP', +} + +def decompose_rflags(rflags: int) -> dict[str, int]: + """Decompose the RFLAGS register's value into its separate flags. + + Uses flag name abbreviation conventions from + `https://en.wikipedia.org/wiki/FLAGS_register`. + + :param rflags: The RFLAGS register value. + :return: A dictionary mapping Miasm's flag names to their values. + """ + return { + # FLAGS + 'CF': rflags & 0x0001, + # 0x0002 reserved + 'PF': rflags & 0x0004, + # 0x0008 reserved + 'AF': rflags & 0x0010, + # 0x0020 reserved + 'ZF': rflags & 0x0040, + 'SF': rflags & 0x0080, + 'TF': rflags & 0x0100, + 'IF': rflags & 0x0200, + 'DF': rflags & 0x0400, + 'OF': rflags & 0x0800, + 'IOPL': rflags & 0x3000, + 'NT': rflags & 0x4000, + + # EFLAGS + 'RF': rflags & 0x00010000, + 'VM': rflags & 0x00020000, + 'AC': rflags & 0x00040000, + 'VIF': rflags & 0x00080000, + 'VIP': rflags & 0x00100000, + 'ID': rflags & 0x00200000, + } class ArchX86(Arch): def __init__(self): super().__init__("X86", regnames) + + def to_regname(self, name: str) -> str | None: + """The X86 override of the standard register name lookup. + + Applies certain register name aliases. + """ + reg = super().to_regname(name) + if reg in regname_aliases: + return regname_aliases[reg] + return reg diff --git a/miasm_test.py b/miasm_test.py index 4bd3062..7ec76a9 100644 --- a/miasm_test.py +++ b/miasm_test.py @@ -1,6 +1,5 @@ import sys - -import IPython +from typing import Any from miasm.arch.x86.sem import Lifter_X86_64 from miasm.analysis.machine import Machine @@ -31,37 +30,17 @@ def print_state(state: SymbolicState): print(f'{str(reg):10s} = {val}') print('=' * 80) -def decompose_rflags(rflags: int) -> dict[str, int]: - """Decompose the RFLAGS register's value into its separate flags. +def flag_names_to_miasm(regs: dict[str, Any]) -> dict: + """Convert standard flag names to Miasm's names. - :param rflags: The RFLAGS register value. - :return: A dictionary mapping Miasm's flag names to their values. + :param regs: Modified in-place. + :return: Returns `regs`. """ - return { - # FLAGS - 'cf': rflags & 0x0001, - # reserved 0x0002 - 'pf': rflags & 0x0004, - # reserved 0x0008 - 'af': rflags & 0x0010, - # reserved 0x0020 - 'zf': rflags & 0x0040, - 'nf': rflags & 0x0080, # I think NF (Negative Flag) == SF (Sign Flag)? - 'tf': rflags & 0x0100, - 'i_f': rflags & 0x0200, - 'df': rflags & 0x0400, - 'of': rflags & 0x0800, - 'iopl_f': rflags & 0x3000, - 'nt': rflags & 0x4000, - - # EFLAGS - 'rf': rflags & 0x00010000, - 'vm': rflags & 0x00020000, - 'ac': rflags & 0x00040000, - 'vif': rflags & 0x00080000, - 'vip': rflags & 0x00100000, - 'i_d': rflags & 0x00200000, - } + regs['NF'] = regs.pop('SF') + regs['I_F'] = regs.pop('IF') + regs['IOPL_F'] = regs.pop('IOPL') + regs['I_D'] = regs.pop('ID') + return regs def disasm_elf(addr, mdis: disasmEngine) -> AsmCFG: """Try to disassemble all contents of an ELF file. @@ -118,14 +97,16 @@ def create_state(target: LLDBConcreteTarget) -> MiasmProgramState: mem = [] # Query and store register state - rflags = decompose_rflags(target.read_register('rflags')) + rflags = target.read_register('rflags') + rflags = flag_names_to_miasm(x86.decompose_rflags(rflags)) for reg in machine.mn.regs.all_regs_ids_no_alias: + regname = reg.name.upper() # Make flag names upper case, too try: - conc_val = target.read_register(reg.name) + conc_val = target.read_register(regname) regs[reg] = ExprInt(conc_val, reg.size) except SimConcreteRegisterError: - if reg.name in rflags: - regs[reg] = ExprInt(rflags[reg.name], reg.size) + if regname in rflags: + regs[reg] = ExprInt(rflags[regname], reg.size) # Query and store memory state for mapping in target.get_mappings(): diff --git a/run.py b/run.py index 6aca4d2..768a73d 100644 --- a/run.py +++ b/run.py @@ -20,46 +20,17 @@ class SnapshotBuilder: self.states = [] self.regnames = set(arch.regnames) - @staticmethod - def parse_flags(flag_reg: int): - flags = {'ZF': 0, - 'CF': 0, - 'OF': 0, - 'SF': 0, - 'PF': 0, - 'DF': 0} - - # CF (Carry flag) Bit 0 - # PF (Parity flag) Bit 2 - # ZF (Zero flag) Bit 6 - # SF (Sign flag) Bit 7 - # TF (Trap flag) Bit 8 - # IF (Interrupt enable flag) Bit 9 - # DF (Direction flag) Bit 10 - # OF (Overflow flag) Bit 11 - flags['CF'] = int(0 != flag_reg & 1) - flags['ZF'] = int(0 != flag_reg & (1 << 6)) - flags['OF'] = int(0 != flag_reg & (1 << 11)) - flags['SF'] = int(0 != flag_reg & (1 << 7)) - flags['DF'] = int(0 != flag_reg & (1 << 10)) - flags['PF'] = int(0 != flag_reg & (1 << 1)) - return flags - - def create_snapshot(self, frame): + def create_snapshot(self, frame: lldb.SBFrame): state = ProgramState(self.arch) state.set('PC', frame.GetPC()) - for reg in frame.GetRegisters(): - for sub_reg in reg: - # Set the register's value in the current snapshot - regname = sub_reg.GetName().upper() - if regname in self.regnames: - regval = int(sub_reg.GetValue(), base=16) - if regname == 'RFLAGS': - flags = SnapshotBuilder.parse_flags(regval) - for flag, val in flags.items(): - state.set(f'flag {flag}', val) - else: - state.set(regname, regval) + for regname in self.arch.regnames: + reg = frame.FindRegister(regname) + regval = int(reg.GetValue(), base=16) + state.set(regname, regval) + if regname == 'RFLAGS': + flags = x86.decompose_rflags(regval) + for flag_name, val in flags.items(): + state.set(flag_name, val) return state def __call__(self, frame): diff --git a/snapshot.py b/snapshot.py index d5136ad..01c6446 100644 --- a/snapshot.py +++ b/snapshot.py @@ -1,38 +1,40 @@ from arch.arch import Arch -class ProgramState(): +class ProgramState: """A snapshot of the program's state.""" def __init__(self, arch: Arch): self.arch = arch - dict_t = dict[str, int] - self.regs = dict_t({ reg: None for reg in arch.regnames }) - self.has_backwards = False - self.matched = False + dict_t = dict[str, int | None] + self.regs: dict_t = { reg: None for reg in arch.regnames } - def set_backwards(self): - self.has_backwards = True + def read(self, reg: str) -> int: + """Read a register's value. + + :raise KeyError: If `reg` is not a register name. + :raise ValueError: If the register has no value. + """ + regname = self.arch.to_regname(reg) + if regname is None: + raise KeyError(f'Not a register name: {reg}') + + assert(regname in self.regs) + regval = self.regs[regname] + if regval is None: + raise ValueError(f'Unable to read value of register {reg} (aka.' + f' {regname}): The register contains no value.') + return regval def set(self, reg: str, value: int): """Assign a value to a register. - :raises RuntimeError: if the register already has a value. + :raise KeyError: If `reg` is not a register name. """ - assert(reg in self.arch.regnames) - - if self.regs[reg] != None: - raise RuntimeError("Reassigning register") - self.regs[reg] = value - - def as_repr(self, reg: str): - """Get a representational string of a register's value.""" - assert(reg in self.arch.regnames) + regname = self.arch.to_regname(reg) + if regname is None: + raise KeyError(f'Not a register name: {regname}') - value = self.regs[reg] - if value is not None: - return hex(value) - else: - return "<none>" + self.regs[regname] = value def __repr__(self): - return self.regs.__repr__() + return repr(self.regs) diff --git a/symbolic.py b/symbolic.py index a8d45d0..53e1bbf 100644 --- a/symbolic.py +++ b/symbolic.py @@ -10,7 +10,7 @@ from lldb_target import LLDBConcreteTarget def symbolize_state(state: angr.SimState, arch: Arch = x86.ArchX86(), - exclude: list[str] = ['PC', 'RBP', 'RSP'], + exclude: list[str] = ['RIP', 'RBP', 'RSP'], stack_name: str = 'stack', stack_size: int = 0x10) \ -> angr.SimState: |