diff options
| author | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2024-10-14 12:10:00 +0200 |
|---|---|---|
| committer | Theofilos Augoustis <theofilos.augoustis@gmail.com> | 2024-10-14 12:10:00 +0200 |
| commit | a514b34d6f708ee80c4f0df91fefa9871d87ad39 (patch) | |
| tree | 0596e7ffdd2b18a1e7977a49b55afb6f46976f6a | |
| parent | aa946a8b14b7970c3c8f52626b82068cdf39cf94 (diff) | |
| download | focaccia-ta/develop.tar.gz focaccia-ta/develop.zip | |
Implement online verification of symbolic backend ta/develop
Co-authored-by: Theofilos Augoustis <theofilos.augoustis@gmail.com> Co-authored-by: Nicola Crivellin <nicola.crivellin98@gmail.com>
| -rw-r--r-- | focaccia/arch/aarch64.py | 108 | ||||
| -rw-r--r-- | focaccia/arch/arch.py | 62 | ||||
| -rw-r--r-- | focaccia/arch/x86.py | 175 | ||||
| -rw-r--r-- | focaccia/snapshot.py | 30 | ||||
| -rw-r--r-- | focaccia/symbolic.py | 62 | ||||
| -rw-r--r-- | test/test_snapshot.py | 74 | ||||
| -rw-r--r-- | tools/_qemu_tool.py | 7 |
7 files changed, 412 insertions, 106 deletions
diff --git a/focaccia/arch/aarch64.py b/focaccia/arch/aarch64.py index c0efc1a..2e510b9 100644 --- a/focaccia/arch/aarch64.py +++ b/focaccia/arch/aarch64.py @@ -1,32 +1,100 @@ """Description of 64-bit ARM.""" -from typing import Literal -from .arch import Arch +from .arch import Arch, RegisterDescription as _Reg archname = 'aarch64' -regnames = [ - 'PC', 'SP', 'LR', - 'CPSR', +registers = [ + _Reg(('R0', 0, 64), ('X0', 0, 64), ('W0', 0, 32)), + _Reg(('R1', 0, 64), ('X1', 0, 64), ('W1', 0, 32)), + _Reg(('R2', 0, 64), ('X2', 0, 64), ('W2', 0, 32)), + _Reg(('R3', 0, 64), ('X3', 0, 64), ('W3', 0, 32)), + _Reg(('R4', 0, 64), ('X4', 0, 64), ('W4', 0, 32)), + _Reg(('R5', 0, 64), ('X5', 0, 64), ('W5', 0, 32)), + _Reg(('R6', 0, 64), ('X6', 0, 64), ('W6', 0, 32)), + _Reg(('R7', 0, 64), ('X7', 0, 64), ('W7', 0, 32)), + _Reg(('R8', 0, 64), ('X8', 0, 64), ('W8', 0, 32)), + _Reg(('R9', 0, 64), ('X9', 0, 64), ('W9', 0, 32)), + _Reg(('R10', 0, 64), ('X10', 0, 64), ('W10', 0, 32)), + _Reg(('R11', 0, 64), ('X11', 0, 64), ('W11', 0, 32)), + _Reg(('R12', 0, 64), ('X12', 0, 64), ('W12', 0, 32)), + _Reg(('R13', 0, 64), ('X13', 0, 64), ('W13', 0, 32)), + _Reg(('R14', 0, 64), ('X14', 0, 64), ('W14', 0, 32)), + _Reg(('R15', 0, 64), ('X15', 0, 64), ('W15', 0, 32)), + _Reg(('R16', 0, 64), ('X16', 0, 64), ('W16', 0, 32)), + _Reg(('R17', 0, 64), ('X17', 0, 64), ('W17', 0, 32)), + _Reg(('R18', 0, 64), ('X18', 0, 64), ('W18', 0, 32)), + _Reg(('R19', 0, 64), ('X19', 0, 64), ('W19', 0, 32)), + _Reg(('R20', 0, 64), ('X20', 0, 64), ('W20', 0, 32)), + _Reg(('R21', 0, 64), ('X21', 0, 64), ('W21', 0, 32)), + _Reg(('R22', 0, 64), ('X22', 0, 64), ('W22', 0, 32)), + _Reg(('R23', 0, 64), ('X23', 0, 64), ('W23', 0, 32)), + _Reg(('R24', 0, 64), ('X24', 0, 64), ('W24', 0, 32)), + _Reg(('R25', 0, 64), ('X25', 0, 64), ('W25', 0, 32)), + _Reg(('R26', 0, 64), ('X26', 0, 64), ('W26', 0, 32)), + _Reg(('R27', 0, 64), ('X27', 0, 64), ('W27', 0, 32)), + _Reg(('R28', 0, 64), ('X28', 0, 64), ('W28', 0, 32)), + _Reg(('R29', 0, 64), ('X29', 0, 64), ('W29', 0, 32)), + _Reg(('R30', 0, 64), ('X30', 0, 64), ('W30', 0, 32), ('LR', 0, 64)), - 'X0', 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', - 'X10', 'X11', 'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19', - 'X20', 'X21', 'X22', 'X23', 'X24', 'X25', 'X26', 'X27', 'X28', 'X29', - 'X30', + _Reg(('RZR', 0, 64), ('XZR', 0, 64), ('WZR', 0, 32)), + _Reg(('SP', 0, 64), ('RSP', 0, 64)), + _Reg(('PC', 0, 64)), - 'Q0', 'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q7', 'Q8', 'Q9', - 'Q10', 'Q11', 'Q12', 'Q13', 'Q14', 'Q15', + _Reg(('V0', 0, 128), ('Q0', 0, 128), ('D0', 0, 64), ('S0', 0, 32), ('H0', 0, 16), ('B0', 0, 8)), + _Reg(('V1', 0, 128), ('Q1', 0, 128), ('D1', 0, 64), ('S1', 0, 32), ('H1', 0, 16), ('B1', 0, 8)), + _Reg(('V2', 0, 128), ('Q2', 0, 128), ('D2', 0, 64), ('S2', 0, 32), ('H2', 0, 16), ('B2', 0, 8)), + _Reg(('V3', 0, 128), ('Q3', 0, 128), ('D3', 0, 64), ('S3', 0, 32), ('H3', 0, 16), ('B3', 0, 8)), + _Reg(('V4', 0, 128), ('Q4', 0, 128), ('D4', 0, 64), ('S4', 0, 32), ('H4', 0, 16), ('B4', 0, 8)), + _Reg(('V5', 0, 128), ('Q5', 0, 128), ('D5', 0, 64), ('S5', 0, 32), ('H5', 0, 16), ('B5', 0, 8)), + _Reg(('V6', 0, 128), ('Q6', 0, 128), ('D6', 0, 64), ('S6', 0, 32), ('H6', 0, 16), ('B6', 0, 8)), + _Reg(('V7', 0, 128), ('Q7', 0, 128), ('D7', 0, 64), ('S7', 0, 32), ('H7', 0, 16), ('B7', 0, 8)), + _Reg(('V8', 0, 128), ('Q8', 0, 128), ('D8', 0, 64), ('S8', 0, 32), ('H8', 0, 16), ('B8', 0, 8)), + _Reg(('V9', 0, 128), ('Q9', 0, 128), ('D9', 0, 64), ('S9', 0, 32), ('H9', 0, 16), ('B9', 0, 8)), + _Reg(('V10', 0, 128), ('Q10', 0, 128), ('D10', 0, 64), ('S10', 0, 32), ('H10', 0, 16), ('B10', 0, 8)), + _Reg(('V11', 0, 128), ('Q11', 0, 128), ('D11', 0, 64), ('S11', 0, 32), ('H11', 0, 16), ('B11', 0, 8)), + _Reg(('V12', 0, 128), ('Q12', 0, 128), ('D12', 0, 64), ('S12', 0, 32), ('H12', 0, 16), ('B12', 0, 8)), + _Reg(('V13', 0, 128), ('Q13', 0, 128), ('D13', 0, 64), ('S13', 0, 32), ('H13', 0, 16), ('B13', 0, 8)), + _Reg(('V14', 0, 128), ('Q14', 0, 128), ('D14', 0, 64), ('S14', 0, 32), ('H14', 0, 16), ('B14', 0, 8)), + _Reg(('V15', 0, 128), ('Q15', 0, 128), ('D15', 0, 64), ('S15', 0, 32), ('H15', 0, 16), ('B15', 0, 8)), + _Reg(('V16', 0, 128), ('Q16', 0, 128), ('D16', 0, 64), ('S16', 0, 32), ('H16', 0, 16), ('B16', 0, 8)), + _Reg(('V17', 0, 128), ('Q17', 0, 128), ('D17', 0, 64), ('S17', 0, 32), ('H17', 0, 16), ('B17', 0, 8)), + _Reg(('V18', 0, 128), ('Q18', 0, 128), ('D18', 0, 64), ('S18', 0, 32), ('H18', 0, 16), ('B18', 0, 8)), + _Reg(('V19', 0, 128), ('Q19', 0, 128), ('D19', 0, 64), ('S19', 0, 32), ('H19', 0, 16), ('B19', 0, 8)), + _Reg(('V20', 0, 128), ('Q20', 0, 128), ('D20', 0, 64), ('S20', 0, 32), ('H20', 0, 16), ('B20', 0, 8)), + _Reg(('V21', 0, 128), ('Q21', 0, 128), ('D21', 0, 64), ('S21', 0, 32), ('H21', 0, 16), ('B21', 0, 8)), + _Reg(('V22', 0, 128), ('Q22', 0, 128), ('D22', 0, 64), ('S22', 0, 32), ('H22', 0, 16), ('B22', 0, 8)), + _Reg(('V23', 0, 128), ('Q23', 0, 128), ('D23', 0, 64), ('S23', 0, 32), ('H23', 0, 16), ('B23', 0, 8)), + _Reg(('V24', 0, 128), ('Q24', 0, 128), ('D24', 0, 64), ('S24', 0, 32), ('H24', 0, 16), ('B24', 0, 8)), + _Reg(('V25', 0, 128), ('Q25', 0, 128), ('D25', 0, 64), ('S25', 0, 32), ('H25', 0, 16), ('B25', 0, 8)), + _Reg(('V26', 0, 128), ('Q26', 0, 128), ('D26', 0, 64), ('S26', 0, 32), ('H26', 0, 16), ('B26', 0, 8)), + _Reg(('V27', 0, 128), ('Q27', 0, 128), ('D27', 0, 64), ('S27', 0, 32), ('H27', 0, 16), ('B27', 0, 8)), + _Reg(('V28', 0, 128), ('Q28', 0, 128), ('D28', 0, 64), ('S28', 0, 32), ('H28', 0, 16), ('B28', 0, 8)), + _Reg(('V29', 0, 128), ('Q29', 0, 128), ('D29', 0, 64), ('S29', 0, 32), ('H29', 0, 16), ('B29', 0, 8)), + _Reg(('V30', 0, 128), ('Q30', 0, 128), ('D30', 0, 64), ('S30', 0, 32), ('H30', 0, 16), ('B30', 0, 8)), + _Reg(('V31', 0, 128), ('Q31', 0, 128), ('D31', 0, 64), ('S31', 0, 32), ('H31', 0, 16), ('B31', 0, 8)), - 'V0', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', - 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', - 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'V29', - 'V30', 'V31', - - 'N', 'Z', 'C', 'V', 'Q', - 'SSBS', 'PAN', 'DIT', 'GE', - 'E', 'A', 'I', 'F', 'M', + _Reg(('CPSR', 0, 64), + ('N', 31, 32), + ('Z', 30, 31), + ('C', 29, 30), + ('V', 28, 29), + ('Q', 27, 28), + ('SSBS', 23, 24), + ('PAN', 22, 23), + ('DIT', 21, 22), + ('GE', 16, 20), + ('E', 9, 10), + ('A', 8, 9), + ('I', 7, 8), + ('F', 6, 7), + ('M', 0, 4), + ), ] +# Names of registers in the architecture +regnames = [desc.base.base_reg for desc in registers] + def decompose_cpsr(cpsr: int) -> dict[str, int]: """Extract individual flag values from the CPSR register.""" return { @@ -52,4 +120,4 @@ def decompose_cpsr(cpsr: int) -> dict[str, int]: class ArchAArch64(Arch): def __init__(self, endianness: Arch.Endianness): - super().__init__(archname, regnames, 64, endianness) + super().__init__(archname, registers, 64, endianness) diff --git a/focaccia/arch/arch.py b/focaccia/arch/arch.py index dee7288..ce5e532 100644 --- a/focaccia/arch/arch.py +++ b/focaccia/arch/arch.py @@ -1,32 +1,82 @@ -from typing import Iterable, Literal +from typing import Literal + +class RegisterAccessor: + def __init__(self, regname: str, start_bit: int, end_bit: int): + """An accessor that describes a range of bits. + + Builds a bit range [start_bit, end_bit), meaning `end_bit` is excluded + from the range. + + Example: An object `RegisterAccessor(0, 1)` accesses exactly the first + bit of a value. `RegisterAccessor(0, 0)` is invalid as it references + a range of zero bits. + + :param start_bit: First bit included in the range. This is the least + significant bit in the range. + :param end_bit: First bit *not* included in the range. This is the most + significant bit of the range. + """ + assert(start_bit < end_bit) + self.base_reg = regname + self.start = start_bit + self.end = end_bit + + self.num_bits = end_bit - start_bit + self.mask = 0 + for i in range(start_bit, end_bit): + self.mask |= 1 << i + + def __repr__(self) -> str: + return f'{self.base_reg}[{self.start}:{self.end - 1}]' + +class RegisterDescription: + def __init__(self, base: tuple[str, int, int], *subsets: tuple[str, int, int]): + self.base = RegisterAccessor(*base) + self.subsets = [(name, RegisterAccessor(base[0], s, e)) for name, s, e in subsets] class Arch(): Endianness = Literal['little', 'big'] def __init__(self, archname: str, - regnames: Iterable[str], + registers: list[RegisterDescription], ptr_size: int, endianness: Endianness = 'little'): self.archname = archname - self.regnames = set(name.upper() for name in regnames) self.ptr_size = ptr_size self.endianness: Literal['little', 'big'] = endianness + self._accessors = {} + for desc in registers: + self._accessors[desc.base.base_reg.upper()] = desc.base + self._accessors |= {name: acc for name, acc in desc.subsets} + + self.regnames = set(desc.base.base_reg.upper() for desc in registers) + """Names of the architecture's base registers.""" + + self.all_regnames = set(self._accessors.keys()) + """Names of the architecture's registers, including register aliases.""" + def to_regname(self, name: str) -> str | None: """Transform a string into a standard register name. - Override to implement things like name aliases etc. - :param name: The possibly non-standard name to look up. :return: The 'corrected' register name, or None if `name` cannot be transformed into a register name. """ name = name.upper() - if name in self.regnames: + if name in self._accessors: return name return None + def get_reg_accessor(self, regname: str) -> RegisterAccessor | None: + """Get an accessor for a register name, which may be an alias. + + Is used internally by ProgramState to access aliased registers. + """ + _regname = self.to_regname(regname) + return self._accessors.get(_regname, None) + def __eq__(self, other): return self.archname == other.archname diff --git a/focaccia/arch/x86.py b/focaccia/arch/x86.py index f35783b..fefab37 100644 --- a/focaccia/arch/x86.py +++ b/focaccia/arch/x86.py @@ -1,53 +1,112 @@ """Architecture-specific configuration.""" -from .arch import Arch +from .arch import Arch, RegisterDescription as _Reg archname = 'x86_64' -# Names of registers in the architecture -regnames = [ - # 8-bit - 'AL', 'CL', 'DL', 'BL', 'SPL', 'BPL', 'SIL', 'DIL', - 'AH', 'CH', 'DH', 'BH', - - # 16-bit - 'IP', - 'AX', 'BX', 'CX', 'DX', 'SI', 'DI', 'BP', 'SP', - - # 32-bit - 'EIP', - 'EAX', 'EBX', 'ECX', 'EDX', 'ESI', 'EDI', 'EBP', 'ESP', - 'EFLAGS', - - # 64-bit - 'RIP', - 'RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP', 'RSP', - 'R8', 'R9', 'R10', 'R11', 'R12', 'R13', 'R14', 'R15', - 'RFLAGS', - - # x87 float registers - 'ST0', 'ST1', 'ST2', 'ST3', 'ST4', 'ST5', 'ST6', 'ST7', - - # Vector registers - 'MM0', 'MM1', 'MM2', 'MM3', 'MM4', 'MM5', 'MM6', 'MM7', - - 'XMM0', 'XMM1', 'XMM2', 'XMM3', 'XMM4', 'XMM5', 'XMM6', 'XMM7', - 'XMM8', 'XMM9', 'XMM10', 'XMM11', 'XMM12', 'XMM13', 'XMM14', 'XMM15', - - 'YMM0', 'YMM1', 'YMM2', 'YMM3', 'YMM4', 'YMM5', 'YMM6', 'YMM7', - 'YMM8', 'YMM9', 'YMM10', 'YMM11', 'YMM12', 'YMM13', 'YMM14', 'YMM15', +registers = [ + # General-purpose registers + _Reg(('RIP', 0, 64), ('EIP', 0, 32), ('IP', 0, 16)), + _Reg(('RAX', 0, 64), ('EAX', 0, 32), ('AX', 0, 16), ('AL', 0, 8), ('AH', 8, 16)), + _Reg(('RBX', 0, 64), ('EBX', 0, 32), ('BX', 0, 16), ('BL', 0, 8), ('BH', 8, 16)), + _Reg(('RCX', 0, 64), ('ECX', 0, 32), ('CX', 0, 16), ('CL', 0, 8), ('CH', 8, 16)), + _Reg(('RDX', 0, 64), ('EDX', 0, 32), ('DX', 0, 16), ('DL', 0, 8), ('DH', 8, 16)), + _Reg(('RSI', 0, 64), ('ESI', 0, 32), ('SI', 0, 16), ('SIL', 0, 8)), + _Reg(('RDI', 0, 64), ('EDI', 0, 32), ('DI', 0, 16), ('DIL', 0, 8)), + _Reg(('RBP', 0, 64), ('EBP', 0, 32), ('BP', 0, 16), ('BPL', 0, 8)), + _Reg(('RSP', 0, 64), ('ESP', 0, 32), ('SP', 0, 16), ('SPL', 0, 8)), + _Reg(('R8', 0, 64)), + _Reg(('R9', 0, 64)), + _Reg(('R10', 0, 64)), + _Reg(('R11', 0, 64)), + _Reg(('R12', 0, 64)), + _Reg(('R13', 0, 64)), + _Reg(('R14', 0, 64)), + _Reg(('R15', 0, 64)), + + # RFLAGS + _Reg(('RFLAGS', 0, 64), ('EFLAGS', 0, 32), ('FLAGS', 0, 16), + ('CF', 0, 1), + ('PF', 2, 3), + ('AF', 4, 5), + ('ZF', 6, 7), + ('SF', 7, 8), + ('TF', 8, 9), + ('IF', 9, 10), + ('DF', 10, 11), + ('OF', 11, 12), + ('IOPL', 12, 14), + ('NT', 14, 15), + ('MD', 15, 16), + + ('RF', 16, 17), + ('VM', 17, 18), + ('AC', 18, 19), + ('VIF', 19, 20), + ('VIP', 20, 21), + ('ID', 21, 22), + ('AI', 31, 32), + ), # Segment registers - 'CS', 'DS', 'SS', 'ES', 'FS', 'GS', - 'FS_BASE', 'GS_BASE', - - # FLAGS - 'CF', 'PF', 'AF', 'ZF', 'SF', 'TF', 'IF', 'DF', 'OF', 'IOPL', 'NT', + _Reg(('CS', 0, 16)), + _Reg(('DS', 0, 16)), + _Reg(('SS', 0, 16)), + _Reg(('ES', 0, 16)), + _Reg(('FS', 0, 16)), + _Reg(('GS', 0, 16)), + _Reg(('FS_BASE', 0, 64)), + _Reg(('GS_BASE', 0, 64)), + + # x87 floating-point registers + _Reg(('ST0', 0, 80)), + _Reg(('ST1', 0, 80)), + _Reg(('ST2', 0, 80)), + _Reg(('ST3', 0, 80)), + _Reg(('ST4', 0, 80)), + _Reg(('ST5', 0, 80)), + _Reg(('ST6', 0, 80)), + _Reg(('ST7', 0, 80)), - # EFLAGS - 'RF', 'VM', 'AC', 'VIF', 'VIP', 'ID', + # Vector registers + _Reg(('ZMM0', 0, 512), ('YMM0', 0, 256), ('XMM0', 0, 128), ('MM0', 0, 64)), + _Reg(('ZMM1', 0, 512), ('YMM1', 0, 256), ('XMM1', 0, 128), ('MM1', 0, 64)), + _Reg(('ZMM2', 0, 512), ('YMM2', 0, 256), ('XMM2', 0, 128), ('MM2', 0, 64)), + _Reg(('ZMM3', 0, 512), ('YMM3', 0, 256), ('XMM3', 0, 128), ('MM3', 0, 64)), + _Reg(('ZMM4', 0, 512), ('YMM4', 0, 256), ('XMM4', 0, 128), ('MM4', 0, 64)), + _Reg(('ZMM5', 0, 512), ('YMM5', 0, 256), ('XMM5', 0, 128), ('MM5', 0, 64)), + _Reg(('ZMM6', 0, 512), ('YMM6', 0, 256), ('XMM6', 0, 128), ('MM6', 0, 64)), + _Reg(('ZMM7', 0, 512), ('YMM7', 0, 256), ('XMM7', 0, 128), ('MM7', 0, 64)), + _Reg(('ZMM8', 0, 512), ('YMM8', 0, 256), ('XMM8', 0, 128)), + _Reg(('ZMM9', 0, 512), ('YMM9', 0, 256), ('XMM9', 0, 128)), + _Reg(('ZMM10', 0, 512), ('YMM10', 0, 256), ('XMM10', 0, 128)), + _Reg(('ZMM11', 0, 512), ('YMM11', 0, 256), ('XMM11', 0, 128)), + _Reg(('ZMM12', 0, 512), ('YMM12', 0, 256), ('XMM12', 0, 128)), + _Reg(('ZMM13', 0, 512), ('YMM13', 0, 256), ('XMM13', 0, 128)), + _Reg(('ZMM14', 0, 512), ('YMM14', 0, 256), ('XMM14', 0, 128)), + _Reg(('ZMM15', 0, 512), ('YMM15', 0, 256), ('XMM15', 0, 128)), + + _Reg(('ZMM16', 0, 512), ('YMM16', 0, 256), ('XMM16', 0, 128)), + _Reg(('ZMM17', 0, 512), ('YMM17', 0, 256), ('XMM17', 0, 128)), + _Reg(('ZMM18', 0, 512), ('YMM18', 0, 256), ('XMM18', 0, 128)), + _Reg(('ZMM19', 0, 512), ('YMM19', 0, 256), ('XMM19', 0, 128)), + _Reg(('ZMM20', 0, 512), ('YMM20', 0, 256), ('XMM20', 0, 128)), + _Reg(('ZMM21', 0, 512), ('YMM21', 0, 256), ('XMM21', 0, 128)), + _Reg(('ZMM22', 0, 512), ('YMM22', 0, 256), ('XMM22', 0, 128)), + _Reg(('ZMM23', 0, 512), ('YMM23', 0, 256), ('XMM23', 0, 128)), + _Reg(('ZMM24', 0, 512), ('YMM24', 0, 256), ('XMM24', 0, 128)), + _Reg(('ZMM25', 0, 512), ('YMM25', 0, 256), ('XMM25', 0, 128)), + _Reg(('ZMM26', 0, 512), ('YMM26', 0, 256), ('XMM26', 0, 128)), + _Reg(('ZMM27', 0, 512), ('YMM27', 0, 256), ('XMM27', 0, 128)), + _Reg(('ZMM28', 0, 512), ('YMM28', 0, 256), ('XMM28', 0, 128)), + _Reg(('ZMM29', 0, 512), ('YMM29', 0, 256), ('XMM29', 0, 128)), + _Reg(('ZMM30', 0, 512), ('YMM30', 0, 256), ('XMM30', 0, 128)), + _Reg(('ZMM31', 0, 512), ('YMM31', 0, 256), ('XMM31', 0, 128)), ] +# Names of registers in the architecture +regnames = [desc.base.base_reg for desc in registers] + # A dictionary mapping aliases to standard register names. regname_aliases = { 'PC': 'RIP', @@ -100,33 +159,33 @@ def compose_rflags(rflags: dict[str, int]) -> int: """ return ( # FLAGS - (0x0001 if rflags['CF'] else 0) | + (0x0001 if rflags.get('CF', 0) else 0) | # 0x0002 reserved - (0x0004 if rflags['PF'] else 0) | + (0x0004 if rflags.get('PF', 0) else 0) | # 0x0008 reserved - (0x0010 if rflags['AF'] else 0) | + (0x0010 if rflags.get('AF', 0) else 0) | # 0x0020 reserved - (0x0040 if rflags['ZF'] else 0) | - (0x0080 if rflags['SF'] else 0) | - (0x0100 if rflags['TF'] else 0) | - (0x0200 if rflags['IF'] else 0) | - (0x0400 if rflags['DF'] else 0) | - (0x0800 if rflags['OF'] else 0) | - (0x3000 if rflags['IOPL'] else 0) | - (0x4000 if rflags['NT'] else 0) | + (0x0040 if rflags.get('ZF', 0) else 0) | + (0x0080 if rflags.get('SF', 0) else 0) | + (0x0100 if rflags.get('TF', 0) else 0) | + (0x0200 if rflags.get('IF', 0) else 0) | + (0x0400 if rflags.get('DF', 0) else 0) | + (0x0800 if rflags.get('OF', 0) else 0) | + (0x3000 if rflags.get('IOPL', 0) else 0) | + (0x4000 if rflags.get('NT', 0) else 0) | # EFLAGS - (0x00010000 if rflags['RF'] else 0) | - (0x00020000 if rflags['VM'] else 0) | - (0x00040000 if rflags['AC'] else 0) | - (0x00080000 if rflags['VIF'] else 0) | - (0x00100000 if rflags['VIP'] else 0) | - (0x00200000 if rflags['ID'] else 0) + (0x00010000 if rflags.get('RF', 0) else 0) | + (0x00020000 if rflags.get('VM', 0) else 0) | + (0x00040000 if rflags.get('AC', 0) else 0) | + (0x00080000 if rflags.get('VIF', 0) else 0) | + (0x00100000 if rflags.get('VIP', 0) else 0) | + (0x00200000 if rflags.get('ID', 0) else 0) ) class ArchX86(Arch): def __init__(self): - super().__init__(archname, regnames, 64) + super().__init__(archname, registers, 64) def to_regname(self, name: str) -> str | None: """The X86 override of the standard register name lookup. diff --git a/focaccia/snapshot.py b/focaccia/snapshot.py index 506cf27..1945d71 100644 --- a/focaccia/snapshot.py +++ b/focaccia/snapshot.py @@ -119,30 +119,40 @@ class ProgramState(ReadableProgramState): :raise RegisterAccessError: If `reg` is not a register name, or if the register has no value. """ - regname = self.arch.to_regname(reg) - if regname is None: + acc = self.arch.get_reg_accessor(reg) + if acc is None: raise RegisterAccessError(reg, f'Not a register name: {reg}') - assert(regname in self.regs) - regval = self.regs[regname] + assert(acc.base_reg in self.regs) + regval = self.regs[acc.base_reg] if regval is None: raise RegisterAccessError( - regname, + acc.base_reg, f'[In ProgramState.read_register]: Unable to read value of' - f' register {reg} (a.k.a. {regname}): The register is not set.' + f' register {reg} (a.k.a. {acc}): The register is not set.' f' Full state: {self}') - return regval + + return (regval & acc.mask) >> acc.start def set_register(self, reg: str, value: int): """Assign a value to a register. :raise RegisterAccessError: If `reg` is not a register name. """ - regname = self.arch.to_regname(reg) - if regname is None: + acc = self.arch.get_reg_accessor(reg) + if acc is None: raise RegisterAccessError(reg, f'Not a register name: {reg}') - self.regs[regname] = value + assert(acc.base_reg in self.regs) + base_reg_size = self.arch.get_reg_accessor(acc.base_reg).num_bits + + val = self.regs[acc.base_reg] + if val is None: + val = 0 + val &= (~acc.mask & ((1 << base_reg_size) - 1)) # Clear bits in range + val |= (value << acc.start) & acc.mask # Set bits in range + + self.regs[acc.base_reg] = val def read_memory(self, addr: int, size: int) -> bytes: """Read a number of bytes from memory. diff --git a/focaccia/symbolic.py b/focaccia/symbolic.py index 0f381a4..9aeff56 100644 --- a/focaccia/symbolic.py +++ b/focaccia/symbolic.py @@ -89,6 +89,15 @@ class Instruction: _instr = machine.mn.dis(asm, arch.ptr_size) return Instruction(_instr, machine, arch, None) + @staticmethod + def from_string(s: str, arch: Arch, offset: int = 0, length: int = 0) -> Instruction: + machine = make_machine(arch) + assert(machine.mn is not None) + _instr = machine.mn.fromstring(s, LocationDB(), arch.ptr_size) + _instr.offset = offset + _instr.l = length + return Instruction(_instr, machine, arch, None) + def to_bytecode(self) -> bytes: """Assemble the instruction to byte code.""" assert(self.machine.mn is not None) @@ -312,7 +321,7 @@ class SymbolicTransform: return list(accessed_mem) - def eval_register_transforms(self, conc_state: ProgramState) \ + def eval_register_transforms(self, conc_state: ReadableProgramState) \ -> dict[str, int]: """Calculate register transformations when applied to a concrete state. @@ -329,7 +338,7 @@ class SymbolicTransform: res[regname] = eval_symbol(expr, conc_state) return res - def eval_memory_transforms(self, conc_state: ProgramState) \ + def eval_memory_transforms(self, conc_state: ReadableProgramState) \ -> dict[int, bytes]: """Calculate memory transformations when applied to a concrete state. @@ -357,13 +366,13 @@ class SymbolicTransform: """ from miasm.expression.parser import str_to_expr as parse - def decode_inst(obj: Iterable[int], arch: Arch): - b = b''.join(i.to_bytes(1) for i in obj) + def decode_inst(obj: list, arch: Arch): + length, text = obj try: - return Instruction.from_bytecode(b, arch) + return Instruction.from_string(text, arch, offset=0, length=length) except Exception as err: - warn(f'[In SymbolicTransform.from_json] Unable to disassemble' - f' bytes {obj}: {err}.') + warn(f'[In SymbolicTransform.from_json] Unable to parse' + f' instruction string "{text}": {err}.') return None arch = supported_architectures[data['arch']] @@ -388,10 +397,10 @@ class SymbolicTransform: """Serialize a symbolic transformation as a JSON object.""" def encode_inst(inst: Instruction): try: - return [int(b) for b in inst.to_bytecode()] + return [inst.length, inst.to_string()] except Exception as err: - warn(f'[In SymbolicTransform.to_json] Unable to assemble' - f' "{inst}" to bytecode: {err}. This instruction will not' + warn(f'[In SymbolicTransform.to_json] Unable to serialize' + f' "{inst}" as string: {err}. This instruction will not' f' be serialized.') return None @@ -644,9 +653,40 @@ def collect_symbolic_trace(env: TraceEnvironment, new_pc = pc + instruction.length else: new_pc = int(new_pc) - strace.append(SymbolicTransform(modified, [instruction], arch, pc, new_pc)) + transform = SymbolicTransform(modified, [instruction], arch, pc, new_pc) + strace.append(transform) + + # Predict next concrete state. + # We verify the symbolic execution backend on the fly for some + # additional protection from bugs in the backend. + predicted_regs = transform.eval_register_transforms(lldb_state) + predicted_mems = transform.eval_memory_transforms(lldb_state) # Step forward target.step() + if target.is_exited(): + break + + # Verify last generated transform by comparing concrete state against + # predicted values. + assert(len(strace) > 0) + for reg, val in predicted_regs.items(): + conc_val = lldb_state.read_register(reg) + if conc_val != val: + warn(f'Symbolic execution backend generated false equation for' + f' [{hex(instruction.addr)}]: {instruction}:' + f' Predicted {reg} = {hex(val)}, but the' + f' concrete state has value {reg} = {hex(conc_val)}.' + f'\nFaulty transformation: {transform}') + for addr, data in predicted_mems.items(): + conc_data = lldb_state.read_memory(addr, len(data)) + if conc_data != data: + warn(f'Symbolic execution backend generated false equation for' + f' [{hex(instruction.addr)}]: {instruction}: Predicted' + f' mem[{hex(addr)}:{hex(addr+len(data))}] = {data},' + f' but the concrete state has value' + f' mem[{hex(addr)}:{hex(addr+len(data))}] = {conc_data}.' + f'\nFaulty transformation: {transform}') + raise Exception() return Trace(strace, env) diff --git a/test/test_snapshot.py b/test/test_snapshot.py new file mode 100644 index 0000000..ddad410 --- /dev/null +++ b/test/test_snapshot.py @@ -0,0 +1,74 @@ +import unittest + +from focaccia.arch import x86 +from focaccia.snapshot import ProgramState, RegisterAccessError + +class TestProgramState(unittest.TestCase): + def setUp(self): + self.arch = x86.ArchX86() + + def test_register_access_empty_state(self): + state = ProgramState(self.arch) + for reg in x86.regnames: + self.assertRaises(RegisterAccessError, state.read_register, reg) + + def test_register_read_write(self): + state = ProgramState(self.arch) + for reg in x86.regnames: + state.set_register(reg, 0x42) + for reg in x86.regnames: + val = state.read_register(reg) + self.assertEqual(val, 0x42) + + def test_register_aliases_empty_state(self): + state = ProgramState(self.arch) + for reg in self.arch.all_regnames: + self.assertRaises(RegisterAccessError, state.read_register, reg) + + def test_register_aliases_read_write(self): + state = ProgramState(self.arch) + for reg in ['EAX', 'EBX', 'ECX', 'EDX']: + state.set_register(reg, 0xa0ff0) + + for reg in ['AH', 'BH', 'CH', 'DH']: + self.assertEqual(state.read_register(reg), 0xf, reg) + for reg in ['AL', 'BL', 'CL', 'DL']: + self.assertEqual(state.read_register(reg), 0xf0, reg) + for reg in ['AX', 'BX', 'CX', 'DX']: + self.assertEqual(state.read_register(reg), 0x0ff0, reg) + for reg in ['EAX', 'EBX', 'ECX', 'EDX', + 'RAX', 'RBX', 'RCX', 'RDX']: + self.assertEqual(state.read_register(reg), 0xa0ff0, reg) + + def test_flag_aliases(self): + flags = ['CF', 'PF', 'AF', 'ZF', 'SF', 'TF', 'IF', 'DF', 'OF', + 'IOPL', 'NT', 'RF', 'VM', 'AC', 'VIF', 'VIP', 'ID'] + state = ProgramState(self.arch) + + state.set_register('RFLAGS', 0) + for flag in flags: + self.assertEqual(state.read_register(flag), 0) + + state.set_register('RFLAGS', + x86.compose_rflags({'ZF': 1, 'PF': 1, 'OF': 0})) + self.assertEqual(state.read_register('ZF'), 1, self.arch.get_reg_accessor('ZF')) + self.assertEqual(state.read_register('PF'), 1) + self.assertEqual(state.read_register('OF'), 0) + self.assertEqual(state.read_register('AF'), 0) + self.assertEqual(state.read_register('ID'), 0) + self.assertEqual(state.read_register('SF'), 0) + + for flag in flags: + state.set_register(flag, 1) + for flag in flags: + self.assertEqual(state.read_register(flag), 1) + + state.set_register('OF', 1) + state.set_register('AF', 1) + state.set_register('SF', 1) + self.assertEqual(state.read_register('OF'), 1) + self.assertEqual(state.read_register('AF'), 1) + self.assertEqual(state.read_register('SF'), 1) + +if __name__ == '__main__': + unittest.main() diff --git a/tools/_qemu_tool.py b/tools/_qemu_tool.py index e3341ad..b365d39 100644 --- a/tools/_qemu_tool.py +++ b/tools/_qemu_tool.py @@ -58,18 +58,23 @@ class GDBProgramState(ReadableProgramState): } def read_register(self, reg: str) -> int: + if reg == 'RFLAGS': + reg = 'EFLAGS' + try: val = self._frame.read_register(reg.lower()) size = val.type.sizeof * 8 # For vector registers, we need to apply architecture-specific # logic because GDB's interface is not consistent. - if size > 64: # Value is a vector + if size >= 128: # Value is a vector if self.arch.archname not in self.read_vector_reg: raise NotImplementedError( f'Reading vector registers is not implemented for' f' architecture {self.arch.archname}.') return self.read_vector_reg[self.arch.archname](val, size) + elif size < 64: + return int(val.cast(gdb.lookup_type('unsigned int'))) # For non-vector values, just return the 64-bit value return int(val.cast(gdb.lookup_type('unsigned long'))) except ValueError as err: |