diff --git a/angr/analyses/decompiler/ail_simplifier.py b/angr/analyses/decompiler/ail_simplifier.py index 0ec41c0a72d..ae6094354bf 100644 --- a/angr/analyses/decompiler/ail_simplifier.py +++ b/angr/analyses/decompiler/ail_simplifier.py @@ -18,7 +18,6 @@ BinaryOp, ) -from ...errors import SimMemoryMissingError from ...engines.light import SpOffset from ...code_location import CodeLocation from ...analyses.reaching_definitions.external_codeloc import ExternalCodeLocation @@ -35,7 +34,7 @@ if TYPE_CHECKING: from ailment.manager import Manager - from angr.analyses.reaching_definitions import ReachingDefinitionsAnalysis + from angr.analyses.reaching_definitions import ReachingDefinitionsModel _l = logging.getLogger(__name__) @@ -103,7 +102,7 @@ def __init__( ): self.func = func self.func_graph = func_graph if func_graph is not None else func.graph - self._reaching_definitions: Optional[ReachingDefinitionsAnalysis] = None + self._reaching_definitions: Optional["ReachingDefinitionsModel"] = None self._propagator = None self._remove_dead_memdefs = remove_dead_memdefs @@ -191,7 +190,7 @@ def _handler(node): AILGraphWalker(self.func_graph, _handler, replace_nodes=True).walk() self.blocks = {} - def _compute_reaching_definitions(self) -> "ReachingDefinitionsAnalysis": + def _compute_reaching_definitions(self) -> "ReachingDefinitionsModel": # Computing reaching definitions or return the cached one if self._reaching_definitions is not None: return self._reaching_definitions @@ -199,17 +198,12 @@ def _compute_reaching_definitions(self) -> "ReachingDefinitionsAnalysis": subject=self.func, func_graph=self.func_graph, # init_context=(), <-- in case of fire break glass - observe_all=True, # observe_callback=self._simplify_function_rd_observe_callback + observe_all=False, use_callee_saved_regs_at_return=self._use_callee_saved_regs_at_return, - ) + ).model self._reaching_definitions = rd return rd - @staticmethod - # pylint:disable=unused-argument - def _simplify_function_rd_observe_callback(ob_type, **kwargs): - return ob_type == "node" or (ob_type == "insn" and kwargs.get("op_type", None) == OP_BEFORE) - def _compute_propagation(self): # Propagate expressions or return the existing result if self._propagator is not None: @@ -772,7 +766,7 @@ def _unify_local_variables(self) -> bool: # ensure the expression that we want to replace with is still up-to-date replace_with_original_def = self._find_atom_def_at(replace_with, rd, def_.codeloc) if replace_with_original_def is not None and not self._check_atom_last_def( - replace_with, used_expr.size, u.ins_addr, rd, replace_with_original_def + replace_with, u, rd, replace_with_original_def ): all_uses_replaced = False continue @@ -813,26 +807,18 @@ def _unify_local_variables(self) -> bool: @staticmethod def _find_atom_def_at(atom, rd, codeloc: CodeLocation) -> Optional[Definition]: if isinstance(atom, Register): - observ = rd.observed_results[("insn", codeloc.ins_addr, OP_BEFORE)] - try: - reg_vals = observ.register_definitions.load(atom.reg_offset, size=atom.size) - defs = list(observ.extract_defs_from_mv(reg_vals)) - return defs[0] if len(defs) == 1 else None - except SimMemoryMissingError: - pass + defs = rd.get_defs(atom, codeloc, OP_BEFORE) + return next(iter(defs)) if len(defs) == 1 else None + return None @staticmethod - def _check_atom_last_def(atom, size, ins_addr, rd, the_def) -> bool: + def _check_atom_last_def(atom, codeloc, rd, the_def) -> bool: if isinstance(atom, Register): - observ = rd.observed_results[("insn", ins_addr, OP_BEFORE)] - try: - reg_vals = observ.register_definitions.load(atom.reg_offset, size=size) - for existing_def in observ.extract_defs_from_mv(reg_vals): - if existing_def.codeloc != the_def.codeloc: - return False - except SimMemoryMissingError: - pass + defs = rd.get_defs(atom, codeloc, OP_BEFORE) + for d in defs: + if d.codeloc != the_def.codeloc: + return False return True @@ -961,10 +947,9 @@ def _fold_call_exprs(self) -> bool: defsite_defs_per_atom = defaultdict(set) for dd in defsite_all_expr_uses: defsite_defs_per_atom[dd.atom].add(dd) - usesite_rdstate = rd.observed_results[("stmt", (u.block_addr, u.block_idx, u.stmt_idx), 0)] usesite_expr_def_outdated = False for defsite_expr_atom, defsite_expr_uses in defsite_defs_per_atom.items(): - usesite_expr_uses = set(usesite_rdstate.get_definitions(defsite_expr_atom)) + usesite_expr_uses = set(rd.get_defs(defsite_expr_atom, u, OP_BEFORE)) if not usesite_expr_uses: # the atom is not defined at the use site - it's fine continue diff --git a/angr/analyses/decompiler/block_simplifier.py b/angr/analyses/decompiler/block_simplifier.py index e155dbba055..aa1a5f1cd98 100644 --- a/angr/analyses/decompiler/block_simplifier.py +++ b/angr/analyses/decompiler/block_simplifier.py @@ -123,15 +123,19 @@ def _compute_propagation(self, block): def _compute_reaching_definitions(self, block): def observe_callback(ob_type, addr=None, op_type=None, **kwargs) -> bool: # pylint:disable=unused-argument - return ob_type == "stmt" or ob_type == "node" and addr == block.addr and op_type == OP_AFTER + return ob_type == "node" and addr == block.addr and op_type == OP_AFTER if self._reaching_definitions is None: - self._reaching_definitions = self.project.analyses[ReachingDefinitionsAnalysis].prep()( - subject=block, - track_tmps=True, - stack_pointer_tracker=self._stack_pointer_tracker, - observe_all=False, - observe_callback=observe_callback, + self._reaching_definitions = ( + self.project.analyses[ReachingDefinitionsAnalysis] + .prep()( + subject=block, + track_tmps=True, + stack_pointer_tracker=self._stack_pointer_tracker, + observe_all=False, + observe_callback=observe_callback, + ) + .model ) return self._reaching_definitions diff --git a/angr/analyses/propagator/engine_ail.py b/angr/analyses/propagator/engine_ail.py index 865e55a7a95..ef8ccfc524e 100644 --- a/angr/analyses/propagator/engine_ail.py +++ b/angr/analyses/propagator/engine_ail.py @@ -5,13 +5,13 @@ import claripy from ailment import Stmt, Expr -from angr.errors import SimMemoryMissingError from angr.knowledge_plugins.propagations.prop_value import PropValue, Detail from angr.analyses.reaching_definitions.external_codeloc import ExternalCodeLocation +from angr.knowledge_plugins.key_definitions.atoms import Register from ...utils.constants import is_alignment_mask from ...engines.light import SimEngineLightAILMixin from ...sim_variable import SimStackVariable, SimMemoryVariable -from ..reaching_definitions.reaching_definitions import OP_BEFORE, OP_AFTER +from ..reaching_definitions.reaching_definitions import OP_BEFORE from .engine_base import SimEnginePropagatorBase if TYPE_CHECKING: @@ -378,26 +378,17 @@ def _test_concatenation(pv: PropValue): reg_defat = None if self._reaching_definitions is not None: codeloc = self._codeloc() - key = "stmt", (codeloc.block_addr, codeloc.block_idx, codeloc.stmt_idx), OP_BEFORE - if key in self._reaching_definitions.observed_results: - o = self._reaching_definitions.observed_results[key] - try: - mv = o.register_definitions.load(expr.reg_offset, size=expr.size) - except SimMemoryMissingError: - mv = None - if mv is not None: - reg_defs = o.extract_defs_from_mv(mv) - reg_defat_codelocs = {reg_def.codeloc for reg_def in reg_defs} - if len(reg_defat_codelocs) == 1: - reg_defat = next(iter(reg_defat_codelocs)) - defat_key = "stmt", (reg_defat.block_addr, reg_defat.block_idx, reg_defat.stmt_idx), OP_BEFORE - if defat_key not in self._reaching_definitions.observed_results: - # the observation point does not exist. probably it's because te observation point is in a - # callee function. - reg_defat = None - if isinstance(reg_defat, ExternalCodeLocation): - # there won't be an observed result for external code location. give up - reg_defat = None + reg_defat_defs = self._reaching_definitions.get_defs( + Register(expr.reg_offset, expr.size), codeloc, OP_BEFORE + ) + reg_defat_codelocs = {reg_def.codeloc for reg_def in reg_defat_defs} + if len(reg_defat_codelocs) == 1: + reg_defat = next(iter(reg_defat_codelocs)) + if reg_defat.stmt_idx is None: + # the observation point is in a callee function + reg_defat = None + if isinstance(reg_defat, ExternalCodeLocation): + reg_defat = None if new_expr is not None: # check if this new_expr uses any expression that has been overwritten @@ -1139,36 +1130,17 @@ def is_using_outdated_def( l.warning("Unknown where the expression is defined. Assume the definition is out-dated.") return True, False - key_defat = "stmt", (expr_defat.block_addr, expr_defat.block_idx, expr_defat.stmt_idx), OP_AFTER - if key_defat not in self._reaching_definitions.observed_results: - l.warning( - "Required reaching definition state at instruction address %#x is not found. Assume the definition is " - "out-dated.", - expr_defat.ins_addr, - ) - return True, False - - key_currloc = "stmt", (current_loc.block_addr, current_loc.block_idx, current_loc.stmt_idx), OP_BEFORE - if key_currloc not in self._reaching_definitions.observed_results: - l.warning( - "Required reaching definition state at instruction address %#x is not found. Assume the definition is " - "out-dated.", - current_loc.ins_addr, - ) - return True, False - from .outdated_definition_walker import OutdatedDefinitionWalker # pylint:disable=import-outside-toplevel walker = OutdatedDefinitionWalker( expr, expr_defat, - self._reaching_definitions.observed_results[key_defat], current_loc, - self._reaching_definitions.observed_results[key_currloc], self.state, self.arch, avoid=avoid, extract_offset_to_sp=self.extract_offset_to_sp, + rda=self._reaching_definitions, ) walker.walk_expression(expr) return walker.out_dated, walker.has_avoid diff --git a/angr/analyses/propagator/outdated_definition_walker.py b/angr/analyses/propagator/outdated_definition_walker.py index 4bb1bff22e4..0e35805b55e 100644 --- a/angr/analyses/propagator/outdated_definition_walker.py +++ b/angr/analyses/propagator/outdated_definition_walker.py @@ -1,15 +1,16 @@ +# pylint:disable=consider-using-in from typing import Optional, Callable, TYPE_CHECKING from ailment import Block, Stmt, Expr, AILBlockWalker -from ...errors import SimMemoryMissingError from ...code_location import CodeLocation +from ...knowledge_plugins.key_definitions.constants import OP_BEFORE, OP_AFTER +from ...knowledge_plugins.key_definitions import atoms if TYPE_CHECKING: from archinfo import Arch from .propagator import PropagatorAILState - from angr.storage.memory_mixins.paged_memory.pages.multi_values import MultiValues - from angr.knowledge_plugins.key_definitions import LiveDefinitions + from angr.analyses.reaching_definitions import ReachingDefinitionsModel class OutdatedDefinitionWalker(AILBlockWalker): @@ -21,20 +22,17 @@ def __init__( self, expr, expr_defat: CodeLocation, - livedefs_defat: "LiveDefinitions", current_loc: CodeLocation, - livedefs_currentloc: "LiveDefinitions", state: "PropagatorAILState", arch: "Arch", avoid: Optional[Expr.Expression] = None, extract_offset_to_sp: Callable = None, + rda: "ReachingDefinitionsModel" = None, ): super().__init__() self.expr = expr self.expr_defat = expr_defat - self.livedefs_defat = livedefs_defat self.current_loc = current_loc - self.livedefs_currentloc = livedefs_currentloc self.state = state self.avoid = avoid self.arch = arch @@ -45,6 +43,7 @@ def __init__( self.expr_handlers[Expr.VEXCCallExpression] = self._handle_VEXCCallExpression self.out_dated = False self.has_avoid = False + self.rda = rda # pylint:disable=unused-argument def _handle_Tmp(self, expr_idx: int, expr: Expr.Tmp, stmt_idx: int, stmt: Stmt.Assignment, block: Optional[Block]): @@ -63,19 +62,8 @@ def _handle_Register( self.has_avoid = True # is the used register still alive at this point? - try: - reg_vals: "MultiValues" = self.livedefs_defat.register_definitions.load(expr.reg_offset, size=expr.size) - defs_defat = list(self.livedefs_defat.extract_defs_from_mv(reg_vals)) - except SimMemoryMissingError: - defs_defat = [] - - try: - reg_vals: "MultiValues" = self.livedefs_currentloc.register_definitions.load( - expr.reg_offset, size=expr.size - ) - defs_currentloc = list(self.livedefs_currentloc.extract_defs_from_mv(reg_vals)) - except SimMemoryMissingError: - defs_currentloc = [] + defs_defat = self.rda.get_defs(atoms.Register(expr.reg_offset, expr.size), self.expr_defat, OP_AFTER) + defs_currentloc = self.rda.get_defs(atoms.Register(expr.reg_offset, expr.size), self.current_loc, OP_BEFORE) codelocs_defat = {def_.codeloc for def_ in defs_defat} codelocs_currentloc = {def_.codeloc for def_ in defs_currentloc} @@ -83,31 +71,19 @@ def _handle_Register( self.out_dated = True def _handle_Load(self, expr_idx: int, expr: Expr.Load, stmt_idx: int, stmt: Stmt.Statement, block: Optional[Block]): - if self.avoid is not None and ( # pylint:disable=consider-using-in - expr == self.avoid or expr.addr == self.avoid - ): + if self.avoid is not None and (expr == self.avoid or expr.addr == self.avoid): self.has_avoid = True if isinstance(expr.addr, Expr.StackBaseOffset): sp_offset = self.extract_offset_to_sp(expr.addr) if sp_offset is not None: - stack_addr = self.livedefs_defat.stack_offset_to_stack_addr(sp_offset) - try: - mem_vals: "MultiValues" = self.livedefs_defat.stack_definitions.load( - stack_addr, size=expr.size, endness=expr.endness - ) - defs_defat = list(self.livedefs_defat.extract_defs_from_mv(mem_vals)) - except SimMemoryMissingError: - defs_defat = [] - - try: - mem_vals: "MultiValues" = self.livedefs_currentloc.stack_definitions.load( - stack_addr, size=expr.size, endness=expr.endness - ) - defs_currentloc = list(self.livedefs_defat.extract_defs_from_mv(mem_vals)) - except SimMemoryMissingError: - defs_currentloc = [] + defs_defat = self.rda.get_defs( + atoms.MemoryLocation(atoms.SpOffset(expr.bits, sp_offset), expr.size), self.expr_defat, OP_AFTER + ) + defs_currentloc = self.rda.get_defs( + atoms.MemoryLocation(atoms.SpOffset(expr.bits, sp_offset), expr.size), self.current_loc, OP_BEFORE + ) codelocs_defat = {def_.codeloc for def_ in defs_defat} codelocs_currentloc = {def_.codeloc for def_ in defs_currentloc} @@ -126,21 +102,9 @@ def _handle_Load(self, expr_idx: int, expr: Expr.Load, stmt_idx: int, stmt: Stmt elif isinstance(expr.addr, Expr.Const): mem_addr = expr.addr.value - try: - mem_vals: "MultiValues" = self.livedefs_defat.memory_definitions.load( - mem_addr, size=expr.size, endness=expr.endness - ) - defs_defat = list(self.livedefs_defat.extract_defs_from_mv(mem_vals)) - except SimMemoryMissingError: - defs_defat = [] - try: - mem_vals: "MultiValues" = self.livedefs_currentloc.memory_definitions.load( - mem_addr, size=expr.size, endness=expr.endness - ) - defs_currentloc = list(self.livedefs_defat.extract_defs_from_mv(mem_vals)) - except SimMemoryMissingError: - defs_currentloc = [] + defs_defat = self.rda.get_defs(atoms.MemoryLocation(mem_addr, expr.size), self.expr_defat, OP_AFTER) + defs_currentloc = self.rda.get_defs(atoms.MemoryLocation(mem_addr, expr.size), self.current_loc, OP_BEFORE) codelocs_defat = {def_.codeloc for def_ in defs_defat} codelocs_currentloc = {def_.codeloc for def_ in defs_currentloc} diff --git a/angr/analyses/reaching_definitions/engine_ail.py b/angr/analyses/reaching_definitions/engine_ail.py index 58478307c41..0f9e2311a93 100644 --- a/angr/analyses/reaching_definitions/engine_ail.py +++ b/angr/analyses/reaching_definitions/engine_ail.py @@ -100,20 +100,26 @@ def _external_codeloc(self): def _set_codeloc(self): # TODO do we want a better mechanism to specify context updates? - self.state.move_codelocs( - CodeLocation( - self.block.addr, - self.stmt_idx, - ins_addr=self.ins_addr, - block_idx=self.block.idx, - context=self.state.codeloc.context, - ) + new_codeloc = CodeLocation( + self.block.addr, + self.stmt_idx, + ins_addr=self.ins_addr, + block_idx=self.block.idx, + context=self.state.codeloc.context, ) + self.state.move_codelocs(new_codeloc) + self.state.analysis.model.at_new_stmt(new_codeloc) # # AIL statement handlers # + def _process_Stmt(self, whitelist=None): + super()._process_Stmt(whitelist=whitelist) + + if self.state.analysis: + self.state.analysis.model.complete_loc() + def _handle_Stmt(self, stmt): if self.state.analysis: self.state.analysis.stmt_observe(self.stmt_idx, stmt, self.block, self.state, OP_BEFORE) diff --git a/angr/analyses/reaching_definitions/engine_vex.py b/angr/analyses/reaching_definitions/engine_vex.py index 2485b024408..129a1fe2dc2 100644 --- a/angr/analyses/reaching_definitions/engine_vex.py +++ b/angr/analyses/reaching_definitions/engine_vex.py @@ -90,9 +90,11 @@ def _external_codeloc(self): def _set_codeloc(self): # TODO do we want a better mechanism to specify context updates? - self.state.move_codelocs( - CodeLocation(self.block.addr, self.stmt_idx, ins_addr=self.ins_addr, context=self.state.codeloc.context) + new_codeloc = CodeLocation( + self.block.addr, self.stmt_idx, ins_addr=self.ins_addr, context=self.state.codeloc.context ) + self.state.move_codelocs(new_codeloc) + self.state.analysis.model.at_new_stmt(new_codeloc) # # VEX statement handlers diff --git a/angr/analyses/reaching_definitions/rd_state.py b/angr/analyses/reaching_definitions/rd_state.py index a32a9695a39..9e12c057401 100644 --- a/angr/analyses/reaching_definitions/rd_state.py +++ b/angr/analyses/reaching_definitions/rd_state.py @@ -318,6 +318,10 @@ def _initialize_function(self, cc: SimCC, func_addr: int, rtoc_value: Optional[i sp = self.annotate_with_def(self._initial_stack_pointer(), sp_def) self.register_definitions.store(self.arch.sp_offset, sp) + ex_loc = ExternalCodeLocation(call_string) + if self.analysis is not None: + self.analysis.model.at_new_stmt(ex_loc) + if cc is not None: prototype = self.analysis.kb.functions[func_addr].prototype if prototype is not None: @@ -328,20 +332,20 @@ def _initialize_function(self, cc: SimCC, func_addr: int, rtoc_value: Optional[i # FIXME: implement reg_offset handling in SimRegArg reg_offset = self.arch.registers[arg.reg_name][0] reg_atom = Register(reg_offset, self.arch.bytes) - reg_def = Definition( - reg_atom, ExternalCodeLocation(call_string), tags={ParameterTag(function=func_addr)} - ) + reg_def = Definition(reg_atom, ex_loc, tags={ParameterTag(function=func_addr)}) self.all_definitions.add(reg_def) + if self.analysis is not None: + self.analysis.model.add_def(reg_def, ex_loc) reg = self.annotate_with_def(self.top(self.arch.bits), reg_def) self.register_definitions.store(reg_offset, reg) # initialize stack parameters elif isinstance(arg, SimStackArg): ml_atom = MemoryLocation(SpOffset(self.arch.bits, arg.stack_offset), arg.size) - ml_def = Definition( - ml_atom, ExternalCodeLocation(call_string), tags={ParameterTag(function=func_addr)} - ) + ml_def = Definition(ml_atom, ex_loc, tags={ParameterTag(function=func_addr)}) self.all_definitions.add(ml_def) + if self.analysis is not None: + self.analysis.model.add_def(ml_def, ex_loc) ml = self.annotate_with_def(self.top(self.arch.bits), ml_def) stack_address = self.get_stack_address(self.stack_address(arg.stack_offset)) self.stack_definitions.store(stack_address, ml, endness=self.arch.memory_endness) @@ -354,15 +358,19 @@ def _initialize_function(self, cc: SimCC, func_addr: int, rtoc_value: Optional[i raise TypeError("rtoc_value must be provided on PPC64.") offset, size = self.arch.registers["rtoc"] rtoc_atom = Register(offset, size) - rtoc_def = Definition(rtoc_atom, ExternalCodeLocation(call_string), tags={InitialValueTag()}) + rtoc_def = Definition(rtoc_atom, ex_loc, tags={InitialValueTag()}) self.all_definitions.add(rtoc_def) + if self.analysis is not None: + self.analysis.model.add_def(rtoc_def, ex_loc) rtoc = self.annotate_with_def(claripy.BVV(rtoc_value, self.arch.bits), rtoc_def) self.register_definitions.store(offset, rtoc) elif self.arch.name.startswith("MIPS64"): offset, size = self.arch.registers["t9"] t9_atom = Register(offset, size) - t9_def = Definition(t9_atom, ExternalCodeLocation(call_string), tags={InitialValueTag()}) + t9_def = Definition(t9_atom, ex_loc, tags={InitialValueTag()}) self.all_definitions.add(t9_def) + if self.analysis is not None: + self.analysis.model.add_def(t9_def, ex_loc) t9 = self.annotate_with_def(claripy.BVV(func_addr, self.arch.bits), t9_def) self.register_definitions.store(offset, t9) elif self.arch.name.startswith("MIPS"): @@ -370,12 +378,17 @@ def _initialize_function(self, cc: SimCC, func_addr: int, rtoc_value: Optional[i l.warning("func_addr must not be None to initialize a function in mips") t9_offset = self.arch.registers["t9"][0] t9_atom = Register(t9_offset, self.arch.bytes) - t9_def = Definition(t9_atom, ExternalCodeLocation(call_string), tags={InitialValueTag()}) + t9_def = Definition(t9_atom, ex_loc, tags={InitialValueTag()}) self.all_definitions.add(t9_def) + if self.analysis is not None: + self.analysis.model.add_def(t9_def, ex_loc) t9 = self.annotate_with_def(claripy.BVV(func_addr, self.arch.bits), t9_def) self.register_definitions.store(t9_offset, t9) - def copy(self) -> "ReachingDefinitionsState": + if self.analysis is not None: + self.analysis.model.complete_loc() + + def copy(self, discard_tmpdefs=False) -> "ReachingDefinitionsState": rd = ReachingDefinitionsState( self.codeloc, self.arch, @@ -383,7 +396,7 @@ def copy(self) -> "ReachingDefinitionsState": track_tmps=self._track_tmps, track_consts=self._track_consts, analysis=self.analysis, - live_definitions=self.live_definitions.copy(), + live_definitions=self.live_definitions.copy(discard_tmpdefs=discard_tmpdefs), canonical_size=self._canonical_size, heap_allocator=self.heap_allocator, environment=self._environment, @@ -412,9 +425,13 @@ def kill_definitions(self, atom: Atom) -> None: Overwrite existing definitions w.r.t 'atom' with a dummy definition instance. A dummy definition will not be removed during simplification. """ + existing_defs = set(self.live_definitions.get_definitions(atom)) self.live_definitions.kill_definitions(atom) + for def_ in existing_defs: + self.analysis.model.kill_def(def_) + def kill_and_add_definition( self, atom: Atom, @@ -427,6 +444,7 @@ def kill_and_add_definition( override_codeloc: Optional[CodeLocation] = None, ) -> Tuple[Optional[MultiValues], Set[Definition]]: codeloc = override_codeloc or self.codeloc + existing_defs = set(self.live_definitions.get_definitions(atom)) mv = self.live_definitions.kill_and_add_definition( atom, codeloc, data, dummy=dummy, tags=tags, endness=endness, annotated=annotated ) @@ -493,16 +511,19 @@ def kill_and_add_definition( else: defs = set() + for def_ in existing_defs: + self.analysis.model.kill_def(def_) + for def_ in defs: + self.analysis.model.add_def(def_, codeloc) + return mv, defs def add_use(self, atom: Atom, expr: Optional[Any] = None) -> None: self.codeloc_uses.update(self.get_definitions(atom)) - self.live_definitions.add_use(atom, self.codeloc, expr=expr) def add_use_by_def(self, definition: Definition, expr: Optional[Any] = None) -> None: self.codeloc_uses.add(definition) - self.live_definitions.add_use_by_def(definition, self.codeloc, expr=expr) def add_tmp_use(self, tmp: int, expr: Optional[Any] = None) -> None: diff --git a/angr/analyses/reaching_definitions/reaching_definitions.py b/angr/analyses/reaching_definitions/reaching_definitions.py index d37b6c15bae..316daa1515d 100644 --- a/angr/analyses/reaching_definitions/reaching_definitions.py +++ b/angr/analyses/reaching_definitions/reaching_definitions.py @@ -6,6 +6,7 @@ import pyvex from angr.analyses import ForwardAnalysis +from angr.analyses.reaching_definitions.external_codeloc import ExternalCodeLocation from ...block import Block from ...knowledge_plugins.cfg.cfg_node import CFGNode from ...codenode import CodeNode @@ -20,7 +21,7 @@ from .engine_ail import SimEngineRDAIL from .engine_vex import SimEngineRDVEX from .rd_state import ReachingDefinitionsState -from .subject import Subject +from .subject import Subject, SubjectType from .function_handler import FunctionHandler, FunctionCallRelationships from .dep_graph import DepGraph @@ -144,6 +145,10 @@ def __init__( self._node_iterations: DefaultDict[int, int] = defaultdict(int) + self.model: ReachingDefinitionsModel = ReachingDefinitionsModel( + func_addr=self.subject.content.addr if isinstance(self.subject.content, Function) else None + ) + self._engine_vex = SimEngineRDVEX( self.project, functions=self.kb.functions, @@ -157,9 +162,6 @@ def __init__( ) self._visited_blocks: Set[Any] = visited_blocks or set() - self.model: ReachingDefinitionsModel = ReachingDefinitionsModel( - func_addr=self.subject.content.addr if isinstance(self.subject.content, Function) else None - ) self.function_calls: Dict[CodeLocation, FunctionCallRelationships] = {} self._analyze() @@ -401,23 +403,36 @@ def _run_on_node(self, node, state: ReachingDefinitionsState): block_key = node.addr elif isinstance(node, CFGNode): if node.is_simprocedure or node.is_syscall: - return False, state.copy() + return False, state.copy(discard_tmpdefs=True) block = node.block engine = self._engine_vex block_key = node.addr else: l.warning("Unsupported node type %s.", node.__class__) - return False, state.copy() + return False, state.copy(discard_tmpdefs=True) self.node_observe(node.addr, state, OP_BEFORE) - state = state.copy() + if self.subject.type == SubjectType.Function: + node_parents = [ + CodeLocation(pred.addr, 0, block_idx=pred.idx if isinstance(pred, ailment.Block) else None) + for pred in self._graph_visitor.predecessors(node) + ] + if node.addr == self.subject.content.addr: + node_parents += [ExternalCodeLocation()] + self.model.at_new_block( + CodeLocation(block.addr, 0, block_idx=block.idx if isinstance(block, ailment.Block) else None), + node_parents, + ) + + state = state.copy(discard_tmpdefs=True) state = engine.process( state, block=block, fail_fast=self._fail_fast, visited_blocks=self._visited_blocks, dep_graph=self._dep_graph, + model=self.model, ) self._node_iterations[block_key] += 1 diff --git a/angr/knowledge_plugins/key_definitions/live_definitions.py b/angr/knowledge_plugins/key_definitions/live_definitions.py index 77f6f3af682..f6100779201 100644 --- a/angr/knowledge_plugins/key_definitions/live_definitions.py +++ b/angr/knowledge_plugins/key_definitions/live_definitions.py @@ -196,7 +196,7 @@ def __repr__(self): ctnt += ", %d tmpdefs" % len(self.tmps) return "<%s>" % ctnt - def copy(self) -> "LiveDefinitions": + def copy(self, discard_tmpdefs=False) -> "LiveDefinitions": rd = LiveDefinitions( self.arch, track_tmps=self.track_tmps, @@ -205,12 +205,12 @@ def copy(self) -> "LiveDefinitions": stack_definitions=self.stack_definitions.copy(), heap_definitions=self.heap_definitions.copy(), memory_definitions=self.memory_definitions.copy(), - tmps=self.tmps.copy(), + tmps=self.tmps.copy() if not discard_tmpdefs else None, register_uses=self.register_uses.copy(), stack_uses=self.stack_uses.copy(), heap_uses=self.heap_uses.copy(), memory_uses=self.memory_uses.copy(), - tmp_uses=self.tmp_uses.copy(), + tmp_uses=self.tmp_uses.copy() if not discard_tmpdefs else None, ) return rd @@ -600,8 +600,15 @@ def get_register_definitions(self, reg_offset: int, size: int, endness=None) -> size=size, endness=endness, ) - except SimMemoryMissingError: - return + except SimMemoryMissingError as ex: + # load values and stop at the missing location + if ex.missing_addr > reg_offset: + values: MultiValues = self.register_definitions.load( + reg_offset, size=ex.missing_addr - reg_offset, endness=endness + ) + else: + # nothing we can do + return yield from LiveDefinitions.extract_defs_from_mv(values) def get_stack_values(self, stack_offset: int, size: int, endness: str) -> Optional[MultiValues]: diff --git a/angr/knowledge_plugins/key_definitions/liveness.py b/angr/knowledge_plugins/key_definitions/liveness.py new file mode 100644 index 00000000000..87d7fcbf0f9 --- /dev/null +++ b/angr/knowledge_plugins/key_definitions/liveness.py @@ -0,0 +1,94 @@ +from typing import DefaultDict, Optional, List, Set, Tuple, TYPE_CHECKING + +from collections import defaultdict + +from .constants import ObservationPointType, OP_BEFORE, OP_AFTER + +if TYPE_CHECKING: + from .definition import Definition + from angr.code_location import CodeLocation + + +LocationType = Tuple[int, Optional[int], Optional[int]] # block addr, block ID, stmt ID +LocationWithPosType = Tuple[ + int, Optional[int], Optional[int], ObservationPointType +] # block addr, block ID, stmt ID, before/after + + +class Liveness: + """ + This class stores liveness information for each definition. + """ + + def __init__(self): + self.curr_live_defs: Set["Definition"] = set() + self.curr_loc: Optional[LocationType] = None + self.def_to_liveness: DefaultDict["Definition", Set[LocationType]] = defaultdict(set) + self.loc_to_defs: DefaultDict[LocationWithPosType, Set["Definition"]] = defaultdict(set) + self._node_max_stmt_id: DefaultDict[Tuple[int, Optional[int]], int] = defaultdict(int) + + def add_def(self, d: "Definition", code_loc: "CodeLocation") -> None: + loc = (code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx) + self.curr_live_defs.add(d) + self.def_to_liveness[d].add(loc) + + def kill_def(self, d: "Definition") -> None: + self.curr_live_defs.discard(d) + + def complete_loc(self) -> None: + if self.curr_loc is not None: + for live_def in self.curr_live_defs: + self.def_to_liveness[live_def].add(self.curr_loc) + self.loc_to_defs[self.curr_loc + (OP_AFTER,)] |= self.curr_live_defs + + def at_new_stmt(self, code_loc: "CodeLocation") -> None: + """ + Only support moving from a statement to the next statement within one basic block. + """ + self.complete_loc() + self.curr_loc = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx + if ( + code_loc.stmt_idx is not None + and code_loc.stmt_idx > self._node_max_stmt_id[(code_loc.block_addr, code_loc.block_idx)] + ): + self._node_max_stmt_id[(code_loc.block_addr, code_loc.block_idx)] = code_loc.stmt_idx + + def at_new_block(self, code_loc: "CodeLocation", pred_codelocs: List["CodeLocation"]) -> None: + """ + Only support moving to a new block from one or more blocks. + """ + loc = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx + key = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx, OP_BEFORE + for pred_codeloc in pred_codelocs: + if pred_codeloc.stmt_idx is None: + # external code location + pred_max_stmt_id = None + else: + pred_max_stmt_id = self._node_max_stmt_id[(pred_codeloc.block_addr, pred_codeloc.block_idx)] + pred_key = pred_codeloc.block_addr, pred_codeloc.block_idx, pred_max_stmt_id, OP_AFTER + pred_defs = self.loc_to_defs[pred_key] + for pred_def in pred_defs: + self.def_to_liveness[pred_def].add(loc) + self.loc_to_defs[key] |= pred_defs + + self.curr_live_defs = set(self.loc_to_defs[key]) + self.curr_loc = loc + + def find_defs_at(self, code_loc: "CodeLocation", op: int = OP_BEFORE) -> Set["Definition"]: + if op == OP_BEFORE: + if code_loc.stmt_idx != 0: + loc = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx - 1, OP_AFTER + else: + loc = code_loc.block_addr, code_loc.block_idx, 0, OP_BEFORE + else: + loc = code_loc.block_addr, code_loc.block_idx, code_loc.stmt_idx, OP_AFTER + return set() if loc not in self.loc_to_defs else self.loc_to_defs[loc] + + def copy(self) -> "Liveness": + o = Liveness() + o.curr_live_defs = self.curr_live_defs.copy() + o.curr_loc = self.curr_loc + o.def_to_liveness = self.def_to_liveness.copy() + o.loc_to_defs = self.loc_to_defs.copy() + o._node_max_stmt_id = self._node_max_stmt_id.copy() + return o diff --git a/angr/knowledge_plugins/key_definitions/rd_model.py b/angr/knowledge_plugins/key_definitions/rd_model.py index fdd5b5180fc..bd99c4a6d12 100644 --- a/angr/knowledge_plugins/key_definitions/rd_model.py +++ b/angr/knowledge_plugins/key_definitions/rd_model.py @@ -1,13 +1,14 @@ -from typing import Dict, Tuple, Set, Union, Optional, TYPE_CHECKING, overload - -from angr.knowledge_plugins.key_definitions.constants import ObservationPointType -from angr.code_location import CodeLocation +from typing import Dict, List, Tuple, Set, Union, Optional, TYPE_CHECKING, overload +from .atoms import Atom, Register, MemoryLocation, SpOffset from .uses import Uses from .live_definitions import LiveDefinitions +from .liveness import Liveness +from .constants import OP_BEFORE, ObservationPointType if TYPE_CHECKING: from angr.knowledge_plugins.key_definitions.definition import Definition + from angr.code_location import CodeLocation # TODO: Make ReachingDefinitionsModel serializable @@ -23,6 +24,7 @@ def __init__(self, func_addr: Optional[int] = None): ] = {} self.all_definitions: Set["Definition"] = set() self.all_uses = Uses() + self.liveness = Liveness() def __repr__(self): return "".format( @@ -30,11 +32,70 @@ def __repr__(self): len(self.observed_results), ) + def add_def(self, d: "Definition", codeloc: "CodeLocation") -> None: + self.liveness.add_def(d, codeloc) + + def kill_def(self, d: "Definition") -> None: + self.liveness.kill_def(d) + + def at_new_stmt(self, codeloc: "CodeLocation") -> None: + self.liveness.at_new_stmt(codeloc) + + def at_new_block(self, code_loc: "CodeLocation", pred_codelocs: List["CodeLocation"]) -> None: + self.liveness.at_new_block(code_loc, pred_codelocs) + + def complete_loc(self) -> None: + self.liveness.complete_loc() + + def find_defs_at(self, code_loc: "CodeLocation", op: int = OP_BEFORE) -> Set["Definition"]: + return self.liveness.find_defs_at(code_loc, op=op) + + def get_defs(self, atom: Atom, code_loc: "CodeLocation", op: int) -> Set["Definition"]: + all_defs = self.liveness.find_defs_at(code_loc, op=op) + defs = None + if isinstance(atom, Register): + defs = { + d + for d in all_defs + if isinstance(d.atom, Register) + and d.atom.reg_offset <= atom.reg_offset < d.atom.reg_offset + d.atom.size + } + elif isinstance(atom, MemoryLocation): + if isinstance(atom.addr, int): + defs = { + d + for d in all_defs + if isinstance(d.atom, MemoryLocation) + and isinstance(d.atom.addr, int) + and ( + d.atom.addr <= atom.addr < d.atom.addr + d.size + or atom.addr <= d.atom.addr < atom.addr + atom.size + ) + } + elif isinstance(atom.addr, SpOffset): + defs = { + d + for d in all_defs + if isinstance(d.atom, MemoryLocation) + and isinstance(d.atom.addr, SpOffset) + and ( + d.atom.addr.offset <= atom.addr.offset < d.atom.addr.offset + d.size + or atom.addr.offset <= d.atom.addr.offset < atom.addr.offset + atom.size + ) + } + + if defs is None: + # unsupported for now + defs = set() + + return defs + def copy(self) -> "ReachingDefinitionsModel": new = ReachingDefinitionsModel(self.func_addr) new.observed_results = self.observed_results.copy() new.all_definitions = self.all_definitions.copy() new.all_uses = self.all_uses.copy() + new.liveness = self.liveness.copy() return new def merge(self, model: "ReachingDefinitionsModel"): @@ -47,9 +108,10 @@ def merge(self, model: "ReachingDefinitionsModel"): self.observed_results[k] = merged self.all_definitions.union(model.all_definitions) self.all_uses.merge(model.all_uses) + # TODO: Merge self.liveness def get_observation_by_insn( - self, ins_addr: Union[int, CodeLocation], kind: ObservationPointType + self, ins_addr: Union[int, "CodeLocation"], kind: ObservationPointType ) -> Optional[LiveDefinitions]: if isinstance(ins_addr, int): return self.observed_results.get(("insn", ins_addr, kind), None) @@ -58,7 +120,7 @@ def get_observation_by_insn( return self.observed_results.get(("insn", ins_addr.ins_addr, kind)) def get_observation_by_node( - self, node_addr: Union[int, CodeLocation], kind: ObservationPointType + self, node_addr: Union[int, "CodeLocation"], kind: ObservationPointType ) -> Optional[LiveDefinitions]: if isinstance(node_addr, int): return self.observed_results.get(("node", node_addr, kind), None) @@ -66,7 +128,7 @@ def get_observation_by_node( return self.observed_results.get(("node", node_addr.block_addr, kind)) @overload - def get_observation_by_stmt(self, codeloc: CodeLocation, kind: ObservationPointType) -> Optional[LiveDefinitions]: + def get_observation_by_stmt(self, codeloc: "CodeLocation", kind: ObservationPointType) -> Optional[LiveDefinitions]: ... @overload diff --git a/angr/utils/timing.py b/angr/utils/timing.py index dfa347a9802..d460fc5e911 100644 --- a/angr/utils/timing.py +++ b/angr/utils/timing.py @@ -1,14 +1,16 @@ -# pylint:disable=no-member +# pylint:disable=no-member,global-statement +import os import time from functools import wraps from collections import defaultdict -TIMING = False -PRINT = False +TIMING = os.environ.get("TIMING", "").lower() not in {"", "no", "0", "false"} +PRINT = os.environ.get("PRINT", "").lower() not in {"", "no", "0", "false"} TIME_DISTRIBUTION = False total_time = defaultdict(float) time_distribution = defaultdict(list) +depth = 0 def timethis(func): @@ -19,18 +21,23 @@ def timed_func(*args, **kwargs): def _t(): return time.perf_counter_ns() / 1000000 + global depth + depth += 1 + start = _t() r = func(*args, **kwargs) millisec = _t() - start sec = millisec / 1000 if PRINT: + indent = " " * ((depth - 1) * 2) if sec > 1.0: - print(f"[timing] {func.__name__} took {sec:f} seconds ({millisec:f} milliseconds).") + print(f"[timing] {indent}{func.__name__} took {sec:f} seconds ({millisec:f} milliseconds).") else: - print(f"[timing] {func.__name__} took {millisec:f} milliseconds.") + print(f"[timing] {indent}{func.__name__} took {millisec:f} milliseconds.") total_time[func] += millisec if TIME_DISTRIBUTION: time_distribution[func].append(millisec) + depth -= 1 return r else: return func(*args, **kwargs)