Skip to content

Commit

Permalink
CFGFast: Avoid overlapping THUMB and ARM nodes. (angr#3073)
Browse files Browse the repository at this point in the history
* CFGFast: Avoid overlapping THUMB and ARM nodes.

* Lint the code.
  • Loading branch information
ltfish authored Jan 10, 2022
1 parent 4b04d56 commit f43d110
Showing 1 changed file with 69 additions and 12 deletions.
81 changes: 69 additions & 12 deletions angr/analyses/cfg/cfg_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,17 @@


class ContinueScanningNotification(RuntimeError):
"""
A notification raised by _next_code_addr_core() to indicate no code address is found and _next_code_addr_core()
should be invoked again.
"""
pass


class ARMDecodingMode:
"""
Enums indicating decoding mode for ARM code.
"""
ARM = 0
THUMB = 1

Expand Down Expand Up @@ -242,13 +249,19 @@ def clear_updated_functions(self):


class FunctionEdge:
"""
Describes an edge in functions' transition graphs. Base class for all types of edges.
"""
__slots__ = ('src_func_addr', 'stmt_idx', 'ins_addr',)

def apply(self, cfg):
raise NotImplementedError()


class FunctionTransitionEdge(FunctionEdge):
"""
Describes a transition edge in functions' transition graphs.
"""

__slots__ = ('src_node', 'dst_addr', 'src_func_addr', 'to_outside', 'dst_func_addr', 'is_exception', )

Expand Down Expand Up @@ -283,6 +296,9 @@ def apply(self, cfg):


class FunctionCallEdge(FunctionEdge):
"""
Describes a call edge in functions' transition graphs.
"""

__slots__ = ('src_node', 'dst_addr', 'ret_addr', 'syscall')

Expand All @@ -307,6 +323,9 @@ def apply(self, cfg):


class FunctionFakeRetEdge(FunctionEdge):
"""
Describes a FakeReturn (also called fall-through) edge in functions' transition graphs.
"""

__slots__ = ('src_node', 'dst_addr', 'confirmed')

Expand All @@ -326,6 +345,9 @@ def apply(self, cfg):


class FunctionReturnEdge(FunctionEdge):
"""
Describes a return (from a function call or a syscall) edge in functions' transition graphs.
"""

__slots__ = ('ret_from_addr', 'ret_to_addr', 'dst_func_addr')

Expand Down Expand Up @@ -1260,6 +1282,30 @@ def _post_process_successors(self, irsb, successors):
lambda tpl: tpl[3],
)

# make sure we don't jump to the beginning of another function with a different mode
filtered_successors = [ ]
for successor in successors:
addr_v = successor[2]
if isinstance(addr_v, pyvex.expr.Const):
addr = addr_v.con.value
elif isinstance(addr_v, int):
addr = addr_v
else:
# do nothing
filtered_successors.append(successor)
continue
if addr % 2 == 1:
# THUMB mode - test if there is an existing ARM function
addr_to_test = addr - 1
else:
# ARM mode - test if there is an existing THUMB function
addr_to_test = addr + 1
if self.functions.contains_addr(addr_to_test):
# oops. skip it
continue
filtered_successors.append(successor)
successors = filtered_successors

return successors

def _post_job_handling(self, job, new_jobs, successors):
Expand Down Expand Up @@ -1345,6 +1391,7 @@ def _job_queue_empty(self):
# if this is ARM and addr % 4 != 0, it has to be THUMB
if is_arm_arch(self.project.arch):
if addr % 2 == 0 and addr % 4 != 0:
# it's not aligned by 4, so it's definitely not ARM mode
addr |= 1
else:
# load 8 bytes and test with THUMB-mode prologues
Expand All @@ -1355,14 +1402,15 @@ def _job_queue_empty(self):
if any(re.match(prolog, bytes_prefix) for prolog in self.project.arch.thumb_prologs):
addr |= 1

# another heuristics: take a look at the closest function. if it's THUMB mode, this address should
# be THUMB, too.
func = self.functions.floor_func(addr)
if func is None:
func = self.functions.ceiling_func(addr)
if func is not None and func.addr % 2 == 1:
addr |= 1
# print(f"GUESSING: {hex(addr)} because of function {repr(func)}.")
if addr % 2 == 0:
# another heuristics: take a look at the closest function. if it's THUMB mode, this address
# should be THUMB, too.
func = self.functions.floor_func(addr)
if func is None:
func = self.functions.ceiling_func(addr)
if func is not None and func.addr % 2 == 1:
addr |= 1
# print(f"GUESSING: {hex(addr)} because of function {repr(func)}.")

job = CFGJob(addr, addr, "Ijk_Boring", last_addr=None, job_type=CFGJob.JOB_TYPE_COMPLETE_SCANNING)
self._insert_job(job)
Expand Down Expand Up @@ -3072,6 +3120,7 @@ def _remove_redundant_overlapping_blocks(self, function_alignment: int=16, is_ar
removed_nodes = set()

a = None # it always hold the very recent non-removed node
is_arm = is_arm_arch(self.project.arch)

for i in range(len(sorted_nodes)): # pylint:disable=consider-using-enumerate

Expand All @@ -3087,12 +3136,20 @@ def _remove_redundant_overlapping_blocks(self, function_alignment: int=16, is_ar
# skip all removed nodes
continue

if a.addr <= b.addr and \
(a.addr + a.size > b.addr):
# handle ARM vs THUMB...
if is_arm:
a_real_addr = a.addr & 0xffff_fffe
b_real_addr = b.addr & 0xffff_fffe
else:
a_real_addr = a.addr
b_real_addr = b.addr

if a_real_addr <= b_real_addr < a_real_addr + a.size:
# They are overlapping

try:
block = self.project.factory.fresh_block(a.addr, b.addr - a.addr, backup_state=self._base_state)
block = self.project.factory.fresh_block(a.addr, b_real_addr - a_real_addr,
backup_state=self._base_state)
except SimTranslationError:
a = b
continue
Expand Down Expand Up @@ -3837,7 +3894,7 @@ def _generate_cfgnode(self, cfg_job, current_function_addr):
return None, None, None, None

distance = VEX_IRSB_MAX_SIZE
# if there is exception handling code, check the distance between `addr` and the cloest ending address
# if there is exception handling code, check the distance between `addr` and the closest ending address
if self._exception_handling_by_endaddr:
next_end = next(self._exception_handling_by_endaddr.irange(minimum=real_addr), None)
if next_end is not None:
Expand Down

0 comments on commit f43d110

Please sign in to comment.