Skip to content

Commit 81b2d4d

Browse files
committed
[WARP] Allow multi-round matching to resolve matching function dependencies
Fixes some possible nondeterminism due to parallelized matching
1 parent 58cc21e commit 81b2d4d

File tree

2 files changed

+135
-72
lines changed

2 files changed

+135
-72
lines changed

plugins/warp/src/matcher.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,10 @@ pub struct MatcherSettings {
312312
///
313313
/// This is set to [MatcherSettings::MAXIMUM_POSSIBLE_FUNCTIONS_DEFAULT] by default.
314314
pub maximum_possible_functions: Option<u64>,
315+
/// The maximum number of matching rounds to run, consecutive rounds are ran until no new matched functions are found.
316+
///
317+
/// This is set to [MatcherSettings::MAXIMUM_MATCHING_ROUNDS_DEFAULT] by default.
318+
pub maximum_matching_rounds: Option<u64>,
315319
}
316320

317321
impl MatcherSettings {
@@ -342,6 +346,9 @@ impl MatcherSettings {
342346
pub const MAXIMUM_POSSIBLE_FUNCTIONS_SETTING: &'static str =
343347
"warp.matcher.maximumPossibleFunctions";
344348
pub const MAXIMUM_POSSIBLE_FUNCTIONS_DEFAULT: u64 = 1000;
349+
pub const MAXIMUM_MATCHING_ROUNDS_SETTING: &'static str = "warp.matcher.maximumMatchingRounds";
350+
pub const MAXIMUM_MATCHING_ROUNDS_DEFAULT: u64 = 0;
351+
345352
/// Populates the [MatcherSettings] to the current Binary Ninja settings instance.
346353
///
347354
/// Call this once when you initialize so that the settings exist.
@@ -426,6 +433,18 @@ impl MatcherSettings {
426433
Self::MAXIMUM_POSSIBLE_FUNCTIONS_SETTING,
427434
&maximum_possible_functions_props.to_string(),
428435
);
436+
437+
let maximum_matching_rounds_props = json!({
438+
"title" : "Maximum Matching Rounds",
439+
"type" : "number",
440+
"default" : Self::MAXIMUM_MATCHING_ROUNDS_DEFAULT,
441+
"description" : "The maximum number of matching rounds to run, consecutive rounds are ran until no new matched functions are found. A value of 0 will disable this check.",
442+
"ignore" : [],
443+
});
444+
bn_settings.register_setting_json(
445+
Self::MAXIMUM_MATCHING_ROUNDS_SETTING,
446+
&maximum_matching_rounds_props.to_string(),
447+
);
429448
}
430449

431450
/// Retrieve matcher settings from [`BNSettings`].
@@ -463,6 +482,14 @@ impl MatcherSettings {
463482
len => settings.maximum_possible_functions = Some(len),
464483
}
465484
}
485+
if bn_settings.contains(Self::MAXIMUM_MATCHING_ROUNDS_SETTING) {
486+
match bn_settings
487+
.get_integer_with_opts(Self::MAXIMUM_MATCHING_ROUNDS_SETTING, query_opts)
488+
{
489+
0 => settings.maximum_matching_rounds = None,
490+
len => settings.maximum_matching_rounds = Some(len),
491+
}
492+
}
466493
settings
467494
}
468495
}
@@ -477,6 +504,7 @@ impl Default for MatcherSettings {
477504
trivial_function_adjacent_allowed:
478505
MatcherSettings::TRIVIAL_FUNCTION_ADJACENT_ALLOWED_DEFAULT,
479506
maximum_possible_functions: Some(MatcherSettings::MAXIMUM_POSSIBLE_FUNCTIONS_DEFAULT),
507+
maximum_matching_rounds: Some(MatcherSettings::MAXIMUM_MATCHING_ROUNDS_DEFAULT),
480508
}
481509
}
482510
}

plugins/warp/src/plugin/workflow.rs

Lines changed: 107 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use crate::cache::{
33
cached_function_guid, insert_cached_function_match, try_cached_function_guid,
44
try_cached_function_match,
55
};
6+
use crate::container::{Container, SourceId};
67
use crate::convert::{platform_to_target, to_bn_symbol_at_address, to_bn_type};
78
use crate::matcher::{Matcher, MatcherSettings};
89
use crate::plugin::settings::PluginSettings;
@@ -15,14 +16,13 @@ use binaryninja::function::Function as BNFunction;
1516
use binaryninja::rc::Ref as BNRef;
1617
use binaryninja::settings::{QueryOptions, Settings};
1718
use binaryninja::workflow::{activity, Activity, AnalysisContext, Workflow, WorkflowBuilder};
19+
use dashmap::DashSet;
1820
use itertools::Itertools;
1921
use rayon::iter::IntoParallelIterator;
2022
use rayon::iter::ParallelIterator;
2123
use std::cmp::Ordering;
2224
use std::collections::HashMap;
23-
use std::sync::atomic::AtomicUsize;
24-
use std::sync::{Arc, Mutex};
25-
use std::time::{Duration, Instant};
25+
use std::time::Instant;
2626
use warp::r#type::class::function::{Location, RegisterLocation, StackLocation};
2727
use warp::signature::function::{Function, FunctionGUID};
2828
use warp::target::Target;
@@ -120,6 +120,23 @@ pub fn run_matcher(view: &BinaryView) {
120120
let _ = get_warp_ignore_tag_type(view);
121121
view.file().forget_undo_actions(&undo_id);
122122

123+
let filter_functions = |functions: &mut Vec<Function>| {
124+
// We sort primarily by symbol, then by type, so we can deduplicate in-place.
125+
functions.sort_unstable_by(|a, b| match a.symbol.cmp(&b.symbol) {
126+
Ordering::Equal => match (&a.ty, &b.ty) {
127+
(None, None) => Ordering::Equal,
128+
(None, Some(_)) => Ordering::Less,
129+
(Some(_), None) => Ordering::Greater,
130+
// TODO: We still need to order the types, probably cant do this in place.
131+
// TODO: Once Type can be ordered, we can remove this entire explicit match stmt.
132+
(Some(_), Some(_)) => Ordering::Equal,
133+
},
134+
other => other,
135+
});
136+
// This removes consecutive duplicates efficiently
137+
functions.dedup_by(|a, b| a.symbol == b.symbol && a.ty == b.ty);
138+
};
139+
123140
// Then we want to actually find matching functions.
124141
let background_task = BackgroundTask::new("Matching on WARP functions...", true);
125142
let start = Instant::now();
@@ -135,88 +152,106 @@ pub fn run_matcher(view: &BinaryView) {
135152
return;
136153
};
137154

138-
// TODO: Target gets cloned a lot.
139-
// TODO: Containers might both match on the same function. What should we do?
140-
let matched_count = AtomicUsize::new(0);
141-
for_cached_containers(|container| {
142-
if background_task.is_cancelled() {
155+
let matcher_results: DashSet<u64> = DashSet::new();
156+
let match_for_guid = |target, container: &dyn Container, sources: Vec<SourceId>, guid| {
157+
let mut matched_functions: Vec<Function> = sources
158+
.iter()
159+
.flat_map(|source| {
160+
container
161+
.functions_with_guid(target, source, &guid)
162+
.unwrap_or_default()
163+
})
164+
.collect();
165+
166+
// NOTE: See the comment in `match_function_from_constraints` about this fast fail.
167+
if matcher
168+
.settings
169+
.maximum_possible_functions
170+
.is_some_and(|max| max < matched_functions.len() as u64)
171+
{
172+
log::warn!(
173+
"Skipping {}, too many possible functions: {}",
174+
guid,
175+
matched_functions.len()
176+
);
143177
return;
144178
}
145179

146-
for (target, guids) in &function_set.guids_by_target {
147-
let function_guid_with_sources = container
148-
.sources_with_function_guids(target, guids)
149-
.unwrap_or_default();
150-
151-
function_guid_with_sources
152-
.into_par_iter()
153-
.for_each(|(guid, sources)| {
154-
let mut matched_functions: Vec<Function> = sources
155-
.iter()
156-
.flat_map(|source| {
157-
container
158-
.functions_with_guid(target, source, &guid)
159-
.unwrap_or_default()
160-
})
161-
.collect();
162-
163-
// We sort primarily by symbol, then by type, so we can deduplicate in-place.
164-
matched_functions.sort_unstable_by(|a, b| match a.symbol.cmp(&b.symbol) {
165-
Ordering::Equal => match (&a.ty, &b.ty) {
166-
(None, None) => Ordering::Equal,
167-
(None, Some(_)) => Ordering::Less,
168-
(Some(_), None) => Ordering::Greater,
169-
// TODO: We still need to order the types, probably cant do this in place.
170-
// TODO: Once Type can be ordered, we can remove this entire explicit match stmt.
171-
(Some(_), Some(_)) => Ordering::Equal,
172-
},
173-
other => other,
180+
// Filter out duplicate functions for matching.
181+
filter_functions(&mut matched_functions);
182+
183+
let functions = function_set
184+
.functions_by_target_and_guid
185+
.get(&(guid, target.clone()))
186+
.expect("Function guid not found");
187+
188+
for function in functions {
189+
// Match on all the possible functions
190+
if let Some(matched_function) =
191+
matcher.match_function_from_constraints(function, &matched_functions)
192+
{
193+
// Because we can do multiple rounds of matching at once, we only want to insert a function
194+
// match if we have not already done so in a previous round.
195+
// TODO: What if the new round changes the matched function metadata? Unlikely but possible.
196+
if matcher_results.insert(function.start()) {
197+
// We were able to find a match, add it to the match cache and then mark the function
198+
// as requiring updates; this is so that we know about it in the applier activity.
199+
insert_cached_function_match(function, Some(matched_function));
200+
}
201+
}
202+
}
203+
};
204+
205+
// NOTE: Because matching can depend on other functions to have matched, we will run multiple
206+
// rounds of matching until it stabilizes (e.g. no more newly matched functions), there are other
207+
// ways to have the same behavior that may take less time, such as a work list, and pushing callers
208+
// back into the work list on matches of a function, on top of that you could order the functions
209+
// matched bottom up, with a reverse post order sort.
210+
211+
// TODO: Target gets cloned a lot.
212+
// TODO: Containers might both match on the same function. What should we do?
213+
let maximum_rounds = matcher.settings.maximum_matching_rounds.unwrap_or(100);
214+
let mut final_matched_round = 1;
215+
for matched_round in 1..=maximum_rounds {
216+
let bg_task_text = format!("Matching on WARP functions... ({} rounds)", matched_round);
217+
background_task.set_progress_text(&bg_task_text);
218+
let matched_count_before = matcher_results.len();
219+
220+
for_cached_containers(|container| {
221+
if background_task.is_cancelled() {
222+
return;
223+
}
224+
225+
for (target, guids) in &function_set.guids_by_target {
226+
let function_guid_with_sources = container
227+
.sources_with_function_guids(target, guids)
228+
.unwrap_or_default();
229+
230+
function_guid_with_sources
231+
.into_par_iter()
232+
.for_each(|(guid, sources)| {
233+
match_for_guid(target, container, sources, guid);
174234
});
175-
// This removes consecutive duplicates efficiently
176-
matched_functions.dedup_by(|a, b| a.symbol == b.symbol && a.ty == b.ty);
177-
178-
// NOTE: See the comment in `match_function_from_constraints` about this fast fail.
179-
if matcher
180-
.settings
181-
.maximum_possible_functions
182-
.is_some_and(|max| max < matched_functions.len() as u64)
183-
{
184-
log::warn!(
185-
"Skipping {}, too many possible functions: {}",
186-
guid,
187-
matched_functions.len()
188-
);
189-
return;
190-
}
235+
}
236+
});
191237

192-
let functions = function_set
193-
.functions_by_target_and_guid
194-
.get(&(guid, target.clone()))
195-
.expect("Function guid not found");
196-
197-
for function in functions {
198-
// Match on all the possible functions
199-
if let Some(matched_function) =
200-
matcher.match_function_from_constraints(function, &matched_functions)
201-
{
202-
matched_count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
203-
// We were able to find a match, add it to the match cache and then mark the function
204-
// as requiring updates; this is so that we know about it in the applier activity.
205-
insert_cached_function_match(function, Some(matched_function));
206-
}
207-
}
208-
});
238+
final_matched_round = matched_round;
239+
// If the number of matches did not increase we can stop matching.
240+
let matched_count_after = matcher_results.len();
241+
if matched_count_after == 0 || matched_count_after == matched_count_before {
242+
break;
209243
}
210-
});
244+
}
211245

212246
if background_task.is_cancelled() {
213247
log::info!("Matcher was cancelled by user, you may run it again by running the 'Run Matcher' command.");
214248
}
215249

216250
log::info!(
217-
"Function matching took {:.3} seconds and matched {} functions",
251+
"Function matching took {:.3} seconds and matched {} functions after {} rounds",
218252
start.elapsed().as_secs_f64(),
219-
matched_count.load(std::sync::atomic::Ordering::Relaxed)
253+
matcher_results.len(),
254+
final_matched_round
220255
);
221256
background_task.finish();
222257

0 commit comments

Comments
 (0)