@@ -3,6 +3,7 @@ use crate::cache::{
33 cached_function_guid, insert_cached_function_match, try_cached_function_guid,
44 try_cached_function_match,
55} ;
6+ use crate :: container:: { Container , SourceId } ;
67use crate :: convert:: { platform_to_target, to_bn_symbol_at_address, to_bn_type} ;
78use crate :: matcher:: { Matcher , MatcherSettings } ;
89use crate :: plugin:: settings:: PluginSettings ;
@@ -15,14 +16,13 @@ use binaryninja::function::Function as BNFunction;
1516use binaryninja:: rc:: Ref as BNRef ;
1617use binaryninja:: settings:: { QueryOptions , Settings } ;
1718use binaryninja:: workflow:: { activity, Activity , AnalysisContext , Workflow , WorkflowBuilder } ;
19+ use dashmap:: DashSet ;
1820use itertools:: Itertools ;
1921use rayon:: iter:: IntoParallelIterator ;
2022use rayon:: iter:: ParallelIterator ;
2123use std:: cmp:: Ordering ;
2224use std:: collections:: HashMap ;
23- use std:: sync:: atomic:: AtomicUsize ;
24- use std:: sync:: { Arc , Mutex } ;
25- use std:: time:: { Duration , Instant } ;
25+ use std:: time:: Instant ;
2626use warp:: r#type:: class:: function:: { Location , RegisterLocation , StackLocation } ;
2727use warp:: signature:: function:: { Function , FunctionGUID } ;
2828use warp:: target:: Target ;
@@ -120,6 +120,23 @@ pub fn run_matcher(view: &BinaryView) {
120120 let _ = get_warp_ignore_tag_type ( view) ;
121121 view. file ( ) . forget_undo_actions ( & undo_id) ;
122122
123+ let filter_functions = |functions : & mut Vec < Function > | {
124+ // We sort primarily by symbol, then by type, so we can deduplicate in-place.
125+ functions. sort_unstable_by ( |a, b| match a. symbol . cmp ( & b. symbol ) {
126+ Ordering :: Equal => match ( & a. ty , & b. ty ) {
127+ ( None , None ) => Ordering :: Equal ,
128+ ( None , Some ( _) ) => Ordering :: Less ,
129+ ( Some ( _) , None ) => Ordering :: Greater ,
130+ // TODO: We still need to order the types, probably cant do this in place.
131+ // TODO: Once Type can be ordered, we can remove this entire explicit match stmt.
132+ ( Some ( _) , Some ( _) ) => Ordering :: Equal ,
133+ } ,
134+ other => other,
135+ } ) ;
136+ // This removes consecutive duplicates efficiently
137+ functions. dedup_by ( |a, b| a. symbol == b. symbol && a. ty == b. ty ) ;
138+ } ;
139+
123140 // Then we want to actually find matching functions.
124141 let background_task = BackgroundTask :: new ( "Matching on WARP functions..." , true ) ;
125142 let start = Instant :: now ( ) ;
@@ -135,88 +152,106 @@ pub fn run_matcher(view: &BinaryView) {
135152 return ;
136153 } ;
137154
138- // TODO: Target gets cloned a lot.
139- // TODO: Containers might both match on the same function. What should we do?
140- let matched_count = AtomicUsize :: new ( 0 ) ;
141- for_cached_containers ( |container| {
142- if background_task. is_cancelled ( ) {
155+ let matcher_results: DashSet < u64 > = DashSet :: new ( ) ;
156+ let match_for_guid = |target, container : & dyn Container , sources : Vec < SourceId > , guid| {
157+ let mut matched_functions: Vec < Function > = sources
158+ . iter ( )
159+ . flat_map ( |source| {
160+ container
161+ . functions_with_guid ( target, source, & guid)
162+ . unwrap_or_default ( )
163+ } )
164+ . collect ( ) ;
165+
166+ // NOTE: See the comment in `match_function_from_constraints` about this fast fail.
167+ if matcher
168+ . settings
169+ . maximum_possible_functions
170+ . is_some_and ( |max| max < matched_functions. len ( ) as u64 )
171+ {
172+ log:: warn!(
173+ "Skipping {}, too many possible functions: {}" ,
174+ guid,
175+ matched_functions. len( )
176+ ) ;
143177 return ;
144178 }
145179
146- for ( target, guids) in & function_set. guids_by_target {
147- let function_guid_with_sources = container
148- . sources_with_function_guids ( target, guids)
149- . unwrap_or_default ( ) ;
150-
151- function_guid_with_sources
152- . into_par_iter ( )
153- . for_each ( |( guid, sources) | {
154- let mut matched_functions: Vec < Function > = sources
155- . iter ( )
156- . flat_map ( |source| {
157- container
158- . functions_with_guid ( target, source, & guid)
159- . unwrap_or_default ( )
160- } )
161- . collect ( ) ;
162-
163- // We sort primarily by symbol, then by type, so we can deduplicate in-place.
164- matched_functions. sort_unstable_by ( |a, b| match a. symbol . cmp ( & b. symbol ) {
165- Ordering :: Equal => match ( & a. ty , & b. ty ) {
166- ( None , None ) => Ordering :: Equal ,
167- ( None , Some ( _) ) => Ordering :: Less ,
168- ( Some ( _) , None ) => Ordering :: Greater ,
169- // TODO: We still need to order the types, probably cant do this in place.
170- // TODO: Once Type can be ordered, we can remove this entire explicit match stmt.
171- ( Some ( _) , Some ( _) ) => Ordering :: Equal ,
172- } ,
173- other => other,
180+ // Filter out duplicate functions for matching.
181+ filter_functions ( & mut matched_functions) ;
182+
183+ let functions = function_set
184+ . functions_by_target_and_guid
185+ . get ( & ( guid, target. clone ( ) ) )
186+ . expect ( "Function guid not found" ) ;
187+
188+ for function in functions {
189+ // Match on all the possible functions
190+ if let Some ( matched_function) =
191+ matcher. match_function_from_constraints ( function, & matched_functions)
192+ {
193+ // Because we can do multiple rounds of matching at once, we only want to insert a function
194+ // match if we have not already done so in a previous round.
195+ // TODO: What if the new round changes the matched function metadata? Unlikely but possible.
196+ if matcher_results. insert ( function. start ( ) ) {
197+ // We were able to find a match, add it to the match cache and then mark the function
198+ // as requiring updates; this is so that we know about it in the applier activity.
199+ insert_cached_function_match ( function, Some ( matched_function) ) ;
200+ }
201+ }
202+ }
203+ } ;
204+
205+ // NOTE: Because matching can depend on other functions to have matched, we will run multiple
206+ // rounds of matching until it stabilizes (e.g. no more newly matched functions), there are other
207+ // ways to have the same behavior that may take less time, such as a work list, and pushing callers
208+ // back into the work list on matches of a function, on top of that you could order the functions
209+ // matched bottom up, with a reverse post order sort.
210+
211+ // TODO: Target gets cloned a lot.
212+ // TODO: Containers might both match on the same function. What should we do?
213+ let maximum_rounds = matcher. settings . maximum_matching_rounds . unwrap_or ( 100 ) ;
214+ let mut final_matched_round = 1 ;
215+ for matched_round in 1 ..=maximum_rounds {
216+ let bg_task_text = format ! ( "Matching on WARP functions... ({} rounds)" , matched_round) ;
217+ background_task. set_progress_text ( & bg_task_text) ;
218+ let matched_count_before = matcher_results. len ( ) ;
219+
220+ for_cached_containers ( |container| {
221+ if background_task. is_cancelled ( ) {
222+ return ;
223+ }
224+
225+ for ( target, guids) in & function_set. guids_by_target {
226+ let function_guid_with_sources = container
227+ . sources_with_function_guids ( target, guids)
228+ . unwrap_or_default ( ) ;
229+
230+ function_guid_with_sources
231+ . into_par_iter ( )
232+ . for_each ( |( guid, sources) | {
233+ match_for_guid ( target, container, sources, guid) ;
174234 } ) ;
175- // This removes consecutive duplicates efficiently
176- matched_functions. dedup_by ( |a, b| a. symbol == b. symbol && a. ty == b. ty ) ;
177-
178- // NOTE: See the comment in `match_function_from_constraints` about this fast fail.
179- if matcher
180- . settings
181- . maximum_possible_functions
182- . is_some_and ( |max| max < matched_functions. len ( ) as u64 )
183- {
184- log:: warn!(
185- "Skipping {}, too many possible functions: {}" ,
186- guid,
187- matched_functions. len( )
188- ) ;
189- return ;
190- }
235+ }
236+ } ) ;
191237
192- let functions = function_set
193- . functions_by_target_and_guid
194- . get ( & ( guid, target. clone ( ) ) )
195- . expect ( "Function guid not found" ) ;
196-
197- for function in functions {
198- // Match on all the possible functions
199- if let Some ( matched_function) =
200- matcher. match_function_from_constraints ( function, & matched_functions)
201- {
202- matched_count. fetch_add ( 1 , std:: sync:: atomic:: Ordering :: Relaxed ) ;
203- // We were able to find a match, add it to the match cache and then mark the function
204- // as requiring updates; this is so that we know about it in the applier activity.
205- insert_cached_function_match ( function, Some ( matched_function) ) ;
206- }
207- }
208- } ) ;
238+ final_matched_round = matched_round;
239+ // If the number of matches did not increase we can stop matching.
240+ let matched_count_after = matcher_results. len ( ) ;
241+ if matched_count_after == 0 || matched_count_after == matched_count_before {
242+ break ;
209243 }
210- } ) ;
244+ }
211245
212246 if background_task. is_cancelled ( ) {
213247 log:: info!( "Matcher was cancelled by user, you may run it again by running the 'Run Matcher' command." ) ;
214248 }
215249
216250 log:: info!(
217- "Function matching took {:.3} seconds and matched {} functions" ,
251+ "Function matching took {:.3} seconds and matched {} functions after {} rounds " ,
218252 start. elapsed( ) . as_secs_f64( ) ,
219- matched_count. load( std:: sync:: atomic:: Ordering :: Relaxed )
253+ matcher_results. len( ) ,
254+ final_matched_round
220255 ) ;
221256 background_task. finish ( ) ;
222257
0 commit comments