@@ -126,11 +126,23 @@ pub fn parquet_to_datafile(
126126 . or_insert ( distinct_count as i64 ) ;
127127 }
128128 (
129- Value :: LongInt ( _min) ,
130- Value :: LongInt ( _max) ,
131- Some ( Value :: LongInt ( _current_min) ) ,
132- Some ( Value :: LongInt ( _current_max) ) ,
133- ) => ( ) ,
129+ Value :: LongInt ( min) ,
130+ Value :: LongInt ( max) ,
131+ Some ( Value :: LongInt ( current_min) ) ,
132+ Some ( Value :: LongInt ( current_max) ) ,
133+ ) => {
134+ distinct_counts
135+ . entry ( id)
136+ . and_modify ( |x| {
137+ * x += estimate_distinct_count (
138+ & [ current_min, current_max] ,
139+ & [ & min, & max] ,
140+ * x,
141+ distinct_count as i64 ,
142+ ) ;
143+ } )
144+ . or_insert ( distinct_count as i64 ) ;
145+ }
134146 ( _, _, None , None ) => {
135147 distinct_counts. entry ( id) . or_insert ( distinct_count as i64 ) ;
136148 }
@@ -322,6 +334,23 @@ fn range_overlap<T: Ord + Sub + Copy>(
322334 overlap_end - overlap_start
323335}
324336
337+ /// Helper trait to convert numeric types to f64 for statistical calculations
338+ trait ToF64 {
339+ fn to_f64 ( self ) -> f64 ;
340+ }
341+
342+ impl ToF64 for i32 {
343+ fn to_f64 ( self ) -> f64 {
344+ self as f64
345+ }
346+ }
347+
348+ impl ToF64 for i64 {
349+ fn to_f64 ( self ) -> f64 {
350+ self as f64
351+ }
352+ }
353+
325354/// Estimates the number of new distinct values when merging two sets of statistics.
326355///
327356/// This function assumes uniform distribution of distinct values within their respective ranges
@@ -362,13 +391,13 @@ fn estimate_distinct_count<T>(
362391 new_distinct_count : i64 ,
363392) -> i64
364393where
365- T : Ord + Sub < Output = T > + Copy + Into < f64 > + Default ,
394+ T : Ord + Sub < Output = T > + Copy + Default + ToF64 ,
366395{
367- let new_range_size: f64 = ( * new_range[ 1 ] - * new_range[ 0 ] ) . into ( ) ;
368- let current_range_size: f64 = ( * old_range[ 1 ] - * old_range[ 0 ] ) . into ( ) ;
396+ let new_range_size = ( * new_range[ 1 ] - * new_range[ 0 ] ) . to_f64 ( ) ;
397+ let current_range_size = ( * old_range[ 1 ] - * old_range[ 0 ] ) . to_f64 ( ) ;
369398 let overlap = range_overlap ( old_range, new_range) ;
370399 let overlap_size: f64 = if overlap >= T :: default ( ) {
371- overlap. into ( )
400+ overlap. to_f64 ( )
372401 } else {
373402 0.0
374403 } ;
0 commit comments