Skip to content

Commit ec54547

Browse files
committed
add Long support
1 parent 86a420a commit ec54547

File tree

1 file changed

+38
-9
lines changed

1 file changed

+38
-9
lines changed

iceberg-rust/src/file_format/parquet.rs

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,23 @@ pub fn parquet_to_datafile(
126126
.or_insert(distinct_count as i64);
127127
}
128128
(
129-
Value::LongInt(_min),
130-
Value::LongInt(_max),
131-
Some(Value::LongInt(_current_min)),
132-
Some(Value::LongInt(_current_max)),
133-
) => (),
129+
Value::LongInt(min),
130+
Value::LongInt(max),
131+
Some(Value::LongInt(current_min)),
132+
Some(Value::LongInt(current_max)),
133+
) => {
134+
distinct_counts
135+
.entry(id)
136+
.and_modify(|x| {
137+
*x += estimate_distinct_count(
138+
&[current_min, current_max],
139+
&[&min, &max],
140+
*x,
141+
distinct_count as i64,
142+
);
143+
})
144+
.or_insert(distinct_count as i64);
145+
}
134146
(_, _, None, None) => {
135147
distinct_counts.entry(id).or_insert(distinct_count as i64);
136148
}
@@ -322,6 +334,23 @@ fn range_overlap<T: Ord + Sub + Copy>(
322334
overlap_end - overlap_start
323335
}
324336

337+
/// Helper trait to convert numeric types to f64 for statistical calculations
338+
trait ToF64 {
339+
fn to_f64(self) -> f64;
340+
}
341+
342+
impl ToF64 for i32 {
343+
fn to_f64(self) -> f64 {
344+
self as f64
345+
}
346+
}
347+
348+
impl ToF64 for i64 {
349+
fn to_f64(self) -> f64 {
350+
self as f64
351+
}
352+
}
353+
325354
/// Estimates the number of new distinct values when merging two sets of statistics.
326355
///
327356
/// This function assumes uniform distribution of distinct values within their respective ranges
@@ -362,13 +391,13 @@ fn estimate_distinct_count<T>(
362391
new_distinct_count: i64,
363392
) -> i64
364393
where
365-
T: Ord + Sub<Output = T> + Copy + Into<f64> + Default,
394+
T: Ord + Sub<Output = T> + Copy + Default + ToF64,
366395
{
367-
let new_range_size: f64 = (*new_range[1] - *new_range[0]).into();
368-
let current_range_size: f64 = (*old_range[1] - *old_range[0]).into();
396+
let new_range_size = (*new_range[1] - *new_range[0]).to_f64();
397+
let current_range_size = (*old_range[1] - *old_range[0]).to_f64();
369398
let overlap = range_overlap(old_range, new_range);
370399
let overlap_size: f64 = if overlap >= T::default() {
371-
overlap.into()
400+
overlap.to_f64()
372401
} else {
373402
0.0
374403
};

0 commit comments

Comments
 (0)