Skip to content

Commit f5242cd

Browse files
authored
feat: Add SnapshotSummaries (#1085)
## Which issue does this PR close? - Related to #724 ## What changes are included in this PR? This is the building block to implementing snapshot summaries. Most of the implementation code is built off of @barronw 's very nice commit. Most of the added lines are also just tests. A follow up pr will be included integrating it with the rest of the codebase. ## Are these changes tested? Yes, unit test
1 parent eaa36e6 commit f5242cd

File tree

4 files changed

+831
-1
lines changed

4 files changed

+831
-1
lines changed

crates/iceberg/src/spec/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ mod name_mapping;
2424
mod partition;
2525
mod schema;
2626
mod snapshot;
27+
mod snapshot_summary;
2728
mod sort;
2829
mod statistic_file;
2930
mod table_metadata;

crates/iceberg/src/spec/partition.rs

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@
2020
*/
2121
use std::sync::Arc;
2222

23+
use itertools::Itertools;
2324
use serde::{Deserialize, Serialize};
2425
use typed_builder::TypedBuilder;
2526

2627
use super::transform::Transform;
2728
use super::{NestedField, Schema, SchemaRef, StructType};
29+
use crate::spec::Struct;
2830
use crate::{Error, ErrorKind, Result};
2931

3032
pub(crate) const UNPARTITIONED_LAST_ASSIGNED_ID: i32 = 999;
@@ -152,6 +154,30 @@ impl PartitionSpec {
152154

153155
true
154156
}
157+
158+
pub(crate) fn partition_to_path(&self, data: &Struct, schema: SchemaRef) -> String {
159+
let partition_type = self.partition_type(&schema).unwrap();
160+
let field_types = partition_type.fields();
161+
162+
self.fields
163+
.iter()
164+
.enumerate()
165+
.map(|(i, field)| {
166+
let value = if data.is_null_at_index(i) {
167+
None
168+
} else {
169+
Some(&data.fields()[i])
170+
};
171+
format!(
172+
"{}={}",
173+
field.name,
174+
field
175+
.transform
176+
.to_human_string(&field_types[i].field_type, value)
177+
)
178+
})
179+
.join("/")
180+
}
155181
}
156182

157183
/// Reference to [`UnboundPartitionSpec`].
@@ -660,7 +686,7 @@ impl CorePartitionSpecValidator for UnboundPartitionSpecBuilder {
660686
#[cfg(test)]
661687
mod tests {
662688
use super::*;
663-
use crate::spec::{PrimitiveType, Type};
689+
use crate::spec::{Literal, PrimitiveType, Type};
664690

665691
#[test]
666692
fn test_partition_spec() {
@@ -1733,4 +1759,30 @@ mod tests {
17331759
assert_eq!(1002, spec.fields[1].field_id);
17341760
assert!(!spec.has_sequential_ids());
17351761
}
1762+
1763+
#[test]
1764+
fn test_partition_to_path() {
1765+
let schema = Schema::builder()
1766+
.with_fields(vec![
1767+
NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
1768+
NestedField::required(2, "name", Type::Primitive(PrimitiveType::String)).into(),
1769+
])
1770+
.build()
1771+
.unwrap();
1772+
1773+
let spec = PartitionSpec::builder(schema.clone())
1774+
.add_partition_field("id", "id", Transform::Identity)
1775+
.unwrap()
1776+
.add_partition_field("name", "name", Transform::Identity)
1777+
.unwrap()
1778+
.build()
1779+
.unwrap();
1780+
1781+
let data = Struct::from_iter([Some(Literal::int(42)), Some(Literal::string("alice"))]);
1782+
1783+
assert_eq!(
1784+
spec.partition_to_path(&data, schema.into()),
1785+
"id=42/name=\"alice\""
1786+
);
1787+
}
17361788
}

0 commit comments

Comments
 (0)