Skip to content

Commit e36d834

Browse files
committed
Squash builder
1 parent 0ef9304 commit e36d834

File tree

12 files changed

+2181
-108
lines changed

12 files changed

+2181
-108
lines changed

crates/catalog/glue/src/catalog.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,9 @@ impl Catalog for GlueCatalog {
355355
}
356356
};
357357

358-
let metadata = TableMetadataBuilder::from_table_creation(creation)?.build()?;
358+
let metadata = TableMetadataBuilder::from_table_creation(creation)?
359+
.build()?
360+
.metadata;
359361
let metadata_location = create_metadata_location(&location, 0)?;
360362

361363
self.file_io

crates/catalog/glue/src/schema.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,9 @@ mod tests {
198198
.location("my_location".to_string())
199199
.schema(schema)
200200
.build();
201-
let metadata = TableMetadataBuilder::from_table_creation(table_creation)?.build()?;
201+
let metadata = TableMetadataBuilder::from_table_creation(table_creation)?
202+
.build()?
203+
.metadata;
202204

203205
Ok(metadata)
204206
}

crates/catalog/glue/src/utils.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,9 @@ mod tests {
299299
.location("my_location".to_string())
300300
.schema(schema)
301301
.build();
302-
let metadata = TableMetadataBuilder::from_table_creation(table_creation)?.build()?;
302+
let metadata = TableMetadataBuilder::from_table_creation(table_creation)?
303+
.build()?
304+
.metadata;
303305

304306
Ok(metadata)
305307
}

crates/catalog/hms/src/catalog.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,9 @@ impl Catalog for HmsCatalog {
346346
}
347347
};
348348

349-
let metadata = TableMetadataBuilder::from_table_creation(creation)?.build()?;
349+
let metadata = TableMetadataBuilder::from_table_creation(creation)?
350+
.build()?
351+
.metadata;
350352
let metadata_location = create_metadata_location(&location, 0)?;
351353

352354
self.file_io

crates/catalog/memory/src/catalog.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,9 @@ impl Catalog for MemoryCatalog {
194194
}
195195
};
196196

197-
let metadata = TableMetadataBuilder::from_table_creation(table_creation)?.build()?;
197+
let metadata = TableMetadataBuilder::from_table_creation(table_creation)?
198+
.build()?
199+
.metadata;
198200
let metadata_location = format!(
199201
"{}/metadata/{}-{}.metadata.json",
200202
&location,

crates/catalog/sql/src/catalog.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -699,7 +699,9 @@ impl Catalog for SqlCatalog {
699699
}
700700
};
701701

702-
let tbl_metadata = TableMetadataBuilder::from_table_creation(tbl_creation)?.build()?;
702+
let tbl_metadata = TableMetadataBuilder::from_table_creation(tbl_creation)?
703+
.build()?
704+
.metadata;
703705
let tbl_metadata_location = format!(
704706
"{}/metadata/0-{}.metadata.json",
705707
location.clone(),

crates/iceberg/src/catalog/mod.rs

+48-5
Original file line numberDiff line numberDiff line change
@@ -447,8 +447,46 @@ impl TableUpdate {
447447
/// Applies the update to the table metadata builder.
448448
pub fn apply(self, builder: TableMetadataBuilder) -> Result<TableMetadataBuilder> {
449449
match self {
450-
TableUpdate::AssignUuid { uuid } => builder.assign_uuid(uuid),
451-
_ => unimplemented!(),
450+
TableUpdate::AssignUuid { uuid } => Ok(builder.assign_uuid(uuid)),
451+
TableUpdate::AddSchema {
452+
schema,
453+
last_column_id,
454+
} => {
455+
if let Some(last_column_id) = last_column_id {
456+
if builder.last_column_id() > last_column_id {
457+
return Err(Error::new(
458+
ErrorKind::DataInvalid,
459+
format!(
460+
"Invalid last column ID: {last_column_id} < {} (previous last column ID)",
461+
builder.last_column_id()
462+
),
463+
));
464+
}
465+
};
466+
Ok(builder.add_schema(schema))
467+
}
468+
TableUpdate::SetCurrentSchema { schema_id } => builder.set_current_schema(schema_id),
469+
TableUpdate::AddSpec { spec } => builder.add_partition_spec(spec),
470+
TableUpdate::SetDefaultSpec { spec_id } => builder.set_default_partition_spec(spec_id),
471+
TableUpdate::AddSortOrder { sort_order } => builder.add_sort_order(sort_order),
472+
TableUpdate::SetDefaultSortOrder { sort_order_id } => {
473+
builder.set_default_sort_order(sort_order_id)
474+
}
475+
TableUpdate::AddSnapshot { snapshot } => builder.add_snapshot(snapshot),
476+
TableUpdate::SetSnapshotRef {
477+
ref_name,
478+
reference,
479+
} => builder.set_ref(&ref_name, reference),
480+
TableUpdate::RemoveSnapshots { snapshot_ids } => {
481+
Ok(builder.remove_snapshots(&snapshot_ids))
482+
}
483+
TableUpdate::RemoveSnapshotRef { ref_name } => Ok(builder.remove_ref(&ref_name)),
484+
TableUpdate::SetLocation { location } => Ok(builder.set_location(location)),
485+
TableUpdate::SetProperties { updates } => builder.set_properties(updates),
486+
TableUpdate::RemoveProperties { removals } => Ok(builder.remove_properties(&removals)),
487+
TableUpdate::UpgradeFormatVersion { format_version } => {
488+
builder.upgrade_format_version(format_version)
489+
}
452490
}
453491
}
454492
}
@@ -1221,16 +1259,21 @@ mod tests {
12211259
let table_metadata = TableMetadataBuilder::from_table_creation(table_creation)
12221260
.unwrap()
12231261
.build()
1224-
.unwrap();
1225-
let table_metadata_builder = TableMetadataBuilder::new(table_metadata);
1262+
.unwrap()
1263+
.metadata;
1264+
let table_metadata_builder = TableMetadataBuilder::new_from_metadata(
1265+
table_metadata,
1266+
Some("s3://db/table/metadata/metadata1.gz.json".to_string()),
1267+
);
12261268

12271269
let uuid = uuid::Uuid::new_v4();
12281270
let update = TableUpdate::AssignUuid { uuid };
12291271
let updated_metadata = update
12301272
.apply(table_metadata_builder)
12311273
.unwrap()
12321274
.build()
1233-
.unwrap();
1275+
.unwrap()
1276+
.metadata;
12341277
assert_eq!(updated_metadata.uuid(), uuid);
12351278
}
12361279
}

crates/iceberg/src/spec/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ mod schema;
2525
mod snapshot;
2626
mod sort;
2727
mod table_metadata;
28+
mod table_metadata_builder;
2829
mod transform;
2930
mod values;
3031
mod view_metadata;

crates/iceberg/src/spec/partition.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,6 @@ impl BoundPartitionSpec {
147147
}
148148

149149
/// Get the highest field id in the partition spec.
150-
/// If the partition spec is unpartitioned, it returns the last unpartitioned last assigned id (999).
151150
pub fn highest_field_id(&self) -> Option<i32> {
152151
self.fields.iter().map(|f| f.field_id).max()
153152
}
@@ -182,6 +181,11 @@ impl BoundPartitionSpec {
182181

183182
true
184183
}
184+
185+
/// Change the spec id of the partition spec
186+
pub fn with_spec_id(self, spec_id: i32) -> Self {
187+
Self { spec_id, ..self }
188+
}
185189
}
186190

187191
impl SchemalessPartitionSpec {

crates/iceberg/src/spec/schema.rs

+18
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,24 @@ impl Schema {
376376
pub fn accessor_by_field_id(&self, field_id: i32) -> Option<Arc<StructAccessor>> {
377377
self.field_id_to_accessor.get(&field_id).cloned()
378378
}
379+
380+
/// Check if this schema is identical to another schema semantically - excluding schema id.
381+
pub(crate) fn is_same_schema(&self, other: &SchemaRef) -> bool {
382+
self.as_struct().eq(other.as_struct())
383+
&& self.identifier_field_ids().eq(other.identifier_field_ids())
384+
}
385+
386+
/// Change the schema id of this schema.
387+
// This is redundant with the `with_schema_id` method on the builder, but useful
388+
// as it is infallible in contrast to the builder `build()` method.
389+
pub(crate) fn with_schema_id(self, schema_id: SchemaId) -> Self {
390+
Self { schema_id, ..self }
391+
}
392+
393+
/// Return A HashMap matching field ids to field names.
394+
pub(crate) fn field_id_to_name_map(&self) -> &HashMap<i32, String> {
395+
&self.id_to_name
396+
}
379397
}
380398

381399
impl Display for Schema {

crates/iceberg/src/spec/table_metadata.rs

+17-96
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,13 @@ use serde_repr::{Deserialize_repr, Serialize_repr};
3030
use uuid::Uuid;
3131

3232
use super::snapshot::SnapshotReference;
33+
pub use super::table_metadata_builder::{TableMetadataBuildResult, TableMetadataBuilder};
3334
use super::{
34-
BoundPartitionSpec, BoundPartitionSpecRef, SchemaId, SchemaRef, SchemalessPartitionSpecRef,
35-
Snapshot, SnapshotRef, SnapshotRetention, SortOrder, SortOrderRef, DEFAULT_PARTITION_SPEC_ID,
35+
BoundPartitionSpecRef, SchemaId, SchemaRef, SchemalessPartitionSpecRef, Snapshot, SnapshotRef,
36+
SnapshotRetention, SortOrder, SortOrderRef, DEFAULT_PARTITION_SPEC_ID,
3637
};
3738
use crate::error::{timestamp_ms_to_utc, Result};
38-
use crate::{Error, ErrorKind, TableCreation};
39+
use crate::{Error, ErrorKind};
3940

4041
static MAIN_BRANCH: &str = "main";
4142
pub(crate) static ONE_MINUTE_MS: i64 = 60_000;
@@ -165,6 +166,17 @@ pub struct TableMetadata {
165166
}
166167

167168
impl TableMetadata {
169+
/// Convert this Table Metadata into a builder for modification.
170+
///
171+
/// `current_file_location` is the location where the current version
172+
/// of the metadata file is stored. This is used to update the metadata log.
173+
/// If `current_file_location` is `None`, the metadata log will not be updated.
174+
/// This should only be used to stage-create tables.
175+
#[must_use]
176+
pub fn into_builder(self, current_file_location: Option<String>) -> TableMetadataBuilder {
177+
TableMetadataBuilder::new_from_metadata(self, current_file_location)
178+
}
179+
168180
/// Returns format version of this metadata.
169181
#[inline]
170182
pub fn format_version(&self) -> FormatVersion {
@@ -539,98 +551,6 @@ impl TableMetadata {
539551
}
540552
}
541553

542-
/// Manipulating table metadata.
543-
pub struct TableMetadataBuilder(TableMetadata);
544-
545-
impl TableMetadataBuilder {
546-
/// Creates a new table metadata builder from the given table metadata.
547-
pub fn new(origin: TableMetadata) -> Self {
548-
Self(origin)
549-
}
550-
551-
/// Creates a new table metadata builder from the given table creation.
552-
pub fn from_table_creation(table_creation: TableCreation) -> Result<Self> {
553-
let TableCreation {
554-
name: _,
555-
location,
556-
schema,
557-
partition_spec,
558-
sort_order,
559-
properties,
560-
} = table_creation;
561-
562-
let schema: Arc<super::Schema> = Arc::new(schema);
563-
let unpartition_spec = BoundPartitionSpec::unpartition_spec(schema.clone());
564-
let partition_specs = match partition_spec {
565-
Some(_) => {
566-
return Err(Error::new(
567-
ErrorKind::FeatureUnsupported,
568-
"Can't create table with partition spec now",
569-
))
570-
}
571-
None => HashMap::from([(
572-
unpartition_spec.spec_id(),
573-
Arc::new(unpartition_spec.clone().into_schemaless()),
574-
)]),
575-
};
576-
577-
let sort_orders = match sort_order {
578-
Some(_) => {
579-
return Err(Error::new(
580-
ErrorKind::FeatureUnsupported,
581-
"Can't create table with sort order now",
582-
))
583-
}
584-
None => HashMap::from([(
585-
SortOrder::UNSORTED_ORDER_ID,
586-
Arc::new(SortOrder::unsorted_order()),
587-
)]),
588-
};
589-
590-
let mut table_metadata = TableMetadata {
591-
format_version: FormatVersion::V2,
592-
table_uuid: Uuid::now_v7(),
593-
location: location.ok_or_else(|| {
594-
Error::new(
595-
ErrorKind::DataInvalid,
596-
"Can't create table without location",
597-
)
598-
})?,
599-
last_sequence_number: 0,
600-
last_updated_ms: Utc::now().timestamp_millis(),
601-
last_column_id: schema.highest_field_id(),
602-
current_schema_id: schema.schema_id(),
603-
schemas: HashMap::from([(schema.schema_id(), schema)]),
604-
partition_specs,
605-
default_spec: BoundPartitionSpecRef::new(unpartition_spec),
606-
last_partition_id: 0,
607-
properties,
608-
current_snapshot_id: None,
609-
snapshots: Default::default(),
610-
snapshot_log: vec![],
611-
sort_orders,
612-
metadata_log: vec![],
613-
default_sort_order_id: SortOrder::UNSORTED_ORDER_ID,
614-
refs: Default::default(),
615-
};
616-
617-
table_metadata.try_normalize()?;
618-
619-
Ok(Self(table_metadata))
620-
}
621-
622-
/// Changes uuid of table metadata.
623-
pub fn assign_uuid(mut self, uuid: Uuid) -> Result<Self> {
624-
self.0.table_uuid = uuid;
625-
Ok(self)
626-
}
627-
628-
/// Returns the new table metadata after changes.
629-
pub fn build(self) -> Result<TableMetadata> {
630-
Ok(self.0)
631-
}
632-
}
633-
634554
pub(super) mod _serde {
635555
use std::borrow::BorrowMut;
636556
/// This is a helper module that defines types to help with serialization/deserialization.
@@ -2308,7 +2228,8 @@ mod tests {
23082228
let table_metadata = TableMetadataBuilder::from_table_creation(table_creation)
23092229
.unwrap()
23102230
.build()
2311-
.unwrap();
2231+
.unwrap()
2232+
.metadata;
23122233
assert_eq!(table_metadata.location, "s3://db/table");
23132234
assert_eq!(table_metadata.schemas.len(), 1);
23142235
assert_eq!(

0 commit comments

Comments
 (0)