@@ -39,7 +39,7 @@ use crate::{ensure_data_valid, Error, ErrorKind};
39
39
pub type SchemaId = i32 ;
40
40
/// Reference to [`Schema`].
41
41
pub type SchemaRef = Arc < Schema > ;
42
- const DEFAULT_SCHEMA_ID : SchemaId = 0 ;
42
+ pub ( crate ) const DEFAULT_SCHEMA_ID : SchemaId = 0 ;
43
43
44
44
/// Defines schema in iceberg.
45
45
#[ derive( Debug , Serialize , Deserialize , Clone ) ]
@@ -133,7 +133,7 @@ impl SchemaBuilder {
133
133
let r#struct = StructType :: new ( self . fields ) ;
134
134
let id_to_field = index_by_id ( & r#struct) ?;
135
135
let highest_field_id =
136
- highest_field_id. unwrap_or ( get_highest_schema_id_for_struct ( & r#struct ) ) ;
136
+ highest_field_id. unwrap_or ( id_to_field . keys ( ) . max ( ) . cloned ( ) . unwrap_or ( 0 ) ) ;
137
137
138
138
Self :: validate_identifier_ids (
139
139
& r#struct,
@@ -367,13 +367,18 @@ impl Schema {
367
367
self . id_to_name . get ( & field_id) . map ( String :: as_str)
368
368
}
369
369
370
+ /// Return A HashMap matching field ids to field names.
371
+ pub ( crate ) fn field_id_to_name_map ( & self ) -> & HashMap < i32 , String > {
372
+ & self . id_to_name
373
+ }
374
+
370
375
/// Get an accessor for retrieving data in a struct
371
376
pub fn accessor_by_field_id ( & self , field_id : i32 ) -> Option < Arc < StructAccessor > > {
372
377
self . field_id_to_accessor . get ( & field_id) . cloned ( )
373
378
}
374
379
375
380
/// Check if this schema is identical to another schema semantically - excluding schema id.
376
- pub fn is_same_schema ( & self , other : & SchemaRef ) -> bool {
381
+ pub ( crate ) fn is_same_schema ( & self , other : & SchemaRef ) -> bool {
377
382
self . as_struct ( ) . eq ( other. as_struct ( ) )
378
383
&& self . identifier_field_ids ( ) . eq ( other. identifier_field_ids ( ) )
379
384
}
@@ -981,58 +986,6 @@ impl SchemaVisitor for PruneColumn {
981
986
}
982
987
}
983
988
984
- /// Get the highest field id in a struct.
985
- /// Recursively visits nested fields.
986
- pub fn get_highest_schema_id_for_struct ( s : & StructType ) -> i32 {
987
- let mut agg = HighestFieldIdAggregator :: new ( ) ;
988
- visit_struct ( s, & mut agg) . unwrap_or_default ( ) ;
989
- agg. highest_field_id
990
- }
991
-
992
- struct HighestFieldIdAggregator {
993
- highest_field_id : i32 ,
994
- }
995
-
996
- impl HighestFieldIdAggregator {
997
- fn new ( ) -> Self {
998
- Self {
999
- highest_field_id : 0 ,
1000
- }
1001
- }
1002
- }
1003
-
1004
- impl SchemaVisitor for HighestFieldIdAggregator {
1005
- type T = ( ) ;
1006
-
1007
- fn schema ( & mut self , _schema : & Schema , _value : Self :: T ) -> Result < Self :: T > {
1008
- Ok ( ( ) )
1009
- }
1010
-
1011
- fn field ( & mut self , field : & NestedFieldRef , _value : Self :: T ) -> Result < Self :: T > {
1012
- self . highest_field_id = std:: cmp:: max ( self . highest_field_id , field. id ) ;
1013
- Ok ( ( ) )
1014
- }
1015
-
1016
- fn r#struct ( & mut self , _struct : & StructType , _results : Vec < Self :: T > ) -> Result < Self :: T > {
1017
- Ok ( ( ) )
1018
- }
1019
-
1020
- fn list ( & mut self , list : & ListType , _value : Self :: T ) -> Result < Self :: T > {
1021
- self . highest_field_id = std:: cmp:: max ( self . highest_field_id , list. element_field . id ) ;
1022
- Ok ( ( ) )
1023
- }
1024
-
1025
- fn map ( & mut self , map : & MapType , _key_value : Self :: T , _value : Self :: T ) -> Result < Self :: T > {
1026
- self . highest_field_id = std:: cmp:: max ( self . highest_field_id , map. key_field . id ) ;
1027
- self . highest_field_id = std:: cmp:: max ( self . highest_field_id , map. value_field . id ) ;
1028
- Ok ( ( ) )
1029
- }
1030
-
1031
- fn primitive ( & mut self , _p : & PrimitiveType ) -> Result < Self :: T > {
1032
- Ok ( ( ) )
1033
- }
1034
- }
1035
-
1036
989
struct ReassignFieldIds {
1037
990
next_field_id : i32 ,
1038
991
old_to_new_id : HashMap < i32 , i32 > ,
0 commit comments