@@ -827,193 +827,6 @@ get_parquet_stat_as_datum!(min);
827
827
828
828
get_parquet_stat_as_datum ! ( max) ;
829
829
830
- /// Utilities to deal with [arrow_array::builder] types in the Iceberg context.
831
- pub ( crate ) mod builder {
832
- use arrow_array:: builder:: * ;
833
- use arrow_array:: cast:: AsArray ;
834
- use arrow_array:: types:: * ;
835
- use arrow_array:: { ArrayRef , Datum as ArrowDatum } ;
836
- use arrow_schema:: { DataType , TimeUnit } ;
837
- use ordered_float:: OrderedFloat ;
838
-
839
- use crate :: spec:: { Literal , PrimitiveLiteral } ;
840
- use crate :: { Error , ErrorKind } ;
841
-
842
- /// A helper wrapping [ArrayBuilder] for building arrays without declaring the inner type at
843
- /// compile-time when types are determined dynamically (e.g. based on some column type).
844
- /// A [DataType] is given at construction time which is used to later downcast the inner array
845
- /// and provided values.
846
- pub ( crate ) struct AnyArrayBuilder {
847
- data_type : DataType ,
848
- inner : Box < dyn ArrayBuilder > ,
849
- }
850
-
851
- impl AnyArrayBuilder {
852
- pub ( crate ) fn new ( data_type : & DataType ) -> Self {
853
- Self {
854
- data_type : data_type. clone ( ) ,
855
- inner : make_builder ( data_type, 0 ) ,
856
- }
857
- }
858
-
859
- pub ( crate ) fn finish ( & mut self ) -> ArrayRef {
860
- self . inner . finish ( )
861
- }
862
-
863
- /// Append an [[arrow_array::Datum]] value.
864
- pub ( crate ) fn append_datum ( & mut self , value : & dyn ArrowDatum ) -> crate :: Result < ( ) > {
865
- let ( array, is_scalar) = value. get ( ) ;
866
- assert ! ( is_scalar, "Can only append scalar datum" ) ;
867
-
868
- match array. data_type ( ) {
869
- DataType :: Boolean => self
870
- . builder :: < BooleanBuilder > ( ) ?
871
- . append_value ( array. as_boolean ( ) . value ( 0 ) ) ,
872
- DataType :: Int32 => self
873
- . builder :: < Int32Builder > ( ) ?
874
- . append_value ( array. as_primitive :: < Int32Type > ( ) . value ( 0 ) ) ,
875
- DataType :: Int64 => self
876
- . builder :: < Int64Builder > ( ) ?
877
- . append_value ( array. as_primitive :: < Int64Type > ( ) . value ( 0 ) ) ,
878
- DataType :: Float32 => self
879
- . builder :: < Float32Builder > ( ) ?
880
- . append_value ( array. as_primitive :: < Float32Type > ( ) . value ( 0 ) ) ,
881
- DataType :: Float64 => self
882
- . builder :: < Float64Builder > ( ) ?
883
- . append_value ( array. as_primitive :: < Float64Type > ( ) . value ( 0 ) ) ,
884
- DataType :: Decimal128 ( _, _) => self
885
- . builder :: < Decimal128Builder > ( ) ?
886
- . append_value ( array. as_primitive :: < Decimal128Type > ( ) . value ( 0 ) ) ,
887
- DataType :: Date32 => self
888
- . builder :: < Date32Builder > ( ) ?
889
- . append_value ( array. as_primitive :: < Date32Type > ( ) . value ( 0 ) ) ,
890
- DataType :: Time64 ( TimeUnit :: Microsecond ) => self
891
- . builder :: < Time64MicrosecondBuilder > ( ) ?
892
- . append_value ( array. as_primitive :: < Time64MicrosecondType > ( ) . value ( 0 ) ) ,
893
- DataType :: Timestamp ( TimeUnit :: Microsecond , _) => self
894
- . builder :: < TimestampMicrosecondBuilder > ( ) ?
895
- . append_value ( array. as_primitive :: < TimestampMicrosecondType > ( ) . value ( 0 ) ) ,
896
- DataType :: Timestamp ( TimeUnit :: Nanosecond , _) => self
897
- . builder :: < TimestampNanosecondBuilder > ( ) ?
898
- . append_value ( array. as_primitive :: < TimestampNanosecondType > ( ) . value ( 0 ) ) ,
899
- DataType :: Utf8 => self
900
- . builder :: < StringBuilder > ( ) ?
901
- . append_value ( array. as_string :: < i32 > ( ) . value ( 0 ) ) ,
902
- DataType :: FixedSizeBinary ( _) => self
903
- . builder :: < BinaryBuilder > ( ) ?
904
- . append_value ( array. as_fixed_size_binary ( ) . value ( 0 ) ) ,
905
- DataType :: LargeBinary => self
906
- . builder :: < LargeBinaryBuilder > ( ) ?
907
- . append_value ( array. as_binary :: < i64 > ( ) . value ( 0 ) ) ,
908
- _ => {
909
- return Err ( Error :: new (
910
- ErrorKind :: FeatureUnsupported ,
911
- format ! ( "Cannot append data type: {:?}" , array. data_type( ) , ) ,
912
- ) ) ;
913
- }
914
- }
915
- Ok ( ( ) )
916
- }
917
-
918
- /// Append a literal with the provided [DataType]. We're not solely relying on the literal to
919
- /// infer the type because [Literal] values do not specify the expected type of builder. E.g.,
920
- /// a [PrimitiveLiteral::Long] may go into an array builder for longs but also for timestamps.
921
- pub ( crate ) fn append_literal ( & mut self , value : & Literal ) -> crate :: Result < ( ) > {
922
- let Some ( primitive) = value. as_primitive_literal ( ) else {
923
- return Err ( Error :: new (
924
- ErrorKind :: FeatureUnsupported ,
925
- "Expected primitive type" ,
926
- ) ) ;
927
- } ;
928
-
929
- match ( & self . data_type , primitive. clone ( ) ) {
930
- ( DataType :: Boolean , PrimitiveLiteral :: Boolean ( value) ) => {
931
- self . builder :: < BooleanBuilder > ( ) ?. append_value ( value)
932
- }
933
- ( DataType :: Int32 , PrimitiveLiteral :: Int ( value) ) => {
934
- self . builder :: < Int32Builder > ( ) ?. append_value ( value)
935
- }
936
- ( DataType :: Int64 , PrimitiveLiteral :: Long ( value) ) => {
937
- self . builder :: < Int64Builder > ( ) ?. append_value ( value)
938
- }
939
- ( DataType :: Float32 , PrimitiveLiteral :: Float ( OrderedFloat ( value) ) ) => {
940
- self . builder :: < Float32Builder > ( ) ?. append_value ( value)
941
- }
942
- ( DataType :: Float64 , PrimitiveLiteral :: Double ( OrderedFloat ( value) ) ) => {
943
- self . builder :: < Float64Builder > ( ) ?. append_value ( value)
944
- }
945
- ( DataType :: Utf8 , PrimitiveLiteral :: String ( value) ) => {
946
- self . builder :: < StringBuilder > ( ) ?. append_value ( value)
947
- }
948
- ( DataType :: FixedSizeBinary ( _) , PrimitiveLiteral :: Binary ( value) ) => self
949
- . builder :: < FixedSizeBinaryBuilder > ( ) ?
950
- . append_value ( value) ?,
951
- ( DataType :: LargeBinary , PrimitiveLiteral :: Binary ( value) ) => {
952
- self . builder :: < LargeBinaryBuilder > ( ) ?. append_value ( value)
953
- }
954
- ( _, _) => {
955
- return Err ( Error :: new (
956
- ErrorKind :: FeatureUnsupported ,
957
- format ! (
958
- "Builder of type {:?} does not accept literal {:?}" ,
959
- self . data_type, primitive
960
- ) ,
961
- ) ) ;
962
- }
963
- }
964
-
965
- Ok ( ( ) )
966
- }
967
-
968
- /// Append a null value for the provided [DataType].
969
- pub ( crate ) fn append_null ( & mut self ) -> crate :: Result < ( ) > {
970
- match self . data_type {
971
- DataType :: Boolean => self . builder :: < BooleanBuilder > ( ) ?. append_null ( ) ,
972
- DataType :: Int32 => self . builder :: < Int32Builder > ( ) ?. append_null ( ) ,
973
- DataType :: Int64 => self . builder :: < Int64Builder > ( ) ?. append_null ( ) ,
974
- DataType :: Float32 => self . builder :: < Float32Builder > ( ) ?. append_null ( ) ,
975
- DataType :: Float64 => self . builder :: < Float64Builder > ( ) ?. append_null ( ) ,
976
- DataType :: Decimal128 ( _, _) => self . builder :: < Decimal128Builder > ( ) ?. append_null ( ) ,
977
- DataType :: Date32 => self . builder :: < Date32Builder > ( ) ?. append_null ( ) ,
978
- DataType :: Time64 ( TimeUnit :: Microsecond ) => {
979
- self . builder :: < Time64MicrosecondBuilder > ( ) ?. append_null ( )
980
- }
981
- DataType :: Timestamp ( TimeUnit :: Microsecond , _) => {
982
- self . builder :: < TimestampMicrosecondBuilder > ( ) ?. append_null ( )
983
- }
984
- DataType :: Timestamp ( TimeUnit :: Nanosecond , _) => {
985
- self . builder :: < TimestampNanosecondBuilder > ( ) ?. append_null ( )
986
- }
987
- DataType :: Utf8 => self . builder :: < StringBuilder > ( ) ?. append_null ( ) ,
988
- DataType :: FixedSizeBinary ( _) => {
989
- self . builder :: < FixedSizeBinaryBuilder > ( ) ?. append_null ( )
990
- }
991
- DataType :: LargeBinary => self . builder :: < LargeBinaryBuilder > ( ) ?. append_null ( ) ,
992
- _ => {
993
- return Err ( Error :: new (
994
- ErrorKind :: FeatureUnsupported ,
995
- format ! (
996
- "Cannot append null values for data type: {:?}" ,
997
- self . data_type
998
- ) ,
999
- ) )
1000
- }
1001
- }
1002
- Ok ( ( ) )
1003
- }
1004
-
1005
- /// Cast the `inner` builder to a specific type or return [Error].
1006
- fn builder < T : ArrayBuilder > ( & mut self ) -> crate :: Result < & mut T > {
1007
- self . inner . as_any_mut ( ) . downcast_mut :: < T > ( ) . ok_or_else ( || {
1008
- Error :: new (
1009
- ErrorKind :: Unexpected ,
1010
- "Failed to cast builder to expected type" ,
1011
- )
1012
- } )
1013
- }
1014
- }
1015
- }
1016
-
1017
830
impl TryFrom < & ArrowSchema > for crate :: spec:: Schema {
1018
831
type Error = Error ;
1019
832
0 commit comments