@@ -71,7 +71,7 @@ impl EqualityDeleteWriterConfig {
71
71
|field| {
72
72
// Only primitive type is allowed to be used for identifier field ids
73
73
if field. is_nullable ( )
74
- || ! field. data_type ( ) . is_primitive ( )
74
+ || field. data_type ( ) . is_nested ( )
75
75
|| matches ! (
76
76
field. data_type( ) ,
77
77
DataType :: Float16 | DataType :: Float32 | DataType :: Float64
@@ -169,13 +169,14 @@ mod test {
169
169
use std:: sync:: Arc ;
170
170
171
171
use arrow_array:: types:: Int32Type ;
172
- use arrow_array:: { ArrayRef , Int32Array , RecordBatch , StructArray } ;
172
+ use arrow_array:: { ArrayRef , BooleanArray , Int32Array , Int64Array , RecordBatch , StructArray } ;
173
173
use arrow_schema:: DataType ;
174
174
use arrow_select:: concat:: concat_batches;
175
175
use itertools:: Itertools ;
176
176
use parquet:: arrow:: arrow_reader:: ParquetRecordBatchReaderBuilder ;
177
177
use parquet:: file:: properties:: WriterProperties ;
178
178
use tempfile:: TempDir ;
179
+ use uuid:: Uuid ;
179
180
180
181
use crate :: arrow:: { arrow_schema_to_schema, schema_to_arrow_schema} ;
181
182
use crate :: io:: { FileIO , FileIOBuilder } ;
@@ -500,4 +501,159 @@ mod test {
500
501
501
502
Ok ( ( ) )
502
503
}
504
+
505
+ #[ tokio:: test]
506
+ async fn test_equality_delete_with_primitive_type ( ) -> Result < ( ) , anyhow:: Error > {
507
+ let temp_dir = TempDir :: new ( ) . unwrap ( ) ;
508
+ let file_io = FileIOBuilder :: new_fs_io ( ) . build ( ) . unwrap ( ) ;
509
+ let location_gen =
510
+ MockLocationGenerator :: new ( temp_dir. path ( ) . to_str ( ) . unwrap ( ) . to_string ( ) ) ;
511
+ let file_name_gen =
512
+ DefaultFileNameGenerator :: new ( "test" . to_string ( ) , None , DataFileFormat :: Parquet ) ;
513
+
514
+ let schema = Arc :: new (
515
+ Schema :: builder ( )
516
+ . with_schema_id ( 1 )
517
+ . with_fields ( vec ! [
518
+ NestedField :: required( 0 , "col0" , Type :: Primitive ( PrimitiveType :: Boolean ) )
519
+ . into( ) ,
520
+ NestedField :: required( 1 , "col1" , Type :: Primitive ( PrimitiveType :: Int ) ) . into( ) ,
521
+ NestedField :: required( 2 , "col2" , Type :: Primitive ( PrimitiveType :: Long ) ) . into( ) ,
522
+ NestedField :: required(
523
+ 3 ,
524
+ "col3" ,
525
+ Type :: Primitive ( PrimitiveType :: Decimal {
526
+ precision: 38 ,
527
+ scale: 5 ,
528
+ } ) ,
529
+ )
530
+ . into( ) ,
531
+ NestedField :: required( 4 , "col4" , Type :: Primitive ( PrimitiveType :: Date ) ) . into( ) ,
532
+ NestedField :: required( 5 , "col5" , Type :: Primitive ( PrimitiveType :: Time ) ) . into( ) ,
533
+ NestedField :: required( 6 , "col6" , Type :: Primitive ( PrimitiveType :: Timestamp ) )
534
+ . into( ) ,
535
+ NestedField :: required( 7 , "col7" , Type :: Primitive ( PrimitiveType :: Timestamptz ) )
536
+ . into( ) ,
537
+ NestedField :: required( 8 , "col8" , Type :: Primitive ( PrimitiveType :: TimestampNs ) )
538
+ . into( ) ,
539
+ NestedField :: required( 9 , "col9" , Type :: Primitive ( PrimitiveType :: TimestamptzNs ) )
540
+ . into( ) ,
541
+ NestedField :: required( 10 , "col10" , Type :: Primitive ( PrimitiveType :: String ) )
542
+ . into( ) ,
543
+ NestedField :: required( 11 , "col11" , Type :: Primitive ( PrimitiveType :: Uuid ) ) . into( ) ,
544
+ NestedField :: required( 12 , "col12" , Type :: Primitive ( PrimitiveType :: Fixed ( 10 ) ) )
545
+ . into( ) ,
546
+ NestedField :: required( 13 , "col13" , Type :: Primitive ( PrimitiveType :: Binary ) )
547
+ . into( ) ,
548
+ ] )
549
+ . build ( )
550
+ . unwrap ( ) ,
551
+ ) ;
552
+ let equality_ids = vec ! [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 ] ;
553
+ let config = EqualityDeleteWriterConfig :: new ( equality_ids, schema. clone ( ) , None ) . unwrap ( ) ;
554
+ let delete_arrow_schema = config. projected_arrow_schema_ref ( ) . clone ( ) ;
555
+ let delete_schema = arrow_schema_to_schema ( & delete_arrow_schema) . unwrap ( ) ;
556
+
557
+ let pb = ParquetWriterBuilder :: new (
558
+ WriterProperties :: builder ( ) . build ( ) ,
559
+ Arc :: new ( delete_schema) ,
560
+ file_io. clone ( ) ,
561
+ location_gen,
562
+ file_name_gen,
563
+ ) ;
564
+ let mut equality_delete_writer = EqualityDeleteFileWriterBuilder :: new ( pb)
565
+ . build ( config)
566
+ . await ?;
567
+
568
+ // prepare data
569
+ let col0 = Arc :: new ( BooleanArray :: from ( vec ! [
570
+ Some ( true ) ,
571
+ Some ( false ) ,
572
+ Some ( true ) ,
573
+ ] ) ) as ArrayRef ;
574
+ let col1 = Arc :: new ( Int32Array :: from ( vec ! [ Some ( 1 ) , Some ( 2 ) , Some ( 4 ) ] ) ) as ArrayRef ;
575
+ let col2 = Arc :: new ( Int64Array :: from ( vec ! [ Some ( 1 ) , Some ( 2 ) , Some ( 4 ) ] ) ) as ArrayRef ;
576
+ let col3 = Arc :: new (
577
+ arrow_array:: Decimal128Array :: from ( vec ! [ Some ( 1 ) , Some ( 2 ) , Some ( 4 ) ] )
578
+ . with_precision_and_scale ( 38 , 5 )
579
+ . unwrap ( ) ,
580
+ ) as ArrayRef ;
581
+ let col4 = Arc :: new ( arrow_array:: Date32Array :: from ( vec ! [
582
+ Some ( 0 ) ,
583
+ Some ( 1 ) ,
584
+ Some ( 3 ) ,
585
+ ] ) ) as ArrayRef ;
586
+ let col5 = Arc :: new ( arrow_array:: Time64MicrosecondArray :: from ( vec ! [
587
+ Some ( 0 ) ,
588
+ Some ( 1 ) ,
589
+ Some ( 3 ) ,
590
+ ] ) ) as ArrayRef ;
591
+ let col6 = Arc :: new ( arrow_array:: TimestampMicrosecondArray :: from ( vec ! [
592
+ Some ( 0 ) ,
593
+ Some ( 1 ) ,
594
+ Some ( 3 ) ,
595
+ ] ) ) as ArrayRef ;
596
+ let col7 = Arc :: new (
597
+ arrow_array:: TimestampMicrosecondArray :: from ( vec ! [ Some ( 0 ) , Some ( 1 ) , Some ( 3 ) ] )
598
+ . with_timezone_utc ( ) ,
599
+ ) as ArrayRef ;
600
+ let col8 = Arc :: new ( arrow_array:: TimestampNanosecondArray :: from ( vec ! [
601
+ Some ( 0 ) ,
602
+ Some ( 1 ) ,
603
+ Some ( 3 ) ,
604
+ ] ) ) as ArrayRef ;
605
+ let col9 = Arc :: new (
606
+ arrow_array:: TimestampNanosecondArray :: from ( vec ! [ Some ( 0 ) , Some ( 1 ) , Some ( 3 ) ] )
607
+ . with_timezone_utc ( ) ,
608
+ ) as ArrayRef ;
609
+ let col10 = Arc :: new ( arrow_array:: StringArray :: from ( vec ! [
610
+ Some ( "a" ) ,
611
+ Some ( "b" ) ,
612
+ Some ( "d" ) ,
613
+ ] ) ) as ArrayRef ;
614
+ let col11 = Arc :: new (
615
+ arrow_array:: FixedSizeBinaryArray :: try_from_sparse_iter_with_size (
616
+ vec ! [
617
+ Some ( Uuid :: from_u128( 0 ) . as_bytes( ) . to_vec( ) ) ,
618
+ Some ( Uuid :: from_u128( 1 ) . as_bytes( ) . to_vec( ) ) ,
619
+ Some ( Uuid :: from_u128( 3 ) . as_bytes( ) . to_vec( ) ) ,
620
+ ]
621
+ . into_iter ( ) ,
622
+ 16 ,
623
+ )
624
+ . unwrap ( ) ,
625
+ ) as ArrayRef ;
626
+ let col12 = Arc :: new (
627
+ arrow_array:: FixedSizeBinaryArray :: try_from_sparse_iter_with_size (
628
+ vec ! [
629
+ Some ( vec![ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ] ) ,
630
+ Some ( vec![ 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 , 19 , 20 ] ) ,
631
+ Some ( vec![ 21 , 22 , 23 , 24 , 25 , 26 , 27 , 28 , 29 , 30 ] ) ,
632
+ ]
633
+ . into_iter ( ) ,
634
+ 10 ,
635
+ )
636
+ . unwrap ( ) ,
637
+ ) as ArrayRef ;
638
+ let col13 = Arc :: new ( arrow_array:: LargeBinaryArray :: from_opt_vec ( vec ! [
639
+ Some ( b"one" ) ,
640
+ Some ( b"" ) ,
641
+ Some ( b"zzzz" ) ,
642
+ ] ) ) as ArrayRef ;
643
+ let to_write = RecordBatch :: try_new ( delete_arrow_schema. clone ( ) , vec ! [
644
+ col0, col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13,
645
+ ] )
646
+ . unwrap ( ) ;
647
+ equality_delete_writer. write ( to_write. clone ( ) ) . await ?;
648
+ let res = equality_delete_writer. close ( ) . await ?;
649
+ assert_eq ! ( res. len( ) , 1 ) ;
650
+ check_parquet_data_file_with_equality_delete_write (
651
+ & file_io,
652
+ & res. into_iter ( ) . next ( ) . unwrap ( ) ,
653
+ & to_write,
654
+ )
655
+ . await ;
656
+
657
+ Ok ( ( ) )
658
+ }
503
659
}
0 commit comments