@@ -31,13 +31,12 @@ use parquet::file::properties::WriterProperties;
31
31
use parquet:: file:: statistics:: { from_thrift, Statistics } ;
32
32
use parquet:: format:: FileMetaData ;
33
33
34
- use super :: location_generator:: { FileNameGenerator , LocationGenerator } ;
35
34
use super :: track_writer:: TrackWriter ;
36
35
use super :: { FileWriter , FileWriterBuilder } ;
37
36
use crate :: arrow:: {
38
37
get_parquet_stat_max_as_datum, get_parquet_stat_min_as_datum, DEFAULT_MAP_FIELD_NAME ,
39
38
} ;
40
- use crate :: io:: { FileIO , FileWrite , OutputFile } ;
39
+ use crate :: io:: { FileWrite , OutputFile } ;
41
40
use crate :: spec:: {
42
41
visit_schema, DataFileBuilder , DataFileFormat , Datum , ListType , MapType , NestedFieldRef ,
43
42
PrimitiveType , Schema , SchemaRef , SchemaVisitor , StructType , Type ,
@@ -47,45 +46,25 @@ use crate::{Error, ErrorKind, Result};
47
46
48
47
/// ParquetWriterBuilder is used to builder a [`ParquetWriter`]
49
48
#[ derive( Clone , Debug ) ]
50
- pub struct ParquetWriterBuilder < T : LocationGenerator , F : FileNameGenerator > {
49
+ pub struct ParquetWriterBuilder {
51
50
props : WriterProperties ,
52
51
schema : SchemaRef ,
53
-
54
- file_io : FileIO ,
55
- location_generator : T ,
56
- file_name_generator : F ,
57
52
}
58
53
59
- impl < T : LocationGenerator , F : FileNameGenerator > ParquetWriterBuilder < T , F > {
54
+ impl ParquetWriterBuilder {
60
55
/// Create a new `ParquetWriterBuilder`
61
56
/// To construct the write result, the schema should contain the `PARQUET_FIELD_ID_META_KEY` metadata for each field.
62
- pub fn new (
63
- props : WriterProperties ,
64
- schema : SchemaRef ,
65
- file_io : FileIO ,
66
- location_generator : T ,
67
- file_name_generator : F ,
68
- ) -> Self {
69
- Self {
70
- props,
71
- schema,
72
- file_io,
73
- location_generator,
74
- file_name_generator,
75
- }
57
+ pub fn new ( props : WriterProperties , schema : SchemaRef ) -> Self {
58
+ Self { props, schema }
76
59
}
77
60
}
78
61
79
- impl < T : LocationGenerator , F : FileNameGenerator > FileWriterBuilder for ParquetWriterBuilder < T , F > {
62
+ impl FileWriterBuilder for ParquetWriterBuilder {
80
63
type R = ParquetWriter ;
81
64
82
- async fn build ( self ) -> crate :: Result < Self :: R > {
65
+ async fn build ( self , out_file : OutputFile ) -> crate :: Result < Self :: R > {
83
66
let arrow_schema: ArrowSchemaRef = Arc :: new ( self . schema . as_ref ( ) . try_into ( ) ?) ;
84
67
let written_size = Arc :: new ( AtomicI64 :: new ( 0 ) ) ;
85
- let out_file = self . file_io . new_output (
86
- self . location_generator
87
- . generate_location ( & self . file_name_generator . generate_file_name ( ) ) ,
88
- ) ?;
89
68
let inner_writer = TrackWriter :: new ( out_file. writer ( ) . await ?, written_size. clone ( ) ) ;
90
69
let async_writer = AsyncFileWriter :: new ( inner_writer) ;
91
70
let writer =
@@ -668,14 +647,14 @@ mod tests {
668
647
let to_write_null = RecordBatch :: try_new ( schema. clone ( ) , vec ! [ null_col] ) . unwrap ( ) ;
669
648
670
649
// write data
650
+ let output_file = file_io
651
+ . new_output ( format ! ( "{}/{}" , loccation_gen. gen ( ) , file_name_gen. gen ( ) ) )
652
+ . unwrap ( ) ;
671
653
let mut pw = ParquetWriterBuilder :: new (
672
654
WriterProperties :: builder ( ) . build ( ) ,
673
655
Arc :: new ( to_write. schema ( ) . as_ref ( ) . try_into ( ) . unwrap ( ) ) ,
674
- file_io. clone ( ) ,
675
- location_gen,
676
- file_name_gen,
677
656
)
678
- . build ( )
657
+ . build ( output_file )
679
658
. await ?;
680
659
pw. write ( & to_write) . await ?;
681
660
pw. write ( & to_write_null) . await ?;
@@ -864,15 +843,13 @@ mod tests {
864
843
. unwrap ( ) ;
865
844
866
845
// write data
867
- let mut pw = ParquetWriterBuilder :: new (
868
- WriterProperties :: builder ( ) . build ( ) ,
869
- Arc :: new ( schema) ,
870
- file_io. clone ( ) ,
871
- location_gen,
872
- file_name_gen,
873
- )
874
- . build ( )
875
- . await ?;
846
+ let output_file = file_io
847
+ . new_output ( format ! ( "{}/{}" , loccation_gen. gen ( ) , file_name_gen. gen ( ) ) )
848
+ . unwrap ( ) ;
849
+ let mut pw =
850
+ ParquetWriterBuilder :: new ( WriterProperties :: builder ( ) . build ( ) , Arc :: new ( schema) )
851
+ . build ( output_file)
852
+ . await ?;
876
853
pw. write ( & to_write) . await ?;
877
854
let res = pw. close ( ) . await ?;
878
855
assert_eq ! ( res. len( ) , 1 ) ;
@@ -1054,15 +1031,13 @@ mod tests {
1054
1031
. unwrap ( ) ;
1055
1032
1056
1033
// write data
1057
- let mut pw = ParquetWriterBuilder :: new (
1058
- WriterProperties :: builder ( ) . build ( ) ,
1059
- Arc :: new ( schema) ,
1060
- file_io. clone ( ) ,
1061
- loccation_gen,
1062
- file_name_gen,
1063
- )
1064
- . build ( )
1065
- . await ?;
1034
+ let output_file = file_io
1035
+ . new_output ( format ! ( "{}/{}" , loccation_gen. gen ( ) , file_name_gen. gen ( ) ) )
1036
+ . unwrap ( ) ;
1037
+ let mut pw =
1038
+ ParquetWriterBuilder :: new ( WriterProperties :: builder ( ) . build ( ) , Arc :: new ( schema) )
1039
+ . build ( output_file)
1040
+ . await ?;
1066
1041
pw. write ( & to_write) . await ?;
1067
1042
let res = pw. close ( ) . await ?;
1068
1043
assert_eq ! ( res. len( ) , 1 ) ;
@@ -1198,15 +1173,12 @@ mod tests {
1198
1173
. unwrap ( ) ,
1199
1174
) ;
1200
1175
let arrow_schema: ArrowSchemaRef = Arc :: new ( schema_to_arrow_schema ( & schema) . unwrap ( ) ) ;
1201
- let mut pw = ParquetWriterBuilder :: new (
1202
- WriterProperties :: builder ( ) . build ( ) ,
1203
- schema. clone ( ) ,
1204
- file_io. clone ( ) ,
1205
- loccation_gen. clone ( ) ,
1206
- file_name_gen. clone ( ) ,
1207
- )
1208
- . build ( )
1209
- . await ?;
1176
+ let output_file = file_io
1177
+ . new_output ( format ! ( "{}/{}" , loccation_gen. gen ( ) , file_name_gen. gen ( ) ) )
1178
+ . unwrap ( ) ;
1179
+ let mut pw = ParquetWriterBuilder :: new ( WriterProperties :: builder ( ) . build ( ) , schema. clone ( ) )
1180
+ . build ( output_file)
1181
+ . await ?;
1210
1182
let col0 = Arc :: new (
1211
1183
Decimal128Array :: from ( vec ! [ Some ( 22000000000 ) , Some ( 11000000000 ) ] )
1212
1184
. with_data_type ( DataType :: Decimal128 ( 28 , 10 ) ) ,
@@ -1250,15 +1222,12 @@ mod tests {
1250
1222
. unwrap ( ) ,
1251
1223
) ;
1252
1224
let arrow_schema: ArrowSchemaRef = Arc :: new ( schema_to_arrow_schema ( & schema) . unwrap ( ) ) ;
1253
- let mut pw = ParquetWriterBuilder :: new (
1254
- WriterProperties :: builder ( ) . build ( ) ,
1255
- schema. clone ( ) ,
1256
- file_io. clone ( ) ,
1257
- loccation_gen. clone ( ) ,
1258
- file_name_gen. clone ( ) ,
1259
- )
1260
- . build ( )
1261
- . await ?;
1225
+ let output_file = file_io
1226
+ . new_output ( format ! ( "{}/{}" , loccation_gen. gen ( ) , file_name_gen. gen ( ) ) )
1227
+ . unwrap ( ) ;
1228
+ let mut pw = ParquetWriterBuilder :: new ( WriterProperties :: builder ( ) . build ( ) , schema. clone ( ) )
1229
+ . build ( output_file)
1230
+ . await ?;
1262
1231
let col0 = Arc :: new (
1263
1232
Decimal128Array :: from ( vec ! [ Some ( -22000000000 ) , Some ( -11000000000 ) ] )
1264
1233
. with_data_type ( DataType :: Decimal128 ( 28 , 10 ) ) ,
@@ -1305,15 +1274,12 @@ mod tests {
1305
1274
. unwrap ( ) ,
1306
1275
) ;
1307
1276
let arrow_schema: ArrowSchemaRef = Arc :: new ( schema_to_arrow_schema ( & schema) . unwrap ( ) ) ;
1308
- let mut pw = ParquetWriterBuilder :: new (
1309
- WriterProperties :: builder ( ) . build ( ) ,
1310
- schema,
1311
- file_io. clone ( ) ,
1312
- loccation_gen,
1313
- file_name_gen,
1314
- )
1315
- . build ( )
1316
- . await ?;
1277
+ let output_file = file_io
1278
+ . new_output ( format ! ( "{}/{}" , loccation_gen. gen ( ) , file_name_gen. gen ( ) ) )
1279
+ . unwrap ( ) ;
1280
+ let mut pw = ParquetWriterBuilder :: new ( WriterProperties :: builder ( ) . build ( ) , schema)
1281
+ . build ( output_file)
1282
+ . await ?;
1317
1283
let col0 = Arc :: new (
1318
1284
Decimal128Array :: from ( vec ! [
1319
1285
Some ( decimal_max. mantissa( ) ) ,
0 commit comments