16
16
// under the License.
17
17
18
18
//! Position delete file writer.
19
+ use std:: future:: Future ;
20
+ use std:: pin:: Pin ;
19
21
use std:: sync:: Arc ;
20
22
21
23
use arrow_array:: builder:: { PrimitiveBuilder , StringBuilder } ;
@@ -29,17 +31,21 @@ use crate::writer::file_writer::{FileWriter, FileWriterBuilder};
29
31
use crate :: writer:: { IcebergWriter , IcebergWriterBuilder } ;
30
32
use crate :: { Error , ErrorKind , Result } ;
31
33
34
+ const POS_DELETE_FIELD1_NAME : & str = "file_path" ;
35
+ const POS_DELETE_FIELD1_ID : i32 = 2147483546 ;
36
+ const POS_DELETE_FIELD2_NAME : & str = "pos" ;
37
+ const POS_DELETE_FIELD2_ID : i32 = 2147483545 ;
32
38
static POSITION_DELETE_SCHEMA : Lazy < Schema > = Lazy :: new ( || {
33
39
Schema :: builder ( )
34
40
. with_fields ( vec ! [
35
41
Arc :: new( NestedField :: required(
36
- 2147483546 ,
37
- "file_path" ,
42
+ POS_DELETE_FIELD1_ID ,
43
+ POS_DELETE_FIELD1_NAME ,
38
44
Type :: Primitive ( PrimitiveType :: String ) ,
39
45
) ) ,
40
46
Arc :: new( NestedField :: required(
41
- 2147483545 ,
42
- "pos" ,
47
+ POS_DELETE_FIELD2_ID ,
48
+ POS_DELETE_FIELD2_NAME ,
43
49
Type :: Primitive ( PrimitiveType :: Long ) ,
44
50
) ) ,
45
51
] )
@@ -49,17 +55,17 @@ static POSITION_DELETE_SCHEMA: Lazy<Schema> = Lazy::new(|| {
49
55
50
56
/// Position delete input.
51
57
#[ derive( Clone , PartialEq , Eq , Ord , PartialOrd , Debug ) ]
52
- pub struct PositionDeleteInput {
58
+ pub struct PositionDeleteInput < ' a > {
53
59
/// The path of the file.
54
- pub path : String ,
55
- /// The offset of the position delete.
56
- pub offsets : Vec < i64 > ,
60
+ pub path : & ' a str ,
61
+ /// The row number in data file
62
+ pub pos : i64 ,
57
63
}
58
64
59
- impl PositionDeleteInput {
65
+ impl < ' a > PositionDeleteInput < ' a > {
60
66
/// Create a new `PositionDeleteInput`.
61
- pub fn new ( path : String , offsets : Vec < i64 > ) -> Self {
62
- PositionDeleteInput { path, offsets }
67
+ pub fn new ( path : & ' a str , row : i64 ) -> Self {
68
+ Self { path, pos : row }
63
69
}
64
70
}
65
71
/// Builder for `MemoryPositionDeleteWriter`.
@@ -80,7 +86,7 @@ impl<B: FileWriterBuilder> PositionDeleteWriterBuilder<B> {
80
86
}
81
87
82
88
#[ async_trait:: async_trait]
83
- impl < B : FileWriterBuilder > IcebergWriterBuilder < Vec < PositionDeleteInput > >
89
+ impl < ' a , B : FileWriterBuilder > IcebergWriterBuilder < Vec < PositionDeleteInput < ' a > > >
84
90
for PositionDeleteWriterBuilder < B >
85
91
{
86
92
type R = PositionDeleteWriter < B > ;
@@ -99,16 +105,22 @@ pub struct PositionDeleteWriter<B: FileWriterBuilder> {
99
105
partition_value : Struct ,
100
106
}
101
107
102
- #[ async_trait:: async_trait]
103
- impl < B : FileWriterBuilder > IcebergWriter < Vec < PositionDeleteInput > > for PositionDeleteWriter < B > {
104
- async fn write ( & mut self , inputs : Vec < PositionDeleteInput > ) -> Result < ( ) > {
108
+ impl < ' a , B : FileWriterBuilder > IcebergWriter < Vec < PositionDeleteInput < ' a > > >
109
+ for PositionDeleteWriter < B >
110
+ {
111
+ fn write < ' life0 , ' async_trait > (
112
+ & ' life0 mut self ,
113
+ input : Vec < PositionDeleteInput < ' a > > ,
114
+ ) -> Pin < Box < dyn Future < Output = Result < ( ) > > + Send + ' async_trait > >
115
+ where
116
+ ' life0 : ' async_trait ,
117
+ Self : ' async_trait ,
118
+ {
105
119
let mut path_column_builder = StringBuilder :: new ( ) ;
106
120
let mut offset_column_builder = PrimitiveBuilder :: < Int64Type > :: new ( ) ;
107
- for pd_input in inputs. into_iter ( ) {
108
- for offset in pd_input. offsets {
109
- path_column_builder. append_value ( & pd_input. path ) ;
110
- offset_column_builder. append_value ( offset) ;
111
- }
121
+ for pd_input in input. into_iter ( ) {
122
+ path_column_builder. append_value ( pd_input. path ) ;
123
+ offset_column_builder. append_value ( pd_input. pos ) ;
112
124
}
113
125
let record_batch = RecordBatch :: try_new (
114
126
Arc :: new ( schema_to_arrow_schema ( & POSITION_DELETE_SCHEMA ) . unwrap ( ) ) ,
@@ -117,28 +129,38 @@ impl<B: FileWriterBuilder> IcebergWriter<Vec<PositionDeleteInput>> for PositionD
117
129
Arc :: new( offset_column_builder. finish( ) ) ,
118
130
] ,
119
131
)
120
- . map_err ( |e| Error :: new ( ErrorKind :: DataInvalid , e. to_string ( ) ) ) ? ;
132
+ . map_err ( |e| Error :: new ( ErrorKind :: DataInvalid , e. to_string ( ) ) ) ;
121
133
122
- if let Some ( inner_writer) = & mut self . inner_writer {
123
- inner_writer. write ( & record_batch) . await ?;
124
- } else {
125
- return Err ( Error :: new ( ErrorKind :: Unexpected , "write has been closed" ) ) ;
126
- }
127
- Ok ( ( ) )
134
+ Box :: pin ( async move {
135
+ if let Some ( inner_writer) = & mut self . inner_writer {
136
+ inner_writer. write ( & record_batch?) . await ?;
137
+ } else {
138
+ return Err ( Error :: new ( ErrorKind :: Unexpected , "write has been closed" ) ) ;
139
+ }
140
+ Ok ( ( ) )
141
+ } )
128
142
}
129
143
130
- async fn close ( & mut self ) -> Result < Vec < DataFile > > {
131
- let writer = self . inner_writer . take ( ) . unwrap ( ) ;
132
- Ok ( writer
133
- . close ( )
134
- . await ?
135
- . into_iter ( )
136
- . map ( |mut res| {
137
- res. content ( DataContentType :: PositionDeletes ) ;
138
- res. partition ( self . partition_value . clone ( ) ) ;
139
- res. build ( ) . expect ( "Guaranteed to be valid" )
140
- } )
141
- . collect ( ) )
144
+ fn close < ' life0 , ' async_trait > (
145
+ & ' life0 mut self ,
146
+ ) -> Pin < Box < dyn Future < Output = Result < Vec < DataFile > > > + Send + ' async_trait > >
147
+ where
148
+ ' life0 : ' async_trait ,
149
+ Self : ' async_trait ,
150
+ {
151
+ Box :: pin ( async move {
152
+ let writer = self . inner_writer . take ( ) . unwrap ( ) ;
153
+ Ok ( writer
154
+ . close ( )
155
+ . await ?
156
+ . into_iter ( )
157
+ . map ( |mut res| {
158
+ res. content ( DataContentType :: PositionDeletes ) ;
159
+ res. partition ( self . partition_value . clone ( ) ) ;
160
+ res. build ( ) . expect ( "Guaranteed to be valid" )
161
+ } )
162
+ . collect ( ) )
163
+ } )
142
164
}
143
165
}
144
166
@@ -184,28 +206,38 @@ mod test {
184
206
// Write some position delete inputs
185
207
let inputs: Vec < PositionDeleteInput > = vec ! [
186
208
PositionDeleteInput {
187
- path: "file2.parquet" . to_string( ) ,
188
- offsets: vec![ 2 , 1 , 3 ] ,
209
+ path: "file2.parquet" ,
210
+ pos: 2 ,
211
+ } ,
212
+ PositionDeleteInput {
213
+ path: "file2.parquet" ,
214
+ pos: 1 ,
215
+ } ,
216
+ PositionDeleteInput {
217
+ path: "file2.parquet" ,
218
+ pos: 3 ,
219
+ } ,
220
+ PositionDeleteInput {
221
+ path: "file3.parquet" ,
222
+ pos: 2 ,
223
+ } ,
224
+ PositionDeleteInput {
225
+ path: "file1.parquet" ,
226
+ pos: 5 ,
189
227
} ,
190
228
PositionDeleteInput {
191
- path: "file3 .parquet" . to_string ( ) ,
192
- offsets : vec! [ 2 ] ,
229
+ path: "file1 .parquet" ,
230
+ pos : 4 ,
193
231
} ,
194
232
PositionDeleteInput {
195
- path: "file1.parquet" . to_string ( ) ,
196
- offsets : vec! [ 5 , 4 , 1 ] ,
233
+ path: "file1.parquet" ,
234
+ pos : 1 ,
197
235
} ,
198
236
] ;
199
237
let expect_inputs = inputs
200
238
. clone ( )
201
239
. into_iter ( )
202
- . flat_map ( |input| {
203
- input
204
- . offsets
205
- . iter ( )
206
- . map ( |off| ( input. path . clone ( ) , * off) )
207
- . collect :: < Vec < _ > > ( )
208
- } )
240
+ . map ( |input| ( input. path . to_string ( ) , input. pos ) )
209
241
. collect_vec ( ) ;
210
242
position_delete_writer. write ( inputs. clone ( ) ) . await ?;
211
243
0 commit comments