2929 DATAFRAME_FRAMEWORK = 'pandas'
3030
3131
32- class FileProcessor ( object ) :
32+ class FileProcessor :
3333 @abstractmethod
3434 def format (self ):
3535 pass
@@ -139,11 +139,10 @@ def __init__(self, sep=',', encoding: str = 'utf-8'):
139139 def format (self ):
140140 return TextFormat (encoding = self ._encoding )
141141
142- def load (self , file ):
143- ...
142+ def load (self , file ): ...
143+
144+ def dump (self , obj , file ): ...
144145
145- def dump (self , obj , file ):
146- ...
147146
148147class PolarsCsvFileProcessor (CsvFileProcessor ):
149148 def load (self , file ):
@@ -191,19 +190,17 @@ def __init__(self, orient: str | None = None):
191190 def format (self ):
192191 return luigi .format .Nop
193192
194- def load (self , file ):
195- ...
193+ def load (self , file ): ...
196194
197- def dump (self , obj , file ):
198- ...
195+ def dump (self , obj , file ): ...
199196
200197
201198class PolarsJsonFileProcessor (JsonFileProcessor ):
202199 def load (self , file ):
203200 try :
204201 if self ._orient == 'records' :
205- return self .read_ndjson (file )
206- return self .read_json (file )
202+ return pl .read_ndjson (file )
203+ return pl .read_json (file )
207204 except pl .exceptions .NoDataError :
208205 return pl .DataFrame ()
209206
@@ -215,7 +212,7 @@ def dump(self, obj, file):
215212 obj = pl .from_dict (obj )
216213
217214 if self ._orient == 'records' :
218- obj_write_ndjson (file )
215+ obj . write_ndjson (file )
219216 else :
220217 obj .write_json (file )
221218
@@ -272,11 +269,10 @@ def __init__(self, engine='pyarrow', compression=None):
272269 def format (self ):
273270 return luigi .format .Nop
274271
275- def load (self , file ):
276- ...
272+ def load (self , file ): ...
273+
274+ def dump (self , obj , file ): ...
277275
278- def dump (self , obj , file ):
279- ...
280276
281277class PolarsParquetFileProcessor (ParquetFileProcessor ):
282278 def load (self , file ):
@@ -314,20 +310,17 @@ def __init__(self, store_index_in_feather: bool):
314310 def format (self ):
315311 return luigi .format .Nop
316312
317- def load (self , file ):
318- ...
313+ def load (self , file ): ...
319314
320- def dump (self , obj , file ):
321- ...
315+ def dump (self , obj , file ): ...
322316
323317
324318class PolarsFeatherFileProcessor (FeatherFileProcessor ):
325319 def load (self , file ):
326320 # Since polars' DataFrame doesn't have index, just load feather file
327321 if ObjectStorage .is_buffered_reader (file ):
328- loaded_df = pl .read_ipc (file .name )
329- else :
330- loaded_df = pl .read_ipc (BytesIO (file .read ()))
322+ return pl .read_ipc (file .name )
323+ return pl .read_ipc (BytesIO (file .read ()))
331324
332325 def dump (self , obj , file ):
333326 assert isinstance (obj , (pl .DataFrame )), f'requires pl.DataFrame, but { type (obj )} is passed.'
@@ -378,15 +371,16 @@ def dump(self, obj, file):
378371
379372
380373if DATAFRAME_FRAMEWORK == 'polars' :
381- CsvFileProcessor = PolarsCsvFileProcessor
382- JsonFileProcessor = PolarsJsonFileProcessor
383- ParquetFileProcessor = PolarsParquetFileProcessor
384- FeatherFileProcessor = PolarsFeatherFileProcessor
374+ CsvFileProcessor = PolarsCsvFileProcessor # type: ignore
375+ JsonFileProcessor = PolarsJsonFileProcessor # type: ignore
376+ ParquetFileProcessor = PolarsParquetFileProcessor # type: ignore
377+ FeatherFileProcessor = PolarsFeatherFileProcessor # type: ignore
385378else :
386- CsvFileProcessor = PandasCsvFileProcessor
387- JsonFileProcessor = PandasJsonFileProcessor
388- ParquetFileProcessor = PandasParquetFileProcessor
389- FeatherFileProcessor = PandasFeatherFileProcessor
379+ CsvFileProcessor = PandasCsvFileProcessor # type: ignore
380+ JsonFileProcessor = PandasJsonFileProcessor # type: ignore
381+ ParquetFileProcessor = PandasParquetFileProcessor # type: ignore
382+ FeatherFileProcessor = PandasFeatherFileProcessor # type: ignore
383+
390384
391385def make_file_processor (file_path : str , store_index_in_feather : bool ) -> FileProcessor :
392386 extension2processor = {
0 commit comments