Skip to content

Commit e069f4b

Browse files
committed
Capture compression and open binary if present.
1 parent 17ec4ac commit e069f4b

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

src/hats/io/file_io/file_io.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -107,18 +107,21 @@ def load_csv_to_pandas(file_pointer: str | Path | UPath, **kwargs) -> pd.DataFra
107107

108108

109109
def load_csv_to_pandas_generator(
110-
file_pointer: str | Path | UPath, chunksize=10_000, **kwargs
110+
file_pointer: str | Path | UPath, *, chunksize=10_000, open_mode=None, compression=None, **kwargs
111111
) -> Generator[pd.DataFrame]:
112112
"""Load a csv file to a pandas dataframe
113113
Args:
114114
file_pointer: location of csv file to load
115-
file_system: fsspec or pyarrow filesystem, default None
115+
chunksize (int): number of rows to load per chunk
116+
compression (str): for compressed CSVs, the manner of compression. e.g. 'gz', 'bzip'.
116117
**kwargs: arguments to pass to pandas `read_csv` loading method
117118
Returns:
118119
pandas dataframe loaded from CSV
119120
"""
120121
file_pointer = get_upath(file_pointer)
121-
with file_pointer.open("r", **kwargs) as csv_file:
122+
if open_mode is None:
123+
open_mode = "r" if compression is None else "rb"
124+
with file_pointer.open(mode=open_mode, compression=compression, **kwargs) as csv_file:
122125
with pd.read_csv(csv_file, chunksize=chunksize, **kwargs) as reader:
123126
yield from reader
124127

0 commit comments

Comments
 (0)