@@ -30,35 +30,35 @@ def compress_with_gzip(data: str, encoding: str = "utf-8"):
30
30
return buf .getvalue ()
31
31
32
32
33
- def generate_csv (encoding : str ) -> bytes :
34
- """
35
- Generate CSV data with tab-separated values (\t ).
36
- """
33
+ def generate_csv (
34
+ encoding : str = "utf-8" , delimiter : str = "," , should_compress : bool = False
35
+ ) -> bytes :
37
36
data = [
38
- {"id" : 1 , "name" : "John" , "age" : 28 },
39
- {"id" : 2 , "name" : "Alice" , "age" : 34 },
40
- {"id" : 3 , "name" : "Bob" , "age" : 25 },
37
+ {"id" : "1" , "name" : "John" , "age" : "28" },
38
+ {"id" : "2" , "name" : "Alice" , "age" : "34" },
39
+ {"id" : "3" , "name" : "Bob" , "age" : "25" },
41
40
]
42
41
43
42
output = StringIO ()
44
- writer = csv .DictWriter (output , fieldnames = ["id" , "name" , "age" ], delimiter = " \t " )
43
+ writer = csv .DictWriter (output , fieldnames = ["id" , "name" , "age" ], delimiter = delimiter )
45
44
writer .writeheader ()
46
45
for row in data :
47
46
writer .writerow (row )
48
47
49
- # Ensure the pointer is at the beginning of the buffer before compressing
50
48
output .seek (0 )
49
+ csv_data = output .read ()
51
50
52
- # Compress the CSV data with Gzip
53
- compressed_data = compress_with_gzip (output .read (), encoding = encoding )
54
-
55
- return compressed_data
51
+ if should_compress :
52
+ return compress_with_gzip (csv_data , encoding = encoding )
53
+ return csv_data .encode (encoding )
56
54
57
55
58
56
@pytest .mark .parametrize ("encoding" , ["utf-8" , "utf" , "iso-8859-1" ])
59
57
def test_composite_raw_decoder_gzip_csv_parser (requests_mock , encoding : str ):
60
58
requests_mock .register_uri (
61
- "GET" , "https://airbyte.io/" , content = generate_csv (encoding = encoding )
59
+ "GET" ,
60
+ "https://airbyte.io/" ,
61
+ content = generate_csv (encoding = encoding , delimiter = "\t " , should_compress = True ),
62
62
)
63
63
response = requests .get ("https://airbyte.io/" , stream = True )
64
64
@@ -175,3 +175,26 @@ def test_composite_raw_decoder_raises_traced_exception_when_both_parsers_fail(re
175
175
with patch ("json.loads" , side_effect = Exception ("test" )):
176
176
with pytest .raises (AirbyteTracedException ):
177
177
list (composite_raw_decoder .decode (response ))
178
+
179
+
180
+ @pytest .mark .parametrize ("encoding" , ["utf-8" , "utf" , "iso-8859-1" ])
181
+ @pytest .mark .parametrize ("delimiter" , ["," , "\t " , ";" ])
182
+ def test_composite_raw_decoder_csv_parser_values (requests_mock , encoding : str , delimiter : str ):
183
+ requests_mock .register_uri (
184
+ "GET" ,
185
+ "https://airbyte.io/" ,
186
+ content = generate_csv (encoding = encoding , delimiter = delimiter , should_compress = False ),
187
+ )
188
+ response = requests .get ("https://airbyte.io/" , stream = True )
189
+
190
+ parser = CsvParser (encoding = encoding , delimiter = delimiter )
191
+ composite_raw_decoder = CompositeRawDecoder (parser = parser )
192
+
193
+ expected_data = [
194
+ {"id" : "1" , "name" : "John" , "age" : "28" },
195
+ {"id" : "2" , "name" : "Alice" , "age" : "34" },
196
+ {"id" : "3" , "name" : "Bob" , "age" : "25" },
197
+ ]
198
+
199
+ parsed_records = list (composite_raw_decoder .decode (response ))
200
+ assert parsed_records == expected_data
0 commit comments