Skip to content

Commit d1cfbb8

Browse files
fix: additional CsvParser test (#323)
Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: [email protected] <[email protected]>
1 parent f396439 commit d1cfbb8

File tree

1 file changed

+37
-14
lines changed

1 file changed

+37
-14
lines changed

unit_tests/sources/declarative/decoders/test_composite_decoder.py

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,35 +30,35 @@ def compress_with_gzip(data: str, encoding: str = "utf-8"):
3030
return buf.getvalue()
3131

3232

33-
def generate_csv(encoding: str) -> bytes:
34-
"""
35-
Generate CSV data with tab-separated values (\t).
36-
"""
33+
def generate_csv(
34+
encoding: str = "utf-8", delimiter: str = ",", should_compress: bool = False
35+
) -> bytes:
3736
data = [
38-
{"id": 1, "name": "John", "age": 28},
39-
{"id": 2, "name": "Alice", "age": 34},
40-
{"id": 3, "name": "Bob", "age": 25},
37+
{"id": "1", "name": "John", "age": "28"},
38+
{"id": "2", "name": "Alice", "age": "34"},
39+
{"id": "3", "name": "Bob", "age": "25"},
4140
]
4241

4342
output = StringIO()
44-
writer = csv.DictWriter(output, fieldnames=["id", "name", "age"], delimiter="\t")
43+
writer = csv.DictWriter(output, fieldnames=["id", "name", "age"], delimiter=delimiter)
4544
writer.writeheader()
4645
for row in data:
4746
writer.writerow(row)
4847

49-
# Ensure the pointer is at the beginning of the buffer before compressing
5048
output.seek(0)
49+
csv_data = output.read()
5150

52-
# Compress the CSV data with Gzip
53-
compressed_data = compress_with_gzip(output.read(), encoding=encoding)
54-
55-
return compressed_data
51+
if should_compress:
52+
return compress_with_gzip(csv_data, encoding=encoding)
53+
return csv_data.encode(encoding)
5654

5755

5856
@pytest.mark.parametrize("encoding", ["utf-8", "utf", "iso-8859-1"])
5957
def test_composite_raw_decoder_gzip_csv_parser(requests_mock, encoding: str):
6058
requests_mock.register_uri(
61-
"GET", "https://airbyte.io/", content=generate_csv(encoding=encoding)
59+
"GET",
60+
"https://airbyte.io/",
61+
content=generate_csv(encoding=encoding, delimiter="\t", should_compress=True),
6262
)
6363
response = requests.get("https://airbyte.io/", stream=True)
6464

@@ -175,3 +175,26 @@ def test_composite_raw_decoder_raises_traced_exception_when_both_parsers_fail(re
175175
with patch("json.loads", side_effect=Exception("test")):
176176
with pytest.raises(AirbyteTracedException):
177177
list(composite_raw_decoder.decode(response))
178+
179+
180+
@pytest.mark.parametrize("encoding", ["utf-8", "utf", "iso-8859-1"])
181+
@pytest.mark.parametrize("delimiter", [",", "\t", ";"])
182+
def test_composite_raw_decoder_csv_parser_values(requests_mock, encoding: str, delimiter: str):
183+
requests_mock.register_uri(
184+
"GET",
185+
"https://airbyte.io/",
186+
content=generate_csv(encoding=encoding, delimiter=delimiter, should_compress=False),
187+
)
188+
response = requests.get("https://airbyte.io/", stream=True)
189+
190+
parser = CsvParser(encoding=encoding, delimiter=delimiter)
191+
composite_raw_decoder = CompositeRawDecoder(parser=parser)
192+
193+
expected_data = [
194+
{"id": "1", "name": "John", "age": "28"},
195+
{"id": "2", "name": "Alice", "age": "34"},
196+
{"id": "3", "name": "Bob", "age": "25"},
197+
]
198+
199+
parsed_records = list(composite_raw_decoder.decode(response))
200+
assert parsed_records == expected_data

0 commit comments

Comments
 (0)