Skip to content

Commit 9563c33

Browse files
authored
feat(low-code cdk): add transformation to dynamic schema loader (#176)
1 parent f222fcc commit 9563c33

File tree

6 files changed

+176
-9
lines changed

6 files changed

+176
-9
lines changed

airbyte_cdk/sources/declarative/declarative_component_schema.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,6 +1235,7 @@ definitions:
12351235
- "$ref": "#/definitions/RemoveFields"
12361236
- "$ref": "#/definitions/KeysToLower"
12371237
- "$ref": "#/definitions/KeysToSnakeCase"
1238+
- "$ref": "#/definitions/FlattenFields"
12381239
state_migrations:
12391240
title: State Migrations
12401241
description: Array of state migrations to be applied on the input state
@@ -1767,6 +1768,18 @@ definitions:
17671768
- "$ref": "#/definitions/AsyncRetriever"
17681769
- "$ref": "#/definitions/CustomRetriever"
17691770
- "$ref": "#/definitions/SimpleRetriever"
1771+
schema_transformations:
1772+
title: Schema Transformations
1773+
description: A list of transformations to be applied to the schema.
1774+
type: array
1775+
items:
1776+
anyOf:
1777+
- "$ref": "#/definitions/AddFields"
1778+
- "$ref": "#/definitions/CustomTransformation"
1779+
- "$ref": "#/definitions/RemoveFields"
1780+
- "$ref": "#/definitions/KeysToLower"
1781+
- "$ref": "#/definitions/KeysToSnakeCase"
1782+
- "$ref": "#/definitions/FlattenFields"
17701783
schema_type_identifier:
17711784
"$ref": "#/definitions/SchemaTypeIdentifier"
17721785
$parameters:

airbyte_cdk/sources/declarative/models/declarative_component_schema.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,6 +1671,7 @@ class Config:
16711671
RemoveFields,
16721672
KeysToLower,
16731673
KeysToSnakeCase,
1674+
FlattenFields,
16741675
]
16751676
]
16761677
] = Field(
@@ -1836,6 +1837,22 @@ class DynamicSchemaLoader(BaseModel):
18361837
description="Component used to coordinate how records are extracted across stream slices and request pages.",
18371838
title="Retriever",
18381839
)
1840+
schema_transformations: Optional[
1841+
List[
1842+
Union[
1843+
AddFields,
1844+
CustomTransformation,
1845+
RemoveFields,
1846+
KeysToLower,
1847+
KeysToSnakeCase,
1848+
FlattenFields,
1849+
]
1850+
]
1851+
] = Field(
1852+
None,
1853+
description="A list of transformations to be applied to the schema.",
1854+
title="Schema Transformations",
1855+
)
18391856
schema_type_identifier: SchemaTypeIdentifier
18401857
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
18411858

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,9 @@
239239
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
240240
KeysToLower as KeysToLowerModel,
241241
)
242+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
243+
KeysToSnakeCase as KeysToSnakeCaseModel,
244+
)
242245
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
243246
LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel,
244247
)
@@ -396,6 +399,9 @@
396399
from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
397400
KeysToLowerTransformation,
398401
)
402+
from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import (
403+
KeysToSnakeCaseTransformation,
404+
)
399405
from airbyte_cdk.sources.message import (
400406
InMemoryMessageRepository,
401407
LogAppenderMessageRepositoryDecorator,
@@ -478,6 +484,7 @@ def _init_mappings(self) -> None:
478484
JsonlDecoderModel: self.create_jsonl_decoder,
479485
GzipJsonDecoderModel: self.create_gzipjson_decoder,
480486
KeysToLowerModel: self.create_keys_to_lower_transformation,
487+
KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
481488
FlattenFieldsModel: self.create_flatten_fields,
482489
IterableDecoderModel: self.create_iterable_decoder,
483490
XmlDecoderModel: self.create_xml_decoder,
@@ -594,6 +601,11 @@ def create_keys_to_lower_transformation(
594601
) -> KeysToLowerTransformation:
595602
return KeysToLowerTransformation()
596603

604+
def create_keys_to_snake_transformation(
605+
self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
606+
) -> KeysToSnakeCaseTransformation:
607+
return KeysToSnakeCaseTransformation()
608+
597609
def create_flatten_fields(
598610
self, model: FlattenFieldsModel, config: Config, **kwargs: Any
599611
) -> FlattenFields:
@@ -1650,6 +1662,13 @@ def create_dynamic_schema_loader(
16501662
model.retriever, stream_slicer
16511663
)
16521664

1665+
schema_transformations = []
1666+
if model.schema_transformations:
1667+
for transformation_model in model.schema_transformations:
1668+
schema_transformations.append(
1669+
self._create_component_from_model(model=transformation_model, config=config)
1670+
)
1671+
16531672
retriever = self._create_component_from_model(
16541673
model=model.retriever,
16551674
config=config,
@@ -1664,6 +1683,7 @@ def create_dynamic_schema_loader(
16641683
return DynamicSchemaLoader(
16651684
retriever=retriever,
16661685
config=config,
1686+
schema_transformations=schema_transformations,
16671687
schema_type_identifier=schema_type_identifier,
16681688
parameters=model.parameters or {},
16691689
)

airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55

66
from copy import deepcopy
7-
from dataclasses import InitVar, dataclass
7+
from dataclasses import InitVar, dataclass, field
88
from typing import Any, List, Mapping, MutableMapping, Optional, Union
99

1010
import dpath
@@ -13,8 +13,9 @@
1313
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
1414
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
1515
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
16+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
1617
from airbyte_cdk.sources.source import ExperimentalClassWarning
17-
from airbyte_cdk.sources.types import Config
18+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
1819

1920
AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
2021
"string": {"type": ["null", "string"]},
@@ -103,6 +104,7 @@ class DynamicSchemaLoader(SchemaLoader):
103104
config: Config
104105
parameters: InitVar[Mapping[str, Any]]
105106
schema_type_identifier: SchemaTypeIdentifier
107+
schema_transformations: List[RecordTransformation] = field(default_factory=lambda: [])
106108

107109
def get_json_schema(self) -> Mapping[str, Any]:
108110
"""
@@ -128,12 +130,27 @@ def get_json_schema(self) -> Mapping[str, Any]:
128130
)
129131
properties[key] = value
130132

133+
transformed_properties = self._transform(properties, {})
134+
131135
return {
132136
"$schema": "http://json-schema.org/draft-07/schema#",
133137
"type": "object",
134-
"properties": properties,
138+
"properties": transformed_properties,
135139
}
136140

141+
def _transform(
142+
self,
143+
properties: Mapping[str, Any],
144+
stream_state: StreamState,
145+
stream_slice: Optional[StreamSlice] = None,
146+
) -> Mapping[str, Any]:
147+
for transformation in self.schema_transformations:
148+
transformation.transform(
149+
properties, # type: ignore # properties has type Mapping[str, Any], but Dict[str, Any] expected
150+
config=self.config,
151+
)
152+
return properties
153+
137154
def _get_key(
138155
self,
139156
raw_schema: MutableMapping[str, Any],

unit_tests/sources/declarative/schema/test_dynamic_schema_loader.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,21 @@
6464
},
6565
"paginator": {"type": "NoPagination"},
6666
},
67+
"schema_transformations": [
68+
{
69+
"type": "AddFields",
70+
"fields": [
71+
{
72+
"type": "AddedFieldDefinition",
73+
"path": ["StaticField"],
74+
"value": "{{ {'type': ['null', 'string']} }}",
75+
}
76+
],
77+
},
78+
{
79+
"type": "KeysToSnakeCase",
80+
},
81+
],
6782
"schema_type_identifier": {
6883
"schema_pointer": ["fields"],
6984
"key_pointer": ["name"],
@@ -230,8 +245,9 @@ def test_dynamic_schema_loader_manifest_flow():
230245
"type": "object",
231246
"properties": {
232247
"id": {"type": ["null", "integer"]},
233-
"name": {"type": ["null", "string"]},
248+
"first_name": {"type": ["null", "string"]},
234249
"description": {"type": ["null", "string"]},
250+
"static_field": {"type": ["null", "string"]},
235251
},
236252
}
237253

@@ -245,8 +261,8 @@ def test_dynamic_schema_loader_manifest_flow():
245261
HttpResponse(
246262
body=json.dumps(
247263
[
248-
{"id": 1, "name": "member_1", "description": "First member"},
249-
{"id": 2, "name": "member_2", "description": "Second member"},
264+
{"id": 1, "first_name": "member_1", "description": "First member"},
265+
{"id": 2, "first_name": "member_2", "description": "Second member"},
250266
]
251267
)
252268
),
@@ -257,9 +273,9 @@ def test_dynamic_schema_loader_manifest_flow():
257273
body=json.dumps(
258274
{
259275
"fields": [
260-
{"name": "id", "type": "integer"},
261-
{"name": "name", "type": "string"},
262-
{"name": "description", "type": "singleLineText"},
276+
{"name": "Id", "type": "integer"},
277+
{"name": "FirstName", "type": "string"},
278+
{"name": "Description", "type": "singleLineText"},
263279
]
264280
}
265281
)

unit_tests/sources/declarative/test_manifest_declarative_source.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,6 +1367,90 @@ def _create_page(response_body):
13671367
],
13681368
[call({}, {})],
13691369
),
1370+
(
1371+
"test_read_manifest_with_flatten_fields",
1372+
{
1373+
"version": "0.34.2",
1374+
"type": "DeclarativeSource",
1375+
"check": {"type": "CheckStream", "stream_names": ["Rates"]},
1376+
"streams": [
1377+
{
1378+
"type": "DeclarativeStream",
1379+
"name": "Rates",
1380+
"primary_key": [],
1381+
"schema_loader": {
1382+
"type": "InlineSchemaLoader",
1383+
"schema": {
1384+
"$schema": "http://json-schema.org/schema#",
1385+
"properties": {
1386+
"ABC": {"type": "number"},
1387+
"AED": {"type": "number"},
1388+
},
1389+
"type": "object",
1390+
},
1391+
},
1392+
"transformations": [{"type": "FlattenFields"}],
1393+
"retriever": {
1394+
"type": "SimpleRetriever",
1395+
"requester": {
1396+
"type": "HttpRequester",
1397+
"url_base": "https://api.apilayer.com",
1398+
"path": "/exchangerates_data/latest",
1399+
"http_method": "GET",
1400+
"request_parameters": {},
1401+
"request_headers": {},
1402+
"request_body_json": {},
1403+
"authenticator": {
1404+
"type": "ApiKeyAuthenticator",
1405+
"header": "apikey",
1406+
"api_token": "{{ config['api_key'] }}",
1407+
},
1408+
},
1409+
"record_selector": {
1410+
"type": "RecordSelector",
1411+
"extractor": {"type": "DpathExtractor", "field_path": ["rates"]},
1412+
},
1413+
"paginator": {"type": "NoPagination"},
1414+
},
1415+
}
1416+
],
1417+
"spec": {
1418+
"connection_specification": {
1419+
"$schema": "http://json-schema.org/draft-07/schema#",
1420+
"type": "object",
1421+
"required": ["api_key"],
1422+
"properties": {
1423+
"api_key": {
1424+
"type": "string",
1425+
"title": "API Key",
1426+
"airbyte_secret": True,
1427+
}
1428+
},
1429+
"additionalProperties": True,
1430+
},
1431+
"documentation_url": "https://example.org",
1432+
"type": "Spec",
1433+
},
1434+
},
1435+
(
1436+
_create_page(
1437+
{
1438+
"rates": [
1439+
{"nested_fields": {"ABC": 0}, "id": 1},
1440+
{"nested_fields": {"AED": 1}, "id": 2},
1441+
],
1442+
"_metadata": {"next": "next"},
1443+
}
1444+
),
1445+
_create_page({"rates": [{"USD": 2}], "_metadata": {"next": "next"}}),
1446+
)
1447+
* 10,
1448+
[
1449+
{"ABC": 0, "id": 1},
1450+
{"AED": 1, "id": 2},
1451+
],
1452+
[call({}, {})],
1453+
),
13701454
(
13711455
"test_read_with_pagination_no_partitions",
13721456
{

0 commit comments

Comments
 (0)