Skip to content

Commit f118a88

Browse files
authored
Merge branch 'main' into lazebnyi/add-config-components-resolver
2 parents 4dd1a0a + 5801cd8 commit f118a88

File tree

8 files changed

+765
-7
lines changed

8 files changed

+765
-7
lines changed

airbyte_cdk/sources/declarative/declarative_component_schema.yaml

+87
Original file line numberDiff line numberDiff line change
@@ -1218,6 +1218,7 @@ definitions:
12181218
title: Schema Loader
12191219
description: Component used to retrieve the schema for the current stream.
12201220
anyOf:
1221+
- "$ref": "#/definitions/DynamicSchemaLoader"
12211222
- "$ref": "#/definitions/InlineSchemaLoader"
12221223
- "$ref": "#/definitions/JsonFileSchemaLoader"
12231224
- "$ref": "#/definitions/CustomSchemaLoader"
@@ -1684,6 +1685,92 @@ definitions:
16841685
$parameters:
16851686
type: object
16861687
additionalProperties: true
1688+
TypesMap:
1689+
title: Types Map
1690+
description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
1691+
type: object
1692+
required:
1693+
- target_type
1694+
- current_type
1695+
properties:
1696+
target_type:
1697+
anyOf:
1698+
- type: string
1699+
- type: array
1700+
items:
1701+
type: string
1702+
current_type:
1703+
anyOf:
1704+
- type: string
1705+
- type: array
1706+
items:
1707+
type: string
1708+
SchemaTypeIdentifier:
1709+
title: Schema Type Identifier
1710+
description: (This component is experimental. Use at your own risk.) Identifies schema details for dynamic schema extraction and processing.
1711+
type: object
1712+
required:
1713+
- key_pointer
1714+
properties:
1715+
type:
1716+
type: string
1717+
enum: [SchemaTypeIdentifier]
1718+
schema_pointer:
1719+
title: Schema Path
1720+
description: List of nested fields defining the schema field path to extract. Defaults to [].
1721+
type: array
1722+
default: []
1723+
items:
1724+
- type: string
1725+
interpolation_context:
1726+
- config
1727+
key_pointer:
1728+
title: Key Path
1729+
description: List of potentially nested fields describing the full path of the field key to extract.
1730+
type: array
1731+
items:
1732+
- type: string
1733+
interpolation_context:
1734+
- config
1735+
type_pointer:
1736+
title: Type Path
1737+
description: List of potentially nested fields describing the full path of the field type to extract.
1738+
type: array
1739+
items:
1740+
- type: string
1741+
interpolation_context:
1742+
- config
1743+
types_mapping:
1744+
type: array
1745+
items:
1746+
- "$ref": "#/definitions/TypesMap"
1747+
$parameters:
1748+
type: object
1749+
additionalProperties: true
1750+
DynamicSchemaLoader:
1751+
title: Dynamic Schema Loader
1752+
description: (This component is experimental. Use at your own risk.) Loads a schema by extracting data from retrieved records.
1753+
type: object
1754+
required:
1755+
- type
1756+
- retriever
1757+
- schema_type_identifier
1758+
properties:
1759+
type:
1760+
type: string
1761+
enum: [DynamicSchemaLoader]
1762+
retriever:
1763+
title: Retriever
1764+
description: Component used to coordinate how records are extracted across stream slices and request pages.
1765+
anyOf:
1766+
- "$ref": "#/definitions/AsyncRetriever"
1767+
- "$ref": "#/definitions/CustomRetriever"
1768+
- "$ref": "#/definitions/SimpleRetriever"
1769+
schema_type_identifier:
1770+
"$ref": "#/definitions/SchemaTypeIdentifier"
1771+
$parameters:
1772+
type: object
1773+
additionalProperties: true
16871774
InlineSchemaLoader:
16881775
title: Inline Schema Loader
16891776
description: Loads a schema that is defined directly in the manifest file.

airbyte_cdk/sources/declarative/models/declarative_component_schema.py

+49-6
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,32 @@ class HttpResponseFilter(BaseModel):
650650
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
651651

652652

653+
class TypesMap(BaseModel):
654+
target_type: Union[str, List[str]]
655+
current_type: Union[str, List[str]]
656+
657+
658+
class SchemaTypeIdentifier(BaseModel):
659+
type: Optional[Literal["SchemaTypeIdentifier"]] = None
660+
schema_pointer: Optional[List[str]] = Field(
661+
[],
662+
description="List of nested fields defining the schema field path to extract. Defaults to [].",
663+
title="Schema Path",
664+
)
665+
key_pointer: List[str] = Field(
666+
...,
667+
description="List of potentially nested fields describing the full path of the field key to extract.",
668+
title="Key Path",
669+
)
670+
type_pointer: Optional[List[str]] = Field(
671+
None,
672+
description="List of potentially nested fields describing the full path of the field type to extract.",
673+
title="Type Path",
674+
)
675+
types_mapping: Optional[List[TypesMap]] = None
676+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
677+
678+
653679
class InlineSchemaLoader(BaseModel):
654680
type: Literal["InlineSchemaLoader"]
655681
schema_: Optional[Dict[str, Any]] = Field(
@@ -1629,12 +1655,17 @@ class Config:
16291655
primary_key: Optional[PrimaryKey] = Field(
16301656
"", description="The primary key of the stream.", title="Primary Key"
16311657
)
1632-
schema_loader: Optional[Union[InlineSchemaLoader, JsonFileSchemaLoader, CustomSchemaLoader]] = (
1633-
Field(
1634-
None,
1635-
description="Component used to retrieve the schema for the current stream.",
1636-
title="Schema Loader",
1637-
)
1658+
schema_loader: Optional[
1659+
Union[
1660+
DynamicSchemaLoader,
1661+
InlineSchemaLoader,
1662+
JsonFileSchemaLoader,
1663+
CustomSchemaLoader,
1664+
]
1665+
] = Field(
1666+
None,
1667+
description="Component used to retrieve the schema for the current stream.",
1668+
title="Schema Loader",
16381669
)
16391670
transformations: Optional[
16401671
List[Union[AddFields, CustomTransformation, RemoveFields, KeysToLower]]
@@ -1794,6 +1825,17 @@ class HttpRequester(BaseModel):
17941825
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
17951826

17961827

1828+
class DynamicSchemaLoader(BaseModel):
1829+
type: Literal["DynamicSchemaLoader"]
1830+
retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
1831+
...,
1832+
description="Component used to coordinate how records are extracted across stream slices and request pages.",
1833+
title="Retriever",
1834+
)
1835+
schema_type_identifier: SchemaTypeIdentifier
1836+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1837+
1838+
17971839
class ParentStreamConfig(BaseModel):
17981840
type: Literal["ParentStreamConfig"]
17991841
parent_key: str = Field(
@@ -2001,5 +2043,6 @@ class DynamicDeclarativeStream(BaseModel):
20012043
SelectiveAuthenticator.update_forward_refs()
20022044
DeclarativeStream.update_forward_refs()
20032045
SessionTokenAuthenticator.update_forward_refs()
2046+
DynamicSchemaLoader.update_forward_refs()
20042047
SimpleRetriever.update_forward_refs()
20052048
AsyncRetriever.update_forward_refs()

airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py

+4
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@
6767
"AddFields.fields": "AddedFieldDefinition",
6868
# CustomPartitionRouter
6969
"CustomPartitionRouter.parent_stream_configs": "ParentStreamConfig",
70+
# DynamicSchemaLoader
71+
"DynamicSchemaLoader.retriever": "SimpleRetriever",
72+
# SchemaTypeIdentifier
73+
"SchemaTypeIdentifier.types_map": "TypesMap",
7074
}
7175

7276
# We retain a separate registry for custom components to automatically insert the type if it is missing. This is intended to

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py

+72
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,9 @@
191191
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
192192
DpathExtractor as DpathExtractorModel,
193193
)
194+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
195+
DynamicSchemaLoader as DynamicSchemaLoaderModel,
196+
)
194197
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
195198
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
196199
)
@@ -281,6 +284,9 @@
281284
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
282285
ResponseToFileExtractor as ResponseToFileExtractorModel,
283286
)
287+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
288+
SchemaTypeIdentifier as SchemaTypeIdentifierModel,
289+
)
284290
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
285291
SelectiveAuthenticator as SelectiveAuthenticatorModel,
286292
)
@@ -297,6 +303,9 @@
297303
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
298304
SubstreamPartitionRouter as SubstreamPartitionRouterModel,
299305
)
306+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
307+
TypesMap as TypesMapModel,
308+
)
300309
from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
301310
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
302311
WaitTimeFromHeader as WaitTimeFromHeaderModel,
@@ -364,8 +373,11 @@
364373
)
365374
from airbyte_cdk.sources.declarative.schema import (
366375
DefaultSchemaLoader,
376+
DynamicSchemaLoader,
367377
InlineSchemaLoader,
368378
JsonFileSchemaLoader,
379+
SchemaTypeIdentifier,
380+
TypesMap,
369381
)
370382
from airbyte_cdk.sources.declarative.spec import Spec
371383
from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
@@ -463,6 +475,9 @@ def _init_mappings(self) -> None:
463475
IterableDecoderModel: self.create_iterable_decoder,
464476
XmlDecoderModel: self.create_xml_decoder,
465477
JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
478+
DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
479+
SchemaTypeIdentifierModel: self.create_schema_type_identifier,
480+
TypesMapModel: self.create_types_map,
466481
JwtAuthenticatorModel: self.create_jwt_authenticator,
467482
LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
468483
ListPartitionRouterModel: self.create_list_partition_router,
@@ -1584,6 +1599,63 @@ def create_inline_schema_loader(
15841599
) -> InlineSchemaLoader:
15851600
return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
15861601

1602+
@staticmethod
1603+
def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
1604+
return TypesMap(target_type=model.target_type, current_type=model.current_type)
1605+
1606+
def create_schema_type_identifier(
1607+
self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
1608+
) -> SchemaTypeIdentifier:
1609+
types_mapping = []
1610+
if model.types_mapping:
1611+
types_mapping.extend(
1612+
[
1613+
self._create_component_from_model(types_map, config=config)
1614+
for types_map in model.types_mapping
1615+
]
1616+
)
1617+
model_schema_pointer: List[Union[InterpolatedString, str]] = (
1618+
[x for x in model.schema_pointer] if model.schema_pointer else []
1619+
)
1620+
model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
1621+
model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
1622+
[x for x in model.type_pointer] if model.type_pointer else None
1623+
)
1624+
1625+
return SchemaTypeIdentifier(
1626+
schema_pointer=model_schema_pointer,
1627+
key_pointer=model_key_pointer,
1628+
type_pointer=model_type_pointer,
1629+
types_mapping=types_mapping,
1630+
parameters=model.parameters or {},
1631+
)
1632+
1633+
def create_dynamic_schema_loader(
1634+
self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
1635+
) -> DynamicSchemaLoader:
1636+
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
1637+
combined_slicers = self._build_resumable_cursor_from_paginator(
1638+
model.retriever, stream_slicer
1639+
)
1640+
1641+
retriever = self._create_component_from_model(
1642+
model=model.retriever,
1643+
config=config,
1644+
name="",
1645+
primary_key=None,
1646+
stream_slicer=combined_slicers,
1647+
transformations=[],
1648+
)
1649+
schema_type_identifier = self._create_component_from_model(
1650+
model.schema_type_identifier, config=config, parameters=model.parameters or {}
1651+
)
1652+
return DynamicSchemaLoader(
1653+
retriever=retriever,
1654+
config=config,
1655+
schema_type_identifier=schema_type_identifier,
1656+
parameters=model.parameters or {},
1657+
)
1658+
15871659
@staticmethod
15881660
def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
15891661
return JsonDecoder(parameters={})

airbyte_cdk/sources/declarative/schema/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@
66
from airbyte_cdk.sources.declarative.schema.inline_schema_loader import InlineSchemaLoader
77
from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader
88
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
9+
from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import DynamicSchemaLoader, TypesMap, SchemaTypeIdentifier
910

10-
__all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader"]
11+
__all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader", "DynamicSchemaLoader", "TypesMap", "SchemaTypeIdentifier"]

0 commit comments

Comments
 (0)