|
3 | 3 | #
|
4 | 4 |
|
5 | 5 | import logging
|
6 |
| -from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple, Union |
| 6 | +from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple, Union, Callable |
7 | 7 |
|
8 | 8 | from airbyte_cdk.models import (
|
9 | 9 | AirbyteCatalog,
|
|
27 | 27 | )
|
28 | 28 | from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
29 | 29 | DatetimeBasedCursor as DatetimeBasedCursorModel,
|
| 30 | + DeclarativeStream as DeclarativeStreamModel, |
30 | 31 | )
|
31 | 32 | from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
32 | 33 | ModelToComponentFactory,
|
| 34 | + ComponentDefinition, |
33 | 35 | )
|
34 | 36 | from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
35 |
| -from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever |
| 37 | +from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever, Retriever |
| 38 | +from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import ( |
| 39 | + DeclarativePartitionFactory, |
| 40 | + StreamSlicerPartitionGenerator, |
| 41 | +) |
36 | 42 | from airbyte_cdk.sources.declarative.transformations.add_fields import AddFields
|
37 | 43 | from airbyte_cdk.sources.declarative.types import ConnectionDefinition
|
38 | 44 | from airbyte_cdk.sources.source import TState
|
| 45 | +from airbyte_cdk.sources.types import Config, StreamState |
39 | 46 | from airbyte_cdk.sources.streams import Stream
|
40 | 47 | from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
41 |
| -from airbyte_cdk.sources.streams.concurrent.adapters import CursorPartitionGenerator |
42 | 48 | from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
43 | 49 | AlwaysAvailableAvailabilityStrategy,
|
44 | 50 | )
|
@@ -213,31 +219,18 @@ def _group_streams(
|
213 | 219 | )
|
214 | 220 | )
|
215 | 221 |
|
216 |
| - # This is an optimization so that we don't invoke any cursor or state management flows within the |
217 |
| - # low-code framework because state management is handled through the ConcurrentCursor. |
218 |
| - if ( |
219 |
| - declarative_stream |
220 |
| - and declarative_stream.retriever |
221 |
| - and isinstance(declarative_stream.retriever, SimpleRetriever) |
222 |
| - ): |
223 |
| - # Also a temporary hack. In the legacy Stream implementation, as part of the read, set_initial_state() is |
224 |
| - # called to instantiate incoming state on the cursor. Although we no longer rely on the legacy low-code cursor |
225 |
| - # for concurrent checkpointing, low-code components like StopConditionPaginationStrategyDecorator and |
226 |
| - # ClientSideIncrementalRecordFilterDecorator still rely on a DatetimeBasedCursor that is properly initialized |
227 |
| - # with state. |
228 |
| - if declarative_stream.retriever.cursor: |
229 |
| - declarative_stream.retriever.cursor.set_initial_state( |
230 |
| - stream_state=stream_state |
231 |
| - ) |
232 |
| - declarative_stream.retriever.cursor = None |
233 |
| - |
234 |
| - partition_generator = CursorPartitionGenerator( |
235 |
| - stream=declarative_stream, |
236 |
| - message_repository=self.message_repository, # type: ignore # message_repository is always instantiated with a value by factory |
237 |
| - cursor=cursor, |
238 |
| - connector_state_converter=connector_state_converter, |
239 |
| - cursor_field=[cursor.cursor_field.cursor_field_key], |
240 |
| - slice_boundary_fields=cursor.slice_boundary_fields, |
| 222 | + partition_generator = StreamSlicerPartitionGenerator( |
| 223 | + DeclarativePartitionFactory( |
| 224 | + declarative_stream.name, |
| 225 | + declarative_stream.get_json_schema(), |
| 226 | + self._retriever_factory( |
| 227 | + name_to_stream_mapping[declarative_stream.name], |
| 228 | + config, |
| 229 | + stream_state, |
| 230 | + ), |
| 231 | + self.message_repository, |
| 232 | + ), |
| 233 | + cursor, |
241 | 234 | )
|
242 | 235 |
|
243 | 236 | concurrent_streams.append(
|
@@ -350,3 +343,34 @@ def _remove_concurrent_streams_from_catalog(
|
350 | 343 | if stream.stream.name not in concurrent_stream_names
|
351 | 344 | ]
|
352 | 345 | )
|
| 346 | + |
| 347 | + def _retriever_factory( |
| 348 | + self, stream_config: ComponentDefinition, source_config: Config, stream_state: StreamState |
| 349 | + ) -> Callable[[], Retriever]: |
| 350 | + def _factory_method() -> Retriever: |
| 351 | + declarative_stream: DeclarativeStream = self._constructor.create_component( |
| 352 | + DeclarativeStreamModel, |
| 353 | + stream_config, |
| 354 | + source_config, |
| 355 | + emit_connector_builder_messages=self._emit_connector_builder_messages, |
| 356 | + ) |
| 357 | + |
| 358 | + # This is an optimization so that we don't invoke any cursor or state management flows within the |
| 359 | + # low-code framework because state management is handled through the ConcurrentCursor. |
| 360 | + if ( |
| 361 | + declarative_stream |
| 362 | + and declarative_stream.retriever |
| 363 | + and isinstance(declarative_stream.retriever, SimpleRetriever) |
| 364 | + ): |
| 365 | + # Also a temporary hack. In the legacy Stream implementation, as part of the read, set_initial_state() is |
| 366 | + # called to instantiate incoming state on the cursor. Although we no longer rely on the legacy low-code cursor |
| 367 | + # for concurrent checkpointing, low-code components like StopConditionPaginationStrategyDecorator and |
| 368 | + # ClientSideIncrementalRecordFilterDecorator still rely on a DatetimeBasedCursor that is properly initialized |
| 369 | + # with state. |
| 370 | + if declarative_stream.retriever.cursor: |
| 371 | + declarative_stream.retriever.cursor.set_initial_state(stream_state=stream_state) |
| 372 | + declarative_stream.retriever.cursor = None |
| 373 | + |
| 374 | + return declarative_stream.retriever |
| 375 | + |
| 376 | + return _factory_method |
0 commit comments