File tree Expand file tree Collapse file tree 3 files changed +54
-3
lines changed
airbyte_cdk/sources/declarative
unit_tests/sources/declarative/incremental Expand file tree Collapse file tree 3 files changed +54
-3
lines changed Original file line number Diff line number Diff line change 1919from airbyte_cdk .sources .declarative .extractors .record_filter import (
2020 ClientSideIncrementalRecordFilterDecorator ,
2121)
22- from airbyte_cdk .sources .declarative .incremental import ConcurrentPerPartitionCursor
22+ from airbyte_cdk .sources .declarative .incremental import (
23+ ConcurrentPerPartitionCursor ,
24+ GlobalSubstreamCursor ,
25+ )
2326from airbyte_cdk .sources .declarative .incremental .datetime_based_cursor import DatetimeBasedCursor
2427from airbyte_cdk .sources .declarative .incremental .per_partition_with_global import (
2528 PerPartitionWithGlobalCursor ,
@@ -361,7 +364,8 @@ def _group_streams(
361364 == DatetimeBasedCursorModel .__name__
362365 and hasattr (declarative_stream .retriever , "stream_slicer" )
363366 and isinstance (
364- declarative_stream .retriever .stream_slicer , PerPartitionWithGlobalCursor
367+ declarative_stream .retriever .stream_slicer ,
368+ (GlobalSubstreamCursor , PerPartitionWithGlobalCursor ),
365369 )
366370 ):
367371 stream_state = self ._connector_state_manager .get_stream_state (
Original file line number Diff line number Diff line change @@ -1439,7 +1439,9 @@ def create_concurrent_cursor_from_perpartition_cursor(
14391439 stream_state = self .apply_stream_state_migrations (stream_state_migrations , stream_state )
14401440
14411441 # Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state
1442- use_global_cursor = isinstance (partition_router , GroupingPartitionRouter )
1442+ use_global_cursor = isinstance (
1443+ partition_router , GroupingPartitionRouter
1444+ ) or component_definition .get ("global_substream_cursor" , False )
14431445
14441446 # Return the concurrent cursor and state converter
14451447 return ConcurrentPerPartitionCursor (
Original file line number Diff line number Diff line change @@ -3449,3 +3449,48 @@ def test_semaphore_cleanup():
34493449 assert '{"id":"2"}' not in cursor ._semaphore_per_partition
34503450 assert len (cursor ._partition_parent_state_map ) == 0 # All parent states should be popped
34513451 assert cursor ._parent_state == {"parent" : {"state" : "state2" }} # Last parent state
3452+
3453+
3454+ def test_given_global_state_when_read_then_state_is_not_per_partition () -> None :
3455+ manifest = deepcopy (SUBSTREAM_MANIFEST )
3456+ manifest ["definitions" ]["post_comments_stream" ]["incremental_sync" ][
3457+ "global_substream_cursor"
3458+ ] = True
3459+ manifest ["streams" ].remove ({"$ref" : "#/definitions/post_comment_votes_stream" })
3460+ record = {
3461+ "id" : 9 ,
3462+ "post_id" : 1 ,
3463+ "updated_at" : COMMENT_10_UPDATED_AT ,
3464+ }
3465+ mock_requests = [
3466+ (
3467+ f"https://api.example.com/community/posts?per_page=100&start_time={ START_DATE } " ,
3468+ {
3469+ "posts" : [
3470+ {"id" : 1 , "updated_at" : POST_1_UPDATED_AT },
3471+ ],
3472+ },
3473+ ),
3474+ # Fetch the first page of comments for post 1
3475+ (
3476+ "https://api.example.com/community/posts/1/comments?per_page=100" ,
3477+ {
3478+ "comments" : [record ],
3479+ },
3480+ ),
3481+ ]
3482+
3483+ run_mocked_test (
3484+ mock_requests ,
3485+ manifest ,
3486+ CONFIG ,
3487+ "post_comments" ,
3488+ {},
3489+ [record ],
3490+ {
3491+ "lookback_window" : 1 ,
3492+ "parent_state" : {"posts" : {"updated_at" : "2024-01-30T00:00:00Z" }},
3493+ "state" : {"updated_at" : "2024-01-25T00:00:00Z" },
3494+ "use_global_cursor" : True , # ensures that it is running the Concurrent CDK version as this is not populated in the declarative implementation
3495+ }, # this state does have per partition which would be under `states`
3496+ )
You can’t perform that action at this time.
0 commit comments