Skip to content

Commit 10c1085

Browse files
feat(file-based): sync file acl permissions and identities (#260)
We will leverage DefaultFileBased stream and stream reader to reuse most of the logic for scrapping files and let connectors implement the logic from the domain they handle. In the UI we will add a new Transfer Mode to Replicate Permissions ACL - Enhanced file transfer options now support permissions replication, enabling delivery of access permissions along with identity data. - Introduced an additional delivery method option to mirror source file permission restrictions and identity stream inclusion.
1 parent d44aea8 commit 10c1085

14 files changed

+775
-51
lines changed

airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from airbyte_cdk import OneOfOptionConfig
1313
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
14+
from airbyte_cdk.sources.specs.transfer_modes import DeliverPermissions
1415
from airbyte_cdk.sources.utils import schema_helpers
1516

1617

@@ -65,7 +66,7 @@ class AbstractFileBasedSpec(BaseModel):
6566
order=10,
6667
)
6768

68-
delivery_method: Union[DeliverRecords, DeliverRawFiles] = Field(
69+
delivery_method: Union[DeliverRecords, DeliverRawFiles, DeliverPermissions] = Field(
6970
title="Delivery Method",
7071
discriminator="delivery_type",
7172
type="object",
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#
2+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3+
#
4+
5+
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import (
6+
AbstractFileBasedSpec,
7+
DeliverRawFiles,
8+
)
9+
from airbyte_cdk.sources.specs.transfer_modes import DeliverPermissions
10+
11+
DELIVERY_TYPE_KEY = "delivery_type"
12+
DELIVERY_TYPE_PERMISSION_TRANSFER_MODE_VALUE = "use_permissions_transfer"
13+
DELIVERY_TYPE_FILES_TRANSFER_MODE_VALUE = "use_file_transfer"
14+
PRESERVE_DIRECTORY_STRUCTURE_KEY = "preserve_directory_structure"
15+
INCLUDE_IDENTITIES_STREAM_KEY = "include_identities_stream"
16+
17+
18+
def use_file_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
19+
"""Returns `True` if the configuration uses file transfer mode."""
20+
return (
21+
hasattr(parsed_config.delivery_method, DELIVERY_TYPE_KEY)
22+
and parsed_config.delivery_method.delivery_type == DELIVERY_TYPE_FILES_TRANSFER_MODE_VALUE
23+
)
24+
25+
26+
def preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
27+
"""
28+
Determines whether to preserve directory structure during file transfer.
29+
30+
When enabled, files maintain their subdirectory paths in the destination.
31+
When disabled, files are flattened to the root of the destination.
32+
33+
Args:
34+
parsed_config: The parsed configuration containing delivery method settings
35+
36+
Returns:
37+
True if directory structure should be preserved (default), False otherwise
38+
"""
39+
if (
40+
use_file_transfer(parsed_config)
41+
and hasattr(parsed_config.delivery_method, PRESERVE_DIRECTORY_STRUCTURE_KEY)
42+
and isinstance(parsed_config.delivery_method, DeliverRawFiles)
43+
):
44+
return parsed_config.delivery_method.preserve_directory_structure
45+
return True
46+
47+
48+
def use_permissions_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
49+
"""
50+
Determines whether to use permissions transfer to sync ACLs and Identities
51+
52+
Args:
53+
parsed_config: The parsed configuration containing delivery method settings
54+
55+
Returns:
56+
True if permissions transfer should be enabled, False otherwise
57+
"""
58+
return (
59+
hasattr(parsed_config.delivery_method, DELIVERY_TYPE_KEY)
60+
and parsed_config.delivery_method.delivery_type
61+
== DELIVERY_TYPE_PERMISSION_TRANSFER_MODE_VALUE
62+
)
63+
64+
65+
def include_identities_stream(parsed_config: AbstractFileBasedSpec) -> bool:
66+
"""
67+
There are scenarios where user may not have access to identities but still is valuable to get ACLs
68+
69+
Args:
70+
parsed_config: The parsed configuration containing delivery method settings
71+
72+
Returns:
73+
True if we should include Identities stream.
74+
"""
75+
if (
76+
use_permissions_transfer(parsed_config)
77+
and hasattr(parsed_config.delivery_method, INCLUDE_IDENTITIES_STREAM_KEY)
78+
and isinstance(parsed_config.delivery_method, DeliverPermissions)
79+
):
80+
return parsed_config.delivery_method.include_identities_stream
81+
return False

airbyte_cdk/sources/file_based/file_based_source.py

Lines changed: 70 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@
3333
FileBasedStreamConfig,
3434
ValidationPolicy,
3535
)
36+
from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import (
37+
include_identities_stream,
38+
preserve_directory_structure,
39+
use_file_transfer,
40+
use_permissions_transfer,
41+
)
3642
from airbyte_cdk.sources.file_based.discovery_policy import (
3743
AbstractDiscoveryPolicy,
3844
DefaultDiscoveryPolicy,
@@ -49,7 +55,12 @@
4955
DEFAULT_SCHEMA_VALIDATION_POLICIES,
5056
AbstractSchemaValidationPolicy,
5157
)
52-
from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream, DefaultFileBasedStream
58+
from airbyte_cdk.sources.file_based.stream import (
59+
AbstractFileBasedStream,
60+
DefaultFileBasedStream,
61+
FileIdentitiesStream,
62+
PermissionsFileBasedStream,
63+
)
5364
from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamFacade
5465
from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
5566
AbstractConcurrentFileBasedCursor,
@@ -66,6 +77,7 @@
6677
DEFAULT_CONCURRENCY = 100
6778
MAX_CONCURRENCY = 100
6879
INITIAL_N_PARTITIONS = MAX_CONCURRENCY // 2
80+
IDENTITIES_STREAM = "identities"
6981

7082

7183
class FileBasedSource(ConcurrentSourceAdapter, ABC):
@@ -157,13 +169,20 @@ def check_connection(
157169
errors = []
158170
tracebacks = []
159171
for stream in streams:
172+
if isinstance(stream, FileIdentitiesStream):
173+
identity = next(iter(stream.load_identity_groups()))
174+
if not identity:
175+
errors.append(
176+
"Unable to get identities for current configuration, please check your credentials"
177+
)
178+
continue
160179
if not isinstance(stream, AbstractFileBasedStream):
161180
raise ValueError(f"Stream {stream} is not a file-based stream.")
162181
try:
163182
parsed_config = self._get_parsed_config(config)
164183
availability_method = (
165184
stream.availability_strategy.check_availability
166-
if self._use_file_transfer(parsed_config)
185+
if use_file_transfer(parsed_config) or use_permissions_transfer(parsed_config)
167186
else stream.availability_strategy.check_availability_and_parsability
168187
)
169188
(
@@ -239,7 +258,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]:
239258
message_repository=self.message_repository,
240259
)
241260
stream = FileBasedStreamFacade.create_from_stream(
242-
stream=self._make_default_stream(
261+
stream=self._make_file_based_stream(
243262
stream_config=stream_config,
244263
cursor=cursor,
245264
parsed_config=parsed_config,
@@ -270,7 +289,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]:
270289
CursorField(DefaultFileBasedStream.ab_last_mod_col),
271290
)
272291
stream = FileBasedStreamFacade.create_from_stream(
273-
stream=self._make_default_stream(
292+
stream=self._make_file_based_stream(
274293
stream_config=stream_config,
275294
cursor=cursor,
276295
parsed_config=parsed_config,
@@ -282,13 +301,17 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]:
282301
)
283302
else:
284303
cursor = self.cursor_cls(stream_config)
285-
stream = self._make_default_stream(
304+
stream = self._make_file_based_stream(
286305
stream_config=stream_config,
287306
cursor=cursor,
288307
parsed_config=parsed_config,
289308
)
290309

291310
streams.append(stream)
311+
312+
if include_identities_stream(parsed_config):
313+
identities_stream = self._make_identities_stream()
314+
streams.append(identities_stream)
292315
return streams
293316

294317
except ValidationError as exc:
@@ -310,8 +333,48 @@ def _make_default_stream(
310333
validation_policy=self._validate_and_get_validation_policy(stream_config),
311334
errors_collector=self.errors_collector,
312335
cursor=cursor,
313-
use_file_transfer=self._use_file_transfer(parsed_config),
314-
preserve_directory_structure=self._preserve_directory_structure(parsed_config),
336+
use_file_transfer=use_file_transfer(parsed_config),
337+
preserve_directory_structure=preserve_directory_structure(parsed_config),
338+
)
339+
340+
def _make_permissions_stream(
341+
self, stream_config: FileBasedStreamConfig, cursor: Optional[AbstractFileBasedCursor]
342+
) -> AbstractFileBasedStream:
343+
return PermissionsFileBasedStream(
344+
config=stream_config,
345+
catalog_schema=self.stream_schemas.get(stream_config.name),
346+
stream_reader=self.stream_reader,
347+
availability_strategy=self.availability_strategy,
348+
discovery_policy=self.discovery_policy,
349+
parsers=self.parsers,
350+
validation_policy=self._validate_and_get_validation_policy(stream_config),
351+
errors_collector=self.errors_collector,
352+
cursor=cursor,
353+
)
354+
355+
def _make_file_based_stream(
356+
self,
357+
stream_config: FileBasedStreamConfig,
358+
cursor: Optional[AbstractFileBasedCursor],
359+
parsed_config: AbstractFileBasedSpec,
360+
) -> AbstractFileBasedStream:
361+
"""
362+
Creates different streams depending on the type of the transfer mode selected
363+
"""
364+
if use_permissions_transfer(parsed_config):
365+
return self._make_permissions_stream(stream_config, cursor)
366+
# we should have a stream for File transfer mode to decouple from DefaultFileBasedStream
367+
else:
368+
return self._make_default_stream(stream_config, cursor, parsed_config)
369+
370+
def _make_identities_stream(
371+
self,
372+
) -> Stream:
373+
return FileIdentitiesStream(
374+
catalog_schema=self.stream_schemas.get(FileIdentitiesStream.IDENTITIES_STREAM_NAME),
375+
stream_reader=self.stream_reader,
376+
discovery_policy=self.discovery_policy,
377+
errors_collector=self.errors_collector,
315378
)
316379

317380
def _get_stream_from_catalog(
@@ -378,33 +441,3 @@ def _validate_input_schema(self, stream_config: FileBasedStreamConfig) -> None:
378441
"`input_schema` and `schemaless` options cannot both be set",
379442
model=FileBasedStreamConfig,
380443
)
381-
382-
@staticmethod
383-
def _use_file_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
384-
use_file_transfer = (
385-
hasattr(parsed_config.delivery_method, "delivery_type")
386-
and parsed_config.delivery_method.delivery_type == "use_file_transfer"
387-
)
388-
return use_file_transfer
389-
390-
@staticmethod
391-
def _preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
392-
"""
393-
Determines whether to preserve directory structure during file transfer.
394-
395-
When enabled, files maintain their subdirectory paths in the destination.
396-
When disabled, files are flattened to the root of the destination.
397-
398-
Args:
399-
parsed_config: The parsed configuration containing delivery method settings
400-
401-
Returns:
402-
True if directory structure should be preserved (default), False otherwise
403-
"""
404-
if (
405-
FileBasedSource._use_file_transfer(parsed_config)
406-
and hasattr(parsed_config.delivery_method, "preserve_directory_structure")
407-
and parsed_config.delivery_method.preserve_directory_structure is not None
408-
):
409-
return parsed_config.delivery_method.preserve_directory_structure
410-
return True

0 commit comments

Comments
 (0)