Skip to content

Commit 3a9ab87

Browse files
authored
feat(source-declarative-manifest): add support for custom Python components from dynamic text input (#174)
1 parent e78b272 commit 3a9ab87

File tree

17 files changed

+1014
-16
lines changed

17 files changed

+1014
-16
lines changed

airbyte_cdk/cli/source_declarative_manifest/_run.py

+6
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,12 @@ def create_declarative_source(
171171
"Invalid config: `__injected_declarative_manifest` should be provided at the root "
172172
f"of the config but config only has keys: {list(config.keys() if config else [])}"
173173
)
174+
if not isinstance(config["__injected_declarative_manifest"], dict):
175+
raise ValueError(
176+
"Invalid config: `__injected_declarative_manifest` should be a dictionary, "
177+
f"but got type: {type(config['__injected_declarative_manifest'])}"
178+
)
179+
174180
return ConcurrentDeclarativeSource(
175181
config=config,
176182
catalog=catalog,

airbyte_cdk/connector_builder/connector_builder_handler.py

+1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def get_limits(config: Mapping[str, Any]) -> TestReadLimits:
5252
def create_source(config: Mapping[str, Any], limits: TestReadLimits) -> ManifestDeclarativeSource:
5353
manifest = config["__injected_declarative_manifest"]
5454
return ManifestDeclarativeSource(
55+
config=config,
5556
emit_connector_builder_messages=True,
5657
source_config=manifest,
5758
component_factory=ModelToComponentFactory(

airbyte_cdk/sources/declarative/concurrent_declarative_source.py

+1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ def __init__(
7777

7878
super().__init__(
7979
source_config=source_config,
80+
config=config,
8081
debug=debug,
8182
emit_connector_builder_messages=emit_connector_builder_messages,
8283
component_factory=component_factory,

airbyte_cdk/sources/declarative/manifest_declarative_source.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import pkgutil
88
from copy import deepcopy
99
from importlib import metadata
10+
from types import ModuleType
1011
from typing import Any, Dict, Iterator, List, Mapping, Optional, Set
1112

1213
import yaml
@@ -32,6 +33,9 @@
3233
DeclarativeStream as DeclarativeStreamModel,
3334
)
3435
from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
36+
from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
37+
get_registered_components_module,
38+
)
3539
from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer import (
3640
ManifestComponentTransformer,
3741
)
@@ -59,22 +63,29 @@ class ManifestDeclarativeSource(DeclarativeSource):
5963
def __init__(
6064
self,
6165
source_config: ConnectionDefinition,
66+
*,
67+
config: Mapping[str, Any] | None = None,
6268
debug: bool = False,
6369
emit_connector_builder_messages: bool = False,
6470
component_factory: Optional[ModelToComponentFactory] = None,
6571
):
6672
"""
67-
:param source_config(Mapping[str, Any]): The manifest of low-code components that describe the source connector
68-
:param debug(bool): True if debug mode is enabled
69-
:param component_factory(ModelToComponentFactory): optional factory if ModelToComponentFactory's default behaviour needs to be tweaked
73+
Args:
74+
config: The provided config dict.
75+
source_config: The manifest of low-code components that describe the source connector.
76+
debug: True if debug mode is enabled.
77+
emit_connector_builder_messages: True if messages should be emitted to the connector builder.
78+
component_factory: optional factory if ModelToComponentFactory's default behavior needs to be tweaked.
7079
"""
7180
self.logger = logging.getLogger(f"airbyte.{self.name}")
72-
7381
# For ease of use we don't require the type to be specified at the top level manifest, but it should be included during processing
7482
manifest = dict(source_config)
7583
if "type" not in manifest:
7684
manifest["type"] = "DeclarativeSource"
7785

86+
# If custom components are needed, locate and/or register them.
87+
self.components_module: ModuleType | None = get_registered_components_module(config=config)
88+
7889
resolved_source_config = ManifestReferenceResolver().preprocess_manifest(manifest)
7990
propagated_source_config = ManifestComponentTransformer().propagate_types_and_parameters(
8091
"", resolved_source_config, {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
"""Contains functions to compile custom code from text."""
2+
3+
import hashlib
4+
import os
5+
import sys
6+
from collections.abc import Mapping
7+
from types import ModuleType
8+
from typing import Any, cast
9+
10+
from typing_extensions import Literal
11+
12+
ChecksumType = Literal["md5", "sha256"]
13+
CHECKSUM_FUNCTIONS = {
14+
"md5": hashlib.md5,
15+
"sha256": hashlib.sha256,
16+
}
17+
COMPONENTS_MODULE_NAME = "components"
18+
SDM_COMPONENTS_MODULE_NAME = "source_declarative_manifest.components"
19+
INJECTED_MANIFEST = "__injected_declarative_manifest"
20+
INJECTED_COMPONENTS_PY = "__injected_components_py"
21+
INJECTED_COMPONENTS_PY_CHECKSUMS = "__injected_components_py_checksums"
22+
ENV_VAR_ALLOW_CUSTOM_CODE = "AIRBYTE_ALLOW_CUSTOM_CODE"
23+
24+
25+
class AirbyteCodeTamperedError(Exception):
26+
"""Raised when the connector's components module does not match its checksum.
27+
28+
This is a fatal error, as it can be a sign of code tampering.
29+
"""
30+
31+
32+
class AirbyteCustomCodeNotPermittedError(Exception):
33+
"""Raised when custom code is attempted to be run in an environment that does not support it."""
34+
35+
def __init__(self) -> None:
36+
super().__init__(
37+
"Custom connector code is not permitted in this environment. "
38+
"If you need to run custom code, please ask your administrator to set the `AIRBYTE_ALLOW_CUSTOM_CODE` "
39+
"environment variable to 'true' in your Airbyte environment. "
40+
"If you see this message in Airbyte Cloud, your workspace does not allow executing "
41+
"custom connector code."
42+
)
43+
44+
45+
def _hash_text(input_text: str, hash_type: str = "md5") -> str:
46+
"""Return the hash of the input text using the specified hash type."""
47+
if not input_text:
48+
raise ValueError("Input text cannot be empty.")
49+
50+
hash_object = CHECKSUM_FUNCTIONS[hash_type]()
51+
hash_object.update(input_text.encode())
52+
return hash_object.hexdigest()
53+
54+
55+
def custom_code_execution_permitted() -> bool:
56+
"""Return `True` if custom code execution is permitted, otherwise `False`.
57+
58+
Custom code execution is permitted if the `AIRBYTE_ALLOW_CUSTOM_CODE` environment variable is set to 'true'.
59+
"""
60+
return os.environ.get(ENV_VAR_ALLOW_CUSTOM_CODE, "").lower() == "true"
61+
62+
63+
def validate_python_code(
64+
code_text: str,
65+
checksums: dict[str, str] | None,
66+
) -> None:
67+
"""Validate the provided Python code text against the provided checksums.
68+
69+
Currently we fail if no checksums are provided, although this may change in the future.
70+
"""
71+
if not checksums:
72+
raise ValueError(f"A checksum is required to validate the code. Received: {checksums}")
73+
74+
for checksum_type, checksum in checksums.items():
75+
if checksum_type not in CHECKSUM_FUNCTIONS:
76+
raise ValueError(
77+
f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}"
78+
)
79+
80+
if _hash_text(code_text, checksum_type) != checksum:
81+
raise AirbyteCodeTamperedError(f"{checksum_type} checksum does not match.")
82+
83+
84+
def get_registered_components_module(
85+
config: Mapping[str, Any] | None,
86+
) -> ModuleType | None:
87+
"""Get a components module object based on the provided config.
88+
89+
If custom python components is provided, this will be loaded. Otherwise, we will
90+
attempt to load from the `components` module already imported/registered in sys.modules.
91+
92+
If custom `components.py` text is provided in config, it will be registered with sys.modules
93+
so that it can be later imported by manifest declarations which reference the provided classes.
94+
95+
Returns `None` if no components is provided and the `components` module is not found.
96+
"""
97+
if config and INJECTED_COMPONENTS_PY in config:
98+
if not custom_code_execution_permitted():
99+
raise AirbyteCustomCodeNotPermittedError
100+
101+
# Create a new module object and execute the provided Python code text within it
102+
python_text: str = config[INJECTED_COMPONENTS_PY]
103+
return register_components_module_from_string(
104+
components_py_text=python_text,
105+
checksums=config.get(INJECTED_COMPONENTS_PY_CHECKSUMS, None),
106+
)
107+
108+
# Check for `components` or `source_declarative_manifest.components`.
109+
if SDM_COMPONENTS_MODULE_NAME in sys.modules:
110+
return cast(ModuleType, sys.modules.get(SDM_COMPONENTS_MODULE_NAME))
111+
112+
if COMPONENTS_MODULE_NAME in sys.modules:
113+
return cast(ModuleType, sys.modules.get(COMPONENTS_MODULE_NAME))
114+
115+
# Could not find module 'components' in `sys.modules`
116+
# and INJECTED_COMPONENTS_PY was not provided in config.
117+
return None
118+
119+
120+
def register_components_module_from_string(
121+
components_py_text: str,
122+
checksums: dict[str, Any] | None,
123+
) -> ModuleType:
124+
"""Load and return the components module from a provided string containing the python code."""
125+
# First validate the code
126+
validate_python_code(
127+
code_text=components_py_text,
128+
checksums=checksums,
129+
)
130+
131+
# Create a new module object
132+
components_module = ModuleType(name=COMPONENTS_MODULE_NAME)
133+
134+
# Execute the module text in the module's namespace
135+
exec(components_py_text, components_module.__dict__)
136+
137+
# Register the module in `sys.modules`` so it can be imported as
138+
# `source_declarative_manifest.components` and/or `components`.
139+
sys.modules[SDM_COMPONENTS_MODULE_NAME] = components_module
140+
sys.modules[COMPONENTS_MODULE_NAME] = components_module
141+
142+
# Now you can import and use the module
143+
return components_module

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py

+31-7
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
from __future__ import annotations
66

77
import datetime
8-
import importlib
98
import inspect
109
import re
10+
import sys
1111
from functools import partial
1212
from typing import (
1313
Any,
@@ -363,6 +363,10 @@
363363
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
364364
ZipfileDecoder as ZipfileDecoderModel,
365365
)
366+
from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
367+
COMPONENTS_MODULE_NAME,
368+
SDM_COMPONENTS_MODULE_NAME,
369+
)
366370
from airbyte_cdk.sources.declarative.partition_routers import (
367371
CartesianProductStreamSlicer,
368372
ListPartitionRouter,
@@ -1102,7 +1106,6 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) ->
11021106
:param config: The custom defined connector config
11031107
:return: The declarative component built from the Pydantic model to be used at runtime
11041108
"""
1105-
11061109
custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
11071110
component_fields = get_type_hints(custom_component_class)
11081111
model_args = model.dict()
@@ -1156,14 +1159,35 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) ->
11561159
return custom_component_class(**kwargs)
11571160

11581161
@staticmethod
1159-
def _get_class_from_fully_qualified_class_name(full_qualified_class_name: str) -> Any:
1162+
def _get_class_from_fully_qualified_class_name(
1163+
full_qualified_class_name: str,
1164+
) -> Any:
1165+
"""Get a class from its fully qualified name.
1166+
1167+
If a custom components module is needed, we assume it is already registered - probably
1168+
as `source_declarative_manifest.components` or `components`.
1169+
1170+
Args:
1171+
full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName").
1172+
1173+
Returns:
1174+
Any: The class object.
1175+
1176+
Raises:
1177+
ValueError: If the class cannot be loaded.
1178+
"""
11601179
split = full_qualified_class_name.split(".")
1161-
module = ".".join(split[:-1])
1180+
module_name_full = ".".join(split[:-1])
11621181
class_name = split[-1]
1182+
1183+
if module_name_full == COMPONENTS_MODULE_NAME:
1184+
# Assume "components" on its own means "source_declarative_manifest.components"
1185+
module_name_full = SDM_COMPONENTS_MODULE_NAME
1186+
11631187
try:
1164-
return getattr(importlib.import_module(module), class_name)
1165-
except AttributeError:
1166-
raise ValueError(f"Could not load class {full_qualified_class_name}.")
1188+
return getattr(sys.modules[module_name_full], class_name)
1189+
except (AttributeError, ModuleNotFoundError) as e:
1190+
raise ValueError(f"Could not load class {full_qualified_class_name}.") from e
11671191

11681192
@staticmethod
11691193
def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:

airbyte_cdk/test/utils/manifest_only_fixtures.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import importlib.util
55
from pathlib import Path
66
from types import ModuleType
7-
from typing import Optional
87

98
import pytest
109

@@ -30,7 +29,7 @@ def connector_dir(request: pytest.FixtureRequest) -> Path:
3029

3130

3231
@pytest.fixture(scope="session")
33-
def components_module(connector_dir: Path) -> Optional[ModuleType]:
32+
def components_module(connector_dir: Path) -> ModuleType | None:
3433
"""Load and return the components module from the connector directory.
3534
3635
This assumes the components module is located at <connector_dir>/components.py.

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ skip = ["__init__.py"] # TODO: Remove after this is fixed: https://github.com/a
120120
[tool.poe.tasks]
121121
# Installation
122122
install = { shell = "poetry install --all-extras" }
123+
lock = { shell = "poetry lock --no-update" }
123124

124125
# Build tasks
125126
assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate component manifest files."}

unit_tests/connector_builder/test_connector_builder_handler.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ def test_resolve_manifest(valid_resolve_manifest_config_file):
344344
config = copy.deepcopy(RESOLVE_MANIFEST_CONFIG)
345345
command = "resolve_manifest"
346346
config["__command"] = command
347-
source = ManifestDeclarativeSource(MANIFEST)
347+
source = ManifestDeclarativeSource(source_config=MANIFEST)
348348
limits = TestReadLimits()
349349
resolved_manifest = handle_connector_builder_request(
350350
source, command, config, create_configured_catalog("dummy_stream"), _A_STATE, limits
@@ -505,7 +505,7 @@ def resolved_manifest(self):
505505

506506
def test_read():
507507
config = TEST_READ_CONFIG
508-
source = ManifestDeclarativeSource(MANIFEST)
508+
source = ManifestDeclarativeSource(source_config=MANIFEST)
509509

510510
real_record = AirbyteRecordMessage(
511511
data={"id": "1234", "key": "value"}, emitted_at=1, stream=_stream_name
@@ -592,7 +592,7 @@ def test_config_update() -> None:
592592
"client_secret": "a client secret",
593593
"refresh_token": "a refresh token",
594594
}
595-
source = ManifestDeclarativeSource(manifest)
595+
source = ManifestDeclarativeSource(source_config=manifest)
596596

597597
refresh_request_response = {
598598
"access_token": "an updated access token",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
secrets*
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# The Guardian API Tests
2+
3+
For these tests to work, you'll need to create a `secrets.yaml` file in this directory that looks like this:
4+
5+
```yml
6+
api_key: ******
7+
```
8+
9+
The `.gitignore` file in this directory should ensure your file is not committed to git, but it's a good practice to double-check. 👀

0 commit comments

Comments
 (0)