Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(source-declarative-manifest): add support for custom Python components from dynamic text input #174

Merged
merged 43 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
9973955
skeleton: components module from dynamic text input
aaronsteers Dec 13, 2024
8309f79
refactor / clean up
aaronsteers Dec 16, 2024
399dd7b
add test resource for py_components unit test
aaronsteers Dec 19, 2024
9115757
add fixture for custom py components scenario
aaronsteers Dec 19, 2024
5dc664c
add test
aaronsteers Dec 19, 2024
5be084f
chore: add missing guard statement
aaronsteers Jan 13, 2025
7379eea
chore: remove stale comment
aaronsteers Jan 13, 2025
51cbcbd
checkpoint: passing tests with pokeapi
aaronsteers Jan 13, 2025
aaef285
chore: add `poe lock` task definition
aaronsteers Jan 13, 2025
e7c3eae
add 'source_the_guardian_api' test resources
aaronsteers Jan 13, 2025
2300f7a
checkpoint: working `check`
aaronsteers Jan 13, 2025
4efcd40
checkpoint: working discover
aaronsteers Jan 13, 2025
cb6a4ab
checkpoint: working sync
aaronsteers Jan 13, 2025
051c57b
improve module name parsing
aaronsteers Jan 13, 2025
e511a2b
remove unused files
aaronsteers Jan 13, 2025
a19b5c1
tidy up
aaronsteers Jan 13, 2025
c837745
skip if no creds
aaronsteers Jan 13, 2025
c54a73d
cosmetic: cleaner diff
aaronsteers Jan 15, 2025
3f66c46
don't fail when custom components.py is already grafted into filesystem
aaronsteers Jan 15, 2025
75332e8
clean up import code
aaronsteers Jan 15, 2025
67b84a0
clean up imports, implement safety mechanisms and blocked-by-default …
aaronsteers Jan 15, 2025
5805649
fix mypy issues
aaronsteers Jan 15, 2025
3251e5c
Update unit_tests/source_declarative_manifest/test_source_declarative…
aaronsteers Jan 15, 2025
877d721
more clean up
aaronsteers Jan 15, 2025
7531ed0
fix ruff format issue
aaronsteers Jan 15, 2025
5e7e826
add intentionally failing use case
aaronsteers Jan 15, 2025
c654ef5
validate input text
aaronsteers Jan 15, 2025
6badf7e
clean up module name parsing
aaronsteers Jan 15, 2025
b81ca33
refactor and clean up interfaces
aaronsteers Jan 16, 2025
ceab6fd
use monkeypatch for setting env vars
aaronsteers Jan 16, 2025
714360c
full code review and cleanup
aaronsteers Jan 16, 2025
c8de81a
apply suggestion
aaronsteers Jan 16, 2025
a084e7a
apply suggestion
aaronsteers Jan 16, 2025
0491b99
apply suggestion
aaronsteers Jan 16, 2025
7134340
apply suggestion
aaronsteers Jan 16, 2025
bff4dc4
fix lint issues
aaronsteers Jan 21, 2025
15cd254
clean up tests
aaronsteers Jan 21, 2025
3921341
Merge branch 'main' into aj/feat/accept-components-text-input
aaronsteers Jan 21, 2025
6c4e01f
autofix lint issue
aaronsteers Jan 21, 2025
6c81115
fix tests
aaronsteers Jan 21, 2025
1c35577
fix another test
aaronsteers Jan 21, 2025
e6b28b6
fix failing test
aaronsteers Jan 22, 2025
f29f616
mark full sync as slow test (~60s)
aaronsteers Jan 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions airbyte_cdk/cli/source_declarative_manifest/_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,12 @@ def create_declarative_source(
"Invalid config: `__injected_declarative_manifest` should be provided at the root "
f"of the config but config only has keys: {list(config.keys() if config else [])}"
)
if not isinstance(config["__injected_declarative_manifest"], dict):
aaronsteers marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
"Invalid config: `__injected_declarative_manifest` should be a dictionary, "
f"but got type: {type(config['__injected_declarative_manifest'])}"
)

return ConcurrentDeclarativeSource(
config=config,
catalog=catalog,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from __future__ import annotations

import datetime
import importlib
import inspect
import re
import sys
import types
from functools import partial
from typing import (
Any,
Expand Down Expand Up @@ -986,8 +987,10 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) ->
:param config: The custom defined connector config
:return: The declarative component built from the Pydantic model to be used at runtime
"""

custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
custom_component_class = self._get_class_from_fully_qualified_class_name(
full_qualified_class_name=model.class_name,
components_module=self._get_components_module_object(config=config),
)
component_fields = get_type_hints(custom_component_class)
model_args = model.dict()
model_args["config"] = config
Expand Down Expand Up @@ -1040,14 +1043,68 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) ->
return custom_component_class(**kwargs)

@staticmethod
def _get_class_from_fully_qualified_class_name(full_qualified_class_name: str) -> Any:
def _get_class_from_fully_qualified_class_name(
full_qualified_class_name: str,
components_module: types.ModuleType,
) -> Any:
"""
Get a class from its fully qualified name, optionally using a pre-parsed module.

Args:
full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName").
components_module (Optional[ModuleType]): An optional pre-parsed module.

Returns:
Any: The class object.

Raises:
ValueError: If the class cannot be loaded.
"""
split = full_qualified_class_name.split(".")
module = ".".join(split[:-1])
module_name_full = ".".join(split[:-1])
module_name = split[-2]
class_name = split[-1]

if module_name != "components":
raise ValueError(
"Custom components must be defined in a module named "
f"`components`. Found `{module_name}` instead."
)
if module_name_full != "source_declarative_manifest.components":
raise ValueError(
"Custom components must be defined in a module named "
f"`source_declarative_manifest.components`. Found `{module_name_full}` instead."
)
aaronsteers marked this conversation as resolved.
Show resolved Hide resolved

try:
return getattr(importlib.import_module(module), class_name)
except AttributeError:
raise ValueError(f"Could not load class {full_qualified_class_name}.")
return getattr(components_module, class_name)
except (AttributeError, ModuleNotFoundError) as e:
raise ValueError(f"Could not load class {full_qualified_class_name}.") from e

@staticmethod
def _get_components_module_object(
config: Config,
) -> types.ModuleType:
"""Get a components module object based on the provided config.

If custom python components is provided, this will be loaded. Otherwise, we will
attempt to load from the `components` module already imported.
"""
INJECTED_COMPONENTS_PY = "__injected_components_py"
COMPONENTS_MODULE_NAME = "components"

components_module: types.ModuleType
if not INJECTED_COMPONENTS_PY in config:
raise ValueError(
"Custom components must be defined in a module named `components`. Please provide a custom components module."
)

# Create a new module object and execute the provided Python code text within it
components_module = types.ModuleType(name=COMPONENTS_MODULE_NAME)
python_text = config[INJECTED_COMPONENTS_PY]
exec(python_text, components_module.__dict__)
sys.modules[COMPONENTS_MODULE_NAME] = components_module
aaronsteers marked this conversation as resolved.
Show resolved Hide resolved
return components_module

@staticmethod
def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:
Expand Down
21 changes: 19 additions & 2 deletions airbyte_cdk/test/utils/manifest_only_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@


import importlib.util
import types
from pathlib import Path
from types import ModuleType
from typing import Optional

import pytest

Expand All @@ -30,7 +30,7 @@ def connector_dir(request: pytest.FixtureRequest) -> Path:


@pytest.fixture(scope="session")
def components_module(connector_dir: Path) -> Optional[ModuleType]:
def components_module(connector_dir: Path) -> ModuleType | None:
"""Load and return the components module from the connector directory.

This assumes the components module is located at <connector_dir>/components.py.
Expand All @@ -51,6 +51,23 @@ def components_module(connector_dir: Path) -> Optional[ModuleType]:
return components_module


def components_module_from_string(components_py_text: str) -> ModuleType | None:
"""Load and return the components module from a provided string containing the python code.

This assumes the components module is located at <connector_dir>/components.py.
"""
module_name = "components"

# Create a new module object
components_module = types.ModuleType(name=module_name)

# Execute the module text in the module's namespace
exec(components_py_text, components_module.__dict__)

# Now you can import and use the module
return components_module


@pytest.fixture(scope="session")
def manifest_path(connector_dir: Path) -> Path:
"""Return the path to the connector's manifest file."""
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ select = ["I"]
[tool.poe.tasks]
# Installation
install = { shell = "poetry install --all-extras" }
lock = { shell = "poetry lock --no-update" }
aaronsteers marked this conversation as resolved.
Show resolved Hide resolved

# Build tasks
assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate component manifest files."}
Expand Down
14 changes: 13 additions & 1 deletion unit_tests/source_declarative_manifest/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,25 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
#

import hashlib
import os
from typing import Literal

import pytest
import yaml


def get_fixture_path(file_name):
def hash_text(input_text: str, hash_type: Literal["md5", "sha256"] = "md5") -> str:
hashers = {
"md5": hashlib.md5,
"sha256": hashlib.sha256,
}
hash_object = hashers[hash_type]()
hash_object.update(input_text.encode())
return hash_object.hexdigest()
aaronsteers marked this conversation as resolved.
Show resolved Hide resolved


def get_fixture_path(file_name) -> str:
return os.path.join(os.path.dirname(__file__), file_name)


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
secrets*
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# The Guardian API Tests

For these tests to work, you'll need to create a `secrets.yaml` file in this directory that looks like this:

```yml
api_key: ******
```
The `.gitignore` file in this directory should ensure your file is not committed to git, but it's a good practice to double-check. 👀
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#

from dataclasses import dataclass
from typing import Any, Mapping, Optional

import requests

from airbyte_cdk.sources.declarative.requesters.paginators.strategies.page_increment import (
PageIncrement,
)


@dataclass
class CustomPageIncrement(PageIncrement):
"""
Starts page from 1 instead of the default value that is 0. Stops Pagination when currentPage is equal to totalPages.
"""

def next_page_token(self, response: requests.Response, *args) -> Optional[Any]:
res = response.json().get("response")
currPage = res.get("currentPage")
totalPages = res.get("pages")
if currPage < totalPages:
self._page += 1
return self._page
else:
return None
aaronsteers marked this conversation as resolved.
Show resolved Hide resolved

def __post_init__(self, parameters: Mapping[str, Any]):
super().__post_init__(parameters)
self._page = 1

def reset(self):
self._page = 1
Loading
Loading