diff --git a/airbyte_cdk/cli/source_declarative_manifest/_run.py b/airbyte_cdk/cli/source_declarative_manifest/_run.py index 232ac302f..3a00111f9 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_run.py @@ -155,11 +155,25 @@ def handle_remote_manifest_command(args: list[str]) -> None: def create_declarative_source( args: list[str], ) -> ConcurrentDeclarativeSource: # type: ignore [type-arg] - """Creates the source with the injected config. - - This essentially does what other low-code sources do at build time, but at runtime, - with a user-provided manifest in the config. This better reflects what happens in the - connector builder. + """ + Create a declarative source with an injected manifest configuration. + + This function dynamically creates a ConcurrentDeclarativeSource at runtime using a user-provided manifest, similar to how low-code sources are built. It validates the configuration and prepares the source for execution. + + Parameters: + args (list[str]): Command-line arguments containing configuration, catalog, and state information. + + Returns: + ConcurrentDeclarativeSource: A configured declarative source ready for sync operations. + + Raises: + ValueError: If the configuration is invalid or missing required manifest information. + Exception: For any unexpected errors during source creation, with detailed error tracing. + + Notes: + - Requires a configuration with an '__injected_declarative_manifest' key + - The manifest must be a dictionary + - Provides structured error reporting for configuration issues """ try: config: Mapping[str, Any] | None @@ -171,6 +185,12 @@ def create_declarative_source( "Invalid config: `__injected_declarative_manifest` should be provided at the root " f"of the config but config only has keys: {list(config.keys() if config else [])}" ) + if not isinstance(config["__injected_declarative_manifest"], dict): + raise ValueError( + "Invalid config: `__injected_declarative_manifest` should be a dictionary, " + f"but got type: {type(config['__injected_declarative_manifest'])}" + ) + return ConcurrentDeclarativeSource( config=config, catalog=catalog, diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 215d6fff9..400b99dc8 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -5,9 +5,10 @@ from __future__ import annotations import datetime -import importlib import inspect import re +import sys +import types from functools import partial from typing import ( Any, @@ -980,14 +981,32 @@ def create_cursor_pagination( def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: """ - Generically creates a custom component based on the model type and a class_name reference to the custom Python class being - instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor - :param model: The Pydantic model of the custom component being created - :param config: The custom defined connector config - :return: The declarative component built from the Pydantic model to be used at runtime + Create a custom component from a Pydantic model with dynamic class instantiation. + + This method dynamically creates a custom component by loading a class from a specified module and instantiating it with appropriate arguments. It handles complex scenarios such as nested components, type inference, and argument passing. + + Parameters: + model (Any): A Pydantic model representing the custom component configuration. + config (Config): The connector configuration used for module and component resolution. + **kwargs (Any): Additional keyword arguments to override or supplement model arguments. + + Returns: + Any: An instantiated custom component with resolved nested components and configurations. + + Raises: + ValueError: If the component class cannot be loaded or instantiated. + TypeError: If arguments do not match the component's constructor signature. + + Notes: + - Supports nested component creation + - Performs type inference for component fields + - Handles both dictionary and list-based component configurations + - Prioritizes kwargs over model arguments in case of field collisions """ - - custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) + custom_component_class = self._get_class_from_fully_qualified_class_name( + full_qualified_class_name=model.class_name, + components_module=self._get_components_module_object(config=config), + ) component_fields = get_type_hints(custom_component_class) model_args = model.dict() model_args["config"] = config @@ -1040,17 +1059,110 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> return custom_component_class(**kwargs) @staticmethod - def _get_class_from_fully_qualified_class_name(full_qualified_class_name: str) -> Any: + def _get_components_module_object( + config: Config, + ) -> types.ModuleType: + """ + Get a components module object based on the provided configuration. + + This method dynamically creates a module for custom Python components defined in the configuration. It ensures that custom components are defined in a module named 'components' and allows runtime module creation and execution. + + Parameters: + config (Config): A configuration object containing the custom components definition. + + Returns: + types.ModuleType: A dynamically created module containing the custom components. + + Raises: + ValueError: If no custom components are provided or if the components are not defined in a module named 'components'. + + Notes: + - Uses the special key '__injected_components_py' to retrieve custom component code + - Creates a new module dynamically using types.ModuleType + - Executes the provided Python code within the new module's namespace + - Registers the module in sys.modules for future imports + """ + INJECTED_COMPONENTS_PY = "__injected_components_py" + COMPONENTS_MODULE_NAME = "components" + + components_module: types.ModuleType + if not INJECTED_COMPONENTS_PY in config: + raise ValueError( + "Custom components must be defined in a module named `components`. Please provide a custom components module." + ) + + # Create a new module object and execute the provided Python code text within it + components_module = types.ModuleType(name=COMPONENTS_MODULE_NAME) + python_text = config[INJECTED_COMPONENTS_PY] + exec(python_text, components_module.__dict__) + sys.modules[COMPONENTS_MODULE_NAME] = components_module + return components_module + + @staticmethod + def _get_class_from_fully_qualified_class_name( + full_qualified_class_name: str, + components_module: types.ModuleType, + ) -> Any: + """ + Retrieve a class from its fully qualified name within a predefined components module. + + Parameters: + full_qualified_class_name (str): The complete dot-separated path to the class (e.g., "source_declarative_manifest.components.ClassName"). + components_module (types.ModuleType): The pre-parsed module containing custom components. + + Returns: + Any: The requested class object. + + Raises: + ValueError: If the class cannot be loaded or does not meet module naming conventions. + - Raised when the module is not named "components" + - Raised when the full module path is not "source_declarative_manifest.components" + - Raised when the specific class cannot be found in the module + + Notes: + - Enforces strict naming conventions for custom component modules + - Provides detailed error messages for debugging component loading issues + """ split = full_qualified_class_name.split(".") - module = ".".join(split[:-1]) + module_name_full = ".".join(split[:-1]) + module_name = split[-2] class_name = split[-1] + + if module_name != "components": + raise ValueError( + "Custom components must be defined in a module named " + f"`components`. Found `{module_name}` instead." + ) + if module_name_full != "source_declarative_manifest.components": + raise ValueError( + "Custom components must be defined in a module named " + f"`source_declarative_manifest.components`. Found `{module_name_full}` instead." + ) + try: - return getattr(importlib.import_module(module), class_name) - except AttributeError: - raise ValueError(f"Could not load class {full_qualified_class_name}.") + return getattr(components_module, class_name) + except (AttributeError, ModuleNotFoundError) as e: + raise ValueError(f"Could not load class {full_qualified_class_name}.") from e @staticmethod def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: + """ + Derive the component type name from type hints by unwrapping nested generic types. + + This method extracts the underlying type from potentially nested generic type hints, + such as List[T], Optional[List[T]], etc., and returns the type name if it's a non-builtin type. + + Parameters: + field_type (Any): The type hint to analyze for component type extraction. + + Returns: + Optional[str]: The name of the underlying type if it's a non-builtin type, otherwise None. + + Examples: + - List[str] returns None + - List[CustomType] returns "CustomType" + - Optional[List[CustomType]] returns "CustomType" + """ interface = field_type while True: origin = get_origin(interface) diff --git a/airbyte_cdk/test/utils/manifest_only_fixtures.py b/airbyte_cdk/test/utils/manifest_only_fixtures.py index 47620e7c1..643ff2327 100644 --- a/airbyte_cdk/test/utils/manifest_only_fixtures.py +++ b/airbyte_cdk/test/utils/manifest_only_fixtures.py @@ -2,9 +2,9 @@ import importlib.util +import types from pathlib import Path from types import ModuleType -from typing import Optional import pytest @@ -30,10 +30,29 @@ def connector_dir(request: pytest.FixtureRequest) -> Path: @pytest.fixture(scope="session") -def components_module(connector_dir: Path) -> Optional[ModuleType]: - """Load and return the components module from the connector directory. - - This assumes the components module is located at /components.py. +def components_module(connector_dir: Path) -> ModuleType | None: + """ + Load and return the components module from the connector directory. + + This function attempts to load the 'components.py' module from the specified connector directory. It handles various potential failure scenarios during module loading. + + Parameters: + connector_dir (Path): The root directory of the connector containing the components module. + + Returns: + ModuleType | None: The loaded components module if successful, or None if: + - The components.py file does not exist + - The module specification cannot be created + - The module loader is unavailable + + Raises: + No explicit exceptions are raised; returns None on failure. + + Example: + components = components_module(Path('/path/to/connector')) + if components: + # Use the loaded module + some_component = components.SomeComponent() """ components_path = connector_dir / "components.py" if not components_path.exists(): @@ -51,9 +70,57 @@ def components_module(connector_dir: Path) -> Optional[ModuleType]: return components_module +def components_module_from_string(components_py_text: str) -> ModuleType | None: + """ + Load a Python module from a string containing module code. + + Parameters: + components_py_text (str): A string containing valid Python code representing a module. + + Returns: + ModuleType | None: A dynamically created module object containing the executed code, or None if execution fails. + + Raises: + Exception: Potential runtime errors during code execution. + + Example: + components_code = ''' + def sample_component(): + return "Hello, World!" + ''' + module = components_module_from_string(components_code) + result = module.sample_component() # Returns "Hello, World!" + """ + module_name = "components" + + # Create a new module object + components_module = types.ModuleType(name=module_name) + + # Execute the module text in the module's namespace + exec(components_py_text, components_module.__dict__) + + # Now you can import and use the module + return components_module + + @pytest.fixture(scope="session") def manifest_path(connector_dir: Path) -> Path: - """Return the path to the connector's manifest file.""" + """ + Return the path to the connector's manifest file. + + Parameters: + connector_dir (Path): The root directory of the connector. + + Returns: + Path: The absolute path to the manifest.yaml file. + + Raises: + FileNotFoundError: If the manifest.yaml file does not exist in the specified connector directory. + + Example: + manifest_file = manifest_path(Path('/path/to/connector')) + # Returns Path('/path/to/connector/manifest.yaml') + """ path = connector_dir / "manifest.yaml" if not path.exists(): raise FileNotFoundError(f"Manifest file not found at {path}") diff --git a/pyproject.toml b/pyproject.toml index fbc7ad7af..8e3bfb0fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -126,6 +126,7 @@ select = ["I"] [tool.poe.tasks] # Installation install = { shell = "poetry install --all-extras" } +lock = { shell = "poetry lock --no-update" } # Build tasks assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate component manifest files."} diff --git a/unit_tests/source_declarative_manifest/conftest.py b/unit_tests/source_declarative_manifest/conftest.py index 3d61e65e8..f2a37d763 100644 --- a/unit_tests/source_declarative_manifest/conftest.py +++ b/unit_tests/source_declarative_manifest/conftest.py @@ -2,13 +2,55 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # +import hashlib import os +from typing import Literal import pytest import yaml -def get_fixture_path(file_name): +def hash_text(input_text: str, hash_type: Literal["md5", "sha256"] = "md5") -> str: + """ + Compute the hash of the input text using the specified hashing algorithm. + + Parameters: + input_text (str): The text to be hashed. + hash_type (Literal["md5", "sha256"], optional): The hashing algorithm to use. + Defaults to "md5". Supports "md5" and "sha256" algorithms. + + Returns: + str: The hexadecimal digest of the hashed input text. + + Examples: + >>> hash_text("hello world") + '5eb63bbbe01eeed093cb22bb8f5acdc3' + >>> hash_text("hello world", hash_type="sha256") + 'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9' + """ + hashers = { + "md5": hashlib.md5, + "sha256": hashlib.sha256, + } + hash_object = hashers[hash_type]() + hash_object.update(input_text.encode()) + return hash_object.hexdigest() + + +def get_fixture_path(file_name) -> str: + """ + Construct the full path to a fixture file relative to the current script's directory. + + Parameters: + file_name (str): The name of the fixture file to locate. + + Returns: + str: The absolute path to the specified fixture file. + + Example: + >>> get_fixture_path('config.json') + '/path/to/current/directory/config.json' + """ return os.path.join(os.path.dirname(__file__), file_name) diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/.gitignore b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/.gitignore new file mode 100644 index 000000000..c4ab49a30 --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/.gitignore @@ -0,0 +1 @@ +secrets* diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/README.md b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/README.md new file mode 100644 index 000000000..403a4baba --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/README.md @@ -0,0 +1,9 @@ +# The Guardian API Tests + +For these tests to work, you'll need to create a `secrets.yaml` file in this directory that looks like this: + +```yml +api_key: ****** +``` + +The `.gitignore` file in this directory should ensure your file is not committed to git, but it's a good practice to double-check. 👀 diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py new file mode 100644 index 000000000..5c8d76757 --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py @@ -0,0 +1,68 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from dataclasses import dataclass +from typing import Any, Mapping, Optional + +import requests + +from airbyte_cdk.sources.declarative.requesters.paginators.strategies.page_increment import ( + PageIncrement, +) + + +@dataclass +class CustomPageIncrement(PageIncrement): + """ + Starts page from 1 instead of the default value that is 0. Stops Pagination when currentPage is equal to totalPages. + """ + + def next_page_token(self, response: requests.Response, *args) -> Optional[Any]: + """ + Retrieve the next page token for pagination based on the current page and total pages. + + Extracts the current page and total pages from the API response. If more pages are available, + increments the page counter and returns the next page number. Otherwise, returns None to + indicate the end of pagination. + + Parameters: + response (requests.Response): The HTTP response from the API containing pagination details. + *args: Variable length argument list (unused in this implementation). + + Returns: + Optional[Any]: The next page number if more pages are available, or None if pagination is complete. + + Raises: + KeyError: If the expected keys are missing in the response JSON. + """ + res = response.json().get("response") + currPage = res.get("currentPage") + totalPages = res.get("pages") + if currPage < totalPages: + self._page += 1 + return self._page + else: + return None + + def __post_init__(self, parameters: Mapping[str, Any]): + """ + Initialize the page increment with a starting page number of 1. + + This method is called after the class initialization and sets the initial page + to 1 by invoking the parent class's __post_init__ method and then explicitly + setting the _page attribute. + + Parameters: + parameters (Mapping[str, Any]): Configuration parameters passed during initialization. + """ + super().__post_init__(parameters) + self._page = 1 + + def reset(self): + """ + Reset the page counter to the initial state. + + This method resets the internal page counter to 1, allowing pagination to start over from the beginning. It is useful when you want to restart the pagination process for a new request or after completing a previous pagination cycle. + """ + self._page = 1 diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/manifest.yaml b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/manifest.yaml new file mode 100644 index 000000000..7b440631f --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/manifest.yaml @@ -0,0 +1,376 @@ +version: "4.3.2" +definitions: + selector: + extractor: + field_path: + - response + - results + requester: + url_base: "https://content.guardianapis.com" + http_method: "GET" + request_parameters: + api-key: "{{ config['api_key'] }}" + q: "{{ config['query'] }}" + tag: "{{ config['tag'] }}" + section: "{{ config['section'] }}" + order-by: "oldest" + incremental_sync: + type: DatetimeBasedCursor + start_datetime: + datetime: "{{ config['start_date'] }}" + datetime_format: "%Y-%m-%d" + end_datetime: + datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}" + datetime_format: "%Y-%m-%d" + step: "P7D" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" + cursor_granularity: "PT1S" + cursor_field: "webPublicationDate" + start_time_option: + field_name: "from-date" + inject_into: "request_parameter" + end_time_option: + field_name: "to-date" + inject_into: "request_parameter" + retriever: + record_selector: + extractor: + field_path: + - response + - results + paginator: + type: DefaultPaginator + pagination_strategy: + type: CustomPaginationStrategy + class_name: "source_declarative_manifest.components.CustomPageIncrement" + page_size: 10 + page_token_option: + type: RequestOption + inject_into: "request_parameter" + field_name: "page" + page_size_option: + inject_into: "body_data" + field_name: "page_size" + requester: + url_base: "https://content.guardianapis.com" + http_method: "GET" + request_parameters: + api-key: "{{ config['api_key'] }}" + q: "{{ config['query'] }}" + tag: "{{ config['tag'] }}" + section: "{{ config['section'] }}" + order-by: "oldest" + base_stream: + incremental_sync: + type: DatetimeBasedCursor + start_datetime: + datetime: "{{ config['start_date'] }}" + datetime_format: "%Y-%m-%d" + end_datetime: + datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}" + datetime_format: "%Y-%m-%d" + step: "P7D" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" + cursor_granularity: "PT1S" + cursor_field: "webPublicationDate" + start_time_option: + field_name: "from-date" + inject_into: "request_parameter" + end_time_option: + field_name: "to-date" + inject_into: "request_parameter" + retriever: + record_selector: + extractor: + field_path: + - response + - results + paginator: + type: DefaultPaginator + pagination_strategy: + type: CustomPaginationStrategy + class_name: "source_declarative_manifest.components.CustomPageIncrement" + page_size: 10 + page_token_option: + type: RequestOption + inject_into: "request_parameter" + field_name: "page" + page_size_option: + inject_into: "body_data" + field_name: "page_size" + requester: + url_base: "https://content.guardianapis.com" + http_method: "GET" + request_parameters: + api-key: "{{ config['api_key'] }}" + q: "{{ config['query'] }}" + tag: "{{ config['tag'] }}" + section: "{{ config['section'] }}" + order-by: "oldest" + content_stream: + incremental_sync: + type: DatetimeBasedCursor + start_datetime: + datetime: "{{ config['start_date'] }}" + datetime_format: "%Y-%m-%d" + end_datetime: + datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}" + datetime_format: "%Y-%m-%d" + step: "P7D" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" + cursor_granularity: "PT1S" + cursor_field: "webPublicationDate" + start_time_option: + field_name: "from-date" + inject_into: "request_parameter" + end_time_option: + field_name: "to-date" + inject_into: "request_parameter" + retriever: + record_selector: + extractor: + field_path: + - response + - results + paginator: + type: "DefaultPaginator" + pagination_strategy: + type: CustomPaginationStrategy + class_name: "source_declarative_manifest.components.CustomPageIncrement" + page_size: 10 + page_token_option: + type: RequestOption + inject_into: "request_parameter" + field_name: "page" + page_size_option: + inject_into: "body_data" + field_name: "page_size" + requester: + url_base: "https://content.guardianapis.com" + http_method: "GET" + request_parameters: + api-key: "{{ config['api_key'] }}" + q: "{{ config['query'] }}" + tag: "{{ config['tag'] }}" + section: "{{ config['section'] }}" + order-by: "oldest" + schema_loader: + type: InlineSchemaLoader + schema: + $schema: http://json-schema.org/draft-04/schema# + type: object + properties: + id: + type: string + type: + type: string + sectionId: + type: string + sectionName: + type: string + webPublicationDate: + type: string + webTitle: + type: string + webUrl: + type: string + apiUrl: + type: string + isHosted: + type: boolean + pillarId: + type: string + pillarName: + type: string + required: + - id + - type + - sectionId + - sectionName + - webPublicationDate + - webTitle + - webUrl + - apiUrl + - isHosted + - pillarId + - pillarName +streams: + - incremental_sync: + type: DatetimeBasedCursor + start_datetime: + datetime: "{{ config['start_date'] }}" + datetime_format: "%Y-%m-%d" + type: MinMaxDatetime + end_datetime: + datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}" + datetime_format: "%Y-%m-%d" + type: MinMaxDatetime + step: "P7D" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" + cursor_granularity: "PT1S" + cursor_field: "webPublicationDate" + start_time_option: + field_name: "from-date" + inject_into: "request_parameter" + type: RequestOption + end_time_option: + field_name: "to-date" + inject_into: "request_parameter" + type: RequestOption + retriever: + record_selector: + extractor: + field_path: + - response + - results + type: DpathExtractor + type: RecordSelector + paginator: + type: "DefaultPaginator" + pagination_strategy: + class_name: source_declarative_manifest.components.CustomPageIncrement + page_size: 10 + type: CustomPaginationStrategy + page_token_option: + type: RequestOption + inject_into: "request_parameter" + field_name: "page" + page_size_option: + inject_into: "body_data" + field_name: "page_size" + type: RequestOption + requester: + url_base: "https://content.guardianapis.com" + http_method: "GET" + request_parameters: + api-key: "{{ config['api_key'] }}" + q: "{{ config['query'] }}" + tag: "{{ config['tag'] }}" + section: "{{ config['section'] }}" + order-by: "oldest" + type: HttpRequester + path: "/search" + type: SimpleRetriever + schema_loader: + type: InlineSchemaLoader + schema: + $schema: http://json-schema.org/draft-04/schema# + type: object + properties: + id: + type: string + type: + type: string + sectionId: + type: string + sectionName: + type: string + webPublicationDate: + type: string + webTitle: + type: string + webUrl: + type: string + apiUrl: + type: string + isHosted: + type: boolean + pillarId: + type: string + pillarName: + type: string + required: + - id + - type + - sectionId + - sectionName + - webPublicationDate + - webTitle + - webUrl + - apiUrl + - isHosted + - pillarId + - pillarName + type: DeclarativeStream + name: "content" + primary_key: "id" +check: + stream_names: + - "content" + type: CheckStream +type: DeclarativeSource +spec: + type: Spec + documentation_url: https://docs.airbyte.com/integrations/sources/the-guardian-api + connection_specification: + $schema: http://json-schema.org/draft-07/schema# + title: The Guardian Api Spec + type: object + required: + - api_key + - start_date + additionalProperties: true + properties: + api_key: + title: API Key + type: string + description: + Your API Key. See here. + The key is case sensitive. + airbyte_secret: true + start_date: + title: Start Date + type: string + description: + Use this to set the minimum date (YYYY-MM-DD) of the results. + Results older than the start_date will not be shown. + pattern: ^([1-9][0-9]{3})\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01])$ + examples: + - YYYY-MM-DD + query: + title: Query + type: string + description: + (Optional) The query (q) parameter filters the results to only + those that include that search term. The q parameter supports AND, OR and + NOT operators. + examples: + - environment AND NOT water + - environment AND political + - amusement park + - political + tag: + title: Tag + type: string + description: + (Optional) A tag is a piece of data that is used by The Guardian + to categorise content. Use this parameter to filter results by showing only + the ones matching the entered tag. See here + for a list of all tags, and here + for the tags endpoint documentation. + examples: + - environment/recycling + - environment/plasticbags + - environment/energyefficiency + section: + title: Section + type: string + description: + (Optional) Use this to filter the results by a particular section. + See here + for a list of all sections, and here + for the sections endpoint documentation. + examples: + - media + - technology + - housing-network + end_date: + title: End Date + type: string + description: + (Optional) Use this to set the maximum date (YYYY-MM-DD) of the + results. Results newer than the end_date will not be shown. Default is set + to the current date (today) for incremental syncs. + pattern: ^([1-9][0-9]{3})\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01])$ + examples: + - YYYY-MM-DD diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/valid_config.yaml b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/valid_config.yaml new file mode 100644 index 000000000..e31112780 --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/valid_config.yaml @@ -0,0 +1,3 @@ +{ + "start_date": "2024-01-01", +} diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py new file mode 100644 index 000000000..607184409 --- /dev/null +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -0,0 +1,204 @@ +# +# Copyright (c) 2024 Airbyte, Inc., all rights reserved. +# + +import datetime +import json +import logging +import os +import types +from collections.abc import Mapping +from pathlib import Path +from tempfile import NamedTemporaryFile +from typing import Any + +import pytest +import yaml +from airbyte_protocol_dataclasses.models.airbyte_protocol import AirbyteCatalog + +from airbyte_cdk.cli.source_declarative_manifest._run import ( + create_declarative_source, +) +from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream +from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource +from airbyte_cdk.test.utils.manifest_only_fixtures import components_module_from_string +from unit_tests.connector_builder.test_connector_builder_handler import configured_catalog +from unit_tests.source_declarative_manifest.conftest import hash_text + +SAMPLE_COMPONENTS_PY_TEXT = """ +def sample_function() -> str: + return "Hello, World!" + +class SimpleClass: + def sample_method(self) -> str: + return sample_function() +""" + + +def get_fixture_path(file_name) -> str: + """ + Construct the absolute path to a fixture file relative to the current script's directory. + + Parameters: + file_name (str): The name of the fixture file to locate + + Returns: + str: The full absolute path to the specified fixture file + """ + return os.path.join(os.path.dirname(__file__), file_name) + + +def test_components_module_from_string() -> None: + # Call the function to get the module + """ + Test the functionality of dynamically creating a Python module from a string containing code. + + This test verifies that the `components_module_from_string` function can successfully: + - Create a module from a string of Python code + - Define functions within the module + - Define classes within the module + - Allow instantiation and method calls on dynamically created classes + + Assertions: + - Checks that the returned object is a module + - Verifies the existence of a sample function + - Confirms the sample function returns the expected string + - Validates class definition and method invocation + """ + components_module: types.ModuleType = components_module_from_string(SAMPLE_COMPONENTS_PY_TEXT) + + # Check that the module is created and is of the correct type + assert isinstance(components_module, types.ModuleType) + + # Check that the function is correctly defined in the module + assert hasattr(components_module, "sample_function") + + # Check that simple functions are callable + assert components_module.sample_function() == "Hello, World!" + + # Check class definitions work as expected + assert isinstance(components_module.SimpleClass, type) + obj = components_module.SimpleClass() + assert isinstance(obj, components_module.SimpleClass) + assert obj.sample_method() == "Hello, World!" + + +def get_py_components_config_dict() -> dict[str, Any]: + """ + Construct a configuration dictionary for a declarative source with custom Python components. + + This function loads and combines configuration data from multiple YAML files and a Python components file + for a specific Airbyte connector. It prepares a comprehensive configuration dictionary that includes: + - The declarative manifest + - Custom Python components + - Checksums for the Python components + - Configuration and secrets from YAML files + + Parameters: + None + + Returns: + dict[str, Any]: A configuration dictionary containing: + - '__injected_declarative_manifest': The loaded manifest configuration + - '__injected_components_py': The raw Python components code + - '__injected_components_py_checksum': MD5 and SHA256 checksums of the components + - Additional configuration and secret key-value pairs from YAML files + + Raises: + AssertionError: If the manifest file cannot be loaded or is not a mapping + """ + connector_dir = Path(get_fixture_path("resources/source_the_guardian_api")) + manifest_yml_path: Path = connector_dir / "manifest.yaml" + custom_py_code_path: Path = connector_dir / "components.py" + config_yaml_path: Path = connector_dir / "valid_config.yaml" + secrets_yaml_path: Path = connector_dir / "secrets.yaml" + + manifest_dict = yaml.safe_load(manifest_yml_path.read_text()) + assert manifest_dict, "Failed to load the manifest file." + assert isinstance( + manifest_dict, Mapping + ), f"Manifest file is type {type(manifest_dict).__name__}, not a mapping: {manifest_dict}" + + custom_py_code = custom_py_code_path.read_text() + combined_config_dict = { + "__injected_declarative_manifest": manifest_dict, + "__injected_components_py": custom_py_code, + "__injected_components_py_checksum": { + "md5": hash_text(custom_py_code, "md5"), + "sha256": hash_text(custom_py_code, "sha256"), + }, + } + combined_config_dict.update(yaml.safe_load(config_yaml_path.read_text())) + combined_config_dict.update(yaml.safe_load(secrets_yaml_path.read_text())) + return combined_config_dict + + +@pytest.mark.skipif( + condition=not Path(get_fixture_path("resources/source_the_guardian_api/secrets.yaml")).exists(), + reason="Skipped due to missing 'secrets.yaml'.", +) +def test_given_injected_declarative_manifest_and_py_components() -> None: + """ + Test the integration of a declarative source with custom Python components. + + This test function validates the end-to-end functionality of a declarative source by: + 1. Retrieving a configuration dictionary with injected components + 2. Modifying the start date to limit test duration + 3. Creating a temporary configuration file + 4. Creating a declarative source + 5. Performing source check and discovery operations + 6. Reading messages from the source and validating them + + The test ensures that: + - The configuration dictionary is correctly structured + - A declarative source can be created from the configuration + - The source can perform check and discover operations + - The source can read messages without errors + + Args: + None + + Raises: + AssertionError: If any of the validation checks fail during the test process + """ + py_components_config_dict = get_py_components_config_dict() + # Truncate the start_date to speed up tests + py_components_config_dict["start_date"] = ( + datetime.datetime.now() - datetime.timedelta(days=2) + ).strftime("%Y-%m-%d") + assert isinstance(py_components_config_dict, dict) + assert "__injected_declarative_manifest" in py_components_config_dict + assert "__injected_components_py" in py_components_config_dict + + with NamedTemporaryFile(delete=False, suffix=".json") as temp_config_file: + json_str = json.dumps(py_components_config_dict) + Path(temp_config_file.name).write_text(json_str) + temp_config_file.flush() + source = create_declarative_source( + ["check", "--config", temp_config_file.name], + ) + assert isinstance(source, ManifestDeclarativeSource) + source.check(logger=logging.getLogger(), config=py_components_config_dict) + catalog: AirbyteCatalog = source.discover( + logger=logging.getLogger(), config=py_components_config_dict + ) + assert isinstance(catalog, AirbyteCatalog) + configured_catalog = ConfiguredAirbyteCatalog( + streams=[ + ConfiguredAirbyteStream( + stream=stream, + sync_mode="full_refresh", + destination_sync_mode="overwrite", + ) + for stream in catalog.streams + ] + ) + + msg_iterator = source.read( + logger=logging.getLogger(), + config=py_components_config_dict, + catalog=configured_catalog, + state=None, + ) + for msg in msg_iterator: + assert msg