diff --git a/airbyte_cdk/cli/source_declarative_manifest/_run.py b/airbyte_cdk/cli/source_declarative_manifest/_run.py index 5def00602..3a00111f9 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_run.py @@ -155,11 +155,25 @@ def handle_remote_manifest_command(args: list[str]) -> None: def create_declarative_source( args: list[str], ) -> ConcurrentDeclarativeSource: # type: ignore [type-arg] - """Creates the source with the injected config. - - This essentially does what other low-code sources do at build time, but at runtime, - with a user-provided manifest in the config. This better reflects what happens in the - connector builder. + """ + Create a declarative source with an injected manifest configuration. + + This function dynamically creates a ConcurrentDeclarativeSource at runtime using a user-provided manifest, similar to how low-code sources are built. It validates the configuration and prepares the source for execution. + + Parameters: + args (list[str]): Command-line arguments containing configuration, catalog, and state information. + + Returns: + ConcurrentDeclarativeSource: A configured declarative source ready for sync operations. + + Raises: + ValueError: If the configuration is invalid or missing required manifest information. + Exception: For any unexpected errors during source creation, with detailed error tracing. + + Notes: + - Requires a configuration with an '__injected_declarative_manifest' key + - The manifest must be a dictionary + - Provides structured error reporting for configuration issues """ try: config: Mapping[str, Any] | None diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index adb126b5c..400b99dc8 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -981,11 +981,27 @@ def create_cursor_pagination( def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: """ - Generically creates a custom component based on the model type and a class_name reference to the custom Python class being - instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor - :param model: The Pydantic model of the custom component being created - :param config: The custom defined connector config - :return: The declarative component built from the Pydantic model to be used at runtime + Create a custom component from a Pydantic model with dynamic class instantiation. + + This method dynamically creates a custom component by loading a class from a specified module and instantiating it with appropriate arguments. It handles complex scenarios such as nested components, type inference, and argument passing. + + Parameters: + model (Any): A Pydantic model representing the custom component configuration. + config (Config): The connector configuration used for module and component resolution. + **kwargs (Any): Additional keyword arguments to override or supplement model arguments. + + Returns: + Any: An instantiated custom component with resolved nested components and configurations. + + Raises: + ValueError: If the component class cannot be loaded or instantiated. + TypeError: If arguments do not match the component's constructor signature. + + Notes: + - Supports nested component creation + - Performs type inference for component fields + - Handles both dictionary and list-based component configurations + - Prioritizes kwargs over model arguments in case of field collisions """ custom_component_class = self._get_class_from_fully_qualified_class_name( full_qualified_class_name=model.class_name, @@ -1046,10 +1062,25 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> def _get_components_module_object( config: Config, ) -> types.ModuleType: - """Get a components module object based on the provided config. - - If custom python components is provided, this will be loaded. Otherwise, we will - attempt to load from the `components` module already imported. + """ + Get a components module object based on the provided configuration. + + This method dynamically creates a module for custom Python components defined in the configuration. It ensures that custom components are defined in a module named 'components' and allows runtime module creation and execution. + + Parameters: + config (Config): A configuration object containing the custom components definition. + + Returns: + types.ModuleType: A dynamically created module containing the custom components. + + Raises: + ValueError: If no custom components are provided or if the components are not defined in a module named 'components'. + + Notes: + - Uses the special key '__injected_components_py' to retrieve custom component code + - Creates a new module dynamically using types.ModuleType + - Executes the provided Python code within the new module's namespace + - Registers the module in sys.modules for future imports """ INJECTED_COMPONENTS_PY = "__injected_components_py" COMPONENTS_MODULE_NAME = "components" @@ -1073,17 +1104,24 @@ def _get_class_from_fully_qualified_class_name( components_module: types.ModuleType, ) -> Any: """ - Get a class from its fully qualified name, optionally using a pre-parsed module. - - Args: - full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName"). - components_module (Optional[ModuleType]): An optional pre-parsed module. - + Retrieve a class from its fully qualified name within a predefined components module. + + Parameters: + full_qualified_class_name (str): The complete dot-separated path to the class (e.g., "source_declarative_manifest.components.ClassName"). + components_module (types.ModuleType): The pre-parsed module containing custom components. + Returns: - Any: The class object. - + Any: The requested class object. + Raises: - ValueError: If the class cannot be loaded. + ValueError: If the class cannot be loaded or does not meet module naming conventions. + - Raised when the module is not named "components" + - Raised when the full module path is not "source_declarative_manifest.components" + - Raised when the specific class cannot be found in the module + + Notes: + - Enforces strict naming conventions for custom component modules + - Provides detailed error messages for debugging component loading issues """ split = full_qualified_class_name.split(".") module_name_full = ".".join(split[:-1]) @@ -1108,6 +1146,23 @@ def _get_class_from_fully_qualified_class_name( @staticmethod def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: + """ + Derive the component type name from type hints by unwrapping nested generic types. + + This method extracts the underlying type from potentially nested generic type hints, + such as List[T], Optional[List[T]], etc., and returns the type name if it's a non-builtin type. + + Parameters: + field_type (Any): The type hint to analyze for component type extraction. + + Returns: + Optional[str]: The name of the underlying type if it's a non-builtin type, otherwise None. + + Examples: + - List[str] returns None + - List[CustomType] returns "CustomType" + - Optional[List[CustomType]] returns "CustomType" + """ interface = field_type while True: origin = get_origin(interface) diff --git a/airbyte_cdk/test/utils/manifest_only_fixtures.py b/airbyte_cdk/test/utils/manifest_only_fixtures.py index 43e90a2c4..643ff2327 100644 --- a/airbyte_cdk/test/utils/manifest_only_fixtures.py +++ b/airbyte_cdk/test/utils/manifest_only_fixtures.py @@ -31,9 +31,28 @@ def connector_dir(request: pytest.FixtureRequest) -> Path: @pytest.fixture(scope="session") def components_module(connector_dir: Path) -> ModuleType | None: - """Load and return the components module from the connector directory. - - This assumes the components module is located at /components.py. + """ + Load and return the components module from the connector directory. + + This function attempts to load the 'components.py' module from the specified connector directory. It handles various potential failure scenarios during module loading. + + Parameters: + connector_dir (Path): The root directory of the connector containing the components module. + + Returns: + ModuleType | None: The loaded components module if successful, or None if: + - The components.py file does not exist + - The module specification cannot be created + - The module loader is unavailable + + Raises: + No explicit exceptions are raised; returns None on failure. + + Example: + components = components_module(Path('/path/to/connector')) + if components: + # Use the loaded module + some_component = components.SomeComponent() """ components_path = connector_dir / "components.py" if not components_path.exists(): @@ -52,9 +71,25 @@ def components_module(connector_dir: Path) -> ModuleType | None: def components_module_from_string(components_py_text: str) -> ModuleType | None: - """Load and return the components module from a provided string containing the python code. - - This assumes the components module is located at /components.py. + """ + Load a Python module from a string containing module code. + + Parameters: + components_py_text (str): A string containing valid Python code representing a module. + + Returns: + ModuleType | None: A dynamically created module object containing the executed code, or None if execution fails. + + Raises: + Exception: Potential runtime errors during code execution. + + Example: + components_code = ''' + def sample_component(): + return "Hello, World!" + ''' + module = components_module_from_string(components_code) + result = module.sample_component() # Returns "Hello, World!" """ module_name = "components" @@ -70,7 +105,22 @@ def components_module_from_string(components_py_text: str) -> ModuleType | None: @pytest.fixture(scope="session") def manifest_path(connector_dir: Path) -> Path: - """Return the path to the connector's manifest file.""" + """ + Return the path to the connector's manifest file. + + Parameters: + connector_dir (Path): The root directory of the connector. + + Returns: + Path: The absolute path to the manifest.yaml file. + + Raises: + FileNotFoundError: If the manifest.yaml file does not exist in the specified connector directory. + + Example: + manifest_file = manifest_path(Path('/path/to/connector')) + # Returns Path('/path/to/connector/manifest.yaml') + """ path = connector_dir / "manifest.yaml" if not path.exists(): raise FileNotFoundError(f"Manifest file not found at {path}") diff --git a/unit_tests/source_declarative_manifest/conftest.py b/unit_tests/source_declarative_manifest/conftest.py index d4c67a33e..f2a37d763 100644 --- a/unit_tests/source_declarative_manifest/conftest.py +++ b/unit_tests/source_declarative_manifest/conftest.py @@ -11,6 +11,23 @@ def hash_text(input_text: str, hash_type: Literal["md5", "sha256"] = "md5") -> str: + """ + Compute the hash of the input text using the specified hashing algorithm. + + Parameters: + input_text (str): The text to be hashed. + hash_type (Literal["md5", "sha256"], optional): The hashing algorithm to use. + Defaults to "md5". Supports "md5" and "sha256" algorithms. + + Returns: + str: The hexadecimal digest of the hashed input text. + + Examples: + >>> hash_text("hello world") + '5eb63bbbe01eeed093cb22bb8f5acdc3' + >>> hash_text("hello world", hash_type="sha256") + 'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9' + """ hashers = { "md5": hashlib.md5, "sha256": hashlib.sha256, @@ -21,6 +38,19 @@ def hash_text(input_text: str, hash_type: Literal["md5", "sha256"] = "md5") -> s def get_fixture_path(file_name) -> str: + """ + Construct the full path to a fixture file relative to the current script's directory. + + Parameters: + file_name (str): The name of the fixture file to locate. + + Returns: + str: The absolute path to the specified fixture file. + + Example: + >>> get_fixture_path('config.json') + '/path/to/current/directory/config.json' + """ return os.path.join(os.path.dirname(__file__), file_name) diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py index db5b07971..5c8d76757 100644 --- a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py @@ -19,6 +19,23 @@ class CustomPageIncrement(PageIncrement): """ def next_page_token(self, response: requests.Response, *args) -> Optional[Any]: + """ + Retrieve the next page token for pagination based on the current page and total pages. + + Extracts the current page and total pages from the API response. If more pages are available, + increments the page counter and returns the next page number. Otherwise, returns None to + indicate the end of pagination. + + Parameters: + response (requests.Response): The HTTP response from the API containing pagination details. + *args: Variable length argument list (unused in this implementation). + + Returns: + Optional[Any]: The next page number if more pages are available, or None if pagination is complete. + + Raises: + KeyError: If the expected keys are missing in the response JSON. + """ res = response.json().get("response") currPage = res.get("currentPage") totalPages = res.get("pages") @@ -29,8 +46,23 @@ def next_page_token(self, response: requests.Response, *args) -> Optional[Any]: return None def __post_init__(self, parameters: Mapping[str, Any]): + """ + Initialize the page increment with a starting page number of 1. + + This method is called after the class initialization and sets the initial page + to 1 by invoking the parent class's __post_init__ method and then explicitly + setting the _page attribute. + + Parameters: + parameters (Mapping[str, Any]): Configuration parameters passed during initialization. + """ super().__post_init__(parameters) self._page = 1 def reset(self): + """ + Reset the page counter to the initial state. + + This method resets the internal page counter to 1, allowing pagination to start over from the beginning. It is useful when you want to restart the pagination process for a new request or after completing a previous pagination cycle. + """ self._page = 1 diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 42880baca..607184409 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -36,11 +36,35 @@ def sample_method(self) -> str: def get_fixture_path(file_name) -> str: + """ + Construct the absolute path to a fixture file relative to the current script's directory. + + Parameters: + file_name (str): The name of the fixture file to locate + + Returns: + str: The full absolute path to the specified fixture file + """ return os.path.join(os.path.dirname(__file__), file_name) def test_components_module_from_string() -> None: # Call the function to get the module + """ + Test the functionality of dynamically creating a Python module from a string containing code. + + This test verifies that the `components_module_from_string` function can successfully: + - Create a module from a string of Python code + - Define functions within the module + - Define classes within the module + - Allow instantiation and method calls on dynamically created classes + + Assertions: + - Checks that the returned object is a module + - Verifies the existence of a sample function + - Confirms the sample function returns the expected string + - Validates class definition and method invocation + """ components_module: types.ModuleType = components_module_from_string(SAMPLE_COMPONENTS_PY_TEXT) # Check that the module is created and is of the correct type @@ -60,6 +84,29 @@ def test_components_module_from_string() -> None: def get_py_components_config_dict() -> dict[str, Any]: + """ + Construct a configuration dictionary for a declarative source with custom Python components. + + This function loads and combines configuration data from multiple YAML files and a Python components file + for a specific Airbyte connector. It prepares a comprehensive configuration dictionary that includes: + - The declarative manifest + - Custom Python components + - Checksums for the Python components + - Configuration and secrets from YAML files + + Parameters: + None + + Returns: + dict[str, Any]: A configuration dictionary containing: + - '__injected_declarative_manifest': The loaded manifest configuration + - '__injected_components_py': The raw Python components code + - '__injected_components_py_checksum': MD5 and SHA256 checksums of the components + - Additional configuration and secret key-value pairs from YAML files + + Raises: + AssertionError: If the manifest file cannot be loaded or is not a mapping + """ connector_dir = Path(get_fixture_path("resources/source_the_guardian_api")) manifest_yml_path: Path = connector_dir / "manifest.yaml" custom_py_code_path: Path = connector_dir / "components.py" @@ -91,6 +138,29 @@ def get_py_components_config_dict() -> dict[str, Any]: reason="Skipped due to missing 'secrets.yaml'.", ) def test_given_injected_declarative_manifest_and_py_components() -> None: + """ + Test the integration of a declarative source with custom Python components. + + This test function validates the end-to-end functionality of a declarative source by: + 1. Retrieving a configuration dictionary with injected components + 2. Modifying the start date to limit test duration + 3. Creating a temporary configuration file + 4. Creating a declarative source + 5. Performing source check and discovery operations + 6. Reading messages from the source and validating them + + The test ensures that: + - The configuration dictionary is correctly structured + - A declarative source can be created from the configuration + - The source can perform check and discover operations + - The source can read messages without errors + + Args: + None + + Raises: + AssertionError: If any of the validation checks fail during the test process + """ py_components_config_dict = get_py_components_config_dict() # Truncate the start_date to speed up tests py_components_config_dict["start_date"] = (