diff --git a/airbyte_cdk/cli/airbyte_cdk/_connector.py b/airbyte_cdk/cli/airbyte_cdk/_connector.py index d09ed4540..1e83f06c6 100644 --- a/airbyte_cdk/cli/airbyte_cdk/_connector.py +++ b/airbyte_cdk/cli/airbyte_cdk/_connector.py @@ -44,11 +44,14 @@ import rich_click as click -# from airbyte_cdk.test.standard_tests import pytest_hooks -from airbyte_cdk.cli.airbyte_cdk._util import resolve_connector_name_and_directory -from airbyte_cdk.test.standard_tests.test_resources import find_connector_root_from_name from airbyte_cdk.test.standard_tests.util import create_connector_test_suite +# from airbyte_cdk.test.standard_tests import pytest_hooks +from airbyte_cdk.utils.connector_paths import ( + find_connector_root_from_name, + resolve_connector_name_and_directory, +) + click.rich_click.TEXT_MARKUP = "markdown" pytest: ModuleType | None @@ -98,15 +101,11 @@ def connector_cli_group() -> None: @connector_cli_group.command() -@click.option( - "--connector-name", +@click.argument( + "connector", + required=False, type=str, - help="Name of the connector to test. Ignored if --connector-directory is provided.", -) -@click.option( - "--connector-directory", - type=click.Path(exists=True, file_okay=False, path_type=Path), - help="Path to the connector directory.", + metavar="[CONNECTOR]", ) @click.option( "--collect-only", @@ -115,8 +114,7 @@ def connector_cli_group() -> None: help="Only collect tests, do not run them.", ) def test( - connector_name: str | None = None, - connector_directory: Path | None = None, + connector: str | Path | None = None, *, collect_only: bool = False, ) -> None: @@ -124,6 +122,9 @@ def test( This command runs the standard connector tests for a specific connector. + [CONNECTOR] can be a connector name (e.g. 'source-pokeapi'), a path to a connector directory, or omitted to use the current working directory. + If a string containing '/' is provided, it is treated as a path. Otherwise, it is treated as a connector name. + If no connector name or directory is provided, we will look within the current working directory. If the current working directory is not a connector directory (e.g. starting with 'source-') and no connector name or path is provided, the process will fail. @@ -133,10 +134,7 @@ def test( "pytest is not installed. Please install pytest to run the connector tests." ) click.echo("Connector test command executed.") - connector_name, connector_directory = resolve_connector_name_and_directory( - connector_name=connector_name, - connector_directory=connector_directory, - ) + connector_name, connector_directory = resolve_connector_name_and_directory(connector) connector_test_suite = create_connector_test_suite( connector_name=connector_name if not connector_directory else None, diff --git a/airbyte_cdk/cli/airbyte_cdk/_image.py b/airbyte_cdk/cli/airbyte_cdk/_image.py index a94ef03a1..cd0ca5cc9 100644 --- a/airbyte_cdk/cli/airbyte_cdk/_image.py +++ b/airbyte_cdk/cli/airbyte_cdk/_image.py @@ -10,8 +10,8 @@ import rich_click as click -from airbyte_cdk.cli.airbyte_cdk._util import resolve_connector_name_and_directory from airbyte_cdk.models.connector_metadata import MetadataFile +from airbyte_cdk.utils.connector_paths import resolve_connector_name_and_directory from airbyte_cdk.utils.docker import ( ConnectorImageBuildError, build_connector_image, @@ -28,30 +28,30 @@ def image_cli_group() -> None: @image_cli_group.command() -@click.option( - "--connector-name", +@click.argument( + "connector", + required=False, type=str, - help="Name of the connector to test. Ignored if --connector-directory is provided.", -) -@click.option( - "--connector-directory", - type=click.Path(exists=True, file_okay=False, path_type=Path), - help="Path to the connector directory.", + metavar="[CONNECTOR]", ) @click.option("--tag", default="dev", help="Tag to apply to the built image (default: dev)") @click.option("--no-verify", is_flag=True, help="Skip verification of the built image") +@click.option( + "--dockerfile", + type=click.Path(exists=True, file_okay=True, path_type=Path), + help="Optional. Override the Dockerfile used for building the image.", +) def build( - connector_name: str | None = None, - connector_directory: Path | None = None, + connector: str | None = None, *, tag: str = "dev", no_verify: bool = False, + dockerfile: Path | None = None, ) -> None: """Build a connector Docker image. - This command builds a Docker image for a connector, using either - the connector's Dockerfile or a base image specified in the metadata. - The image is built for both AMD64 and ARM64 architectures. + [CONNECTOR] can be a connector name (e.g. 'source-pokeapi'), a path to a connector directory, or omitted to use the current working directory. + If a string containing '/' is provided, it is treated as a path. Otherwise, it is treated as a connector name. """ if not verify_docker_installation(): click.echo( @@ -59,10 +59,7 @@ def build( ) sys.exit(1) - connector_name, connector_directory = resolve_connector_name_and_directory( - connector_name=connector_name, - connector_directory=connector_directory, - ) + connector_name, connector_directory = resolve_connector_name_and_directory(connector) metadata_file_path: Path = connector_directory / "metadata.yaml" try: @@ -81,6 +78,7 @@ def build( metadata=metadata, tag=tag, no_verify=no_verify, + dockerfile_override=dockerfile or None, ) except ConnectorImageBuildError as e: click.echo( diff --git a/airbyte_cdk/cli/airbyte_cdk/_secrets.py b/airbyte_cdk/cli/airbyte_cdk/_secrets.py index baef51a34..7aa3996d4 100644 --- a/airbyte_cdk/cli/airbyte_cdk/_secrets.py +++ b/airbyte_cdk/cli/airbyte_cdk/_secrets.py @@ -43,7 +43,7 @@ from rich.console import Console from rich.table import Table -from airbyte_cdk.cli.airbyte_cdk._util import ( +from airbyte_cdk.utils.connector_paths import ( resolve_connector_name, resolve_connector_name_and_directory, ) @@ -73,15 +73,11 @@ def secrets_cli_group() -> None: @secrets_cli_group.command() -@click.option( - "--connector-name", +@click.argument( + "connector", + required=False, type=str, - help="Name of the connector to fetch secrets for. Ignored if --connector-directory is provided.", -) -@click.option( - "--connector-directory", - type=click.Path(exists=True, file_okay=False, path_type=Path), - help="Path to the connector directory.", + metavar="[CONNECTOR]", ) @click.option( "--gcp-project-id", @@ -97,8 +93,7 @@ def secrets_cli_group() -> None: default=False, ) def fetch( - connector_name: str | None = None, - connector_directory: Path | None = None, + connector: str | Path | None = None, gcp_project_id: str = AIRBYTE_INTERNAL_GCP_PROJECT, print_ci_secrets_masks: bool = False, ) -> None: @@ -107,6 +102,9 @@ def fetch( This command fetches secrets for a connector from Google Secret Manager and writes them to the connector's secrets directory. + [CONNECTOR] can be a connector name (e.g. 'source-pokeapi'), a path to a connector directory, or omitted to use the current working directory. + If a string containing '/' is provided, it is treated as a path. Otherwise, it is treated as a connector name. + If no connector name or directory is provided, we will look within the current working directory. If the current working directory is not a connector directory (e.g. starting with 'source-') and no connector name or path is provided, the process will fail. @@ -114,17 +112,14 @@ def fetch( The `--print-ci-secrets-masks` option will print the GitHub CI mask for the secrets. This is useful for masking secrets in CI logs. - WARNING: This action causes the secrets to be printed in clear text to `STDOUT`. For security - reasons, this function will only execute if the `CI` environment variable is set. Otherwise, - masks will not be printed. + WARNING: The `--print-ci-secrets-masks` option causes the secrets to be printed in clear text to + `STDOUT`. For security reasons, this argument will be ignored if the `CI` environment + variable is not set. """ click.echo("Fetching secrets...", err=True) client = _get_gsm_secrets_client() - connector_name, connector_directory = resolve_connector_name_and_directory( - connector_name=connector_name, - connector_directory=connector_directory, - ) + connector_name, connector_directory = resolve_connector_name_and_directory(connector) secrets_dir = _get_secrets_dir( connector_directory=connector_directory, connector_name=connector_name, @@ -289,21 +284,7 @@ def _get_secrets_dir( connector_name: str, ensure_exists: bool = True, ) -> Path: - try: - connector_name, connector_directory = resolve_connector_name_and_directory( - connector_name=connector_name, - connector_directory=connector_directory, - ) - except FileNotFoundError as e: - raise FileNotFoundError( - f"Could not find connector directory for '{connector_name}'. " - "Please provide the --connector-directory option with the path to the connector. " - "Note: This command requires either running from within a connector directory, " - "being in the airbyte monorepo, or explicitly providing the connector directory path." - ) from e - except ValueError as e: - raise ValueError(str(e)) - + _ = connector_name # Unused, but it may be used in the future for logging secrets_dir = connector_directory / "secrets" if ensure_exists: secrets_dir.mkdir(parents=True, exist_ok=True) diff --git a/airbyte_cdk/cli/airbyte_cdk/_util.py b/airbyte_cdk/cli/airbyte_cdk/_util.py deleted file mode 100644 index 2b638a122..000000000 --- a/airbyte_cdk/cli/airbyte_cdk/_util.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Common utilities for Airbyte CDK CLI.""" - -from pathlib import Path - -from airbyte_cdk.test.standard_tests.test_resources import find_connector_root_from_name - - -def resolve_connector_name_and_directory( - connector_name: str | None = None, - connector_directory: Path | None = None, -) -> tuple[str, Path]: - """Resolve the connector name and directory. - - This function will resolve the connector name and directory based on the provided - arguments. If no connector name or directory is provided, it will look within the - current working directory. If the current working directory is not a connector - directory (e.g. starting with 'source-') and no connector name or path is provided, - the process will fail. - """ - if not connector_directory: - if connector_name: - connector_directory = find_connector_root_from_name(connector_name) - else: - cwd = Path().resolve().absolute() - if cwd.name.startswith("source-") or cwd.name.startswith("destination-"): - connector_directory = cwd - else: - raise ValueError( - "Either connector_name or connector_directory must be provided if not " - "running from a connector directory." - ) - - if not connector_name: - connector_name = connector_directory.name - - if connector_directory: - connector_directory = connector_directory.resolve().absolute() - elif connector_name: - connector_directory = find_connector_root_from_name(connector_name) - else: - raise ValueError("Either connector_name or connector_directory must be provided.") - - return connector_name, connector_directory - - -def resolve_connector_name( - connector_directory: Path, -) -> str: - """Resolve the connector name. - - This function will resolve the connector name based on the provided connector directory. - If the current working directory is not a connector directory - (e.g. starting with 'source-'), the process will fail. - - Raises: - FileNotFoundError: If the connector directory does not exist or cannot be found. - """ - if not connector_directory: - raise FileNotFoundError( - "Connector directory does not exist or cannot be found. Please provide a valid " - "connector directory." - ) - connector_name = connector_directory.absolute().name - if not connector_name.startswith("source-") and not connector_name.startswith("destination-"): - raise ValueError( - f"Connector directory '{connector_name}' does not look like a valid connector directory. " - f"Full path: {connector_directory.absolute()}" - ) - return connector_name diff --git a/airbyte_cdk/test/standard_tests/connector_base.py b/airbyte_cdk/test/standard_tests/connector_base.py index 35bcdbe8f..78c406cc9 100644 --- a/airbyte_cdk/test/standard_tests/connector_base.py +++ b/airbyte_cdk/test/standard_tests/connector_base.py @@ -24,7 +24,7 @@ from airbyte_cdk.test.standard_tests.models import ( ConnectorTestScenario, ) -from airbyte_cdk.test.standard_tests.test_resources import ( +from airbyte_cdk.utils.connector_paths import ( ACCEPTANCE_TEST_CONFIG, find_connector_root, ) diff --git a/airbyte_cdk/test/standard_tests/declarative_sources.py b/airbyte_cdk/test/standard_tests/declarative_sources.py index e1954246f..ad9c89a28 100644 --- a/airbyte_cdk/test/standard_tests/declarative_sources.py +++ b/airbyte_cdk/test/standard_tests/declarative_sources.py @@ -12,7 +12,7 @@ from airbyte_cdk.test.standard_tests._job_runner import IConnector from airbyte_cdk.test.standard_tests.models import ConnectorTestScenario from airbyte_cdk.test.standard_tests.source_base import SourceTestSuiteBase -from airbyte_cdk.test.standard_tests.test_resources import MANIFEST_YAML +from airbyte_cdk.utils.connector_paths import MANIFEST_YAML def md5_checksum(file_path: Path) -> str: diff --git a/airbyte_cdk/test/standard_tests/test_resources.py b/airbyte_cdk/test/standard_tests/test_resources.py deleted file mode 100644 index a56c7c821..000000000 --- a/airbyte_cdk/test/standard_tests/test_resources.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) 2025 Airbyte, Inc., all rights reserved. -"""Resources for Airbyte CDK tests.""" - -from contextlib import suppress -from pathlib import Path - -ACCEPTANCE_TEST_CONFIG = "acceptance-test-config.yml" -MANIFEST_YAML = "manifest.yaml" -METADATA_YAML = "metadata.yaml" - - -def find_connector_root(from_paths: list[Path]) -> Path: - """Find the root directory of the connector.""" - for path in from_paths: - # If we reach here, we didn't find the manifest file in any parent directory - # Check if the manifest file exists in the current directory - for parent in [path, *path.parents]: - if (parent / METADATA_YAML).exists(): - return parent - if (parent / MANIFEST_YAML).exists(): - return parent - if (parent / ACCEPTANCE_TEST_CONFIG).exists(): - return parent - if parent.name == "airbyte_cdk": - break - - raise FileNotFoundError( - "Could not find connector root directory relative to the provided directories: " - f"'{str(from_paths)}'." - ) - - -def find_connector_root_from_name(connector_name: str) -> Path: - """Find the root directory of the connector from its name.""" - with suppress(FileNotFoundError): - return find_connector_root([Path(connector_name)]) - - # If the connector name is not found, check if we are in the airbyte monorepo - # and try to find the connector root from the current directory. - - cwd: Path = Path().absolute() - - if "airbyte" not in cwd.parts: - raise FileNotFoundError( - "Could not find connector root directory relative and we are not in the airbyte repo. " - f"Current directory: {cwd} " - ) - - # Find the connector root from the current directory - - airbyte_repo_root: Path - for parent in [cwd, *cwd.parents]: - if parent.name == "airbyte": - airbyte_repo_root = parent - break - else: - raise FileNotFoundError( - "Could not find connector root directory relative and we are not in the airbyte repo." - ) - - expected_connector_dir: Path = ( - airbyte_repo_root / "airbyte-integrations" / "connectors" / connector_name - ) - if not expected_connector_dir.exists(): - raise FileNotFoundError( - f"Could not find connector directory '{expected_connector_dir}' relative to the airbyte repo." - ) - - return expected_connector_dir diff --git a/airbyte_cdk/test/standard_tests/util.py b/airbyte_cdk/test/standard_tests/util.py index fb30dfc9b..58ae19d85 100644 --- a/airbyte_cdk/test/standard_tests/util.py +++ b/airbyte_cdk/test/standard_tests/util.py @@ -12,7 +12,7 @@ ) from airbyte_cdk.test.standard_tests.destination_base import DestinationTestSuiteBase from airbyte_cdk.test.standard_tests.source_base import SourceTestSuiteBase -from airbyte_cdk.test.standard_tests.test_resources import ( +from airbyte_cdk.utils.connector_paths import ( METADATA_YAML, find_connector_root_from_name, ) diff --git a/airbyte_cdk/utils/connector_paths.py b/airbyte_cdk/utils/connector_paths.py new file mode 100644 index 000000000..c05a195e3 --- /dev/null +++ b/airbyte_cdk/utils/connector_paths.py @@ -0,0 +1,223 @@ +# Copyright (c) 2025 Airbyte, Inc., all rights reserved. +"""Resources and utilities for locating Airbyte Connectors.""" + +from contextlib import suppress +from pathlib import Path + +ACCEPTANCE_TEST_CONFIG = "acceptance-test-config.yml" +MANIFEST_YAML = "manifest.yaml" +METADATA_YAML = "metadata.yaml" + + +def resolve_airbyte_repo_root( + from_dir: Path, +) -> Path: + """Resolve the Airbyte repository root directory. + + This function will resolve the Airbyte repository root directory based on the + current working directory. If the current working directory is not within the + Airbyte repository, it will look for the 'airbyte' or 'airbyte-enterprise' + directory in the parent directories. + + Sibling directories are also considered, so if the working directory is '~/repos/airbyte-cdk', + it will find the 'airbyte' directory in '~/repos/airbyte'. The 'airbyte' directory + will be preferred over 'airbyte-enterprise' if both are present as sibling directories and + neither is a parent directory. + + If we reach the root of the filesystem without finding the 'airbyte' directory, + a FileNotFoundError will be raised. + + Raises: + FileNotFoundError: If the Airbyte repository root directory cannot be found. + """ + + def _is_airbyte_repo_root(path: Path) -> bool: + """Check if the given path is the Airbyte repository root.""" + return all( + [ + (path.name == "airbyte" or path.name == "airbyte-enterprise"), + (path / "airbyte-integrations").is_dir(), + ] + ) + + def _find_in_adjacent_dirs(current_dir: Path) -> Path | None: + """Check if 'airbyte' or 'airbyte-enterprise' exists as a sibling, parent, or child.""" + # Check parents + parent_dir = current_dir.parent + if _is_airbyte_repo_root(parent_dir): + return parent_dir + + # Check siblings + if _is_airbyte_repo_root(parent_dir / "airbyte"): + return parent_dir / "airbyte" + if _is_airbyte_repo_root(parent_dir / "airbyte-enterprise"): + return parent_dir / "airbyte-enterprise" + + # Check children only if no "airbyte" or "airbyte-enterprise" in parent + if not any( + [ + "airbyte" in current_dir.parts, + "airbyte-enterprise" in current_dir.parts, + ] + ): + if _is_airbyte_repo_root(current_dir / "airbyte"): + return current_dir / "airbyte" + if _is_airbyte_repo_root(current_dir / "airbyte-enterprise"): + return current_dir / "airbyte-enterprise" + + return None + + current_dir = from_dir.resolve().absolute() + while current_dir != current_dir.parent: # abort when we reach file system root + found_dir = _find_in_adjacent_dirs(current_dir) + if found_dir: + return found_dir + + # Move up one directory + current_dir = current_dir.parent + + raise FileNotFoundError( + f"Could not find the Airbyte repository root directory. Current directory: {from_dir}" + ) + + +def resolve_connector_name_and_directory( + connector_ref: str | Path | None = None, + *, + connector_directory: Path | None = None, +) -> tuple[str, Path]: + """Resolve the connector name and directory. + + This function will resolve the connector name and directory based on the provided + reference. If no input ref is provided, it will look within the + current working directory. If the current working directory is not a connector + directory (e.g. starting with 'source-') and no connector name or path is provided, + the process will fail. + If ref is sent as a string containing "/" or "\\", it will be treated as a path to the + connector directory. + + raises: + ValueError: If the connector name or directory cannot be resolved. + FileNotFoundError: If the connector directory does not exist or cannot be found. + """ + connector_name: str | None = None + + # Resolve connector_ref to connector_name or connector_directory (if provided) + if connector_ref: + if isinstance(connector_ref, str): + if "/" in connector_ref or "\\" in connector_ref: + # If the connector name is a path, treat it as a directory + connector_directory = Path(connector_ref) + else: + # Otherwise, treat it as a connector name + connector_name = connector_ref + elif isinstance(connector_ref, Path): + connector_directory = connector_ref + else: + raise ValueError( + "connector_ref must be a string or Path. " + f"Received type '{type(connector_ref).__name__}': {connector_ref!r}", + ) + + if not connector_directory: + if connector_name: + connector_directory = find_connector_root_from_name(connector_name) + else: + cwd = Path().resolve().absolute() + if cwd.name.startswith("source-") or cwd.name.startswith("destination-"): + connector_directory = cwd + else: + raise ValueError( + "The 'connector' input must be provided if not " + "running from a connector directory. " + f"Could not infer connector directory from: {cwd}" + ) + + if not connector_name: + connector_name = connector_directory.name + + if connector_directory: + connector_directory = connector_directory.resolve().absolute() + elif connector_name: + connector_directory = find_connector_root_from_name(connector_name) + else: + raise ValueError( + f"Could not infer connector_name or connector_directory from input ref: {connector_ref}", + ) + + return connector_name, connector_directory + + +def resolve_connector_name( + connector_directory: Path, +) -> str: + """Resolve the connector name. + + This function will resolve the connector name based on the provided connector directory. + If the current working directory is not a connector directory + (e.g. starting with 'source-'), the process will fail. + + Raises: + FileNotFoundError: If the connector directory does not exist or cannot be found. + """ + if not connector_directory: + raise FileNotFoundError( + "Connector directory does not exist or cannot be found. Please provide a valid " + "connector directory." + ) + connector_name = connector_directory.absolute().name + if not connector_name.startswith("source-") and not connector_name.startswith("destination-"): + raise ValueError( + f"Connector directory '{connector_name}' does not look like a valid connector directory. " + f"Full path: {connector_directory.absolute()}" + ) + return connector_name + + +def find_connector_root(from_paths: list[Path]) -> Path: + """Find the root directory of the connector.""" + for path in from_paths: + # If we reach here, we didn't find the manifest file in any parent directory + # Check if the manifest file exists in the current directory + for parent in [path, *path.parents]: + if (parent / METADATA_YAML).exists(): + return parent + if (parent / MANIFEST_YAML).exists(): + return parent + if (parent / ACCEPTANCE_TEST_CONFIG).exists(): + return parent + if parent.name == "airbyte_cdk": + break + + raise FileNotFoundError( + "Could not find connector root directory relative to the provided directories: " + f"'{str(from_paths)}'." + ) + + +def find_connector_root_from_name(connector_name: str) -> Path: + """Find the root directory of the connector from its name.""" + with suppress(FileNotFoundError): + return find_connector_root([Path(connector_name)]) + + # If the connector name is not found, check if we are in the airbyte monorepo + # and try to find the connector root from the current directory. + + cwd: Path = Path().absolute() + + try: + airbyte_repo_root: Path = resolve_airbyte_repo_root(cwd) + except FileNotFoundError as ex: + raise FileNotFoundError( + "Could not find connector root directory relative and we are not in the airbyte repo." + ) from ex + + expected_connector_dir: Path = ( + airbyte_repo_root / "airbyte-integrations" / "connectors" / connector_name + ) + if not expected_connector_dir.exists(): + raise FileNotFoundError( + f"Could not find connector directory '{expected_connector_dir}' relative to the airbyte repo." + ) + + return expected_connector_dir diff --git a/airbyte_cdk/utils/docker.py b/airbyte_cdk/utils/docker.py index 5516aa720..db6355eac 100644 --- a/airbyte_cdk/utils/docker.py +++ b/airbyte_cdk/utils/docker.py @@ -12,14 +12,10 @@ from pathlib import Path import click +import requests from airbyte_cdk.models.connector_metadata import ConnectorLanguage, MetadataFile -from airbyte_cdk.utils.docker_image_templates import ( - DOCKERIGNORE_TEMPLATE, - JAVA_CONNECTOR_DOCKERFILE_TEMPLATE, - MANIFEST_ONLY_DOCKERFILE_TEMPLATE, - PYTHON_CONNECTOR_DOCKERFILE_TEMPLATE, -) +from airbyte_cdk.utils.connector_paths import resolve_airbyte_repo_root @dataclass(kw_only=True) @@ -145,6 +141,7 @@ def build_connector_image( tag: str, primary_arch: ArchEnum = ArchEnum.ARM64, # Assume MacBook M series by default no_verify: bool = False, + dockerfile_override: Path | None = None, ) -> None: """Build a connector Docker image. @@ -167,10 +164,36 @@ def build_connector_image( ConnectorImageBuildError: If the image build or tag operation fails. """ connector_kebab_name = connector_name - connector_snake_name = connector_kebab_name.replace("-", "_") - dockerfile_path = connector_directory / "build" / "docker" / "Dockerfile" - dockerignore_path = connector_directory / "build" / "docker" / "Dockerfile.dockerignore" + if dockerfile_override: + dockerfile_path = dockerfile_override + else: + dockerfile_path = connector_directory / "build" / "docker" / "Dockerfile" + dockerignore_path = connector_directory / "build" / "docker" / "Dockerfile.dockerignore" + try: + dockerfile_text, dockerignore_text = get_dockerfile_templates( + metadata=metadata, + connector_directory=connector_directory, + ) + except FileNotFoundError: + # If the Dockerfile and .dockerignore are not found in the connector directory, + # download the templates from the Airbyte repo. This is a fallback + # in case the Airbyte repo not checked out locally. + try: + dockerfile_text, dockerignore_text = _download_dockerfile_defs( + connector_language=metadata.data.language, + ) + except requests.HTTPError as e: + raise ConnectorImageBuildError( + build_args=[], + error_text=( + "Could not locate local dockerfile templates and " + f"failed to download Dockerfile templates from github: {e}" + ), + ) from e + + dockerfile_path.write_text(dockerfile_text) + dockerignore_path.write_text(dockerignore_text) extra_build_script: str = "" build_customization_path = connector_directory / "build_customization.py" @@ -185,14 +208,9 @@ def build_connector_image( ) base_image = metadata.data.connectorBuildOptions.baseImage - - dockerfile_path.write_text(get_dockerfile_template(metadata)) - dockerignore_path.write_text(DOCKERIGNORE_TEMPLATE) - build_args: dict[str, str | None] = { "BASE_IMAGE": base_image, - "CONNECTOR_SNAKE_NAME": connector_snake_name, - "CONNECTOR_KEBAB_NAME": connector_kebab_name, + "CONNECTOR_NAME": connector_kebab_name, "EXTRA_BUILD_SCRIPT": extra_build_script, } @@ -246,31 +264,100 @@ def build_connector_image( sys.exit(0) -def get_dockerfile_template( +def _download_dockerfile_defs( + connector_language: ConnectorLanguage, +) -> tuple[str, str]: + """Download the Dockerfile and .dockerignore templates for the specified connector language. + + We use the requests library to download from the master branch hosted on GitHub. + + Args: + connector_language: The language of the connector. + + Returns: + A tuple containing the Dockerfile and .dockerignore templates as strings. + + Raises: + ValueError: If the connector language is not supported. + requests.HTTPError: If the download fails. + """ + print("Downloading Dockerfile and .dockerignore templates from GitHub...") + # Map ConnectorLanguage to template directory + language_to_template_suffix = { + ConnectorLanguage.PYTHON: "python-connector", + ConnectorLanguage.JAVA: "java-connector", + ConnectorLanguage.MANIFEST_ONLY: "manifest-only-connector", + } + + if connector_language not in language_to_template_suffix: + raise ValueError(f"Unsupported connector language: {connector_language}") + + template_suffix = language_to_template_suffix[connector_language] + base_url = f"https://github.com/airbytehq/airbyte/raw/master/docker-images/" + + dockerfile_url = f"{base_url}/Dockerfile.{template_suffix}" + dockerignore_url = f"{base_url}/Dockerfile.{template_suffix}.dockerignore" + + dockerfile_resp = requests.get(dockerfile_url) + dockerfile_resp.raise_for_status() + dockerfile_text = dockerfile_resp.text + + dockerignore_resp = requests.get(dockerignore_url) + dockerignore_resp.raise_for_status() + dockerignore_text = dockerignore_resp.text + + return dockerfile_text, dockerignore_text + + +def get_dockerfile_templates( metadata: MetadataFile, -) -> str: + connector_directory: Path, +) -> tuple[str, str]: """Get the Dockerfile template for the connector. Args: metadata: The metadata of the connector. connector_name: The name of the connector. + Raises: + ValueError: If the connector language is not supported. + FileNotFoundError: If the Dockerfile or .dockerignore is not found. + Returns: - The Dockerfile template as a string. + A tuple containing the Dockerfile and .dockerignore templates as strings. """ - if metadata.data.language == ConnectorLanguage.PYTHON: - return PYTHON_CONNECTOR_DOCKERFILE_TEMPLATE - - if metadata.data.language == ConnectorLanguage.MANIFEST_ONLY: - return MANIFEST_ONLY_DOCKERFILE_TEMPLATE - - if metadata.data.language == ConnectorLanguage.JAVA: - return JAVA_CONNECTOR_DOCKERFILE_TEMPLATE + if metadata.data.language not in [ + ConnectorLanguage.PYTHON, + ConnectorLanguage.MANIFEST_ONLY, + ConnectorLanguage.JAVA, + ]: + raise ValueError( + f"Unsupported connector language: {metadata.data.language}. " + "Please check the connector's metadata file." + ) - raise ValueError( - f"Unsupported connector language: {metadata.data.language}. " - "Please check the connector's metadata file." + airbyte_repo_root = resolve_airbyte_repo_root( + from_dir=connector_directory, + ) + # airbyte_repo_root successfully resolved + dockerfile_path = ( + airbyte_repo_root / "docker-images" / f"Dockerfile.{metadata.data.language.value}-connector" ) + dockerignore_path = ( + airbyte_repo_root + / "docker-images" + / f"Dockerfile.{metadata.data.language.value}-connector.dockerignore" + ) + if not dockerfile_path.exists(): + raise FileNotFoundError( + f"Dockerfile for {metadata.data.language.value} connector not found at {dockerfile_path}" + ) + if not dockerignore_path.exists(): + raise FileNotFoundError( + f".dockerignore for {metadata.data.language.value} connector not found at {dockerignore_path}" + ) + + return dockerfile_path.read_text(), dockerignore_path.read_text() def run_docker_command( diff --git a/airbyte_cdk/utils/docker_image_templates.py b/airbyte_cdk/utils/docker_image_templates.py deleted file mode 100644 index 761b9adeb..000000000 --- a/airbyte_cdk/utils/docker_image_templates.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright (c) 2025 Airbyte, Inc., all rights reserved. -"""A collection of Dockerfile templates for building Airbyte connectors. - -The templates are designed to be used with the Airbyte CDK and can be customized -for different connectors and architectures. - -These templates are used to generate connector images. -""" - -############################## -## GLOBAL DOCKERIGNORE FILE ## -############################## - -DOCKERIGNORE_TEMPLATE: str = "\n".join( - [ - "# This file is auto-generated. Do not edit.", - "*," # Ignore everything not explicitly allowed below - "build/", - "!build/distributions/*.tar", - ".venv/", - "secrets/", - "!setup.py", - "!pyproject.toml", - "!poetry.lock", - "!poetry.toml", - "!components.py", - "!requirements.txt", - "!README.md", - "!metadata.yaml", - "!build_customization.py", - "!source_*", - "!destination_*", - ] -) - -########################### -# PYTHON CONNECTOR IMAGE ## -########################### - -PYTHON_CONNECTOR_DOCKERFILE_TEMPLATE = r""" -# syntax=docker/dockerfile:1 -# check=skip=all -ARG BASE_IMAGE - -FROM ${BASE_IMAGE} AS builder -ARG BASE_IMAGE -ARG CONNECTOR_SNAKE_NAME -ARG CONNECTOR_KEBAB_NAME -ARG EXTRA_PREREQS_SCRIPT="" - -WORKDIR /airbyte/integration_code - -COPY . ./ - -# Conditionally copy and execute the extra build script if provided -RUN if [ -n "${EXTRA_PREREQS_SCRIPT}" ]; then \ - cp ${EXTRA_PREREQS_SCRIPT} ./extra_prereqs_script && \ - ./extra_prereqs_script; \ - fi - -# TODO: Pre-install uv on the base image to speed up the build. -# (uv is still faster even with the extra step.) -RUN pip install --no-cache-dir uv -RUN python -m uv pip install --no-cache-dir . - -FROM ${BASE_IMAGE} -ARG CONNECTOR_SNAKE_NAME -ARG CONNECTOR_KEBAB_NAME -ARG BASE_IMAGE - -WORKDIR /airbyte/integration_code - -COPY --from=builder /usr/local /usr/local -COPY --chmod=755 < bool: + """ + Check if the local repository structure is correct. + This function checks if the current working directory is 'airbyte-cdk'. + """ + return all( + [ + CDK_REPO_ROOT.name == "airbyte-python-cdk", + (CDK_REPO_ROOT.parent / "airbyte").is_dir(), + (CDK_REPO_ROOT.parent / "airbyte-enterprise").is_dir(), + (CDK_REPO_ROOT.parent / "airbyte" / "airbyte-integrations").is_dir(), + (CDK_REPO_ROOT.parent / "airbyte-enterprise" / "airbyte-integrations").is_dir(), + ] + ) + + +@pytest.mark.parametrize( + "start_dir_rel, expect_success", + [ + (CDK_REPO_ROOT / ".." / "airbyte", True), + (CDK_REPO_ROOT / ".." / "airbyte" / "airbyte-ci", True), + (CDK_REPO_ROOT.parent, True), # Parent directory from CDK repo + (CDK_REPO_ROOT.parent.parent, False), # Grandparent directory from CDK repo + (CDK_REPO_ROOT / ".." / "airbyte-enterprise", True), + (CDK_REPO_ROOT / ".." / "airbyte-enterprise" / "airbyte-integrations", True), + (Path("/"), False), # Filesystem root + (Path("/unrelated"), False), + (Path("/unrelated/foo"), False), + ], +) +@pytest.mark.skipif( + not check_local_repo_structure(), + reason=( + "Test requires a specific local repository structure with " + "'airbyte' and 'airbyte-enterprise' checked out." + ), +) +def test_resolve_airbyte_repo_root_real_fs( + start_dir_rel: Path, + expect_success: bool, +): + """ + This test assumes that the developer's workstation has the following sibling directories checked out: + - airbyte + - airbyte-cdk + - airbyte-enterprise + in the same parent directory (e.g., ~/repos/). + + The test will skip a scenario if the required directory does not exist. + """ + try: + repo_root = resolve_airbyte_repo_root(start_dir_rel) + if repo_root is None: + raise AssertionError( + f"Airbyte repo root should not be None, from: '{start_dir_rel!s}'." + ) + if repo_root.name != "airbyte" and repo_root.name != "airbyte-enterprise": + raise AssertionError( + f"Airbyte repo root should be 'airbyte' or 'airbyte-enterprise', " + f"but found: '{repo_root.name!s}'" + ) + if not repo_root.is_dir(): + raise AssertionError( + f"Found Airbyte repo root, but it is not a directory: {repo_root!s}" + ) + if not expect_success: + raise AssertionError( + f"Airbyte repo root found from '{start_dir_rel!s}' when it was not expected." + f"Found: {repo_root!s}" + ) + except FileNotFoundError: + if expect_success: + raise AssertionError(f"Airbyte repo root not found from '{start_dir_rel!s}'.") diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 5823fb8cc..f930e80da 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -32,7 +32,7 @@ custom_code_execution_permitted, register_components_module_from_string, ) -from airbyte_cdk.test.standard_tests.test_resources import MANIFEST_YAML +from airbyte_cdk.utils.connector_paths import MANIFEST_YAML SAMPLE_COMPONENTS_PY_TEXT = """ def sample_function() -> str: