diff --git a/airbyte_cdk/cli/__init__.py b/airbyte_cdk/cli/__init__.py index 7f66676b8..aec207883 100644 --- a/airbyte_cdk/cli/__init__.py +++ b/airbyte_cdk/cli/__init__.py @@ -1 +1,9 @@ -# Copyright (c) 2024 Airbyte, Inc., all rights reserved. +# Copyright (c) 2025 Airbyte, Inc., all rights reserved. +"""The `airbyte-cdk.cli` module provides command-line interfaces for the Airbyte CDK. + +As of now, it includes the following CLI entry points: + +- `airbyte-cdk`: Commands for working with connectors. +- `source-declarative-manifest`: Directly invoke the declarative manifests connector. + +""" diff --git a/airbyte_cdk/cli/airbyte_cdk/_image.py b/airbyte_cdk/cli/airbyte_cdk/_image.py index 8bbc08bed..a94ef03a1 100644 --- a/airbyte_cdk/cli/airbyte_cdk/_image.py +++ b/airbyte_cdk/cli/airbyte_cdk/_image.py @@ -1,13 +1,22 @@ # Copyright (c) 2025 Airbyte, Inc., all rights reserved. -"""Docker image commands. +"""Airbyte CDK 'image' commands. -Coming soon. - -This module is planned to provide a command line interface (CLI) for building -Docker images for Airbyte CDK connectors. +The `airbyte-cdk image build` command provides a simple way to work with Airbyte +connector images. """ -import click +import sys +from pathlib import Path + +import rich_click as click + +from airbyte_cdk.cli.airbyte_cdk._util import resolve_connector_name_and_directory +from airbyte_cdk.models.connector_metadata import MetadataFile +from airbyte_cdk.utils.docker import ( + ConnectorImageBuildError, + build_connector_image, + verify_docker_installation, +) @click.group( @@ -15,8 +24,70 @@ help=__doc__.replace("\n", "\n\n"), # Render docstring as help text (markdown) ) def image_cli_group() -> None: - """Docker image commands.""" - pass + """Commands for working with connector Docker images.""" + + +@image_cli_group.command() +@click.option( + "--connector-name", + type=str, + help="Name of the connector to test. Ignored if --connector-directory is provided.", +) +@click.option( + "--connector-directory", + type=click.Path(exists=True, file_okay=False, path_type=Path), + help="Path to the connector directory.", +) +@click.option("--tag", default="dev", help="Tag to apply to the built image (default: dev)") +@click.option("--no-verify", is_flag=True, help="Skip verification of the built image") +def build( + connector_name: str | None = None, + connector_directory: Path | None = None, + *, + tag: str = "dev", + no_verify: bool = False, +) -> None: + """Build a connector Docker image. + + This command builds a Docker image for a connector, using either + the connector's Dockerfile or a base image specified in the metadata. + The image is built for both AMD64 and ARM64 architectures. + """ + if not verify_docker_installation(): + click.echo( + "Docker is not installed or not running. Please install Docker and try again.", err=True + ) + sys.exit(1) + + connector_name, connector_directory = resolve_connector_name_and_directory( + connector_name=connector_name, + connector_directory=connector_directory, + ) + + metadata_file_path: Path = connector_directory / "metadata.yaml" + try: + metadata = MetadataFile.from_file(metadata_file_path) + except (FileNotFoundError, ValueError) as e: + click.echo( + f"Error loading metadata file '{metadata_file_path}': {e!s}", + err=True, + ) + sys.exit(1) + click.echo(f"Building Image for Connector: {metadata.data.dockerRepository}:{tag}") + try: + build_connector_image( + connector_directory=connector_directory, + connector_name=connector_name, + metadata=metadata, + tag=tag, + no_verify=no_verify, + ) + except ConnectorImageBuildError as e: + click.echo( + f"Error building connector image: {e!s}", + err=True, + ) + sys.exit(1) __all__ = [ diff --git a/airbyte_cdk/models/connector_metadata.py b/airbyte_cdk/models/connector_metadata.py new file mode 100644 index 000000000..e76f94951 --- /dev/null +++ b/airbyte_cdk/models/connector_metadata.py @@ -0,0 +1,97 @@ +"""Models to represent the structure of a `metadata.yaml` file.""" + +from __future__ import annotations + +from enum import Enum +from pathlib import Path + +import yaml +from pydantic import BaseModel, Field + + +class ConnectorLanguage(str, Enum): + """Connector implementation language.""" + + PYTHON = "python" + JAVA = "java" + LOW_CODE = "low-code" + MANIFEST_ONLY = "manifest-only" + UNKNOWN = "unknown" + + +class ConnectorBuildOptions(BaseModel): + """Connector build options from metadata.yaml.""" + + model_config = {"extra": "allow"} + + baseImage: str | None = Field( + None, + description="Base image to use for building the connector", + ) + path: str | None = Field( + None, + description="Path to the connector code within the repository", + ) + + +class ConnectorMetadata(BaseModel): + """Connector metadata from metadata.yaml.""" + + model_config = {"extra": "allow"} + + dockerRepository: str = Field(..., description="Docker repository for the connector image") + dockerImageTag: str = Field(..., description="Docker image tag for the connector") + + tags: list[str] = Field( + default=[], + description="List of tags for the connector", + ) + + @property + def language(self) -> ConnectorLanguage: + """Get the connector language.""" + for tag in self.tags: + if tag.startswith("language:"): + language = tag.split(":", 1)[1] + if language == "python": + return ConnectorLanguage.PYTHON + elif language == "java": + return ConnectorLanguage.JAVA + elif language == "low-code": + return ConnectorLanguage.LOW_CODE + elif language == "manifest-only": + return ConnectorLanguage.MANIFEST_ONLY + + return ConnectorLanguage.UNKNOWN + + connectorBuildOptions: ConnectorBuildOptions | None = Field( + None, description="Options for building the connector" + ) + + +class MetadataFile(BaseModel): + """Represents the structure of a metadata.yaml file.""" + + model_config = {"extra": "allow"} + + data: ConnectorMetadata = Field(..., description="Connector metadata") + + @classmethod + def from_file( + cls, + file_path: Path, + ) -> MetadataFile: + """Load metadata from a YAML file.""" + if not file_path.exists(): + raise FileNotFoundError(f"Metadata file not found: {file_path!s}") + + metadata_content = file_path.read_text() + metadata_dict = yaml.safe_load(metadata_content) + + if not metadata_dict or "data" not in metadata_dict: + raise ValueError( + "Invalid metadata format: missing 'data' field in YAML file '{file_path!s}'" + ) + + metadata_file = MetadataFile.model_validate(metadata_dict) + return metadata_file diff --git a/airbyte_cdk/utils/docker.py b/airbyte_cdk/utils/docker.py new file mode 100644 index 000000000..d8654c39c --- /dev/null +++ b/airbyte_cdk/utils/docker.py @@ -0,0 +1,337 @@ +"""Docker build utilities for Airbyte CDK.""" + +from __future__ import annotations + +import json +import logging +import os +import subprocess +import sys +from dataclasses import dataclass +from enum import Enum +from pathlib import Path + +import click + +from airbyte_cdk.models.connector_metadata import ConnectorLanguage, MetadataFile +from airbyte_cdk.utils.docker_image_templates import ( + DOCKERIGNORE_TEMPLATE, + MANIFEST_ONLY_DOCKERFILE_TEMPLATE, + PYTHON_CONNECTOR_DOCKERFILE_TEMPLATE, +) + + +@dataclass(kw_only=True) +class ConnectorImageBuildError(Exception): + """Custom exception for Docker build errors.""" + + error_text: str + build_args: list[str] + + def __str__(self) -> str: + return "\n".join( + [ + f"ConnectorImageBuildError: Could not build image.", + f"Build args: {self.build_args}", + f"Error text: {self.error_text}", + ] + ) + + +logger = logging.getLogger(__name__) + + +class ArchEnum(str, Enum): + """Enum for supported architectures.""" + + ARM64 = "arm64" + AMD64 = "amd64" + + +def _build_image( + context_dir: Path, + dockerfile: Path, + metadata: MetadataFile, + tag: str, + arch: ArchEnum, + build_args: dict[str, str | None] | None = None, +) -> str: + """Build a Docker image for the specified architecture. + + Returns the tag of the built image. + + Raises: ConnectorImageBuildError if the build fails. + """ + docker_args: list[str] = [ + "docker", + "build", + "--platform", + f"linux/{arch.value}", + "--file", + str(dockerfile), + "--label", + f"io.airbyte.version={metadata.data.dockerImageTag}", + "--label", + f"io.airbyte.name={metadata.data.dockerRepository}", + ] + if build_args: + for key, value in build_args.items(): + if value is not None: + docker_args.append(f"--build-arg={key}={value}") + else: + docker_args.append(f"--build-arg={key}") + docker_args.extend( + [ + "-t", + tag, + str(context_dir), + ] + ) + + print(f"Building image: {tag} ({arch})") + try: + run_docker_command( + docker_args, + check=True, + ) + except subprocess.CalledProcessError as e: + raise ConnectorImageBuildError( + error_text=e.stderr, + build_args=docker_args, + ) from e + + return tag + + +def _tag_image( + tag: str, + new_tags: list[str] | str, +) -> None: + """Build a Docker image for the specified architecture. + + Returns the tag of the built image. + + Raises: + ConnectorImageBuildError: If the docker tag command fails. + """ + if not isinstance(new_tags, list): + new_tags = [new_tags] + + for new_tag in new_tags: + print(f"Tagging image '{tag}' as: {new_tag}") + docker_args = [ + "docker", + "tag", + tag, + new_tag, + ] + try: + run_docker_command( + docker_args, + check=True, + ) + except subprocess.CalledProcessError as e: + raise ConnectorImageBuildError( + error_text=e.stderr, + build_args=docker_args, + ) from e + + +def build_connector_image( + connector_name: str, + connector_directory: Path, + metadata: MetadataFile, + tag: str, + primary_arch: ArchEnum = ArchEnum.ARM64, # Assume MacBook M series by default + no_verify: bool = False, +) -> None: + """Build a connector Docker image. + + This command builds a Docker image for a connector, using either + the connector's Dockerfile or a base image specified in the metadata. + The image is built for both AMD64 and ARM64 architectures. + + Args: + connector_name: The name of the connector. + connector_directory: The directory containing the connector code. + metadata: The metadata of the connector. + tag: The tag to apply to the built image. + primary_arch: The primary architecture for the build (default: arm64). This + architecture will be used for the same-named tag. Both AMD64 and ARM64 + images will be built, with the suffixes '-amd64' and '-arm64'. + no_verify: If True, skip verification of the built image. + + Raises: + ValueError: If the connector build options are not defined in metadata.yaml. + ConnectorImageBuildError: If the image build or tag operation fails. + """ + connector_kebab_name = connector_name + connector_snake_name = connector_kebab_name.replace("-", "_") + + dockerfile_path = connector_directory / "build" / "docker" / "Dockerfile" + dockerignore_path = connector_directory / "build" / "docker" / "Dockerfile.dockerignore" + + extra_build_script: str = "" + build_customization_path = connector_directory / "build_customization.py" + if build_customization_path.exists(): + extra_build_script = str(build_customization_path) + + dockerfile_path.parent.mkdir(parents=True, exist_ok=True) + if not metadata.data.connectorBuildOptions: + raise ValueError( + "Connector build options are not defined in metadata.yaml. " + "Please check the connector's metadata file." + ) + + base_image = metadata.data.connectorBuildOptions.baseImage + + dockerfile_path.write_text(get_dockerfile_template(metadata)) + dockerignore_path.write_text(DOCKERIGNORE_TEMPLATE) + + build_args: dict[str, str | None] = { + "BASE_IMAGE": base_image, + "CONNECTOR_SNAKE_NAME": connector_snake_name, + "CONNECTOR_KEBAB_NAME": connector_kebab_name, + "EXTRA_BUILD_SCRIPT": extra_build_script, + } + + base_tag = f"{metadata.data.dockerRepository}:{tag}" + arch_images: list[str] = [] + for arch in [ArchEnum.AMD64, ArchEnum.ARM64]: + docker_tag = f"{base_tag}-{arch.value}" + docker_tag_parts = docker_tag.split("/") + if len(docker_tag_parts) > 2: + docker_tag = "/".join(docker_tag_parts[-1:]) + arch_images.append( + _build_image( + context_dir=connector_directory, + dockerfile=dockerfile_path, + metadata=metadata, + tag=docker_tag, + arch=arch, + build_args=build_args, + ) + ) + + _tag_image( + tag=f"{base_tag}-{primary_arch.value}", + new_tags=[base_tag], + ) + if not no_verify: + if verify_connector_image(base_tag): + click.echo(f"Build completed successfully: {base_tag}") + sys.exit(0) + else: + click.echo(f"Built image failed verification: {base_tag}", err=True) + sys.exit(1) + else: + click.echo(f"Build completed successfully (without verification): {base_tag}") + sys.exit(0) + + +def get_dockerfile_template( + metadata: MetadataFile, +) -> str: + """Get the Dockerfile template for the connector. + + Args: + metadata: The metadata of the connector. + connector_name: The name of the connector. + + Returns: + The Dockerfile template as a string. + """ + if metadata.data.language == ConnectorLanguage.PYTHON: + return PYTHON_CONNECTOR_DOCKERFILE_TEMPLATE + + if metadata.data.language == ConnectorLanguage.MANIFEST_ONLY: + return MANIFEST_ONLY_DOCKERFILE_TEMPLATE + + if metadata.data.language == ConnectorLanguage.JAVA: + raise ValueError( + f"Java and Kotlin connectors are not yet supported. " + "Please use airbyte-ci or gradle to build your image." + ) + + raise ValueError( + f"Unsupported connector language: {metadata.data.language}. " + "Please check the connector's metadata file." + ) + + +def run_docker_command( + cmd: list[str], + *, + check: bool = True, + capture_output: bool = False, +) -> subprocess.CompletedProcess[str]: + """Run a Docker command as a subprocess. + + Args: + cmd: The command to run as a list of strings. + check: If True, raises an exception if the command fails. If False, the caller is + responsible for checking the return code. + capture_output: If True, captures stdout and stderr and returns to the caller. + If False, the output is printed to the console. + + Raises: + subprocess.CalledProcessError: If the command fails and check is True. + """ + print(f"Running command: {' '.join(cmd)}") + + process = subprocess.run( + cmd, + text=True, + check=check, + # If capture_output=True, stderr and stdout are captured and returned to caller: + capture_output=capture_output, + env={**os.environ, "DOCKER_BUILDKIT": "1"}, + ) + return process + + +def verify_docker_installation() -> bool: + """Verify Docker is installed and running.""" + try: + run_docker_command(["docker", "--version"]) + return True + except (subprocess.CalledProcessError, FileNotFoundError): + return False + + +def verify_connector_image( + image_name: str, +) -> bool: + """Verify the built image by running the spec command. + + Args: + image_name: The full image name with tag. + + Returns: + True if the spec command succeeds, False otherwise. + """ + logger.info(f"Verifying image {image_name} with 'spec' command...") + + cmd = ["docker", "run", "--rm", image_name, "spec"] + + try: + result = run_docker_command( + cmd, + check=True, + capture_output=True, + ) + # check that the output is valid JSON + if result.stdout: + try: + json.loads(result.stdout) + except json.JSONDecodeError: + logger.error("Invalid JSON output from spec command.") + return False + else: + logger.error("No output from spec command.") + return False + except subprocess.CalledProcessError as e: + logger.error(f"Image verification failed: {e.stderr}") + return False + + return True diff --git a/airbyte_cdk/utils/docker_image_templates.py b/airbyte_cdk/utils/docker_image_templates.py new file mode 100644 index 000000000..17fff1e56 --- /dev/null +++ b/airbyte_cdk/utils/docker_image_templates.py @@ -0,0 +1,101 @@ +# Copyright (c) 2025 Airbyte, Inc., all rights reserved. +"""A collection of Dockerfile templates for building Airbyte connectors. + +The templates are designed to be used with the Airbyte CDK and can be customized +for different connectors and architectures. + +These templates are used to generate connector images. +""" + +############################## +## GLOBAL DOCKERIGNORE FILE ## +############################## + +DOCKERIGNORE_TEMPLATE: str = "\n".join( + [ + "# This file is auto-generated. Do not edit.", + # "*," + "build/", + ".venv/", + "secrets/", + "!setup.py", + "!pyproject.toml", + "!poetry.lock", + "!poetry.toml", + "!components.py", + "!requirements.txt", + "!README.md", + "!metadata.yaml", + "!build_customization.py", + "!source_*", + "!destination_*", + ] +) + +########################### +# PYTHON CONNECTOR IMAGE ## +########################### + +PYTHON_CONNECTOR_DOCKERFILE_TEMPLATE = """ +# syntax=docker/dockerfile:1 +# check=skip=all +ARG BASE_IMAGE + +FROM ${BASE_IMAGE} AS builder +ARG BASE_IMAGE +ARG CONNECTOR_SNAKE_NAME +ARG CONNECTOR_KEBAB_NAME +ARG EXTRA_PREREQS_SCRIPT="" + +WORKDIR /airbyte/integration_code + +COPY . ./ + +# Conditionally copy and execute the extra build script if provided +RUN if [ -n "${EXTRA_PREREQS_SCRIPT}" ]; then \ + cp ${EXTRA_PREREQS_SCRIPT} ./extra_prereqs_script && \ + ./extra_prereqs_script; \ + fi + +# TODO: Pre-install uv on the base image to speed up the build. +# (uv is still faster even with the extra step.) +RUN pip install --no-cache-dir uv +RUN python -m uv pip install --no-cache-dir . + +FROM ${BASE_IMAGE} +ARG CONNECTOR_SNAKE_NAME +ARG CONNECTOR_KEBAB_NAME +ARG BASE_IMAGE + +WORKDIR /airbyte/integration_code + +COPY --from=builder /usr/local /usr/local +COPY --chmod=755 <