From 8f3f2d2bc6aaae11571ad0fb40d166f3f33991a6 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 25 Jan 2025 03:59:04 +0000 Subject: [PATCH 01/11] feat: add pre-commit config for airbyte-python-cdk Co-Authored-By: Aaron Steers --- .pre-commit-config.yaml | 51 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..64a56539a --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,51 @@ +exclude: | + (?x)( + # Python/system files + ^.*/__init__\.py$| + ^.*?/\.venv/.*$| + ^.*?/node_modules/.*$| + ^.*?/\.ruff_cache/.*$| + + # Package management + ^.*?/poetry\.lock$| + ^.*?/package-lock\.json$| + ^.*?/pnpm-lock\.yaml$| + + # Build and test artifacts + ^.*?/build/.*$| + ^.*?/dist/.*$| + ^.*?/\.pytest_cache/.*$| + ^.*?/\.coverage$| + ^.*?/coverage\.xml$| + ^.*?/\.mypy_cache/.*$ + ) + +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.8.3 + hooks: + # Run the linter. + - id: ruff + args: + - --fix + - --select=I + + # Run the formatter. + - id: ruff-format + + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v3.0.3 + hooks: + - id: prettier + types_or: [json, yaml] + additional_dependencies: + - prettier@3.0.3 + + - repo: local + hooks: + - id: addlicense + name: Add license headers + entry: addlicense -c "Airbyte, Inc." -l apache -v -f LICENSE_SHORT + language: golang + additional_dependencies: [github.com/google/addlicense@v1.1.1] + files: \.py$ From 1f050696feb92e1dc105f0c769d06e9a5fb531ec Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 25 Jan 2025 04:15:38 +0000 Subject: [PATCH 02/11] chore: add LICENSE_SHORT file for license header checks Co-Authored-By: Aaron Steers --- LICENSE_SHORT | 1 + 1 file changed, 1 insertion(+) create mode 100644 LICENSE_SHORT diff --git a/LICENSE_SHORT b/LICENSE_SHORT new file mode 100644 index 000000000..ad0158e9b --- /dev/null +++ b/LICENSE_SHORT @@ -0,0 +1 @@ +Copyright (c) 2024 Airbyte, Inc., all rights reserved. From 203a6919556dfa99ab9631df7237bce6c9f1078b Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 25 Jan 2025 04:21:41 +0000 Subject: [PATCH 03/11] chore: update LICENSE_SHORT year to 2025 Co-Authored-By: Aaron Steers --- LICENSE_SHORT | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE_SHORT b/LICENSE_SHORT index ad0158e9b..01fbe9753 100644 --- a/LICENSE_SHORT +++ b/LICENSE_SHORT @@ -1 +1 @@ -Copyright (c) 2024 Airbyte, Inc., all rights reserved. +Copyright (c) 2025 Airbyte, Inc., all rights reserved. From 0952422baa2346a4a6f18c71a1d1c69c2c5c6f3c Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 25 Jan 2025 04:22:35 +0000 Subject: [PATCH 04/11] chore: simplify ruff config to use repo settings Co-Authored-By: Aaron Steers --- .pre-commit-config.yaml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 64a56539a..1613c6e5d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,13 +24,11 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.8.3 hooks: - # Run the linter. + # Run the linter with repo-defined settings - id: ruff - args: - - --fix - - --select=I + args: [--fix] - # Run the formatter. + # Run the formatter with repo-defined settings - id: ruff-format - repo: https://github.com/pre-commit/mirrors-prettier From bbfd75f9b99060d805bb0dceea47ba647b012097 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 25 Jan 2025 04:31:01 +0000 Subject: [PATCH 05/11] feat: add TOML pre-commit hooks for syntax checking and sorting Co-Authored-By: Aaron Steers --- .pre-commit-config.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1613c6e5d..adb55001f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,6 +21,17 @@ exclude: | ) repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-toml + + - repo: https://github.com/pappasam/toml-sort + rev: v0.23.1 + hooks: + - id: toml-sort + args: [--all, --in-place] + - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.8.3 hooks: From dce36ed2d096e600529522b22a6a39a8e712a32b Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 25 Jan 2025 04:31:52 +0000 Subject: [PATCH 06/11] chore: sort pyproject.toml with toml-sort Co-Authored-By: Aaron Steers --- pyproject.toml | 272 +++++++++++++++++++++++-------------------------- 1 file changed, 126 insertions(+), 146 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0bba75f5a..2cfb66f8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,202 +1,182 @@ [build-system] -requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"] build-backend = "poetry_dynamic_versioning.backend" +requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"] + +[tool.airbyte_ci] +mount_docker_socket = true +optional_poetry_groups = ["dev"] +poe_tasks = ["check-ci"] +poetry_extras = ["file-based", "vector-db-based"] +python_versions = ["3.10", "3.11"] + +[tool.check-wheel-contents] +# Quality control for Python wheel generation. Docs here: +# - https://github.com/jwodder/check-wheel-contents +ignore = [ + "W002" # Duplicate files. (TODO: Fix the few duplicate files, mostly `__init__.py` files that have only copyright text.) +] + +[tool.isort] +skip = ["__init__.py"] # TODO: Remove after this is fixed: https://github.com/airbytehq/airbyte-python-cdk/issues/12 +[tool.poe.tasks] +_format-check-prettier = {cmd = "npx prettier . --check", help = "Check formatting with prettier."} +# Format check tasks +_format-check-ruff = {cmd = "ruff format --check .", help = "Check formatting with Ruff."} +_format-fix-prettier = {cmd = "npx prettier . --write", help = "Format with prettier."} +# Format fix tasks +_format-fix-ruff = {cmd = "ruff format .", help = "Format with Ruff."} +# Linting/Typing check tasks +_lint-ruff = {cmd = "poetry run ruff check .", help = "Lint with Ruff."} +# Build tasks +assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate component manifest files."} +build = {help = "Run all tasks to build the package.", sequence = ["assemble", "build-package"]} +build-package = {cmd = "poetry build", help = "Build the python package: source and wheels archives."} +check-all = {help = "Lint, format, and type-check modified files.", ignore_fail = "return_non_zero", sequence = ["check-lockfile", "format-check", "lint", "type-check"]} +check-ci = {help = "Build the package, lint and run unit tests. Does not include type-checking.", sequence = ["build", "check-lockfile", "lint", "unit-test-with-cov"]} +# TODO: find a version of the modified mypy check that works both locally and in CI. +check-local = {help = "Lint all code, type-check modified files, and run unit tests.", sequence = ["check-lockfile", "lint", "type-check", "unit-test-with-cov"]} +# Lockfile check task +check-lockfile = {cmd = "poetry check", help = "Check the poetry lock file."} +# API Docs with PDoc +docs-generate = {cmd = "python -m docs.generate run", env = {PDOC_ALLOW_EXEC = "1"}, help = "Generate API documentation with PDoc."} +docs-preview = {help = "Generate API documentation with PDoc and then open the docs in the default web browser.", shell = "poe docs-generate && open docs/generated/index.html"} +fix-all = {help = "Lint-fix and format-fix modified files, ignoring unsafe fixes.", ignore_fail = "return_non_zero", sequence = ["format-fix", "lint-fix"]} +fix-and-check = {help = "Lint-fix and format-fix, then re-check to see if any issues remain.", ignore_fail = "return_non_zero", sequence = ["check-all", "fix-all"]} +format-check = {help = "Check formatting for all file types.", ignore_fail = "return_non_zero", sequence = ["_format-check-prettier", "_format-check-ruff"]} +format-fix = {help = "Format all file types.", ignore_fail = "return_non_zero", sequence = ["_format-fix-prettier", "_format-fix-ruff"]} +# Installation +install = {shell = "poetry install --all-extras"} +lint = {help = "Lint all code. Includes type checking.", ignore_fail = "return_non_zero", sequence = ["_lint-ruff", "type-check"]} +# Linting/Typing fix tasks +lint-fix = {cmd = "poetry run ruff check --fix .", help = "Auto-fix any lint issues that Ruff can automatically resolve (excluding 'unsafe' fixes)."} +lint-fix-unsafe = {cmd = "poetry run ruff check --fix --unsafe-fixes .", help = "Lint-fix modified files, including 'unsafe' fixes. It is recommended to first commit any pending changes and then always manually review any unsafe changes applied."} +# Build and check +pre-push = {help = "Run all build and check tasks.", sequence = ["build", "check-local"]} +pytest = {cmd = "poetry run coverage run -m pytest --durations=10", help = "Run all pytest tests."} +pytest-fast = {cmd = "poetry run coverage run -m pytest --durations=5 --exitfirst -m 'not flaky and not slow and not requires_creds'", help = "Run pytest tests, failing fast and excluding slow tests."} +type-check = {cmd = "poetry run mypy airbyte_cdk", help = "Type check modified files with mypy."} +unit-test-with-cov = {cmd = "pytest -s unit_tests --cov=airbyte_cdk --cov-report=term --cov-config ./pyproject.toml", help = "Run unit tests and create a coverage report."} [tool.poetry] -name = "airbyte-cdk" -description = "A framework for writing Airbyte Connectors." authors = ["Airbyte "] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.10", + "Topic :: Scientific/Engineering", + "Topic :: Software Development :: Libraries :: Python Modules" +] +description = "A framework for writing Airbyte Connectors." +documentation = "https://docs.airbyte.io/" +homepage = "https://airbyte.com" +keywords = ["airbyte", "cdk", "connector-development-kit"] license = "MIT" +name = "airbyte-cdk" readme = "README.md" -homepage = "https://airbyte.com" repository = "https://github.com/airbytehq/airbyte-python-cdk" -documentation = "https://docs.airbyte.io/" -classifiers = [ - "Development Status :: 3 - Alpha", - "Intended Audience :: Developers", - "Topic :: Scientific/Engineering", - "Topic :: Software Development :: Libraries :: Python Modules", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3.10", -] -keywords = ["airbyte", "connector-development-kit", "cdk"] - # Python CDK uses dynamic versioning: https://github.com/mtkennerly/poetry-dynamic-versioning -version = "0.0.0" # Version will be calculated dynamically. - -[tool.poetry-dynamic-versioning] -enable = true +version = "0.0.0" # Version will be calculated dynamically. [tool.poetry.dependencies] -python = "^3.10,<3.13" +Jinja2 = "~3.1.2" +PyYAML = "^6.0.1" +Unidecode = "^1.3" airbyte-protocol-models-dataclasses = "^0.14" +# Extras depedencies +avro = {optional = true, version = "~1.11.2"} backoff = "*" cachetools = "*" +cohere = {optional = true, version = "4.21"} +cryptography = ">=42.0.5,<44.0.0" dpath = "^2.1.6" dunamai = "^1.22.0" +fastavro = {optional = true, version = "~1.8.0"} genson = "1.3.0" isodate = "~0.6.1" -Jinja2 = "~3.1.2" jsonref = "~0.2" -jsonschema = "~4.17.3" # 4.18 has some significant breaking changes: https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0 +jsonschema = "~4.17.3" # 4.18 has some significant breaking changes: https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0 +langchain = {optional = true, version = "0.1.16"} +langchain_core = {optional = true, version = "0.1.42"} +markdown = {optional = true, version = "*"} +nltk = {optional = true, version = "3.9.1"} +# This will ensure that even when you run poetry install or pip install, the compatible version of numpy will always be chosen. +# airbyte-ci will try to install latest version when --use-local-cdk is used, resulting in the conflict. +numpy = "<2" +openai = {extras = ["embeddings"], optional = true, version = "0.27.9"} +orjson = "^3.10.7" pandas = "2.2.2" +pdf2image = {optional = true, version = "1.16.3"} +"pdfminer.six" = {optional = true, version = "20221105"} pendulum = "<3.0.0" psutil = "6.1.0" +pyarrow = {optional = true, version = "~15.0.0"} pydantic = "^2.7" +pyjwt = "^2.8.0" pyrate-limiter = "~3.1.0" +pytesseract = {optional = true, version = "0.3.10"} +python = "^3.10,<3.13" +python-calamine = {optional = true, version = "0.2.3"} python-dateutil = "*" +python-snappy = {optional = true, version = "0.7.3"} python-ulid = "^3.0.0" -PyYAML = "^6.0.1" +pytz = "2024.2" rapidfuzz = "^3.10.1" requests = "*" requests_cache = "*" -wcmatch = "10.0" -# Extras depedencies -avro = { version = "~1.11.2", optional = true } -cohere = { version = "4.21", optional = true } -fastavro = { version = "~1.8.0", optional = true } -langchain = { version = "0.1.16", optional = true } -langchain_core = { version = "0.1.42", optional = true } -markdown = { version = "*", optional = true } -openai = { version = "0.27.9", extras = ["embeddings"], optional = true } -pdf2image = { version = "1.16.3", optional = true } -"pdfminer.six" = { version = "20221105", optional = true } -pyarrow = { version = "~15.0.0", optional = true } -pytesseract = { version = "0.3.10", optional = true } -python-calamine = { version = "0.2.3", optional = true } -python-snappy = { version = "0.7.3", optional = true } -tiktoken = { version = "0.8.0", optional = true } -nltk = { version = "3.9.1", optional = true } -# This will ensure that even when you run poetry install or pip install, the compatible version of numpy will always be chosen. -# airbyte-ci will try to install latest version when --use-local-cdk is used, resulting in the conflict. -numpy = "<2" -unstructured = { version = "0.10.27", extras = ["docx", "pptx"], optional = true } -"unstructured.pytesseract" = { version = ">=0.3.12", optional = true } -pyjwt = "^2.8.0" -cryptography = ">=42.0.5,<44.0.0" -pytz = "2024.2" -orjson = "^3.10.7" serpyco-rs = "^1.10.2" -sqlalchemy = {version = "^2.0,!=2.0.36", optional = true } +sqlalchemy = {optional = true, version = "^2.0,!=2.0.36"} +tiktoken = {optional = true, version = "0.8.0"} +unstructured = {extras = ["docx", "pptx"], optional = true, version = "0.10.27"} +"unstructured.pytesseract" = {optional = true, version = ">=0.3.12"} +wcmatch = "10.0" xmltodict = ">=0.13,<0.15" -Unidecode = "^1.3" + +[tool.poetry.extras] +file-based = ["avro", "fastavro", "markdown", "pdf2image", "pdfminer.six", "pyarrow", "pytesseract", "python-calamine", "python-snappy", "unstructured", "unstructured.pytesseract"] +sql = ["sqlalchemy"] +vector-db-based = ["cohere", "langchain", "openai", "tiktoken"] [tool.poetry.group.dev.dependencies] +asyncio = "3.4.3" freezegun = "*" mypy = "*" -asyncio = "3.4.3" -ruff = "^0.7.2" pdoc = "^15.0.0" poethepoet = "^0.24.2" +pympler = "*" pyproject-flake8 = "^6.1.0" pytest = "^7" -pytest-memray = "^1.6.0" -pympler = "*" pytest-cov = "*" pytest-httpserver = "*" +pytest-memray = "^1.6.0" pytest-mock = "*" requests-mock = "*" -# Stubs packages for mypy typing -types-requests = "^2.32.0.20241016" +ruff = "^0.7.2" +types-cachetools = "^5.5.0.20240820" types-python-dateutil = "^2.9.0.20241003" types-pyyaml = "^6.0.12.20240917" -types-cachetools = "^5.5.0.20240820" - -[tool.poetry.extras] -file-based = ["avro", "fastavro", "pyarrow", "unstructured", "pdf2image", "pdfminer.six", "unstructured.pytesseract", "pytesseract", "markdown", "python-calamine", "python-snappy"] -vector-db-based = ["langchain", "openai", "cohere", "tiktoken"] -sql = ["sqlalchemy"] +# Stubs packages for mypy typing +types-requests = "^2.32.0.20241016" [tool.poetry.scripts] - source-declarative-manifest = "airbyte_cdk.cli.source_declarative_manifest:run" -[tool.isort] -skip = ["__init__.py"] # TODO: Remove after this is fixed: https://github.com/airbytehq/airbyte-python-cdk/issues/12 +[tool.poetry-dynamic-versioning] +enable = true + +[tool.pytest.ini_options] +filterwarnings = [ + "ignore::airbyte_cdk.sources.source.ExperimentalClassWarning" +] +log_cli = true +log_cli_date_format = "%Y-%m-%d %H:%M:%S" +log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)" +log_cli_level = "INFO" [tool.ruff] -target-version = "py310" line-length = 100 +target-version = "py310" [tool.ruff.lint] select = ["I"] - -[tool.poe.tasks] -# Installation -install = { shell = "poetry install --all-extras" } - -# Build tasks -assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate component manifest files."} -build-package = {cmd = "poetry build", help = "Build the python package: source and wheels archives."} -build = {sequence = ["assemble", "build-package"], help = "Run all tasks to build the package."} - -# Format check tasks -_format-check-ruff = {cmd = "ruff format --check .", help = "Check formatting with Ruff."} -_format-check-prettier = {cmd = "npx prettier . --check", help = "Check formatting with prettier."} -format-check = {sequence = ["_format-check-ruff", "_format-check-prettier"], help = "Check formatting for all file types.", ignore_fail = "return_non_zero"} - -# Format fix tasks -_format-fix-ruff = {cmd = "ruff format .", help = "Format with Ruff."} -_format-fix-prettier = {cmd = "npx prettier . --write", help = "Format with prettier."} -format-fix = {sequence = ["_format-fix-ruff", "_format-fix-prettier"], help = "Format all file types.", ignore_fail = "return_non_zero"} - -# Linting/Typing check tasks -_lint-ruff = {cmd = "poetry run ruff check .", help = "Lint with Ruff."} -type-check = {cmd = "poetry run mypy airbyte_cdk", help = "Type check modified files with mypy."} -lint = {sequence = ["_lint-ruff", "type-check"], help = "Lint all code. Includes type checking.", ignore_fail = "return_non_zero"} - -# Lockfile check task -check-lockfile = {cmd = "poetry check", help = "Check the poetry lock file."} - -# Linting/Typing fix tasks -lint-fix = { cmd = "poetry run ruff check --fix .", help = "Auto-fix any lint issues that Ruff can automatically resolve (excluding 'unsafe' fixes)." } -lint-fix-unsafe = { cmd = "poetry run ruff check --fix --unsafe-fixes .", help = "Lint-fix modified files, including 'unsafe' fixes. It is recommended to first commit any pending changes and then always manually review any unsafe changes applied." } - -# Combined Check and Fix tasks - -check-all = {sequence = ["lint", "format-check", "type-check", "check-lockfile"], help = "Lint, format, and type-check modified files.", ignore_fail = "return_non_zero"} -fix-all = {sequence = ["format-fix", "lint-fix"], help = "Lint-fix and format-fix modified files, ignoring unsafe fixes.", ignore_fail = "return_non_zero"} -fix-and-check = {sequence = ["fix-all", "check-all"], help = "Lint-fix and format-fix, then re-check to see if any issues remain.", ignore_fail = "return_non_zero"} - -# PyTest tasks - -pytest = {cmd = "poetry run coverage run -m pytest --durations=10", help = "Run all pytest tests."} -pytest-fast = {cmd = "poetry run coverage run -m pytest --durations=5 --exitfirst -m 'not flaky and not slow and not requires_creds'", help = "Run pytest tests, failing fast and excluding slow tests."} -unit-test-with-cov = {cmd = "pytest -s unit_tests --cov=airbyte_cdk --cov-report=term --cov-config ./pyproject.toml", help = "Run unit tests and create a coverage report."} - -# Combined check tasks (other) - -# TODO: find a version of the modified mypy check that works both locally and in CI. -check-local = {sequence = ["lint", "type-check", "check-lockfile", "unit-test-with-cov"], help = "Lint all code, type-check modified files, and run unit tests."} -check-ci = {sequence = ["check-lockfile", "build", "lint", "unit-test-with-cov"], help = "Build the package, lint and run unit tests. Does not include type-checking."} - -# Build and check -pre-push = {sequence = ["build", "check-local"], help = "Run all build and check tasks."} - -# API Docs with PDoc -docs-generate = {env = {PDOC_ALLOW_EXEC = "1"}, cmd = "python -m docs.generate run", help="Generate API documentation with PDoc."} -docs-preview = {shell = "poe docs-generate && open docs/generated/index.html", help="Generate API documentation with PDoc and then open the docs in the default web browser."} - -[tool.check-wheel-contents] -# Quality control for Python wheel generation. Docs here: -# - https://github.com/jwodder/check-wheel-contents -ignore = [ - "W002" # Duplicate files. (TODO: Fix the few duplicate files, mostly `__init__.py` files that have only copyright text.) -] - -[tool.pytest.ini_options] -log_cli = true -log_cli_level = "INFO" -log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)" -log_cli_date_format = "%Y-%m-%d %H:%M:%S" -filterwarnings = [ - "ignore::airbyte_cdk.sources.source.ExperimentalClassWarning" -] - -[tool.airbyte_ci] -python_versions = ["3.10", "3.11"] -optional_poetry_groups = ["dev"] -poetry_extras = ["file-based", "vector-db-based"] -poe_tasks = ["check-ci"] -mount_docker_socket = true From 6232222ba12042c54b07acfb48b2645b1087bb63 Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Fri, 24 Jan 2025 21:37:13 -0800 Subject: [PATCH 07/11] Apply suggestions from code review --- .pre-commit-config.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index adb55001f..e9fb859b6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,12 +26,6 @@ repos: hooks: - id: check-toml - - repo: https://github.com/pappasam/toml-sort - rev: v0.23.1 - hooks: - - id: toml-sort - args: [--all, --in-place] - - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.8.3 hooks: From 6d58e1c5ceea21fbc2c2eacf66eea61b0c2b5bbb Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 25 Jan 2025 05:37:30 +0000 Subject: [PATCH 08/11] chore: remove toml-sort and revert pyproject.toml changes Co-Authored-By: Aaron Steers --- pyproject.toml | 272 ++++++++++++++++++++++++++----------------------- 1 file changed, 146 insertions(+), 126 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2cfb66f8d..0bba75f5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,182 +1,202 @@ [build-system] -build-backend = "poetry_dynamic_versioning.backend" requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"] +build-backend = "poetry_dynamic_versioning.backend" -[tool.airbyte_ci] -mount_docker_socket = true -optional_poetry_groups = ["dev"] -poe_tasks = ["check-ci"] -poetry_extras = ["file-based", "vector-db-based"] -python_versions = ["3.10", "3.11"] - -[tool.check-wheel-contents] -# Quality control for Python wheel generation. Docs here: -# - https://github.com/jwodder/check-wheel-contents -ignore = [ - "W002" # Duplicate files. (TODO: Fix the few duplicate files, mostly `__init__.py` files that have only copyright text.) -] - -[tool.isort] -skip = ["__init__.py"] # TODO: Remove after this is fixed: https://github.com/airbytehq/airbyte-python-cdk/issues/12 - -[tool.poe.tasks] -_format-check-prettier = {cmd = "npx prettier . --check", help = "Check formatting with prettier."} -# Format check tasks -_format-check-ruff = {cmd = "ruff format --check .", help = "Check formatting with Ruff."} -_format-fix-prettier = {cmd = "npx prettier . --write", help = "Format with prettier."} -# Format fix tasks -_format-fix-ruff = {cmd = "ruff format .", help = "Format with Ruff."} -# Linting/Typing check tasks -_lint-ruff = {cmd = "poetry run ruff check .", help = "Lint with Ruff."} -# Build tasks -assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate component manifest files."} -build = {help = "Run all tasks to build the package.", sequence = ["assemble", "build-package"]} -build-package = {cmd = "poetry build", help = "Build the python package: source and wheels archives."} -check-all = {help = "Lint, format, and type-check modified files.", ignore_fail = "return_non_zero", sequence = ["check-lockfile", "format-check", "lint", "type-check"]} -check-ci = {help = "Build the package, lint and run unit tests. Does not include type-checking.", sequence = ["build", "check-lockfile", "lint", "unit-test-with-cov"]} -# TODO: find a version of the modified mypy check that works both locally and in CI. -check-local = {help = "Lint all code, type-check modified files, and run unit tests.", sequence = ["check-lockfile", "lint", "type-check", "unit-test-with-cov"]} -# Lockfile check task -check-lockfile = {cmd = "poetry check", help = "Check the poetry lock file."} -# API Docs with PDoc -docs-generate = {cmd = "python -m docs.generate run", env = {PDOC_ALLOW_EXEC = "1"}, help = "Generate API documentation with PDoc."} -docs-preview = {help = "Generate API documentation with PDoc and then open the docs in the default web browser.", shell = "poe docs-generate && open docs/generated/index.html"} -fix-all = {help = "Lint-fix and format-fix modified files, ignoring unsafe fixes.", ignore_fail = "return_non_zero", sequence = ["format-fix", "lint-fix"]} -fix-and-check = {help = "Lint-fix and format-fix, then re-check to see if any issues remain.", ignore_fail = "return_non_zero", sequence = ["check-all", "fix-all"]} -format-check = {help = "Check formatting for all file types.", ignore_fail = "return_non_zero", sequence = ["_format-check-prettier", "_format-check-ruff"]} -format-fix = {help = "Format all file types.", ignore_fail = "return_non_zero", sequence = ["_format-fix-prettier", "_format-fix-ruff"]} -# Installation -install = {shell = "poetry install --all-extras"} -lint = {help = "Lint all code. Includes type checking.", ignore_fail = "return_non_zero", sequence = ["_lint-ruff", "type-check"]} -# Linting/Typing fix tasks -lint-fix = {cmd = "poetry run ruff check --fix .", help = "Auto-fix any lint issues that Ruff can automatically resolve (excluding 'unsafe' fixes)."} -lint-fix-unsafe = {cmd = "poetry run ruff check --fix --unsafe-fixes .", help = "Lint-fix modified files, including 'unsafe' fixes. It is recommended to first commit any pending changes and then always manually review any unsafe changes applied."} -# Build and check -pre-push = {help = "Run all build and check tasks.", sequence = ["build", "check-local"]} -pytest = {cmd = "poetry run coverage run -m pytest --durations=10", help = "Run all pytest tests."} -pytest-fast = {cmd = "poetry run coverage run -m pytest --durations=5 --exitfirst -m 'not flaky and not slow and not requires_creds'", help = "Run pytest tests, failing fast and excluding slow tests."} -type-check = {cmd = "poetry run mypy airbyte_cdk", help = "Type check modified files with mypy."} -unit-test-with-cov = {cmd = "pytest -s unit_tests --cov=airbyte_cdk --cov-report=term --cov-config ./pyproject.toml", help = "Run unit tests and create a coverage report."} [tool.poetry] -authors = ["Airbyte "] -classifiers = [ - "Development Status :: 3 - Alpha", - "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3.10", - "Topic :: Scientific/Engineering", - "Topic :: Software Development :: Libraries :: Python Modules" -] +name = "airbyte-cdk" description = "A framework for writing Airbyte Connectors." -documentation = "https://docs.airbyte.io/" -homepage = "https://airbyte.com" -keywords = ["airbyte", "cdk", "connector-development-kit"] +authors = ["Airbyte "] license = "MIT" -name = "airbyte-cdk" readme = "README.md" +homepage = "https://airbyte.com" repository = "https://github.com/airbytehq/airbyte-python-cdk" +documentation = "https://docs.airbyte.io/" +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Topic :: Scientific/Engineering", + "Topic :: Software Development :: Libraries :: Python Modules", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.10", +] +keywords = ["airbyte", "connector-development-kit", "cdk"] + # Python CDK uses dynamic versioning: https://github.com/mtkennerly/poetry-dynamic-versioning -version = "0.0.0" # Version will be calculated dynamically. +version = "0.0.0" # Version will be calculated dynamically. + +[tool.poetry-dynamic-versioning] +enable = true [tool.poetry.dependencies] -Jinja2 = "~3.1.2" -PyYAML = "^6.0.1" -Unidecode = "^1.3" +python = "^3.10,<3.13" airbyte-protocol-models-dataclasses = "^0.14" -# Extras depedencies -avro = {optional = true, version = "~1.11.2"} backoff = "*" cachetools = "*" -cohere = {optional = true, version = "4.21"} -cryptography = ">=42.0.5,<44.0.0" dpath = "^2.1.6" dunamai = "^1.22.0" -fastavro = {optional = true, version = "~1.8.0"} genson = "1.3.0" isodate = "~0.6.1" +Jinja2 = "~3.1.2" jsonref = "~0.2" -jsonschema = "~4.17.3" # 4.18 has some significant breaking changes: https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0 -langchain = {optional = true, version = "0.1.16"} -langchain_core = {optional = true, version = "0.1.42"} -markdown = {optional = true, version = "*"} -nltk = {optional = true, version = "3.9.1"} -# This will ensure that even when you run poetry install or pip install, the compatible version of numpy will always be chosen. -# airbyte-ci will try to install latest version when --use-local-cdk is used, resulting in the conflict. -numpy = "<2" -openai = {extras = ["embeddings"], optional = true, version = "0.27.9"} -orjson = "^3.10.7" +jsonschema = "~4.17.3" # 4.18 has some significant breaking changes: https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0 pandas = "2.2.2" -pdf2image = {optional = true, version = "1.16.3"} -"pdfminer.six" = {optional = true, version = "20221105"} pendulum = "<3.0.0" psutil = "6.1.0" -pyarrow = {optional = true, version = "~15.0.0"} pydantic = "^2.7" -pyjwt = "^2.8.0" pyrate-limiter = "~3.1.0" -pytesseract = {optional = true, version = "0.3.10"} -python = "^3.10,<3.13" -python-calamine = {optional = true, version = "0.2.3"} python-dateutil = "*" -python-snappy = {optional = true, version = "0.7.3"} python-ulid = "^3.0.0" -pytz = "2024.2" +PyYAML = "^6.0.1" rapidfuzz = "^3.10.1" requests = "*" requests_cache = "*" -serpyco-rs = "^1.10.2" -sqlalchemy = {optional = true, version = "^2.0,!=2.0.36"} -tiktoken = {optional = true, version = "0.8.0"} -unstructured = {extras = ["docx", "pptx"], optional = true, version = "0.10.27"} -"unstructured.pytesseract" = {optional = true, version = ">=0.3.12"} wcmatch = "10.0" +# Extras depedencies +avro = { version = "~1.11.2", optional = true } +cohere = { version = "4.21", optional = true } +fastavro = { version = "~1.8.0", optional = true } +langchain = { version = "0.1.16", optional = true } +langchain_core = { version = "0.1.42", optional = true } +markdown = { version = "*", optional = true } +openai = { version = "0.27.9", extras = ["embeddings"], optional = true } +pdf2image = { version = "1.16.3", optional = true } +"pdfminer.six" = { version = "20221105", optional = true } +pyarrow = { version = "~15.0.0", optional = true } +pytesseract = { version = "0.3.10", optional = true } +python-calamine = { version = "0.2.3", optional = true } +python-snappy = { version = "0.7.3", optional = true } +tiktoken = { version = "0.8.0", optional = true } +nltk = { version = "3.9.1", optional = true } +# This will ensure that even when you run poetry install or pip install, the compatible version of numpy will always be chosen. +# airbyte-ci will try to install latest version when --use-local-cdk is used, resulting in the conflict. +numpy = "<2" +unstructured = { version = "0.10.27", extras = ["docx", "pptx"], optional = true } +"unstructured.pytesseract" = { version = ">=0.3.12", optional = true } +pyjwt = "^2.8.0" +cryptography = ">=42.0.5,<44.0.0" +pytz = "2024.2" +orjson = "^3.10.7" +serpyco-rs = "^1.10.2" +sqlalchemy = {version = "^2.0,!=2.0.36", optional = true } xmltodict = ">=0.13,<0.15" - -[tool.poetry.extras] -file-based = ["avro", "fastavro", "markdown", "pdf2image", "pdfminer.six", "pyarrow", "pytesseract", "python-calamine", "python-snappy", "unstructured", "unstructured.pytesseract"] -sql = ["sqlalchemy"] -vector-db-based = ["cohere", "langchain", "openai", "tiktoken"] +Unidecode = "^1.3" [tool.poetry.group.dev.dependencies] -asyncio = "3.4.3" freezegun = "*" mypy = "*" +asyncio = "3.4.3" +ruff = "^0.7.2" pdoc = "^15.0.0" poethepoet = "^0.24.2" -pympler = "*" pyproject-flake8 = "^6.1.0" pytest = "^7" +pytest-memray = "^1.6.0" +pympler = "*" pytest-cov = "*" pytest-httpserver = "*" -pytest-memray = "^1.6.0" pytest-mock = "*" requests-mock = "*" -ruff = "^0.7.2" -types-cachetools = "^5.5.0.20240820" -types-python-dateutil = "^2.9.0.20241003" -types-pyyaml = "^6.0.12.20240917" # Stubs packages for mypy typing types-requests = "^2.32.0.20241016" +types-python-dateutil = "^2.9.0.20241003" +types-pyyaml = "^6.0.12.20240917" +types-cachetools = "^5.5.0.20240820" + +[tool.poetry.extras] +file-based = ["avro", "fastavro", "pyarrow", "unstructured", "pdf2image", "pdfminer.six", "unstructured.pytesseract", "pytesseract", "markdown", "python-calamine", "python-snappy"] +vector-db-based = ["langchain", "openai", "cohere", "tiktoken"] +sql = ["sqlalchemy"] [tool.poetry.scripts] -source-declarative-manifest = "airbyte_cdk.cli.source_declarative_manifest:run" -[tool.poetry-dynamic-versioning] -enable = true +source-declarative-manifest = "airbyte_cdk.cli.source_declarative_manifest:run" -[tool.pytest.ini_options] -filterwarnings = [ - "ignore::airbyte_cdk.sources.source.ExperimentalClassWarning" -] -log_cli = true -log_cli_date_format = "%Y-%m-%d %H:%M:%S" -log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)" -log_cli_level = "INFO" +[tool.isort] +skip = ["__init__.py"] # TODO: Remove after this is fixed: https://github.com/airbytehq/airbyte-python-cdk/issues/12 [tool.ruff] -line-length = 100 target-version = "py310" +line-length = 100 [tool.ruff.lint] select = ["I"] + +[tool.poe.tasks] +# Installation +install = { shell = "poetry install --all-extras" } + +# Build tasks +assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate component manifest files."} +build-package = {cmd = "poetry build", help = "Build the python package: source and wheels archives."} +build = {sequence = ["assemble", "build-package"], help = "Run all tasks to build the package."} + +# Format check tasks +_format-check-ruff = {cmd = "ruff format --check .", help = "Check formatting with Ruff."} +_format-check-prettier = {cmd = "npx prettier . --check", help = "Check formatting with prettier."} +format-check = {sequence = ["_format-check-ruff", "_format-check-prettier"], help = "Check formatting for all file types.", ignore_fail = "return_non_zero"} + +# Format fix tasks +_format-fix-ruff = {cmd = "ruff format .", help = "Format with Ruff."} +_format-fix-prettier = {cmd = "npx prettier . --write", help = "Format with prettier."} +format-fix = {sequence = ["_format-fix-ruff", "_format-fix-prettier"], help = "Format all file types.", ignore_fail = "return_non_zero"} + +# Linting/Typing check tasks +_lint-ruff = {cmd = "poetry run ruff check .", help = "Lint with Ruff."} +type-check = {cmd = "poetry run mypy airbyte_cdk", help = "Type check modified files with mypy."} +lint = {sequence = ["_lint-ruff", "type-check"], help = "Lint all code. Includes type checking.", ignore_fail = "return_non_zero"} + +# Lockfile check task +check-lockfile = {cmd = "poetry check", help = "Check the poetry lock file."} + +# Linting/Typing fix tasks +lint-fix = { cmd = "poetry run ruff check --fix .", help = "Auto-fix any lint issues that Ruff can automatically resolve (excluding 'unsafe' fixes)." } +lint-fix-unsafe = { cmd = "poetry run ruff check --fix --unsafe-fixes .", help = "Lint-fix modified files, including 'unsafe' fixes. It is recommended to first commit any pending changes and then always manually review any unsafe changes applied." } + +# Combined Check and Fix tasks + +check-all = {sequence = ["lint", "format-check", "type-check", "check-lockfile"], help = "Lint, format, and type-check modified files.", ignore_fail = "return_non_zero"} +fix-all = {sequence = ["format-fix", "lint-fix"], help = "Lint-fix and format-fix modified files, ignoring unsafe fixes.", ignore_fail = "return_non_zero"} +fix-and-check = {sequence = ["fix-all", "check-all"], help = "Lint-fix and format-fix, then re-check to see if any issues remain.", ignore_fail = "return_non_zero"} + +# PyTest tasks + +pytest = {cmd = "poetry run coverage run -m pytest --durations=10", help = "Run all pytest tests."} +pytest-fast = {cmd = "poetry run coverage run -m pytest --durations=5 --exitfirst -m 'not flaky and not slow and not requires_creds'", help = "Run pytest tests, failing fast and excluding slow tests."} +unit-test-with-cov = {cmd = "pytest -s unit_tests --cov=airbyte_cdk --cov-report=term --cov-config ./pyproject.toml", help = "Run unit tests and create a coverage report."} + +# Combined check tasks (other) + +# TODO: find a version of the modified mypy check that works both locally and in CI. +check-local = {sequence = ["lint", "type-check", "check-lockfile", "unit-test-with-cov"], help = "Lint all code, type-check modified files, and run unit tests."} +check-ci = {sequence = ["check-lockfile", "build", "lint", "unit-test-with-cov"], help = "Build the package, lint and run unit tests. Does not include type-checking."} + +# Build and check +pre-push = {sequence = ["build", "check-local"], help = "Run all build and check tasks."} + +# API Docs with PDoc +docs-generate = {env = {PDOC_ALLOW_EXEC = "1"}, cmd = "python -m docs.generate run", help="Generate API documentation with PDoc."} +docs-preview = {shell = "poe docs-generate && open docs/generated/index.html", help="Generate API documentation with PDoc and then open the docs in the default web browser."} + +[tool.check-wheel-contents] +# Quality control for Python wheel generation. Docs here: +# - https://github.com/jwodder/check-wheel-contents +ignore = [ + "W002" # Duplicate files. (TODO: Fix the few duplicate files, mostly `__init__.py` files that have only copyright text.) +] + +[tool.pytest.ini_options] +log_cli = true +log_cli_level = "INFO" +log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)" +log_cli_date_format = "%Y-%m-%d %H:%M:%S" +filterwarnings = [ + "ignore::airbyte_cdk.sources.source.ExperimentalClassWarning" +] + +[tool.airbyte_ci] +python_versions = ["3.10", "3.11"] +optional_poetry_groups = ["dev"] +poetry_extras = ["file-based", "vector-db-based"] +poe_tasks = ["check-ci"] +mount_docker_socket = true From 0385d42c9380a3d9a2d7215df379ca0129f2636c Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 25 Jan 2025 05:38:49 +0000 Subject: [PATCH 09/11] chore: revert pyproject.toml to original state from main Co-Authored-By: Aaron Steers --- pyproject.toml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0bba75f5a..ea48db48e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ requests = "*" requests_cache = "*" wcmatch = "10.0" # Extras depedencies -avro = { version = "~1.11.2", optional = true } +avro = { version = ">=1.11.2,<1.13.0", optional = true } cohere = { version = "4.21", optional = true } fastavro = { version = "~1.8.0", optional = true } langchain = { version = "0.1.16", optional = true } @@ -115,16 +115,12 @@ source-declarative-manifest = "airbyte_cdk.cli.source_declarative_manifest:run" [tool.isort] skip = ["__init__.py"] # TODO: Remove after this is fixed: https://github.com/airbytehq/airbyte-python-cdk/issues/12 -[tool.ruff] -target-version = "py310" -line-length = 100 - -[tool.ruff.lint] -select = ["I"] +# Ruff configuration moved to ruff.toml [tool.poe.tasks] # Installation install = { shell = "poetry install --all-extras" } +lock = { shell = "poetry lock --no-update" } # Build tasks assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate component manifest files."} From 91f6c6efa302eb96234a4101b074f6379d4b2474 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 25 Jan 2025 05:39:40 +0000 Subject: [PATCH 10/11] chore: restore pyproject.toml to match main branch exactly Co-Authored-By: Aaron Steers --- pyproject.toml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ea48db48e..0bba75f5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,7 @@ requests = "*" requests_cache = "*" wcmatch = "10.0" # Extras depedencies -avro = { version = ">=1.11.2,<1.13.0", optional = true } +avro = { version = "~1.11.2", optional = true } cohere = { version = "4.21", optional = true } fastavro = { version = "~1.8.0", optional = true } langchain = { version = "0.1.16", optional = true } @@ -115,12 +115,16 @@ source-declarative-manifest = "airbyte_cdk.cli.source_declarative_manifest:run" [tool.isort] skip = ["__init__.py"] # TODO: Remove after this is fixed: https://github.com/airbytehq/airbyte-python-cdk/issues/12 -# Ruff configuration moved to ruff.toml +[tool.ruff] +target-version = "py310" +line-length = 100 + +[tool.ruff.lint] +select = ["I"] [tool.poe.tasks] # Installation install = { shell = "poetry install --all-extras" } -lock = { shell = "poetry lock --no-update" } # Build tasks assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate component manifest files."} From 43201e26c0bcbefdbc4ea4a9be03fad44e786ff1 Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Fri, 24 Jan 2025 22:13:56 -0800 Subject: [PATCH 11/11] Update .pre-commit-config.yaml add some more checks --- .pre-commit-config.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e9fb859b6..75b7888b0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,6 +24,10 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files - id: check-toml - repo: https://github.com/astral-sh/ruff-pre-commit