diff --git a/poetry.lock b/poetry.lock index 6c1979521..4fbb40b85 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "adlfs" @@ -6,6 +6,7 @@ version = "2023.10.0" description = "Access Azure Datalake Gen1 with fsspec and dask" optional = false python-versions = ">=3.8" +groups = ["filesystem"] files = [ {file = "adlfs-2023.10.0-py3-none-any.whl", hash = "sha256:dfdc8cc782bd78262435fb1bc2a8cfdbdd80342bb1b1ae9dfff968de912b0b09"}, {file = "adlfs-2023.10.0.tar.gz", hash = "sha256:f5cf06c5b0074d17d43838d4c434791a98420d9e768b36a1a02c7b3930686543"}, @@ -28,6 +29,7 @@ version = "2.5.4" description = "Async client for aws services using botocore and aiohttp" optional = false python-versions = ">=3.7" +groups = ["filesystem"] files = [ {file = "aiobotocore-2.5.4-py3-none-any.whl", hash = "sha256:4b32218728ca3d0be83835b604603a0cd6c329066e884bb78149334267f92440"}, {file = "aiobotocore-2.5.4.tar.gz", hash = "sha256:60341f19eda77e41e1ab11eef171b5a98b5dbdb90804f5334b6f90e560e31fae"}, @@ -49,6 +51,7 @@ version = "3.8.6" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.6" +groups = ["main", "facebook_ads", "filesystem", "unstructured_data", "unstructured_data_lint"] files = [ {file = "aiohttp-3.8.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:41d55fc043954cddbbd82503d9cc3f4814a40bcef30b3569bc7b5e34130718c1"}, {file = "aiohttp-3.8.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1d84166673694841d8953f0a8d0c90e1087739d24632fe86b1a08819168b4566"}, @@ -149,7 +152,7 @@ multidict = ">=4.5,<7.0" yarl = ">=1.0,<2.0" [package.extras] -speedups = ["Brotli", "aiodns", "cchardet"] +speedups = ["Brotli", "aiodns", "cchardet ; python_version < \"3.10\""] [[package]] name = "aioitertools" @@ -157,6 +160,7 @@ version = "0.11.0" description = "itertools and builtins for AsyncIO and mixed iterables" optional = false python-versions = ">=3.6" +groups = ["filesystem"] files = [ {file = "aioitertools-0.11.0-py3-none-any.whl", hash = "sha256:04b95e3dab25b449def24d7df809411c10e62aab0cbe31a50ca4e68748c43394"}, {file = "aioitertools-0.11.0.tar.gz", hash = "sha256:42c68b8dd3a69c2bf7f2233bf7df4bb58b557bca5252ac02ed5187bbc67d6831"}, @@ -171,6 +175,7 @@ version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" optional = false python-versions = ">=3.7" +groups = ["main", "facebook_ads", "filesystem", "unstructured_data", "unstructured_data_lint"] files = [ {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, @@ -185,6 +190,7 @@ version = "4.0.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] files = [ {file = "anyio-4.0.0-py3-none-any.whl", hash = "sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f"}, {file = "anyio-4.0.0.tar.gz", hash = "sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a"}, @@ -197,7 +203,7 @@ sniffio = ">=1.1" [package.extras] doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17) ; python_version < \"3.12\" and platform_python_implementation == \"CPython\" and platform_system != \"Windows\""] trio = ["trio (>=0.22)"] [[package]] @@ -206,6 +212,7 @@ version = "0.0.1" description = "" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "argilla-0.0.1-py3-none-any.whl", hash = "sha256:8bdc3c505bcfb47ba4b91f5658034eae53bf7d4f9317980397605c0c55817396"}, {file = "argilla-0.0.1.tar.gz", hash = "sha256:5017854754e89f573b31af25b25b803f51cea9ca1fa0bcf00505dee1f45cf7c9"}, @@ -217,6 +224,7 @@ version = "3.2.2" description = "Asana API client" optional = false python-versions = "*" +groups = ["asana_dlt"] files = [ {file = "asana-3.2.2-py2.py3-none-any.whl", hash = "sha256:e8426ae5f5cda2c27d29874145acb589b91e673a84e3fbd45404679499d9604a"}, {file = "asana-3.2.2.tar.gz", hash = "sha256:3a0c64ad5baaa8c52465fe400cedbc873b2127a77df135af518fd8da1af8d6b9"}, @@ -232,6 +240,7 @@ version = "0.3.3" description = "Some handy helper functions for Python's AST module." optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "astatine-0.3.3-py3-none-any.whl", hash = "sha256:6d8c914f01fbea252cb8f31563f2e766a9ab03c02b9bcc37d18f7d9138828401"}, {file = "astatine-0.3.3.tar.gz", hash = "sha256:0c58a7844b5890ff16da07dbfeb187341d8324cb4378940f89d795cbebebce08"}, @@ -247,6 +256,7 @@ version = "2.4.0" description = "Annotate AST trees with source code positions" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "asttokens-2.4.0-py2.py3-none-any.whl", hash = "sha256:cf8fc9e61a86461aa9fb161a14a0841a03c405fa829ac6b202670b3495d2ce69"}, {file = "asttokens-2.4.0.tar.gz", hash = "sha256:2e0171b991b2c959acc6c49318049236844a5da1d65ba2672c4880c1c894834e"}, @@ -258,27 +268,13 @@ six = ">=1.12.0" [package.extras] test = ["astroid", "pytest"] -[[package]] -name = "astunparse" -version = "1.6.3" -description = "An AST unparser for Python" -optional = false -python-versions = "*" -files = [ - {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"}, - {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"}, -] - -[package.dependencies] -six = ">=1.6.1,<2.0" -wheel = ">=0.23.0,<1.0" - [[package]] name = "async-timeout" version = "4.0.3" description = "Timeout context manager for asyncio programs" optional = false python-versions = ">=3.7" +groups = ["main", "facebook_ads", "filesystem", "unstructured_data", "unstructured_data_lint"] files = [ {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, @@ -290,6 +286,7 @@ version = "23.1.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.7" +groups = ["main", "dev", "facebook_ads", "filesystem", "salesforce", "scrapy", "unstructured_data", "unstructured_data_lint"] files = [ {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, @@ -300,7 +297,7 @@ cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] dev = ["attrs[docs,tests]", "pre-commit"] docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] tests = ["attrs[tests-no-zope]", "zope-interface"] -tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +tests-no-zope = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.1.1) ; platform_python_implementation == \"CPython\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version < \"3.11\"", "pytest-xdist[psutil]"] [[package]] name = "automat" @@ -308,6 +305,7 @@ version = "22.10.0" description = "Self-service finite-state machines for the programmer on the go." optional = false python-versions = "*" +groups = ["dev", "scrapy"] files = [ {file = "Automat-22.10.0-py2.py3-none-any.whl", hash = "sha256:c3164f8742b9dc440f3682482d32aaff7bb53f71740dd018533f9de286b64180"}, {file = "Automat-22.10.0.tar.gz", hash = "sha256:e56beb84edad19dcc11d30e8d9b895f75deeb5ef5e96b84a467066b3b84bb04e"}, @@ -326,6 +324,7 @@ version = "1.29.4" description = "Microsoft Azure Core Library for Python" optional = false python-versions = ">=3.7" +groups = ["filesystem"] files = [ {file = "azure-core-1.29.4.tar.gz", hash = "sha256:500b3aa9bf2e90c5ccc88bb105d056114ca0ce7d0ce73afb8bc4d714b2fc7568"}, {file = "azure_core-1.29.4-py3-none-any.whl", hash = "sha256:b03261bcba22c0b9290faf9999cedd23e849ed2577feee90515694cea6bc74bf"}, @@ -345,6 +344,7 @@ version = "0.0.53" description = "Azure Data Lake Store Filesystem Client Library for Python" optional = false python-versions = "*" +groups = ["filesystem"] files = [ {file = "azure-datalake-store-0.0.53.tar.gz", hash = "sha256:05b6de62ee3f2a0a6e6941e6933b792b800c3e7f6ffce2fc324bc19875757393"}, {file = "azure_datalake_store-0.0.53-py2.py3-none-any.whl", hash = "sha256:a30c902a6e360aa47d7f69f086b426729784e71c536f330b691647a51dc42b2b"}, @@ -361,6 +361,7 @@ version = "1.14.1" description = "Microsoft Azure Identity Library for Python" optional = false python-versions = ">=3.7" +groups = ["filesystem"] files = [ {file = "azure-identity-1.14.1.zip", hash = "sha256:48e2a9dbdc59b4f095f841d867d9a8cbe4c1cdbbad8251e055561afd47b4a9b8"}, {file = "azure_identity-1.14.1-py3-none-any.whl", hash = "sha256:3a5bef8e9c3281e864e869739be8d67424bff616cddae96b546ca2a5168d863d"}, @@ -378,6 +379,7 @@ version = "12.18.3" description = "Microsoft Azure Blob Storage Client Library for Python" optional = false python-versions = ">=3.7" +groups = ["filesystem"] files = [ {file = "azure-storage-blob-12.18.3.tar.gz", hash = "sha256:d8ced0deee3367fa3d4f3d1a03cd9edadf4440c0a371f503d623fa6c807554ee"}, {file = "azure_storage_blob-12.18.3-py3-none-any.whl", hash = "sha256:c278dde2ac41857a68d615c9f2b36d894ba877a7e84d62795603c7e79d0bb5e9"}, @@ -398,45 +400,19 @@ version = "2.2.1" description = "Function decoration for backoff and retry" optional = false python-versions = ">=3.7,<4.0" +groups = ["unstructured_data"] files = [ {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, ] -[[package]] -name = "backports-zoneinfo" -version = "0.2.1" -description = "Backport of the standard library zoneinfo module" -optional = false -python-versions = ">=3.6" -files = [ - {file = "backports.zoneinfo-0.2.1-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc"}, - {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:89a48c0d158a3cc3f654da4c2de1ceba85263fafb861b98b59040a5086259722"}, - {file = "backports.zoneinfo-0.2.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:1c5742112073a563c81f786e77514969acb58649bcdf6cdf0b4ed31a348d4546"}, - {file = "backports.zoneinfo-0.2.1-cp36-cp36m-win32.whl", hash = "sha256:e8236383a20872c0cdf5a62b554b27538db7fa1bbec52429d8d106effbaeca08"}, - {file = "backports.zoneinfo-0.2.1-cp36-cp36m-win_amd64.whl", hash = "sha256:8439c030a11780786a2002261569bdf362264f605dfa4d65090b64b05c9f79a7"}, - {file = "backports.zoneinfo-0.2.1-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:f04e857b59d9d1ccc39ce2da1021d196e47234873820cbeaad210724b1ee28ac"}, - {file = "backports.zoneinfo-0.2.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:17746bd546106fa389c51dbea67c8b7c8f0d14b5526a579ca6ccf5ed72c526cf"}, - {file = "backports.zoneinfo-0.2.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5c144945a7752ca544b4b78c8c41544cdfaf9786f25fe5ffb10e838e19a27570"}, - {file = "backports.zoneinfo-0.2.1-cp37-cp37m-win32.whl", hash = "sha256:e55b384612d93be96506932a786bbcde5a2db7a9e6a4bb4bffe8b733f5b9036b"}, - {file = "backports.zoneinfo-0.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a76b38c52400b762e48131494ba26be363491ac4f9a04c1b7e92483d169f6582"}, - {file = "backports.zoneinfo-0.2.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:8961c0f32cd0336fb8e8ead11a1f8cd99ec07145ec2931122faaac1c8f7fd987"}, - {file = "backports.zoneinfo-0.2.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:e81b76cace8eda1fca50e345242ba977f9be6ae3945af8d46326d776b4cf78d1"}, - {file = "backports.zoneinfo-0.2.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7b0a64cda4145548fed9efc10322770f929b944ce5cee6c0dfe0c87bf4c0c8c9"}, - {file = "backports.zoneinfo-0.2.1-cp38-cp38-win32.whl", hash = "sha256:1b13e654a55cd45672cb54ed12148cd33628f672548f373963b0bff67b217328"}, - {file = "backports.zoneinfo-0.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:4a0f800587060bf8880f954dbef70de6c11bbe59c673c3d818921f042f9954a6"}, - {file = "backports.zoneinfo-0.2.1.tar.gz", hash = "sha256:fadbfe37f74051d024037f223b8e001611eac868b5c5b06144ef4d8b799862f2"}, -] - -[package.extras] -tzdata = ["tzdata"] - [[package]] name = "bandit" version = "1.7.5" description = "Security oriented static analyser for python code." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "bandit-1.7.5-py3-none-any.whl", hash = "sha256:75665181dc1e0096369112541a056c59d1c5f66f9bb74a8d686c3c362b83f549"}, {file = "bandit-1.7.5.tar.gz", hash = "sha256:bdfc739baa03b880c2d15d0431b31c658ffc348e907fe197e54e0389dd59e11e"}, @@ -450,8 +426,8 @@ rich = "*" stevedore = ">=1.20.0" [package.extras] -test = ["beautifulsoup4 (>=4.8.0)", "coverage (>=4.5.4)", "fixtures (>=3.0.0)", "flake8 (>=4.0.0)", "pylint (==1.9.4)", "stestr (>=2.5.0)", "testscenarios (>=0.5.0)", "testtools (>=2.3.0)", "tomli (>=1.1.0)"] -toml = ["tomli (>=1.1.0)"] +test = ["beautifulsoup4 (>=4.8.0)", "coverage (>=4.5.4)", "fixtures (>=3.0.0)", "flake8 (>=4.0.0)", "pylint (==1.9.4)", "stestr (>=2.5.0)", "testscenarios (>=0.5.0)", "testtools (>=2.3.0)", "tomli (>=1.1.0) ; python_version < \"3.11\""] +toml = ["tomli (>=1.1.0) ; python_version < \"3.11\""] yaml = ["PyYAML"] [[package]] @@ -460,6 +436,7 @@ version = "23.9.1" description = "The uncompromising code formatter." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "black-23.9.1-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:d6bc09188020c9ac2555a498949401ab35bb6bf76d4e0f8ee251694664df6301"}, {file = "black-23.9.1-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:13ef033794029b85dfea8032c9d3b92b42b526f1ff4bf13b2182ce4e917f5100"}, @@ -506,6 +483,7 @@ version = "1.31.17" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">= 3.7" +groups = ["filesystem"] files = [ {file = "botocore-1.31.17-py3-none-any.whl", hash = "sha256:6ac34a1d34aa3750e78b77b8596617e2bab938964694d651939dba2cbde2c12b"}, {file = "botocore-1.31.17.tar.gz", hash = "sha256:396459065dba4339eb4da4ec8b4e6599728eb89b7caaceea199e26f7d824a41c"}, @@ -525,6 +503,7 @@ version = "5.3.1" description = "Extensible memoizing collections and decorators" optional = false python-versions = ">=3.7" +groups = ["main", "filesystem", "google_ads", "google_analytics", "google_sheets"] files = [ {file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"}, {file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"}, @@ -536,6 +515,7 @@ version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" +groups = ["main", "airtable", "asana_dlt", "dev", "dltpure", "facebook_ads", "filesystem", "google_ads", "google_analytics", "google_sheets", "salesforce", "scrapy", "stripe_analytics", "unstructured_data", "unstructured_data_lint"] files = [ {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, @@ -547,6 +527,7 @@ version = "1.16.0" description = "Foreign Function Interface for Python calling C code." optional = false python-versions = ">=3.8" +groups = ["main", "filesystem", "salesforce", "scrapy", "unstructured_data"] files = [ {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, @@ -601,6 +582,7 @@ files = [ {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, ] +markers = {main = "platform_python_implementation == \"PyPy\""} [package.dependencies] pycparser = "*" @@ -611,6 +593,7 @@ version = "5.2.0" description = "Universal encoding detector for Python 3" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, @@ -622,6 +605,7 @@ version = "3.3.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7.0" +groups = ["main", "airtable", "asana_dlt", "dev", "dltpure", "facebook_ads", "filesystem", "google_ads", "google_analytics", "google_sheets", "salesforce", "scrapy", "stripe_analytics", "unstructured_data", "unstructured_data_lint"] files = [ {file = "charset-normalizer-3.3.0.tar.gz", hash = "sha256:63563193aec44bce707e0c5ca64ff69fa72ed7cf34ce6e11d5127555756fd2f6"}, {file = "charset_normalizer-3.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:effe5406c9bd748a871dbcaf3ac69167c38d72db8c9baf3ff954c344f31c4cbe"}, @@ -721,6 +705,7 @@ version = "0.3.29" description = "Chroma." optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "chromadb-0.3.29-py3-none-any.whl", hash = "sha256:d681a3e4f3284715dd146774be84cad3d2f8c529bd004ba249e1d3deb70ac68e"}, {file = "chromadb-0.3.29.tar.gz", hash = "sha256:29d47835da494fc1b58da40abb1435689d4ba1c93df6c64664a5d91521cb80e9"}, @@ -730,7 +715,6 @@ files = [ clickhouse-connect = ">=0.5.7" duckdb = ">=0.7.1" fastapi = "0.85.1" -graphlib-backport = {version = ">=1.0.3", markers = "python_version < \"3.9\""} hnswlib = ">=0.7" numpy = ">=1.21.6" onnxruntime = ">=1.14.1" @@ -751,6 +735,7 @@ version = "8.1.7" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" +groups = ["main", "dev", "dltpure", "unstructured_data"] files = [ {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, @@ -765,6 +750,7 @@ version = "0.6.14" description = "ClickHouse Database Core Driver for Python, Pandas, and Superset" optional = false python-versions = "~=3.7" +groups = ["unstructured_data"] files = [ {file = "clickhouse-connect-0.6.14.tar.gz", hash = "sha256:0531bbd5b8bdee616bf1cca5ddcb0af86db12e2b48fd39257a8ecdf32200bd57"}, {file = "clickhouse_connect-0.6.14-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:04affbd255fb8b1e4a882ddc1336c86530976d05578f47bb65e3a53471d291e4"}, @@ -853,10 +839,12 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main", "dev", "dltpure", "pytest", "unstructured_data", "unstructured_data_lint"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +markers = {main = "platform_system == \"Windows\"", dev = "platform_system == \"Windows\" or sys_platform == \"win32\"", dltpure = "platform_system == \"Windows\"", pytest = "sys_platform == \"win32\"", unstructured_data = "platform_system == \"Windows\" or sys_platform == \"win32\"", unstructured_data_lint = "platform_system == \"Windows\""} [[package]] name = "coloredlogs" @@ -864,6 +852,7 @@ version = "15.0.1" description = "Colored terminal output for Python's logging module" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["unstructured_data"] files = [ {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, @@ -881,6 +870,7 @@ version = "2.3.0" description = "Confluent's Python client for Apache Kafka" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "confluent-kafka-2.3.0.tar.gz", hash = "sha256:4069e7b56e0baf9db18c053a605213f0ab2d8f23715dca7b3bd97108df446ced"}, {file = "confluent_kafka-2.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5df845755cd3ebb9165ca00fd1d3a7d514c61e84d9fcbe7babb91193fe9b369c"}, @@ -919,10 +909,10 @@ files = [ ] [package.extras] -avro = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "requests"] -dev = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "flake8", "pytest", "pytest (==4.6.4)", "pytest-timeout", "requests"] -doc = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0)", "fastavro (>=1.0)", "requests", "sphinx", "sphinx-rtd-theme"] -json = ["jsonschema", "pyrsistent", "pyrsistent (==0.16.1)", "requests"] +avro = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0) ; python_version < \"3.0\"", "fastavro (>=1.0) ; python_version > \"3.0\"", "requests"] +dev = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0) ; python_version < \"3.0\"", "fastavro (>=1.0) ; python_version > \"3.0\"", "flake8", "pytest (==4.6.4) ; python_version < \"3.0\"", "pytest ; python_version >= \"3.0\"", "pytest-timeout", "requests"] +doc = ["avro (>=1.11.1,<2)", "fastavro (>=0.23.0,<1.0) ; python_version < \"3.0\"", "fastavro (>=1.0) ; python_version > \"3.0\"", "requests", "sphinx", "sphinx-rtd-theme"] +json = ["jsonschema", "pyrsistent (==0.16.1) ; python_version < \"3.0\"", "pyrsistent ; python_version > \"3.0\"", "requests"] protobuf = ["protobuf", "requests"] schema-registry = ["requests"] @@ -932,6 +922,7 @@ version = "0.3.2" description = "" optional = false python-versions = "*" +groups = ["sql_database"] files = [ {file = "connectorx-0.3.2-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:98274242c64a2831a8b1c86e0fa2c46a557dd8cbcf00c3adcf5a602455fb02d7"}, {file = "connectorx-0.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e2b11ba49efd330a7348bef3ce09c98218eea21d92a12dd75cd8f0ade5c99ffc"}, @@ -957,17 +948,107 @@ version = "23.10.4" description = "Symbolic constants in Python" optional = false python-versions = ">=3.8" +groups = ["dev", "scrapy"] files = [ {file = "constantly-23.10.4-py3-none-any.whl", hash = "sha256:3fd9b4d1c3dc1ec9757f3c52aef7e53ad9323dbe39f51dfd4c43853b68dfa3f9"}, {file = "constantly-23.10.4.tar.gz", hash = "sha256:aa92b70a33e2ac0bb33cd745eb61776594dc48764b06c35e0efd050b7f1c7cbd"}, ] +[[package]] +name = "coverage" +version = "7.6.1" +description = "Code coverage measurement for Python" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "coverage-7.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b06079abebbc0e89e6163b8e8f0e16270124c154dc6e4a47b413dd538859af16"}, + {file = "coverage-7.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cf4b19715bccd7ee27b6b120e7e9dd56037b9c0681dcc1adc9ba9db3d417fa36"}, + {file = "coverage-7.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61c0abb4c85b095a784ef23fdd4aede7a2628478e7baba7c5e3deba61070a02"}, + {file = "coverage-7.6.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fd21f6ae3f08b41004dfb433fa895d858f3f5979e7762d052b12aef444e29afc"}, + {file = "coverage-7.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f59d57baca39b32db42b83b2a7ba6f47ad9c394ec2076b084c3f029b7afca23"}, + {file = "coverage-7.6.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a1ac0ae2b8bd743b88ed0502544847c3053d7171a3cff9228af618a068ed9c34"}, + {file = "coverage-7.6.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e6a08c0be454c3b3beb105c0596ebdc2371fab6bb90c0c0297f4e58fd7e1012c"}, + {file = "coverage-7.6.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f5796e664fe802da4f57a168c85359a8fbf3eab5e55cd4e4569fbacecc903959"}, + {file = "coverage-7.6.1-cp310-cp310-win32.whl", hash = "sha256:7bb65125fcbef8d989fa1dd0e8a060999497629ca5b0efbca209588a73356232"}, + {file = "coverage-7.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:3115a95daa9bdba70aea750db7b96b37259a81a709223c8448fa97727d546fe0"}, + {file = "coverage-7.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7dea0889685db8550f839fa202744652e87c60015029ce3f60e006f8c4462c93"}, + {file = "coverage-7.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed37bd3c3b063412f7620464a9ac1314d33100329f39799255fb8d3027da50d3"}, + {file = "coverage-7.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d85f5e9a5f8b73e2350097c3756ef7e785f55bd71205defa0bfdaf96c31616ff"}, + {file = "coverage-7.6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bc572be474cafb617672c43fe989d6e48d3c83af02ce8de73fff1c6bb3c198d"}, + {file = "coverage-7.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c0420b573964c760df9e9e86d1a9a622d0d27f417e1a949a8a66dd7bcee7bc6"}, + {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1f4aa8219db826ce6be7099d559f8ec311549bfc4046f7f9fe9b5cea5c581c56"}, + {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:fc5a77d0c516700ebad189b587de289a20a78324bc54baee03dd486f0855d234"}, + {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b48f312cca9621272ae49008c7f613337c53fadca647d6384cc129d2996d1133"}, + {file = "coverage-7.6.1-cp311-cp311-win32.whl", hash = "sha256:1125ca0e5fd475cbbba3bb67ae20bd2c23a98fac4e32412883f9bcbaa81c314c"}, + {file = "coverage-7.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:8ae539519c4c040c5ffd0632784e21b2f03fc1340752af711f33e5be83a9d6c6"}, + {file = "coverage-7.6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:95cae0efeb032af8458fc27d191f85d1717b1d4e49f7cb226cf526ff28179778"}, + {file = "coverage-7.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5621a9175cf9d0b0c84c2ef2b12e9f5f5071357c4d2ea6ca1cf01814f45d2391"}, + {file = "coverage-7.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:260933720fdcd75340e7dbe9060655aff3af1f0c5d20f46b57f262ab6c86a5e8"}, + {file = "coverage-7.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07e2ca0ad381b91350c0ed49d52699b625aab2b44b65e1b4e02fa9df0e92ad2d"}, + {file = "coverage-7.6.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c44fee9975f04b33331cb8eb272827111efc8930cfd582e0320613263ca849ca"}, + {file = "coverage-7.6.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:877abb17e6339d96bf08e7a622d05095e72b71f8afd8a9fefc82cf30ed944163"}, + {file = "coverage-7.6.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3e0cadcf6733c09154b461f1ca72d5416635e5e4ec4e536192180d34ec160f8a"}, + {file = "coverage-7.6.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c3c02d12f837d9683e5ab2f3d9844dc57655b92c74e286c262e0fc54213c216d"}, + {file = "coverage-7.6.1-cp312-cp312-win32.whl", hash = "sha256:e05882b70b87a18d937ca6768ff33cc3f72847cbc4de4491c8e73880766718e5"}, + {file = "coverage-7.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:b5d7b556859dd85f3a541db6a4e0167b86e7273e1cdc973e5b175166bb634fdb"}, + {file = "coverage-7.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a4acd025ecc06185ba2b801f2de85546e0b8ac787cf9d3b06e7e2a69f925b106"}, + {file = "coverage-7.6.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a6d3adcf24b624a7b778533480e32434a39ad8fa30c315208f6d3e5542aeb6e9"}, + {file = "coverage-7.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0c212c49b6c10e6951362f7c6df3329f04c2b1c28499563d4035d964ab8e08c"}, + {file = "coverage-7.6.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e81d7a3e58882450ec4186ca59a3f20a5d4440f25b1cff6f0902ad890e6748a"}, + {file = "coverage-7.6.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78b260de9790fd81e69401c2dc8b17da47c8038176a79092a89cb2b7d945d060"}, + {file = "coverage-7.6.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a78d169acd38300060b28d600344a803628c3fd585c912cacc9ea8790fe96862"}, + {file = "coverage-7.6.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2c09f4ce52cb99dd7505cd0fc8e0e37c77b87f46bc9c1eb03fe3bc9991085388"}, + {file = "coverage-7.6.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6878ef48d4227aace338d88c48738a4258213cd7b74fd9a3d4d7582bb1d8a155"}, + {file = "coverage-7.6.1-cp313-cp313-win32.whl", hash = "sha256:44df346d5215a8c0e360307d46ffaabe0f5d3502c8a1cefd700b34baf31d411a"}, + {file = "coverage-7.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:8284cf8c0dd272a247bc154eb6c95548722dce90d098c17a883ed36e67cdb129"}, + {file = "coverage-7.6.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d3296782ca4eab572a1a4eca686d8bfb00226300dcefdf43faa25b5242ab8a3e"}, + {file = "coverage-7.6.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:502753043567491d3ff6d08629270127e0c31d4184c4c8d98f92c26f65019962"}, + {file = "coverage-7.6.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a89ecca80709d4076b95f89f308544ec8f7b4727e8a547913a35f16717856cb"}, + {file = "coverage-7.6.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a318d68e92e80af8b00fa99609796fdbcdfef3629c77c6283566c6f02c6d6704"}, + {file = "coverage-7.6.1-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13b0a73a0896988f053e4fbb7de6d93388e6dd292b0d87ee51d106f2c11b465b"}, + {file = "coverage-7.6.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4421712dbfc5562150f7554f13dde997a2e932a6b5f352edcce948a815efee6f"}, + {file = "coverage-7.6.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:166811d20dfea725e2e4baa71fffd6c968a958577848d2131f39b60043400223"}, + {file = "coverage-7.6.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:225667980479a17db1048cb2bf8bfb39b8e5be8f164b8f6628b64f78a72cf9d3"}, + {file = "coverage-7.6.1-cp313-cp313t-win32.whl", hash = "sha256:170d444ab405852903b7d04ea9ae9b98f98ab6d7e63e1115e82620807519797f"}, + {file = "coverage-7.6.1-cp313-cp313t-win_amd64.whl", hash = "sha256:b9f222de8cded79c49bf184bdbc06630d4c58eec9459b939b4a690c82ed05657"}, + {file = "coverage-7.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6db04803b6c7291985a761004e9060b2bca08da6d04f26a7f2294b8623a0c1a0"}, + {file = "coverage-7.6.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f1adfc8ac319e1a348af294106bc6a8458a0f1633cc62a1446aebc30c5fa186a"}, + {file = "coverage-7.6.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a95324a9de9650a729239daea117df21f4b9868ce32e63f8b650ebe6cef5595b"}, + {file = "coverage-7.6.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b43c03669dc4618ec25270b06ecd3ee4fa94c7f9b3c14bae6571ca00ef98b0d3"}, + {file = "coverage-7.6.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8929543a7192c13d177b770008bc4e8119f2e1f881d563fc6b6305d2d0ebe9de"}, + {file = "coverage-7.6.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:a09ece4a69cf399510c8ab25e0950d9cf2b42f7b3cb0374f95d2e2ff594478a6"}, + {file = "coverage-7.6.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:9054a0754de38d9dbd01a46621636689124d666bad1936d76c0341f7d71bf569"}, + {file = "coverage-7.6.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0dbde0f4aa9a16fa4d754356a8f2e36296ff4d83994b2c9d8398aa32f222f989"}, + {file = "coverage-7.6.1-cp38-cp38-win32.whl", hash = "sha256:da511e6ad4f7323ee5702e6633085fb76c2f893aaf8ce4c51a0ba4fc07580ea7"}, + {file = "coverage-7.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:3f1156e3e8f2872197af3840d8ad307a9dd18e615dc64d9ee41696f287c57ad8"}, + {file = "coverage-7.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:abd5fd0db5f4dc9289408aaf34908072f805ff7792632250dcb36dc591d24255"}, + {file = "coverage-7.6.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:547f45fa1a93154bd82050a7f3cddbc1a7a4dd2a9bf5cb7d06f4ae29fe94eaf8"}, + {file = "coverage-7.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:645786266c8f18a931b65bfcefdbf6952dd0dea98feee39bd188607a9d307ed2"}, + {file = "coverage-7.6.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9e0b2df163b8ed01d515807af24f63de04bebcecbd6c3bfeff88385789fdf75a"}, + {file = "coverage-7.6.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:609b06f178fe8e9f89ef676532760ec0b4deea15e9969bf754b37f7c40326dbc"}, + {file = "coverage-7.6.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:702855feff378050ae4f741045e19a32d57d19f3e0676d589df0575008ea5004"}, + {file = "coverage-7.6.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:2bdb062ea438f22d99cba0d7829c2ef0af1d768d1e4a4f528087224c90b132cb"}, + {file = "coverage-7.6.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:9c56863d44bd1c4fe2abb8a4d6f5371d197f1ac0ebdee542f07f35895fc07f36"}, + {file = "coverage-7.6.1-cp39-cp39-win32.whl", hash = "sha256:6e2cd258d7d927d09493c8df1ce9174ad01b381d4729a9d8d4e38670ca24774c"}, + {file = "coverage-7.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:06a737c882bd26d0d6ee7269b20b12f14a8704807a01056c80bb881a4b2ce6ca"}, + {file = "coverage-7.6.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:e9a6e0eb86070e8ccaedfbd9d38fec54864f3125ab95419970575b42af7541df"}, + {file = "coverage-7.6.1.tar.gz", hash = "sha256:953510dfb7b12ab69d20135a0662397f077c59b1e6379a768e97c59d852ee51d"}, +] + +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + +[package.extras] +toml = ["tomli ; python_full_version <= \"3.11.0a6\""] + [[package]] name = "cryptography" version = "41.0.4" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = ">=3.7" +groups = ["filesystem", "salesforce", "scrapy", "unstructured_data"] files = [ {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:80907d3faa55dc5434a16579952ac6da800935cd98d14dbd62f6f042c7f5e839"}, {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:35c00f637cd0b9d5b6c6bd11b6c3359194a8eba9c46d4e875a3660e3b400005f"}, @@ -1013,6 +1094,7 @@ version = "1.2.0" description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0" optional = false python-versions = ">=3.7" +groups = ["scrapy"] files = [ {file = "cssselect-1.2.0-py2.py3-none-any.whl", hash = "sha256:da1885f0c10b60c03ed5eccbb6b68d6eff248d91976fcde348f395d54c9fd35e"}, {file = "cssselect-1.2.0.tar.gz", hash = "sha256:666b19839cfaddb9ce9d36bfe4c969132c647b92fc9088c4e23f786b30f1b3dc"}, @@ -1024,6 +1106,7 @@ version = "2.2.1" description = "Library to convert python requests object to curl command." optional = false python-versions = "*" +groups = ["facebook_ads"] files = [ {file = "curlify-2.2.1.tar.gz", hash = "sha256:0d3f02e7235faf952de8ef45ef469845196d30632d5838bcd5aee217726ddd6d"}, ] @@ -1037,6 +1120,7 @@ version = "0.5.9" description = "Easily serialize dataclasses to and from JSON" optional = false python-versions = ">=3.6" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "dataclasses-json-0.5.9.tar.gz", hash = "sha256:e9ac87b73edc0141aafbce02b44e93553c3123ad574958f0fe52a534b6707e8e"}, {file = "dataclasses_json-0.5.9-py3-none-any.whl", hash = "sha256:1280542631df1c375b7bc92e5b86d39e06c44760d7e3571a537b3b8acabf2f0c"}, @@ -1048,7 +1132,7 @@ marshmallow-enum = ">=1.5.1,<2.0.0" typing-inspect = ">=0.4.0" [package.extras] -dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest (>=7.2.0)", "setuptools", "simplejson", "twine", "types-dataclasses", "wheel"] +dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest (>=7.2.0)", "setuptools", "simplejson", "twine", "types-dataclasses ; python_version == \"3.6\"", "wheel"] [[package]] name = "db-dtypes" @@ -1056,6 +1140,7 @@ version = "1.3.1" description = "Pandas Data Types for SQL systems (BigQuery, Spanner)" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "db_dtypes-1.3.1-py2.py3-none-any.whl", hash = "sha256:fbc9d1740d94aaf2b5ae24601cfc875a69b4635bb9d049e3c3036e9f10203af8"}, {file = "db_dtypes-1.3.1.tar.gz", hash = "sha256:a058f05dab100891f3e76a7a3db9ad0f107f18dd3d1bdd13680749a2f07eae77"}, @@ -1073,6 +1158,7 @@ version = "5.1.1" description = "Decorators for Humans" optional = false python-versions = ">=3.5" +groups = ["main", "filesystem"] files = [ {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, @@ -1080,17 +1166,17 @@ files = [ [[package]] name = "dlt" -version = "1.3.0" +version = "1.8.1" description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run." optional = false -python-versions = "<3.13,>=3.8.1" +python-versions = "<3.14,>=3.9" +groups = ["main", "dltpure"] files = [ - {file = "dlt-1.3.0-py3-none-any.whl", hash = "sha256:e2583ed0ad4a0d9941b8f9cb0e078f4443bcbeb0e1cf1cce586cf35107ccf266"}, - {file = "dlt-1.3.0.tar.gz", hash = "sha256:57eecee99ace25b6d37027a78f59f8c735d1913cc81f1101e1b47bf96fc544b8"}, + {file = "dlt-1.8.1-py3-none-any.whl", hash = "sha256:154699cc70e4263a294b576ca8d22bb7e153bfb872acabba08fcfecd9b9d285a"}, + {file = "dlt-1.8.1.tar.gz", hash = "sha256:6ff9c56d7ea416cd01bce874348023042a441d6f83b35495d234efd709d9fd77"}, ] [package.dependencies] -astunparse = ">=1.6.3" click = ">=7.1" db-dtypes = {version = ">=1.2.0", optional = true, markers = "extra == \"gcp\" or extra == \"bigquery\""} duckdb = {version = ">=0.9", optional = true, markers = "extra == \"duckdb\" or extra == \"motherduck\""} @@ -1099,7 +1185,6 @@ gcsfs = {version = ">=2022.4.0", optional = true, markers = "extra == \"gcp\" or gitpython = ">=3.1.29" giturlparse = ">=0.10.0" google-cloud-bigquery = {version = ">=2.26.0", optional = true, markers = "extra == \"gcp\" or extra == \"bigquery\""} -graphlib-backport = {version = "*", markers = "python_version < \"3.9\""} grpcio = {version = ">=1.50.0", optional = true, markers = "extra == \"gcp\" or extra == \"bigquery\""} hexbytes = ">=0.2.2" humanize = ">=4.4.0" @@ -1108,50 +1193,54 @@ makefun = ">=1.15.0" orjson = {version = ">=3.6.7,<3.9.11 || >3.9.11,<3.9.12 || >3.9.12,<3.9.13 || >3.9.13,<3.9.14 || >3.9.14,<3.10.1 || >3.10.1,<4", markers = "platform_python_implementation != \"PyPy\""} packaging = ">=21.1" pathvalidate = ">=2.5.2" -pendulum = ">=2.1.2" +pendulum = {version = ">=2.1.2", markers = "python_version < \"3.13\""} pluggy = ">=1.3.0" -psycopg2-binary = {version = ">=2.9.1", optional = true, markers = "extra == \"postgres\" or extra == \"redshift\""} -psycopg2cffi = {version = ">=2.9.0", optional = true, markers = "platform_python_implementation == \"PyPy\" and (extra == \"postgres\" or extra == \"redshift\")"} -pyarrow = {version = ">=12.0.0", optional = true, markers = "extra == \"bigquery\" or extra == \"parquet\" or extra == \"motherduck\" or extra == \"athena\" or extra == \"synapse\" or extra == \"clickhouse\" or extra == \"dremio\" or extra == \"lancedb\" or extra == \"deltalake\""} +psycopg2-binary = {version = ">=2.9.1", optional = true, markers = "extra == \"postgres\" or extra == \"redshift\" or extra == \"postgis\""} +psycopg2cffi = {version = ">=2.9.0", optional = true, markers = "platform_python_implementation == \"PyPy\" and (extra == \"postgres\" or extra == \"redshift\" or extra == \"postgis\")"} +pyarrow = {version = ">=12.0.0,<18", optional = true, markers = "python_version >= \"3.9\" and python_version < \"3.13\" and (extra == \"bigquery\" or extra == \"parquet\" or extra == \"motherduck\" or extra == \"athena\" or extra == \"synapse\" or extra == \"clickhouse\" or extra == \"dremio\" or extra == \"lancedb\" or extra == \"deltalake\" or extra == \"pyiceberg\")"} pytz = ">=2022.6" +pywin32 = {version = ">=306", markers = "sys_platform == \"win32\""} PyYAML = ">=5.4.1" requests = ">=2.26.0" requirements-parser = ">=0.5.0" -semver = ">=2.13.0" +rich-argparse = ">=1.6.0,<2.0.0" +semver = ">=3.0.0" setuptools = ">=65.6.0" simplejson = ">=3.17.5" tenacity = ">=8.0.2" tomlkit = ">=0.11.3" -typing-extensions = ">=4.0.0" +typing-extensions = ">=4.8.0" tzdata = ">=2022.1" win-precise-time = {version = ">=1.4.2", markers = "os_name == \"nt\""} [package.extras] -athena = ["botocore (>=1.28)", "pyarrow (>=12.0.0)", "pyathena (>=2.9.6)", "s3fs (>=2022.4.0)"] -az = ["adlfs (>=2022.4.0)"] -bigquery = ["db-dtypes (>=1.2.0)", "gcsfs (>=2022.4.0)", "google-cloud-bigquery (>=2.26.0)", "grpcio (>=1.50.0)", "pyarrow (>=12.0.0)"] -cli = ["cron-descriptor (>=1.2.32)", "pipdeptree (>=2.9.0,<2.10)"] -clickhouse = ["adlfs (>=2022.4.0)", "clickhouse-connect (>=0.7.7)", "clickhouse-driver (>=0.2.7)", "gcsfs (>=2022.4.0)", "pyarrow (>=12.0.0)", "s3fs (>=2022.4.0)"] -databricks = ["databricks-sql-connector (>=2.9.3)"] -deltalake = ["deltalake (>=0.19.0)", "pyarrow (>=12.0.0)"] -dremio = ["pyarrow (>=12.0.0)"] +athena = ["botocore (>=1.28)", "pyarrow (>=12.0.0,<18) ; python_version >= \"3.9\" and python_version < \"3.13\"", "pyarrow (>=18.0.0) ; python_version >= \"3.13\"", "pyathena (>=2.9.6)", "s3fs (>=2022.4.0)"] +az = ["adlfs (>=2024.7.0)"] +bigquery = ["db-dtypes (>=1.2.0)", "gcsfs (>=2022.4.0)", "google-cloud-bigquery (>=2.26.0)", "grpcio (>=1.50.0)", "pyarrow (>=12.0.0,<18) ; python_version >= \"3.9\" and python_version < \"3.13\"", "pyarrow (>=18.0.0) ; python_version >= \"3.13\""] +cli = ["cron-descriptor (>=1.2.32)", "pip (>=23.0.0)", "pipdeptree (>=2.9.0,<2.10)"] +clickhouse = ["adlfs (>=2024.7.0)", "clickhouse-connect (>=0.7.7)", "clickhouse-driver (>=0.2.7)", "gcsfs (>=2022.4.0)", "pyarrow (>=12.0.0,<18) ; python_version >= \"3.9\" and python_version < \"3.13\"", "pyarrow (>=18.0.0) ; python_version >= \"3.13\"", "s3fs (>=2022.4.0)"] +databricks = ["databricks-sdk (>=0.38.0)", "databricks-sql-connector (>=2.9.3,<4) ; python_version <= \"3.12\"", "databricks-sql-connector (>=3.6.0) ; python_version >= \"3.13\""] +deltalake = ["deltalake (>=0.21.0)", "pyarrow (>=12.0.0,<18) ; python_version >= \"3.9\" and python_version < \"3.13\"", "pyarrow (>=18.0.0) ; python_version >= \"3.13\""] +dremio = ["pyarrow (>=12.0.0,<18) ; python_version >= \"3.9\" and python_version < \"3.13\"", "pyarrow (>=18.0.0) ; python_version >= \"3.13\""] duckdb = ["duckdb (>=0.9)"] filesystem = ["botocore (>=1.28)", "s3fs (>=2022.4.0)", "sqlglot (>=20.0.0)"] gcp = ["db-dtypes (>=1.2.0)", "gcsfs (>=2022.4.0)", "google-cloud-bigquery (>=2.26.0)", "grpcio (>=1.50.0)"] gs = ["gcsfs (>=2022.4.0)"] -lancedb = ["lancedb (>=0.8.2)", "pyarrow (>=12.0.0)", "tantivy (>=0.22.0)"] -motherduck = ["duckdb (>=0.9)", "pyarrow (>=12.0.0)"] +lancedb = ["lancedb (>=0.8.2) ; python_version < \"3.13\"", "pyarrow (>=12.0.0,<18) ; python_version >= \"3.9\" and python_version < \"3.13\"", "pyarrow (>=18.0.0) ; python_version >= \"3.13\"", "tantivy (>=0.22.0)"] +motherduck = ["duckdb (>=0.9)", "pyarrow (>=12.0.0,<18) ; python_version >= \"3.9\" and python_version < \"3.13\"", "pyarrow (>=18.0.0) ; python_version >= \"3.13\""] mssql = ["pyodbc (>=4.0.39)"] -parquet = ["pyarrow (>=12.0.0)"] -postgres = ["psycopg2-binary (>=2.9.1)", "psycopg2cffi (>=2.9.0)"] +parquet = ["pyarrow (>=12.0.0,<18) ; python_version >= \"3.9\" and python_version < \"3.13\"", "pyarrow (>=18.0.0) ; python_version >= \"3.13\""] +postgis = ["psycopg2-binary (>=2.9.1)", "psycopg2cffi (>=2.9.0) ; platform_python_implementation == \"PyPy\""] +postgres = ["psycopg2-binary (>=2.9.1)", "psycopg2cffi (>=2.9.0) ; platform_python_implementation == \"PyPy\""] +pyiceberg = ["pyarrow (>=12.0.0,<18) ; python_version >= \"3.9\" and python_version < \"3.13\"", "pyarrow (>=18.0.0) ; python_version >= \"3.13\"", "pyiceberg (>=0.8.1)", "sqlalchemy (>=1.4)"] qdrant = ["qdrant-client[fastembed] (>=1.8)"] -redshift = ["psycopg2-binary (>=2.9.1)", "psycopg2cffi (>=2.9.0)"] +redshift = ["psycopg2-binary (>=2.9.1)", "psycopg2cffi (>=2.9.0) ; platform_python_implementation == \"PyPy\""] s3 = ["botocore (>=1.28)", "s3fs (>=2022.4.0)"] sftp = ["paramiko (>=3.3.0)"] snowflake = ["snowflake-connector-python (>=3.5.0)"] sql-database = ["sqlalchemy (>=1.4)"] sqlalchemy = ["alembic (>1.10.0)", "sqlalchemy (>=1.4)"] -synapse = ["adlfs (>=2022.4.0)", "pyarrow (>=12.0.0)", "pyodbc (>=4.0.39)"] +synapse = ["adlfs (>=2024.7.0)", "pyarrow (>=12.0.0,<18) ; python_version >= \"3.9\" and python_version < \"3.13\"", "pyarrow (>=18.0.0) ; python_version >= \"3.13\"", "pyodbc (>=4.0.39)"] weaviate = ["weaviate-client (>=3.22)"] [[package]] @@ -1160,6 +1249,7 @@ version = "2.4.2" description = "DNS toolkit" optional = false python-versions = ">=3.8,<4.0" +groups = ["mongodb"] files = [ {file = "dnspython-2.4.2-py3-none-any.whl", hash = "sha256:57c6fbaaeaaf39c891292012060beb141791735dbb4004798328fc2c467402d8"}, {file = "dnspython-2.4.2.tar.gz", hash = "sha256:8dcfae8c7460a2f84b4072e26f1c9f4101ca20c071649cb7c34e8b6a93d58984"}, @@ -1179,13 +1269,13 @@ version = "3.6.1" description = "Helpful functions for Python 🐍 🛠️" optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "domdf_python_tools-3.6.1-py3-none-any.whl", hash = "sha256:e18158460850957f18e740eb94ede56f580ddb0cb162ab9d9834ed8bbb1b6431"}, {file = "domdf_python_tools-3.6.1.tar.gz", hash = "sha256:acc04563d23bce4d437dd08af6b9bea788328c412772a044d8ca428a7ad861be"}, ] [package.dependencies] -importlib-metadata = {version = ">=3.6.0", markers = "python_version < \"3.9\""} natsort = ">=7.0.1" typing-extensions = ">=3.7.4.1" @@ -1199,6 +1289,7 @@ version = "0.10.3" description = "DuckDB in-process database" optional = false python-versions = ">=3.7.0" +groups = ["main", "unstructured_data"] files = [ {file = "duckdb-0.10.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd25cc8d001c09a19340739ba59d33e12a81ab285b7a6bed37169655e1cefb31"}, {file = "duckdb-0.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f9259c637b917ca0f4c63887e8d9b35ec248f5d987c886dfc4229d66a791009"}, @@ -1255,6 +1346,7 @@ version = "1.1.0" description = "An implementation of lxml.xmlfile for the standard library" optional = false python-versions = ">=3.6" +groups = ["unstructured_data"] files = [ {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, @@ -1266,6 +1358,8 @@ version = "1.1.3" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" +groups = ["dev", "pytest", "unstructured_data"] +markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash = "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"}, {file = "exceptiongroup-1.1.3.tar.gz", hash = "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"}, @@ -1280,6 +1374,7 @@ version = "17.0.4" description = "Facebook Business SDK" optional = false python-versions = "*" +groups = ["facebook_ads"] files = [ {file = "facebook_business-17.0.4-py3-none-any.whl", hash = "sha256:c3a4afbe019c1fd2454eeeefb4e895ed3276d506115fbf9a993135f6af1c1a88"}, {file = "facebook_business-17.0.4.tar.gz", hash = "sha256:52b516a237ab4cbf083053d3cc062995ff4732fca487b46543c4eab3bdbbf188"}, @@ -1298,6 +1393,7 @@ version = "0.85.1" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "fastapi-0.85.1-py3-none-any.whl", hash = "sha256:de3166b6b1163dc22da4dc4ebdc3192fcbac7700dd1870a1afa44de636a636b5"}, {file = "fastapi-0.85.1.tar.gz", hash = "sha256:1facd097189682a4ff11cbd01334a992e51b56be663b2bd50c2c09523624f144"}, @@ -1319,6 +1415,7 @@ version = "3.12.4" description = "A platform independent file lock." optional = false python-versions = ">=3.8" +groups = ["scrapy", "unstructured_data"] files = [ {file = "filelock-3.12.4-py3-none-any.whl", hash = "sha256:08c21d87ded6e2b9da6728c3dff51baf1dcecf973b768ef35bcbc3447edb9ad4"}, {file = "filelock-3.12.4.tar.gz", hash = "sha256:2e6f249f1f3654291606e046b09f1fd5eac39b360664c27f5aad072012f8bcbd"}, @@ -1327,7 +1424,7 @@ files = [ [package.extras] docs = ["furo (>=2023.7.26)", "sphinx (>=7.1.2)", "sphinx-autodoc-typehints (>=1.24)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "pytest-timeout (>=2.1)"] -typing = ["typing-extensions (>=4.7.1)"] +typing = ["typing-extensions (>=4.7.1) ; python_version < \"3.11\""] [[package]] name = "filetype" @@ -1335,6 +1432,7 @@ version = "1.2.0" description = "Infer file type and MIME type of any file/buffer. No external dependencies." optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25"}, {file = "filetype-1.2.0.tar.gz", hash = "sha256:66b56cd6474bf41d8c54660347d37afcc3f7d1970648de365c102ef77548aadb"}, @@ -1346,6 +1444,7 @@ version = "6.1.0" description = "the modular source code checker: pep8 pyflakes and co" optional = false python-versions = ">=3.8.1" +groups = ["dev"] files = [ {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"}, {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"}, @@ -1362,6 +1461,7 @@ version = "22.12.6" description = "A plugin for flake8 finding likely bugs and design problems in your program. Contains warnings that don't belong in pyflakes and pycodestyle." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "flake8-bugbear-22.12.6.tar.gz", hash = "sha256:4cdb2c06e229971104443ae293e75e64c6107798229202fbe4f4091427a30ac0"}, {file = "flake8_bugbear-22.12.6-py3-none-any.whl", hash = "sha256:b69a510634f8a9c298dfda2b18a8036455e6b19ecac4fe582e4d7a0abfa50a30"}, @@ -1380,6 +1480,7 @@ version = "2.1.0" description = "Check for python builtins being used as variables or parameters." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "flake8-builtins-2.1.0.tar.gz", hash = "sha256:12ff1ee96dd4e1f3141141ee6c45a5c7d3b3c440d0949e9b8d345c42b39c51d4"}, {file = "flake8_builtins-2.1.0-py3-none-any.whl", hash = "sha256:469e8f03d6d0edf4b1e62b6d5a97dce4598592c8a13ec8f0952e7a185eba50a1"}, @@ -1397,6 +1498,7 @@ version = "0.5.0.post1" description = "A Flake8 plugin to identify incorrect use of encodings." optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "flake8_encodings-0.5.0.post1-py3-none-any.whl", hash = "sha256:d2fecca0e89ba09c86e5d61cf6bdb1b337f0d74746aac67bbcf0c517b4cb6cba"}, {file = "flake8_encodings-0.5.0.post1.tar.gz", hash = "sha256:082c0163325c85b438a8106e876283b5ed3cbfc53e68d89130d70be8be4c9977"}, @@ -1418,6 +1520,7 @@ version = "0.2.1" description = "A helper library for Flake8 plugins." optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "flake8_helper-0.2.1-py3-none-any.whl", hash = "sha256:9123cdf351ad32ee8a51b85036052302c478122d62fb512c0773e111b3d05241"}, {file = "flake8_helper-0.2.1.tar.gz", hash = "sha256:479f86d1c52df8e49ff876ecd3873242699f93eeece7e6675cdca9c37c9b0a16"}, @@ -1432,6 +1535,7 @@ version = "4.10.0" description = "A flake8 plugin that helps you write tidier imports." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "flake8_tidy_imports-4.10.0-py3-none-any.whl", hash = "sha256:b0387fb2ea200441bd142309e716fb7b8f4b0937bdf5f8b7c0c118a5f5e2b8ed"}, {file = "flake8_tidy_imports-4.10.0.tar.gz", hash = "sha256:bd6cf86465402d2b86903009b748d85a628e599e17b76e810c9857e3a2815173"}, @@ -1446,6 +1550,7 @@ version = "23.5.26" description = "The FlatBuffers serialization format for Python" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "flatbuffers-23.5.26-py2.py3-none-any.whl", hash = "sha256:c0ff356da363087b915fde4b8b45bdda73432fc17cddb3c8157472eab1422ad1"}, {file = "flatbuffers-23.5.26.tar.gz", hash = "sha256:9ea1144cac05ce5d86e2859f431c6cd5e66cd9c78c558317c7955fb8d4c78d89"}, @@ -1457,6 +1562,7 @@ version = "1.4.0" description = "A list-like structure which implements collections.abc.MutableSequence" optional = false python-versions = ">=3.8" +groups = ["main", "facebook_ads", "filesystem", "unstructured_data", "unstructured_data_lint"] files = [ {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"}, {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"}, @@ -1527,6 +1633,7 @@ version = "2024.3.1" description = "File-system specification" optional = false python-versions = ">=3.8" +groups = ["main", "dltpure", "filesystem", "unstructured_data"] files = [ {file = "fsspec-2024.3.1-py3-none-any.whl", hash = "sha256:918d18d41bf73f0e2b261824baeb1b124bcf771767e3a26425cd7dec3332f512"}, {file = "fsspec-2024.3.1.tar.gz", hash = "sha256:f39780e282d7d117ffb42bb96992f8a90795e4d0fb0f661a70ca39fe9c43ded9"}, @@ -1562,6 +1669,7 @@ version = "2024.3.1" description = "Convenient Filesystem interface over GCS" optional = false python-versions = ">=3.8" +groups = ["main", "filesystem"] files = [ {file = "gcsfs-2024.3.1-py2.py3-none-any.whl", hash = "sha256:57ec693a25b74637f00e7a834b4f1dcd7a7511217f7f640072d6fb51a7794bac"}, {file = "gcsfs-2024.3.1.tar.gz", hash = "sha256:d34bdb8a1a51e1b2552ae9e47d1933dec41162ba6b6cc8ea470aef693a8a6aa6"}, @@ -1586,6 +1694,7 @@ version = "4.0.10" description = "Git Object Database" optional = false python-versions = ">=3.7" +groups = ["main", "dev", "dltpure"] files = [ {file = "gitdb-4.0.10-py3-none-any.whl", hash = "sha256:c286cf298426064079ed96a9e4a9d39e7f3e9bf15ba60701e95f5492f28415c7"}, {file = "gitdb-4.0.10.tar.gz", hash = "sha256:6eb990b69df4e15bad899ea868dc46572c3f75339735663b81de79b06f17eb9a"}, @@ -1600,6 +1709,7 @@ version = "3.1.37" description = "GitPython is a Python library used to interact with Git repositories" optional = false python-versions = ">=3.7" +groups = ["main", "dev", "dltpure"] files = [ {file = "GitPython-3.1.37-py3-none-any.whl", hash = "sha256:5f4c4187de49616d710a77e98ddf17b4782060a1788df441846bddefbb89ab33"}, {file = "GitPython-3.1.37.tar.gz", hash = "sha256:f9b9ddc0761c125d5780eab2d64be4873fc6817c2899cbcb34b02344bdc7bc54"}, @@ -1617,6 +1727,7 @@ version = "0.12.0" description = "A Git URL parsing module (supports parsing and rewriting)" optional = false python-versions = ">=3.8" +groups = ["main", "dltpure"] files = [ {file = "giturlparse-0.12.0-py2.py3-none-any.whl", hash = "sha256:412b74f2855f1da2fefa89fd8dde62df48476077a72fc19b62039554d27360eb"}, {file = "giturlparse-0.12.0.tar.gz", hash = "sha256:c0fff7c21acc435491b1779566e038757a205c1ffdcb47e4f81ea52ad8c3859a"}, @@ -1628,6 +1739,7 @@ version = "21.3.0" description = "Client library for the Google Ads API" optional = false python-versions = ">=3.7" +groups = ["google_ads"] files = [ {file = "google-ads-21.3.0.tar.gz", hash = "sha256:bd4fcb6bd5e55bace413e889e82012d48578aa28f7b4d726c86e2d594c753c6c"}, {file = "google_ads-21.3.0-py3-none-any.whl", hash = "sha256:961943fc737941a38f1a826681f7974448df7c60e6c8db2ac7168b26d66738a7"}, @@ -1653,6 +1765,7 @@ version = "0.16.3" description = "Google Analytics Data API client library" optional = false python-versions = ">=3.7" +groups = ["google_analytics"] files = [ {file = "google-analytics-data-0.16.3.tar.gz", hash = "sha256:f29431ec63ab462f7a9b42227521d148c877307c629e308c284025ad834aab52"}, {file = "google_analytics_data-0.16.3-py2.py3-none-any.whl", hash = "sha256:bb73f36707a5a2966e87c9439c25cd8004d58305b0ef01c6f2f50128c08feb13"}, @@ -1661,8 +1774,8 @@ files = [ [package.dependencies] google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} proto-plus = [ - {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, {version = ">=1.22.2,<2.0.0dev", markers = "python_version >= \"3.11\""}, + {version = ">=1.22.0,<2.0.0dev"}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" @@ -1672,6 +1785,7 @@ version = "2.12.0" description = "Google API client core library" optional = false python-versions = ">=3.7" +groups = ["main", "filesystem", "google_ads", "google_analytics", "google_sheets"] files = [ {file = "google-api-core-2.12.0.tar.gz", hash = "sha256:c22e01b1e3c4dcd90998494879612c38d0a3411d1f7b679eb89e2abe3ce1f553"}, {file = "google_api_core-2.12.0-py3-none-any.whl", hash = "sha256:ec6054f7d64ad13b41e43d96f735acbd763b0f3b695dabaa2d579673f6a6e160"}, @@ -1681,18 +1795,18 @@ files = [ google-auth = ">=2.14.1,<3.0.dev0" googleapis-common-protos = ">=1.56.2,<2.0.dev0" grpcio = [ - {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, ] grpcio-status = [ - {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, + {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "extra == \"grpc\""}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" requests = ">=2.18.0,<3.0.0.dev0" [package.extras] -grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev) ; python_version >= \"3.11\"", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0) ; python_version >= \"3.11\""] grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] @@ -1702,6 +1816,7 @@ version = "2.129.0" description = "Google API Client Library for Python" optional = false python-versions = ">=3.7" +groups = ["google_ads", "google_analytics", "google_sheets"] files = [ {file = "google-api-python-client-2.129.0.tar.gz", hash = "sha256:984cc8cc8eb4923468b1926d2b8effc5b459a4dda3c845896eb87c153b28ef84"}, {file = "google_api_python_client-2.129.0-py2.py3-none-any.whl", hash = "sha256:d50f7e2dfdbb7fc2732f6a0cba1c54d7bb676390679526c6bb628c901e43ec86"}, @@ -1720,6 +1835,7 @@ version = "2.23.3" description = "Google Authentication Library" optional = false python-versions = ">=3.7" +groups = ["main", "filesystem", "google_ads", "google_analytics", "google_sheets"] files = [ {file = "google-auth-2.23.3.tar.gz", hash = "sha256:6864247895eea5d13b9c57c9e03abb49cb94ce2dc7c58e91cba3248c7477c9e3"}, {file = "google_auth-2.23.3-py2.py3-none-any.whl", hash = "sha256:a8f4608e65c244ead9e0538f181a96c6e11199ec114d41f1d7b1bffa96937bda"}, @@ -1743,6 +1859,7 @@ version = "0.2.0" description = "Google Authentication Library: httplib2 transport" optional = false python-versions = "*" +groups = ["google_ads", "google_analytics", "google_sheets"] files = [ {file = "google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05"}, {file = "google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d"}, @@ -1758,6 +1875,7 @@ version = "1.1.0" description = "Google Authentication Library" optional = false python-versions = ">=3.6" +groups = ["main", "filesystem", "google_ads", "google_analytics"] files = [ {file = "google-auth-oauthlib-1.1.0.tar.gz", hash = "sha256:83ea8c3b0881e453790baff4448e8a6112ac8778d1de9da0b68010b843937afb"}, {file = "google_auth_oauthlib-1.1.0-py2.py3-none-any.whl", hash = "sha256:089c6e587d36f4803ac7e0720c045c6a8b1fd1790088b8424975b90d0ee61c12"}, @@ -1776,6 +1894,7 @@ version = "3.25.0" description = "Google BigQuery API client library" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "google-cloud-bigquery-3.25.0.tar.gz", hash = "sha256:5b2aff3205a854481117436836ae1403f11f2594e6810a98886afd57eda28509"}, {file = "google_cloud_bigquery-3.25.0-py2.py3-none-any.whl", hash = "sha256:7f0c371bc74d2a7fb74dacbc00ac0f90c8c2bec2289b51dd6685a275873b1ce9"}, @@ -1791,14 +1910,14 @@ python-dateutil = ">=2.7.2,<3.0dev" requests = ">=2.21.0,<3.0.0dev" [package.extras] -all = ["Shapely (>=1.8.4,<3.0.0dev)", "db-dtypes (>=0.3.0,<2.0.0dev)", "geopandas (>=0.9.0,<1.0dev)", "google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "importlib-metadata (>=1.0.0)", "ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)", "ipywidgets (>=7.7.0)", "opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)", "pandas (>=1.1.0)", "proto-plus (>=1.15.0,<2.0.0dev)", "protobuf (>=3.19.5,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev)", "pyarrow (>=3.0.0)", "tqdm (>=4.7.4,<5.0.0dev)"] +all = ["Shapely (>=1.8.4,<3.0.0dev)", "db-dtypes (>=0.3.0,<2.0.0dev)", "geopandas (>=0.9.0,<1.0dev)", "google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev) ; python_version >= \"3.11\"", "importlib-metadata (>=1.0.0) ; python_version < \"3.8\"", "ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)", "ipywidgets (>=7.7.0)", "opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)", "pandas (>=1.1.0)", "proto-plus (>=1.15.0,<2.0.0dev)", "protobuf (>=3.19.5,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev)", "pyarrow (>=3.0.0)", "tqdm (>=4.7.4,<5.0.0dev)"] bigquery-v2 = ["proto-plus (>=1.15.0,<2.0.0dev)", "protobuf (>=3.19.5,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev)"] -bqstorage = ["google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "pyarrow (>=3.0.0)"] +bqstorage = ["google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev) ; python_version >= \"3.11\"", "pyarrow (>=3.0.0)"] geopandas = ["Shapely (>=1.8.4,<3.0.0dev)", "geopandas (>=0.9.0,<1.0dev)"] ipython = ["ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)"] ipywidgets = ["ipykernel (>=6.0.0)", "ipywidgets (>=7.7.0)"] opentelemetry = ["opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)"] -pandas = ["db-dtypes (>=0.3.0,<2.0.0dev)", "importlib-metadata (>=1.0.0)", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)"] +pandas = ["db-dtypes (>=0.3.0,<2.0.0dev)", "importlib-metadata (>=1.0.0) ; python_version < \"3.8\"", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)"] tqdm = ["tqdm (>=4.7.4,<5.0.0dev)"] [[package]] @@ -1807,6 +1926,7 @@ version = "2.3.3" description = "Google Cloud API client core library" optional = false python-versions = ">=3.7" +groups = ["main", "filesystem"] files = [ {file = "google-cloud-core-2.3.3.tar.gz", hash = "sha256:37b80273c8d7eee1ae816b3a20ae43585ea50506cb0e60f3cf5be5f87f1373cb"}, {file = "google_cloud_core-2.3.3-py2.py3-none-any.whl", hash = "sha256:fbd11cad3e98a7e5b0343dc07cb1039a5ffd7a5bb96e1f1e27cee4bda4a90863"}, @@ -1825,6 +1945,7 @@ version = "2.12.0" description = "Google Cloud Storage API client library" optional = false python-versions = ">=3.7" +groups = ["main", "filesystem"] files = [ {file = "google-cloud-storage-2.12.0.tar.gz", hash = "sha256:57c0bcda2f5e11f008a155d8636d8381d5abab46b58e0cae0e46dd5e595e6b46"}, {file = "google_cloud_storage-2.12.0-py2.py3-none-any.whl", hash = "sha256:bc52563439d42981b6e21b071a76da2791672776eda3ba99d13a8061ebbd6e5e"}, @@ -1847,6 +1968,7 @@ version = "1.5.0" description = "A python wrapper of the C library 'Google CRC32C'" optional = false python-versions = ">=3.7" +groups = ["main", "filesystem"] files = [ {file = "google-crc32c-1.5.0.tar.gz", hash = "sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7"}, {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13"}, @@ -1927,6 +2049,7 @@ version = "2.6.0" description = "Utilities for Google Media Downloads and Resumable Uploads" optional = false python-versions = ">= 3.7" +groups = ["main", "filesystem"] files = [ {file = "google-resumable-media-2.6.0.tar.gz", hash = "sha256:972852f6c65f933e15a4a210c2b96930763b47197cdf4aa5f5bea435efb626e7"}, {file = "google_resumable_media-2.6.0-py2.py3-none-any.whl", hash = "sha256:fc03d344381970f79eebb632a3c18bb1828593a2dc5572b5f90115ef7d11e81b"}, @@ -1945,6 +2068,7 @@ version = "1.61.0" description = "Common protobufs used in Google APIs" optional = false python-versions = ">=3.7" +groups = ["main", "filesystem", "google_ads", "google_analytics", "google_sheets"] files = [ {file = "googleapis-common-protos-1.61.0.tar.gz", hash = "sha256:8a64866a97f6304a7179873a465d6eee97b7a24ec6cfd78e0f575e96b821240b"}, {file = "googleapis_common_protos-1.61.0-py2.py3-none-any.whl", hash = "sha256:22f1915393bb3245343f6efe87f6fe868532efc12aa26b391b15132e1279f1c0"}, @@ -1956,23 +2080,13 @@ protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4 [package.extras] grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] -[[package]] -name = "graphlib-backport" -version = "1.0.3" -description = "Backport of the Python 3.9 graphlib module for Python 3.6+" -optional = false -python-versions = ">=3.6,<4.0" -files = [ - {file = "graphlib_backport-1.0.3-py3-none-any.whl", hash = "sha256:24246967b9e7e6a91550bc770e6169585d35aa32790258579a8a3899a8c18fde"}, - {file = "graphlib_backport-1.0.3.tar.gz", hash = "sha256:7bb8fc7757b8ae4e6d8000a26cd49e9232aaa9a3aa57edb478474b8424bfaae2"}, -] - [[package]] name = "greenlet" version = "2.0.2" description = "Lightweight in-process concurrent programming" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" +groups = ["dev", "pg_legacy_replication", "sql_database", "unstructured_data", "unstructured_data_lint"] files = [ {file = "greenlet-2.0.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:bdfea8c661e80d3c1c99ad7c3ff74e6e87184895bbaca6ee8cc61209f8b9b85d"}, {file = "greenlet-2.0.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:9d14b83fab60d5e8abe587d51c75b252bcc21683f24699ada8fb275d7712f5a9"}, @@ -2041,7 +2155,7 @@ files = [ ] [package.extras] -docs = ["Sphinx", "docutils (<0.18)"] +docs = ["Sphinx", "docutils (<0.18) ; python_version < \"3\""] test = ["objgraph", "psutil"] [[package]] @@ -2050,6 +2164,7 @@ version = "1.59.0" description = "HTTP/2-based RPC framework" optional = false python-versions = ">=3.7" +groups = ["main", "google_ads", "google_analytics"] files = [ {file = "grpcio-1.59.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:225e5fa61c35eeaebb4e7491cd2d768cd8eb6ed00f2664fa83a58f29418b39fd"}, {file = "grpcio-1.59.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b95ec8ecc4f703f5caaa8d96e93e40c7f589bad299a2617bdb8becbcce525539"}, @@ -2116,6 +2231,7 @@ version = "1.59.0" description = "Status proto mapping for gRPC" optional = false python-versions = ">=3.6" +groups = ["main", "google_ads", "google_analytics"] files = [ {file = "grpcio-status-1.59.0.tar.gz", hash = "sha256:f93b9c33e0a26162ef8431bfcffcc3e1fb217ccd8d7b5b3061b6e9f813e698b5"}, {file = "grpcio_status-1.59.0-py3-none-any.whl", hash = "sha256:cb5a222b14a80ee050bff9676623822e953bff0c50d2d29180de723652fdf10d"}, @@ -2132,6 +2248,7 @@ version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, @@ -2143,6 +2260,7 @@ version = "0.3.1" description = "hexbytes: Python `bytes` subclass that decodes hex, with a readable console output" optional = false python-versions = ">=3.7, <4" +groups = ["main", "dltpure"] files = [ {file = "hexbytes-0.3.1-py3-none-any.whl", hash = "sha256:383595ad75026cf00abd570f44b368c6cdac0c6becfae5c39ff88829877f8a59"}, {file = "hexbytes-0.3.1.tar.gz", hash = "sha256:a3fe35c6831ee8fafd048c4c086b986075fc14fd46258fa24ecb8d65745f9a9d"}, @@ -2160,6 +2278,7 @@ version = "0.7.0" description = "hnswlib" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "hnswlib-0.7.0.tar.gz", hash = "sha256:bc459668e7e44bb7454b256b90c98c5af750653919d9a91698dafcf416cf64c4"}, ] @@ -2173,6 +2292,7 @@ version = "0.22.0" description = "A comprehensive HTTP client library." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["google_ads", "google_analytics", "google_sheets"] files = [ {file = "httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc"}, {file = "httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81"}, @@ -2187,6 +2307,7 @@ version = "0.6.0" description = "A collection of framework independent HTTP protocol utils." optional = false python-versions = ">=3.5.0" +groups = ["unstructured_data"] files = [ {file = "httptools-0.6.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:818325afee467d483bfab1647a72054246d29f9053fd17cc4b86cda09cc60339"}, {file = "httptools-0.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72205730bf1be875003692ca54a4a7c35fac77b4746008966061d9d41a61b0f5"}, @@ -2234,6 +2355,7 @@ version = "0.17.3" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" +groups = ["unstructured_data"] files = [ {file = "huggingface_hub-0.17.3-py3-none-any.whl", hash = "sha256:545eb3665f6ac587add946e73984148f2ea5c7877eac2e845549730570c1933a"}, {file = "huggingface_hub-0.17.3.tar.gz", hash = "sha256:40439632b211311f788964602bf8b0d9d6b7a2314fba4e8d67b2ce3ecea0e3fd"}, @@ -2267,6 +2389,7 @@ version = "10.0" description = "Human friendly output for text interfaces using Python" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["unstructured_data"] files = [ {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, @@ -2281,6 +2404,7 @@ version = "4.8.0" description = "Python humanize utilities" optional = false python-versions = ">=3.8" +groups = ["main", "dltpure"] files = [ {file = "humanize-4.8.0-py3-none-any.whl", hash = "sha256:8bc9e2bb9315e61ec06bf690151ae35aeb65651ab091266941edf97c90836404"}, {file = "humanize-4.8.0.tar.gz", hash = "sha256:9783373bf1eec713a770ecaa7c2d7a7902c98398009dfa3d8a2df91eec9311e8"}, @@ -2295,6 +2419,7 @@ version = "21.0.0" description = "A featureful, immutable, and correct URL for Python." optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["dev", "scrapy"] files = [ {file = "hyperlink-21.0.0-py2.py3-none-any.whl", hash = "sha256:e6b14c37ecb73e89c77d78cdb4c2cc8f3fb59a885c5b3f819ff4ed80f25af1b4"}, {file = "hyperlink-21.0.0.tar.gz", hash = "sha256:427af957daa58bc909471c6c40f74c5450fa123dd093fc53efd2e91d2705a56b"}, @@ -2309,6 +2434,7 @@ version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.5" +groups = ["main", "airtable", "asana_dlt", "dev", "dltpure", "facebook_ads", "filesystem", "google_ads", "google_analytics", "google_sheets", "salesforce", "scrapy", "stripe_analytics", "unstructured_data", "unstructured_data_lint"] files = [ {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, @@ -2320,6 +2446,8 @@ version = "6.8.0" description = "Read metadata from Python packages" optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] +markers = "python_version == \"3.9\"" files = [ {file = "importlib_metadata-6.8.0-py3-none-any.whl", hash = "sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb"}, {file = "importlib_metadata-6.8.0.tar.gz", hash = "sha256:dbace7892d8c0c4ac1ad096662232f831d4e64f4c4545bd53016a3e9d4654743"}, @@ -2331,25 +2459,7 @@ zipp = ">=0.5" [package.extras] docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] perf = ["ipython"] -testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"] - -[[package]] -name = "importlib-resources" -version = "6.4.0" -description = "Read resources from Python packages" -optional = false -python-versions = ">=3.8" -files = [ - {file = "importlib_resources-6.4.0-py3-none-any.whl", hash = "sha256:50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c"}, - {file = "importlib_resources-6.4.0.tar.gz", hash = "sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145"}, -] - -[package.dependencies] -zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["jaraco.test (>=5.4)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"] +testing = ["flufl.flake8", "importlib-resources (>=1.3) ; python_version < \"3.9\"", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7) ; platform_python_implementation != \"PyPy\"", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1) ; platform_python_implementation != \"PyPy\"", "pytest-perf (>=0.9.2)", "pytest-ruff"] [[package]] name = "incremental" @@ -2357,6 +2467,7 @@ version = "22.10.0" description = "\"A small library that versions your Python projects.\"" optional = false python-versions = "*" +groups = ["dev", "scrapy"] files = [ {file = "incremental-22.10.0-py2.py3-none-any.whl", hash = "sha256:b864a1f30885ee72c5ac2835a761b8fe8aa9c28b9395cacf27286602688d3e51"}, {file = "incremental-22.10.0.tar.gz", hash = "sha256:912feeb5e0f7e0188e6f42241d2f450002e11bbc0937c65865045854c24c0bd0"}, @@ -2372,6 +2483,7 @@ version = "0.5.1" description = "A port of Ruby on Rails inflector to Python" optional = false python-versions = ">=3.5" +groups = ["airtable"] files = [ {file = "inflection-0.5.1-py2.py3-none-any.whl", hash = "sha256:f38b2b640938a4f35ade69ac3d053042959b62a0f1076a5bbaa1b9526605a8a2"}, {file = "inflection-0.5.1.tar.gz", hash = "sha256:1a29730d366e996aaacffb2f1f1cb9593dc38e2ddd30c91250c6dde09ea9b417"}, @@ -2383,6 +2495,7 @@ version = "2.0.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.7" +groups = ["dev", "pytest"] files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, @@ -2394,6 +2507,7 @@ version = "0.6.1" description = "An ISO 8601 date/time/duration parser and formatter" optional = false python-versions = "*" +groups = ["filesystem", "salesforce"] files = [ {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, @@ -2408,6 +2522,7 @@ version = "0.8.0" description = "Common interface for data container classes" optional = false python-versions = ">=3.7" +groups = ["scrapy"] files = [ {file = "itemadapter-0.8.0-py3-none-any.whl", hash = "sha256:2ac1fbcc363b789a18639935ca322e50a65a0a7dfdd8d973c34e2c468e6c0f94"}, {file = "itemadapter-0.8.0.tar.gz", hash = "sha256:77758485fb0ac10730d4b131363e37d65cb8db2450bfec7a57c3f3271f4a48a9"}, @@ -2419,6 +2534,7 @@ version = "1.1.0" description = "Base library for scrapy's ItemLoader" optional = false python-versions = ">=3.7" +groups = ["scrapy"] files = [ {file = "itemloaders-1.1.0-py3-none-any.whl", hash = "sha256:c8c82fe0c11fc4cdd08ec04df0b3c43f3cb7190002edb517e02d55de8efc2aeb"}, {file = "itemloaders-1.1.0.tar.gz", hash = "sha256:21d81c61da6a08b48e5996288cdf3031c0f92e5d0075920a0242527523e14a48"}, @@ -2436,6 +2552,7 @@ version = "1.0.1" description = "JSON Matching Expressions" optional = false python-versions = ">=3.7" +groups = ["filesystem", "scrapy"] files = [ {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, @@ -2447,6 +2564,7 @@ version = "1.3.2" description = "Lightweight pipelining with Python functions" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "joblib-1.3.2-py3-none-any.whl", hash = "sha256:ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9"}, {file = "joblib-1.3.2.tar.gz", hash = "sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1"}, @@ -2458,6 +2576,7 @@ version = "1.6.0" description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming." optional = false python-versions = "*" +groups = ["main", "dltpure"] files = [ {file = "jsonpath-ng-1.6.0.tar.gz", hash = "sha256:5483f8e9d74c39c9abfab554c070ae783c1c8cbadf5df60d561bc705ac68a07e"}, {file = "jsonpath_ng-1.6.0-py3-none-any.whl", hash = "sha256:6fd04833412c4b3d9299edf369542f5e67095ca84efa17cbb7f06a34958adc9f"}, @@ -2472,6 +2591,7 @@ version = "0.0.219" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "langchain-0.0.219-py3-none-any.whl", hash = "sha256:1f08a00e622f1c75087d6013f34e82be3f8dd1859266eb583a0fd7bc045090cf"}, {file = "langchain-0.0.219.tar.gz", hash = "sha256:842f8212939e5ac4005906d2215574ffb3e34d2fe28f5bc0f46eb3b28fb29c5d"}, @@ -2492,17 +2612,17 @@ SQLAlchemy = ">=1.4,<3" tenacity = ">=8.1.0,<9.0.0" [package.extras] -all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.2.6,<0.3.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.3,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (==9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=3,<4)", "deeplake (>=3.6.2,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jina (>=3.14,<4.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.1.dev3,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "octoai-sdk (>=0.1.1,<0.2.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.1.2,<2.0.0)", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "spacy (>=3,<4)", "steamship (>=2.16.9,<3.0.0)", "tensorflow-text (>=2.11.0,<3.0.0)", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0)", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] +all = ["O365 (>=2.0.26,<3.0.0)", "aleph-alpha-client (>=2.15.0,<3.0.0)", "anthropic (>=0.2.6,<0.3.0)", "arxiv (>=1.4,<2.0)", "atlassian-python-api (>=3.36.0,<4.0.0)", "awadb (>=0.3.3,<0.4.0)", "azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "beautifulsoup4 (>=4,<5)", "clarifai (==9.1.0)", "clickhouse-connect (>=0.5.14,<0.6.0)", "cohere (>=3,<4)", "deeplake (>=3.6.2,<4.0.0)", "docarray[hnswlib] (>=0.32.0,<0.33.0)", "duckduckgo-search (>=3.8.3,<4.0.0)", "elasticsearch (>=8,<9)", "esprima (>=4.0.1,<5.0.0)", "faiss-cpu (>=1,<2)", "google-api-python-client (==2.70.0)", "google-auth (>=2.18.1,<3.0.0)", "google-search-results (>=2,<3)", "gptcache (>=0.1.7)", "html2text (>=2020.1.16,<2021.0.0)", "huggingface_hub (>=0,<1)", "jina (>=3.14,<4.0)", "jinja2 (>=3,<4)", "jq (>=1.4.1,<2.0.0)", "lancedb (>=0.1,<0.2)", "langkit (>=0.0.1.dev3,<0.1.0)", "lark (>=1.1.5,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "manifest-ml (>=0.0.1,<0.0.2)", "momento (>=1.5.0,<2.0.0)", "nebula3-python (>=3.4.0,<4.0.0)", "neo4j (>=5.8.1,<6.0.0)", "networkx (>=2.6.3,<3.0.0)", "nlpcloud (>=1,<2)", "nltk (>=3,<4)", "nomic (>=1.0.43,<2.0.0)", "octoai-sdk (>=0.1.1,<0.2.0)", "openai (>=0,<1)", "openlm (>=0.0.5,<0.0.6)", "opensearch-py (>=2.0.0,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pexpect (>=4.8.0,<5.0.0)", "pgvector (>=0.1.6,<0.2.0)", "pinecone-client (>=2,<3)", "pinecone-text (>=0.4.2,<0.5.0)", "psycopg2-binary (>=2.9.5,<3.0.0)", "pymongo (>=4.3.3,<5.0.0)", "pyowm (>=3.3.0,<4.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pytesseract (>=0.3.10,<0.4.0)", "pyvespa (>=0.33.0,<0.34.0)", "qdrant-client (>=1.1.2,<2.0.0) ; python_full_version >= \"3.8.1\" and python_version < \"3.12\"", "redis (>=4,<5)", "requests-toolbelt (>=1.0.0,<2.0.0)", "sentence-transformers (>=2,<3)", "singlestoredb (>=0.7.1,<0.8.0)", "spacy (>=3,<4)", "steamship (>=2.16.9,<3.0.0)", "tensorflow-text (>=2.11.0,<3.0.0) ; python_version >= \"3.10\" and python_version < \"3.12\"", "tigrisdb (>=1.0.0b6,<2.0.0)", "tiktoken (>=0.3.2,<0.4.0) ; python_version >= \"3.9\" and python_version < \"4.0\"", "torch (>=1,<3)", "transformers (>=4,<5)", "weaviate-client (>=3,<4)", "wikipedia (>=1,<2)", "wolframalpha (==5.0.0)"] azure = ["azure-ai-formrecognizer (>=3.2.1,<4.0.0)", "azure-ai-vision (>=0.11.1b1,<0.12.0)", "azure-cognitiveservices-speech (>=1.28.0,<2.0.0)", "azure-core (>=1.26.4,<2.0.0)", "azure-cosmos (>=4.4.0b1,<5.0.0)", "azure-identity (>=1.12.0,<2.0.0)", "azure-search-documents (==11.4.0a20230509004)", "openai (>=0,<1)"] clarifai = ["clarifai (==9.1.0)"] cohere = ["cohere (>=3,<4)"] docarray = ["docarray[hnswlib] (>=0.32.0,<0.33.0)"] embeddings = ["sentence-transformers (>=2,<3)"] -extended-testing = ["atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "openai (>=0,<1)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0)", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "zep-python (>=0.31)"] +extended-testing = ["atlassian-python-api (>=3.36.0,<4.0.0)", "beautifulsoup4 (>=4,<5)", "bibtexparser (>=1.4.0,<2.0.0)", "chardet (>=5.1.0,<6.0.0)", "esprima (>=4.0.1,<5.0.0)", "gql (>=3.4.1,<4.0.0)", "html2text (>=2020.1.16,<2021.0.0)", "jq (>=1.4.1,<2.0.0)", "lxml (>=4.9.2,<5.0.0)", "openai (>=0,<1)", "pandas (>=2.0.1,<3.0.0)", "pdfminer-six (>=20221105,<20221106)", "pgvector (>=0.1.6,<0.2.0)", "psychicapi (>=0.8.0,<0.9.0)", "py-trello (>=0.19.0,<0.20.0)", "pymupdf (>=1.22.3,<2.0.0)", "pypdf (>=3.4.0,<4.0.0)", "pypdfium2 (>=4.10.0,<5.0.0)", "pyspark (>=3.4.0,<4.0.0)", "requests-toolbelt (>=1.0.0,<2.0.0)", "scikit-learn (>=1.2.2,<2.0.0)", "streamlit (>=1.18.0,<2.0.0) ; python_full_version >= \"3.8.1\" and python_full_version != \"3.9.7\" and python_version < \"4.0\"", "telethon (>=1.28.5,<2.0.0)", "tqdm (>=4.48.0)", "zep-python (>=0.31)"] javascript = ["esprima (>=4.0.1,<5.0.0)"] llms = ["anthropic (>=0.2.6,<0.3.0)", "clarifai (==9.1.0)", "cohere (>=3,<4)", "huggingface_hub (>=0,<1)", "manifest-ml (>=0.0.1,<0.0.2)", "nlpcloud (>=1,<2)", "openai (>=0,<1)", "openllm (>=0.1.6)", "openlm (>=0.0.5,<0.0.6)", "torch (>=1,<3)", "transformers (>=4,<5)"] -openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.4.0)"] -qdrant = ["qdrant-client (>=1.1.2,<2.0.0)"] +openai = ["openai (>=0,<1)", "tiktoken (>=0.3.2,<0.4.0) ; python_version >= \"3.9\" and python_version < \"4.0\""] +qdrant = ["qdrant-client (>=1.1.2,<2.0.0) ; python_full_version >= \"3.8.1\" and python_version < \"3.12\""] text-helpers = ["chardet (>=5.1.0,<6.0.0)"] [[package]] @@ -2511,6 +2631,7 @@ version = "0.0.20" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = ">=3.8.1,<4.0" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "langchainplus_sdk-0.0.20-py3-none-any.whl", hash = "sha256:07a869d476755803aa04c4986ce78d00c2fe4ff584c0eaa57d7570c9664188db"}, {file = "langchainplus_sdk-0.0.20.tar.gz", hash = "sha256:3d300e2e3290f68cc9d842c059f9458deba60e776c9e790309688cad1bfbb219"}, @@ -2527,6 +2648,7 @@ version = "4.9.3" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" +groups = ["salesforce", "scrapy", "unstructured_data"] files = [ {file = "lxml-4.9.3-cp27-cp27m-macosx_11_0_x86_64.whl", hash = "sha256:b0a545b46b526d418eb91754565ba5b63b1c0b12f9bd2f808c852d9b4b2f9b5c"}, {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:075b731ddd9e7f68ad24c635374211376aa05a281673ede86cbe1d1b3455279d"}, @@ -2634,6 +2756,7 @@ version = "4.3.2" description = "LZ4 Bindings for Python" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "lz4-4.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1c4c100d99eed7c08d4e8852dd11e7d1ec47a3340f49e3a96f8dfbba17ffb300"}, {file = "lz4-4.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:edd8987d8415b5dad25e797043936d91535017237f72fa456601be1479386c92"}, @@ -2683,6 +2806,7 @@ version = "1.15.1" description = "Small library to dynamically create python functions." optional = false python-versions = "*" +groups = ["main", "dltpure"] files = [ {file = "makefun-1.15.1-py2.py3-none-any.whl", hash = "sha256:a63cfc7b47a539c76d97bd4fdb833c7d0461e759fd1225f580cb4be6200294d4"}, {file = "makefun-1.15.1.tar.gz", hash = "sha256:40b0f118b6ded0d8d78c78f1eb679b8b6b2462e3c1b3e05fb1b2da8cd46b48a5"}, @@ -2694,6 +2818,7 @@ version = "3.5" description = "Python implementation of John Gruber's Markdown." optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] files = [ {file = "Markdown-3.5-py3-none-any.whl", hash = "sha256:4afb124395ce5fc34e6d9886dab977fd9ae987fc6e85689f08278cf0c69d4bf3"}, {file = "Markdown-3.5.tar.gz", hash = "sha256:a807eb2e4778d9156c8f07876c6e4d50b5494c5665c4834f67b06459dfd877b3"}, @@ -2712,6 +2837,7 @@ version = "3.0.0" description = "Python port of markdown-it. Markdown parsing, done right!" optional = false python-versions = ">=3.8" +groups = ["main", "dev", "dltpure"] files = [ {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, @@ -2736,6 +2862,7 @@ version = "3.20.1" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." optional = false python-versions = ">=3.8" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "marshmallow-3.20.1-py3-none-any.whl", hash = "sha256:684939db93e80ad3561392f47be0230743131560a41c5110684c16e21ade0a5c"}, {file = "marshmallow-3.20.1.tar.gz", hash = "sha256:5d2371bbe42000f2b3fb5eaa065224df7d8f8597bc19a1bbfa5bfe7fba8da889"}, @@ -2756,6 +2883,7 @@ version = "1.5.1" description = "Enum field for Marshmallow" optional = false python-versions = "*" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "marshmallow-enum-1.5.1.tar.gz", hash = "sha256:38e697e11f45a8e64b4a1e664000897c659b60aa57bfa18d44e226a9920b6e58"}, {file = "marshmallow_enum-1.5.1-py2.py3-none-any.whl", hash = "sha256:57161ab3dbfde4f57adeb12090f39592e992b9c86d206d02f6bd03ebec60f072"}, @@ -2770,6 +2898,7 @@ version = "0.7.0" description = "McCabe checker, plugin for flake8" optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, @@ -2781,6 +2910,7 @@ version = "0.1.2" description = "Markdown URL utilities" optional = false python-versions = ">=3.7" +groups = ["main", "dev", "dltpure"] files = [ {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, @@ -2792,6 +2922,7 @@ version = "7.1.0" description = "Mimesis: Fake Data Generator." optional = false python-versions = ">=3.8,<4.0" +groups = ["dev"] files = [ {file = "mimesis-7.1.0-py3-none-any.whl", hash = "sha256:da65bea6d6d5d5d87d5c008e6b23ef5f96a49cce436d9f8708dabb5152da0290"}, {file = "mimesis-7.1.0.tar.gz", hash = "sha256:c83b55d35536d7e9b9700a596b7ccfb639a740e3e1fb5e08062e8ab2a67dcb37"}, @@ -2803,6 +2934,7 @@ version = "1.6" description = "An implementation of time.monotonic() for Python 2 & < 3.3" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c"}, {file = "monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7"}, @@ -2814,6 +2946,7 @@ version = "10.1.0" description = "More routines for operating on iterables, beyond itertools" optional = false python-versions = ">=3.8" +groups = ["salesforce"] files = [ {file = "more-itertools-10.1.0.tar.gz", hash = "sha256:626c369fa0eb37bac0291bce8259b332fd59ac792fa5497b59837309cd5b114a"}, {file = "more_itertools-10.1.0-py3-none-any.whl", hash = "sha256:64e0735fcfdc6f3464ea133afe8ea4483b1c5fe3a3d69852e6503b43a0b222e6"}, @@ -2825,6 +2958,7 @@ version = "1.3.0" description = "Python library for arbitrary-precision floating-point arithmetic" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, @@ -2833,7 +2967,7 @@ files = [ [package.extras] develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] docs = ["sphinx"] -gmpy = ["gmpy2 (>=2.1.0a4)"] +gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""] tests = ["pytest (>=4.6)"] [[package]] @@ -2842,6 +2976,7 @@ version = "1.24.1" description = "The Microsoft Authentication Library (MSAL) for Python library" optional = false python-versions = ">=2.7" +groups = ["filesystem"] files = [ {file = "msal-1.24.1-py2.py3-none-any.whl", hash = "sha256:ce4320688f95c301ee74a4d0e9dbcfe029a63663a8cc61756f40d0d0d36574ad"}, {file = "msal-1.24.1.tar.gz", hash = "sha256:aa0972884b3c6fdec53d9a0bd15c12e5bd7b71ac1b66d746f54d128709f3f8f8"}, @@ -2853,7 +2988,7 @@ PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]} requests = ">=2.0.0,<3" [package.extras] -broker = ["pymsalruntime (>=0.13.2,<0.14)"] +broker = ["pymsalruntime (>=0.13.2,<0.14) ; python_version >= \"3.6\" and platform_system == \"Windows\""] [[package]] name = "msal-extensions" @@ -2861,6 +2996,7 @@ version = "1.0.0" description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." optional = false python-versions = "*" +groups = ["filesystem"] files = [ {file = "msal-extensions-1.0.0.tar.gz", hash = "sha256:c676aba56b0cce3783de1b5c5ecfe828db998167875126ca4b47dc6436451354"}, {file = "msal_extensions-1.0.0-py2.py3-none-any.whl", hash = "sha256:91e3db9620b822d0ed2b4d1850056a0f133cba04455e62f11612e40f5502f2ee"}, @@ -2879,6 +3015,7 @@ version = "1.2.0" description = "This module enables reading, parsing and converting Microsoft Outlook MSG E-Mail files." optional = false python-versions = ">=3.4" +groups = ["unstructured_data"] files = [ {file = "msg_parser-1.2.0-py2.py3-none-any.whl", hash = "sha256:d47a2f0b2a359cb189fad83cc991b63ea781ecc70d91410324273fbf93e95375"}, {file = "msg_parser-1.2.0.tar.gz", hash = "sha256:0de858d4fcebb6c8f6f028da83a17a20fe01cdce67c490779cf43b3b0162aa66"}, @@ -2896,6 +3033,7 @@ version = "6.0.4" description = "multidict implementation" optional = false python-versions = ">=3.7" +groups = ["main", "facebook_ads", "filesystem", "unstructured_data", "unstructured_data_lint"] files = [ {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, @@ -2979,6 +3117,7 @@ version = "1.10.0" description = "Optional static typing for Python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "mypy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2"}, {file = "mypy-1.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99"}, @@ -3026,17 +3165,35 @@ version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." optional = false python-versions = ">=3.5" +groups = ["dev", "unstructured_data", "unstructured_data_lint"] files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "mypy-protobuf" +version = "3.6.0" +description = "Generate mypy stub files from protobuf specs" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "mypy-protobuf-3.6.0.tar.gz", hash = "sha256:02f242eb3409f66889f2b1a3aa58356ec4d909cdd0f93115622e9e70366eca3c"}, + {file = "mypy_protobuf-3.6.0-py3-none-any.whl", hash = "sha256:56176e4d569070e7350ea620262478b49b7efceba4103d468448f1d21492fd6c"}, +] + +[package.dependencies] +protobuf = ">=4.25.3" +types-protobuf = ">=4.24" + [[package]] name = "natsort" version = "8.4.0" description = "Simple yet flexible natural sorting in Python." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c"}, {file = "natsort-8.4.0.tar.gz", hash = "sha256:45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581"}, @@ -3052,6 +3209,7 @@ version = "3.8.1" description = "Natural Language Toolkit" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"}, {file = "nltk-3.8.1.zip", hash = "sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3"}, @@ -3077,6 +3235,7 @@ version = "2.8.6" description = "Fast numerical expression evaluator for NumPy" optional = false python-versions = ">=3.7" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "numexpr-2.8.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:80acbfefb68bd92e708e09f0a02b29e04d388b9ae72f9fcd57988aca172a7833"}, {file = "numexpr-2.8.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6e884687da8af5955dc9beb6a12d469675c90b8fb38b6c93668c989cfc2cd982"}, @@ -3119,6 +3278,7 @@ version = "1.24.4" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.8" +groups = ["main", "dev", "mongodb", "stripe_analytics", "unstructured_data", "unstructured_data_lint"] files = [ {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, @@ -3156,6 +3316,7 @@ version = "3.2.2" description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" optional = false python-versions = ">=3.6" +groups = ["main", "asana_dlt", "filesystem", "google_ads", "google_analytics"] files = [ {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, @@ -3172,6 +3333,7 @@ version = "0.46" description = "Python package to parse, read and write Microsoft OLE2 files (Structured Storage or Compound Document, Microsoft Office)" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["unstructured_data"] files = [ {file = "olefile-0.46.zip", hash = "sha256:133b031eaf8fd2c9399b78b8bc5b8fcbe4c31e85295749bb17a87cba8f3c3964"}, ] @@ -3182,6 +3344,7 @@ version = "1.16.1" description = "ONNX Runtime is a runtime accelerator for Machine Learning models" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "onnxruntime-1.16.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:28b2c7f444b4119950b69370801cd66067f403d19cbaf2a444735d7c269cce4a"}, {file = "onnxruntime-1.16.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c24e04f33e7899f6aebb03ed51e51d346c1f906b05c5569d58ac9a12d38a2f58"}, @@ -3223,6 +3386,7 @@ version = "0.27.10" description = "Python client library for the OpenAI API" optional = false python-versions = ">=3.7.1" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "openai-0.27.10-py3-none-any.whl", hash = "sha256:beabd1757e3286fa166dde3b70ebb5ad8081af046876b47c14c41e203ed22a14"}, {file = "openai-0.27.10.tar.gz", hash = "sha256:60e09edf7100080283688748c6803b7b3b52d5a55d21890f3815292a0552d83b"}, @@ -3245,6 +3409,7 @@ version = "1.2.4" description = "OpenAPI (v3) specification schema as pydantic class" optional = false python-versions = ">=3.6.1" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "openapi-schema-pydantic-1.2.4.tar.gz", hash = "sha256:3e22cf58b74a69f752cc7e5f1537f6e44164282db2700cbbcd3bb99ddd065196"}, {file = "openapi_schema_pydantic-1.2.4-py3-none-any.whl", hash = "sha256:a932ecc5dcbb308950282088956e94dea069c9823c84e507d64f6b622222098c"}, @@ -3259,6 +3424,7 @@ version = "3.1.2" description = "A Python library to read/write Excel 2010 xlsx/xlsm files" optional = false python-versions = ">=3.6" +groups = ["unstructured_data"] files = [ {file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"}, {file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"}, @@ -3273,6 +3439,8 @@ version = "3.9.9" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" optional = false python-versions = ">=3.8" +groups = ["main", "dltpure"] +markers = "platform_python_implementation != \"PyPy\"" files = [ {file = "orjson-3.9.9-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:f28090060a31f4d11221f9ba48b2273b0d04b702f4dcaa197c38c64ce639cc51"}, {file = "orjson-3.9.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8038ba245d0c0a6337cfb6747ea0c51fe18b0cf1a4bc943d530fd66799fae33d"}, @@ -3332,6 +3500,7 @@ version = "7.4.0" description = "A decorator to automatically detect mismatch when overriding a method." optional = false python-versions = ">=3.6" +groups = ["unstructured_data"] files = [ {file = "overrides-7.4.0-py3-none-any.whl", hash = "sha256:3ad24583f86d6d7a49049695efe9933e67ba62f0c7625d53c59fa832ce4b8b7d"}, {file = "overrides-7.4.0.tar.gz", hash = "sha256:9502a3cca51f4fac40b5feca985b6703a5c1f6ad815588a7ca9e285b9dca6757"}, @@ -3343,6 +3512,7 @@ version = "23.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.7" +groups = ["main", "dev", "dltpure", "mongodb", "pytest", "scrapy", "unstructured_data", "unstructured_data_lint"] files = [ {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, @@ -3354,6 +3524,7 @@ version = "2.0.3" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.8" +groups = ["main", "mongodb", "stripe_analytics", "unstructured_data"] files = [ {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"}, {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"}, @@ -3384,9 +3555,9 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.20.3", markers = "python_version < \"3.10\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.20.3", markers = "python_version < \"3.10\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -3421,6 +3592,7 @@ version = "2.0.2.230605" description = "Type annotations for pandas" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pandas_stubs-2.0.2.230605-py3-none-any.whl", hash = "sha256:39106b602f3cb6dc5f728b84e1b32bde6ecf41ee34ee714c66228009609fbada"}, {file = "pandas_stubs-2.0.2.230605.tar.gz", hash = "sha256:624c7bb06d38145a44b61be459ccd19b038e0bf20364a025ecaab78fea65e858"}, @@ -3436,6 +3608,7 @@ version = "1.8.1" description = "Parsel is a library to extract data from HTML and XML using XPath and CSS selectors" optional = false python-versions = ">=3.7" +groups = ["scrapy"] files = [ {file = "parsel-1.8.1-py2.py3-none-any.whl", hash = "sha256:2708fc74daeeb4ce471e2c2e9089b650ec940c7a218053e57421e69b5b00f82c"}, {file = "parsel-1.8.1.tar.gz", hash = "sha256:aff28e68c9b3f1a901db2a4e3f158d8480a38724d7328ee751c1a4e1c1801e39"}, @@ -3454,6 +3627,7 @@ version = "0.11.2" description = "Utility library for gitignore style pattern matching of file paths." optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"}, {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"}, @@ -3465,6 +3639,7 @@ version = "3.2.0" description = "pathvalidate is a Python library to sanitize/validate a string such as filenames/file-paths/etc." optional = false python-versions = ">=3.7" +groups = ["main", "dltpure"] files = [ {file = "pathvalidate-3.2.0-py3-none-any.whl", hash = "sha256:cc593caa6299b22b37f228148257997e2fa850eea2daf7e4cc9205cef6908dee"}, {file = "pathvalidate-3.2.0.tar.gz", hash = "sha256:5e8378cf6712bff67fbe7a8307d99fa8c1a0cb28aa477056f8fc374f0dff24ad"}, @@ -3472,7 +3647,7 @@ files = [ [package.extras] docs = ["Sphinx (>=2.4)", "sphinx-rtd-theme (>=1.2.2)", "urllib3 (<2)"] -test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1)", "pytest-discord (>=0.1.4)", "pytest-md-report (>=0.4.1)"] +test = ["Faker (>=1.0.8)", "allpairspy (>=2)", "click (>=6.2)", "pytest (>=6.0.1)", "pytest-discord (>=0.1.4) ; python_version >= \"3.7\"", "pytest-md-report (>=0.4.1)"] [[package]] name = "pbr" @@ -3480,6 +3655,7 @@ version = "5.11.1" description = "Python Build Reasonableness" optional = false python-versions = ">=2.6" +groups = ["dev"] files = [ {file = "pbr-5.11.1-py2.py3-none-any.whl", hash = "sha256:567f09558bae2b3ab53cb3c1e2e33e726ff3338e7bae3db5dc954b3a44eef12b"}, {file = "pbr-5.11.1.tar.gz", hash = "sha256:aefc51675b0b533d56bb5fd1c8c6c0522fe31896679882e1c4c63d5e4a0fccb3"}, @@ -3491,6 +3667,7 @@ version = "1.16.3" description = "A wrapper around the pdftoppm and pdftocairo command line tools to convert PDF to a PIL Image list." optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "pdf2image-1.16.3-py3-none-any.whl", hash = "sha256:b6154164af3677211c22cbb38b2bd778b43aca02758e962fe1e231f6d3b0e380"}, {file = "pdf2image-1.16.3.tar.gz", hash = "sha256:74208810c2cef4d9e347769b8e62a52303982ddb4f2dfd744c7ab4b940ae287e"}, @@ -3505,6 +3682,7 @@ version = "20221105" description = "PDF parser and analyzer" optional = false python-versions = ">=3.6" +groups = ["unstructured_data"] files = [ {file = "pdfminer.six-20221105-py3-none-any.whl", hash = "sha256:1eaddd712d5b2732f8ac8486824533514f8ba12a0787b3d5fe1e686cd826532d"}, {file = "pdfminer.six-20221105.tar.gz", hash = "sha256:8448ab7b939d18b64820478ecac5394f482d7a79f5f7eaa7703c6c959c175e1d"}, @@ -3525,6 +3703,7 @@ version = "3.0.0" description = "Python datetimes made easy" optional = false python-versions = ">=3.8" +groups = ["main", "dev", "dltpure", "salesforce"] files = [ {file = "pendulum-3.0.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2cf9e53ef11668e07f73190c805dbdf07a1939c3298b78d5a9203a86775d1bfd"}, {file = "pendulum-3.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fb551b9b5e6059377889d2d878d940fd0bbb80ae4810543db18e6f77b02c5ef6"}, @@ -3612,13 +3791,11 @@ files = [ ] [package.dependencies] -"backports.zoneinfo" = {version = ">=0.2.1", markers = "python_version < \"3.9\""} -importlib-resources = {version = ">=5.9.0", markers = "python_version < \"3.9\""} python-dateutil = ">=2.6" tzdata = ">=2020.1" [package.extras] -test = ["time-machine (>=2.6.0)"] +test = ["time-machine (>=2.6.0) ; implementation_name != \"pypy\""] [[package]] name = "pillow" @@ -3626,6 +3803,7 @@ version = "9.5.0" description = "Python Imaging Library (Fork)" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "Pillow-9.5.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:ace6ca218308447b9077c14ea4ef381ba0b67ee78d64046b3f19cf4e1139ad16"}, {file = "Pillow-9.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d3d403753c9d5adc04d4694d35cf0391f0f3d57c8e0030aac09d7678fa8030aa"}, @@ -3705,6 +3883,7 @@ version = "3.11.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." optional = false python-versions = ">=3.7" +groups = ["dev", "salesforce"] files = [ {file = "platformdirs-3.11.0-py3-none-any.whl", hash = "sha256:e9d171d00af68be50e9202731309c4e658fd8bc76f55c11c7dd760d023bda68e"}, {file = "platformdirs-3.11.0.tar.gz", hash = "sha256:cf8ee52a3afdb965072dcc652433e0c7e3e40cf5ea1477cd4b3b1d2eb75495b3"}, @@ -3720,6 +3899,7 @@ version = "1.3.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" +groups = ["main", "dev", "dltpure", "pytest"] files = [ {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, @@ -3735,6 +3915,7 @@ version = "3.11" description = "Python Lex & Yacc" optional = false python-versions = "*" +groups = ["main", "dltpure"] files = [ {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, @@ -3746,6 +3927,7 @@ version = "2.8.2" description = "Wraps the portalocker recipe for easy usage" optional = false python-versions = ">=3.8" +groups = ["filesystem"] files = [ {file = "portalocker-2.8.2-py3-none-any.whl", hash = "sha256:cfb86acc09b9aa7c3b43594e19be1345b9d16af3feb08bf92f23d4dce513a28e"}, {file = "portalocker-2.8.2.tar.gz", hash = "sha256:2b035aa7828e46c58e9b31390ee1f169b98e1066ab10b9a6a861fe7e25ee4f33"}, @@ -3765,6 +3947,7 @@ version = "3.0.2" description = "Integrate PostHog into any python application." optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "posthog-3.0.2-py2.py3-none-any.whl", hash = "sha256:a8c0af6f2401fbe50f90e68c4143d0824b54e872de036b1c2f23b5abb39d88ce"}, {file = "posthog-3.0.2.tar.gz", hash = "sha256:701fba6e446a4de687c6e861b587e7b7741955ad624bf34fe013c06a0fec6fb3"}, @@ -3788,6 +3971,7 @@ version = "0.3.0" description = "Pure-Python robots.txt parser with support for modern conventions" optional = false python-versions = ">=3.7" +groups = ["scrapy"] files = [ {file = "Protego-0.3.0-py2.py3-none-any.whl", hash = "sha256:db38f6a945839d8162a4034031a21490469566a2726afb51d668497c457fb0aa"}, {file = "Protego-0.3.0.tar.gz", hash = "sha256:04228bffde4c6bcba31cf6529ba2cfd6e1b70808fdc1d2cb4301be6b28d6c568"}, @@ -3799,6 +3983,7 @@ version = "1.22.3" description = "Beautiful, Pythonic protocol buffers." optional = false python-versions = ">=3.6" +groups = ["google_ads", "google_analytics"] files = [ {file = "proto-plus-1.22.3.tar.gz", hash = "sha256:fdcd09713cbd42480740d2fe29c990f7fbd885a67efc328aa8be6ee3e9f76a6b"}, {file = "proto_plus-1.22.3-py3-none-any.whl", hash = "sha256:a49cd903bc0b6ab41f76bf65510439d56ca76f868adf0274e738bfdd096894df"}, @@ -3812,24 +3997,23 @@ testing = ["google-api-core[grpc] (>=1.31.5)"] [[package]] name = "protobuf" -version = "4.24.4" +version = "4.25.5" description = "" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" +groups = ["main", "dev", "filesystem", "google_ads", "google_analytics", "google_sheets", "pg_legacy_replication", "unstructured_data"] files = [ - {file = "protobuf-4.24.4-cp310-abi3-win32.whl", hash = "sha256:ec9912d5cb6714a5710e28e592ee1093d68c5ebfeda61983b3f40331da0b1ebb"}, - {file = "protobuf-4.24.4-cp310-abi3-win_amd64.whl", hash = "sha256:1badab72aa8a3a2b812eacfede5020472e16c6b2212d737cefd685884c191085"}, - {file = "protobuf-4.24.4-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:8e61a27f362369c2f33248a0ff6896c20dcd47b5d48239cb9720134bef6082e4"}, - {file = "protobuf-4.24.4-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:bffa46ad9612e6779d0e51ae586fde768339b791a50610d85eb162daeb23661e"}, - {file = "protobuf-4.24.4-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:b493cb590960ff863743b9ff1452c413c2ee12b782f48beca77c8da3e2ffe9d9"}, - {file = "protobuf-4.24.4-cp37-cp37m-win32.whl", hash = "sha256:dbbed8a56e56cee8d9d522ce844a1379a72a70f453bde6243e3c86c30c2a3d46"}, - {file = "protobuf-4.24.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6b7d2e1c753715dcfe9d284a25a52d67818dd43c4932574307daf836f0071e37"}, - {file = "protobuf-4.24.4-cp38-cp38-win32.whl", hash = "sha256:02212557a76cd99574775a81fefeba8738d0f668d6abd0c6b1d3adcc75503dbe"}, - {file = "protobuf-4.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:2fa3886dfaae6b4c5ed2730d3bf47c7a38a72b3a1f0acb4d4caf68e6874b947b"}, - {file = "protobuf-4.24.4-cp39-cp39-win32.whl", hash = "sha256:b77272f3e28bb416e2071186cb39efd4abbf696d682cbb5dc731308ad37fa6dd"}, - {file = "protobuf-4.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:9fee5e8aa20ef1b84123bb9232b3f4a5114d9897ed89b4b8142d81924e05d79b"}, - {file = "protobuf-4.24.4-py3-none-any.whl", hash = "sha256:80797ce7424f8c8d2f2547e2d42bfbb6c08230ce5832d6c099a37335c9c90a92"}, - {file = "protobuf-4.24.4.tar.gz", hash = "sha256:5a70731910cd9104762161719c3d883c960151eea077134458503723b60e3667"}, + {file = "protobuf-4.25.5-cp310-abi3-win32.whl", hash = "sha256:5e61fd921603f58d2f5acb2806a929b4675f8874ff5f330b7d6f7e2e784bbcd8"}, + {file = "protobuf-4.25.5-cp310-abi3-win_amd64.whl", hash = "sha256:4be0571adcbe712b282a330c6e89eae24281344429ae95c6d85e79e84780f5ea"}, + {file = "protobuf-4.25.5-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:b2fde3d805354df675ea4c7c6338c1aecd254dfc9925e88c6d31a2bcb97eb173"}, + {file = "protobuf-4.25.5-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:919ad92d9b0310070f8356c24b855c98df2b8bd207ebc1c0c6fcc9ab1e007f3d"}, + {file = "protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:fe14e16c22be926d3abfcb500e60cab068baf10b542b8c858fa27e098123e331"}, + {file = "protobuf-4.25.5-cp38-cp38-win32.whl", hash = "sha256:98d8d8aa50de6a2747efd9cceba361c9034050ecce3e09136f90de37ddba66e1"}, + {file = "protobuf-4.25.5-cp38-cp38-win_amd64.whl", hash = "sha256:b0234dd5a03049e4ddd94b93400b67803c823cfc405689688f59b34e0742381a"}, + {file = "protobuf-4.25.5-cp39-cp39-win32.whl", hash = "sha256:abe32aad8561aa7cc94fc7ba4fdef646e576983edb94a73381b03c53728a626f"}, + {file = "protobuf-4.25.5-cp39-cp39-win_amd64.whl", hash = "sha256:7a183f592dc80aa7c8da7ad9e55091c4ffc9497b3054452d629bb85fa27c2a45"}, + {file = "protobuf-4.25.5-py3-none-any.whl", hash = "sha256:0aebecb809cae990f8129ada5ca273d9d670b76d9bfc9b1809f0a9c02b7dbf41"}, + {file = "protobuf-4.25.5.tar.gz", hash = "sha256:7f8249476b4a9473645db7f8ab42b02fe1488cbe5fb72fddd445e0665afd8584"}, ] [[package]] @@ -3838,6 +4022,7 @@ version = "2.9.9" description = "psycopg2 - Python-PostgreSQL Database Adapter" optional = false python-versions = ">=3.7" +groups = ["main", "pg_legacy_replication", "pg_replication"] files = [ {file = "psycopg2-binary-2.9.9.tar.gz", hash = "sha256:7f01846810177d829c7692f1f5ada8096762d9172af1b1a28d4ab5b77c923c1c"}, {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c2470da5418b76232f02a2fcd2229537bb2d5a7096674ce61859c3229f2eb202"}, @@ -3919,6 +4104,8 @@ version = "2.9.0" description = ".. image:: https://travis-ci.org/chtd/psycopg2cffi.svg?branch=master" optional = false python-versions = "*" +groups = ["main"] +markers = "platform_python_implementation == \"PyPy\"" files = [ {file = "psycopg2cffi-2.9.0.tar.gz", hash = "sha256:7e272edcd837de3a1d12b62185eb85c45a19feda9e62fa1b120c54f9e8d35c52"}, ] @@ -3933,6 +4120,7 @@ version = "3.3.0" description = "Apache Pulsar Python client library" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "pulsar_client-3.3.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:c31afd3e67a044ff93177df89e08febf214cc965e95ede097d9fe8755af00e01"}, {file = "pulsar_client-3.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f66982284571674b215324cc26b5c2f7c56c7043113c47a7084cb70d67a8afb"}, @@ -3980,6 +4168,7 @@ version = "1.11.0" description = "library with cross-python path, ini-parsing, io, code, log facilities" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["dev", "pytest"] files = [ {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, @@ -3991,6 +4180,7 @@ version = "2.1.0.post1" description = "Python Client for the Airtable API" optional = false python-versions = "*" +groups = ["airtable"] files = [ {file = "pyairtable-2.1.0.post1-py2.py3-none-any.whl", hash = "sha256:a80eb85f7c020bf41679bb00ca57da11aeaa43769afbc73619276798a2ca182e"}, {file = "pyairtable-2.1.0.post1.tar.gz", hash = "sha256:e588249e68cf338dcdca9908537ed16d5a22ae72345ec930022b230ba96e5f84"}, @@ -4009,6 +4199,7 @@ version = "16.0.0" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.8" +groups = ["main", "mongodb"] files = [ {file = "pyarrow-16.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:22a1fdb1254e5095d629e29cd1ea98ed04b4bbfd8e42cc670a6b639ccc208b60"}, {file = "pyarrow-16.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:574a00260a4ed9d118a14770edbd440b848fcae5a3024128be9d0274dbcaf858"}, @@ -4057,6 +4248,7 @@ version = "0.5.0" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +groups = ["main", "filesystem", "google_ads", "google_analytics", "google_sheets", "scrapy"] files = [ {file = "pyasn1-0.5.0-py2.py3-none-any.whl", hash = "sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57"}, {file = "pyasn1-0.5.0.tar.gz", hash = "sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde"}, @@ -4068,6 +4260,7 @@ version = "0.3.0" description = "A collection of ASN.1-based protocols modules" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +groups = ["main", "filesystem", "google_ads", "google_analytics", "google_sheets", "scrapy"] files = [ {file = "pyasn1_modules-0.3.0-py2.py3-none-any.whl", hash = "sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d"}, {file = "pyasn1_modules-0.3.0.tar.gz", hash = "sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c"}, @@ -4082,6 +4275,7 @@ version = "2.11.1" description = "Python style guide checker" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, @@ -4093,6 +4287,7 @@ version = "22.3.5" description = "ISO country, subdivision, language, currency and script definitions and their translations" optional = false python-versions = ">=3.6, <4" +groups = ["facebook_ads"] files = [ {file = "pycountry-22.3.5.tar.gz", hash = "sha256:b2163a246c585894d808f18783e19137cb70a0c18fb36748dc01fc6f109c1646"}, ] @@ -4106,10 +4301,12 @@ version = "2.21" description = "C parser in Python" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["main", "filesystem", "salesforce", "scrapy", "unstructured_data"] files = [ {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, ] +markers = {main = "platform_python_implementation == \"PyPy\""} [[package]] name = "pydantic" @@ -4117,6 +4314,7 @@ version = "1.10.13" description = "Data validation and settings management using python type hints" optional = false python-versions = ">=3.7" +groups = ["airtable", "unstructured_data", "unstructured_data_lint"] files = [ {file = "pydantic-1.10.13-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:efff03cc7a4f29d9009d1c96ceb1e7a70a65cfe86e89d34e4a5f2ab1e5693737"}, {file = "pydantic-1.10.13-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3ecea2b9d80e5333303eeb77e180b90e95eea8f765d08c3d278cd56b00345d01"}, @@ -4169,6 +4367,8 @@ version = "2.0.7" description = "Multi-producer multi-consumer in-memory signal dispatch system" optional = false python-versions = "*" +groups = ["scrapy"] +markers = "platform_python_implementation == \"CPython\"" files = [ {file = "PyDispatcher-2.0.7-py3-none-any.whl", hash = "sha256:96543bea04115ffde08f851e1d45cacbfd1ee866ac42127d9b476dc5aefa7de0"}, {file = "PyDispatcher-2.0.7.tar.gz", hash = "sha256:b777c6ad080dc1bad74a4c29d6a46914fa6701ac70f94b0d66fbcfde62f5be31"}, @@ -4183,6 +4383,7 @@ version = "3.1.0" description = "passive checker of Python programs" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"}, {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, @@ -4194,13 +4395,14 @@ version = "2.16.1" description = "Pygments is a syntax highlighting package written in Python." optional = false python-versions = ">=3.7" +groups = ["main", "dev", "dltpure"] files = [ {file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"}, {file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"}, ] [package.extras] -plugins = ["importlib-metadata"] +plugins = ["importlib-metadata ; python_version < \"3.8\""] [[package]] name = "pyjwt" @@ -4208,6 +4410,7 @@ version = "2.8.0" description = "JSON Web Token implementation in Python" optional = false python-versions = ">=3.7" +groups = ["filesystem", "salesforce"] files = [ {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, @@ -4228,6 +4431,7 @@ version = "4.5.0" description = "Python driver for MongoDB " optional = false python-versions = ">=3.7" +groups = ["mongodb"] files = [ {file = "pymongo-4.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2d4fa1b01fa7e5b7bb8d312e3542e211b320eb7a4e3d8dc884327039d93cb9e0"}, {file = "pymongo-4.5.0-cp310-cp310-manylinux1_i686.whl", hash = "sha256:dfcd2b9f510411de615ccedd47462dae80e82fdc09fe9ab0f0f32f11cf57eeb5"}, @@ -4318,9 +4522,9 @@ dnspython = ">=1.16.0,<3.0.0" [package.extras] aws = ["pymongo-auth-aws (<2.0.0)"] -encryption = ["certifi", "pymongo[aws]", "pymongocrypt (>=1.6.0,<2.0.0)"] -gssapi = ["pykerberos", "winkerberos (>=0.5.0)"] -ocsp = ["certifi", "cryptography (>=2.5)", "pyopenssl (>=17.2.0)", "requests (<3.0.0)", "service-identity (>=18.1.0)"] +encryption = ["certifi ; os_name == \"nt\" or sys_platform == \"darwin\"", "pymongo[aws]", "pymongocrypt (>=1.6.0,<2.0.0)"] +gssapi = ["pykerberos ; os_name != \"nt\"", "winkerberos (>=0.5.0) ; os_name == \"nt\""] +ocsp = ["certifi ; os_name == \"nt\" or sys_platform == \"darwin\"", "cryptography (>=2.5)", "pyopenssl (>=17.2.0)", "requests (<3.0.0)", "service-identity (>=18.1.0)"] snappy = ["python-snappy"] zstd = ["zstandard"] @@ -4330,6 +4534,7 @@ version = "1.4.0" description = "\"Tools for using NumPy, Pandas, Polars, and PyArrow with MongoDB\"" optional = false python-versions = ">=3.8" +groups = ["mongodb"] files = [ {file = "pymongoarrow-1.4.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:57a438dad3808c10931ffadd6028c8107133d254229996f8260e7c61417d98fe"}, {file = "pymongoarrow-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:de545ecfc591288c75b602b4baeb6cd9f7db9ff6890c69d46dcb63eebd5e379d"}, @@ -4373,6 +4578,7 @@ version = "1.1.0" description = "Pure Python MySQL Driver" optional = false python-versions = ">=3.7" +groups = ["sql_database"] files = [ {file = "PyMySQL-1.1.0-py3-none-any.whl", hash = "sha256:8969ec6d763c856f7073c4c64662882675702efcb114b4bcbb955aea3a069fa7"}, {file = "PyMySQL-1.1.0.tar.gz", hash = "sha256:4f13a7df8bf36a51e81dd9f3605fede45a4878fe02f9236349fd82a3f0612f96"}, @@ -4388,6 +4594,7 @@ version = "23.2.0" description = "Python wrapper module around the OpenSSL library" optional = false python-versions = ">=3.6" +groups = ["scrapy"] files = [ {file = "pyOpenSSL-23.2.0-py3-none-any.whl", hash = "sha256:24f0dc5227396b3e831f4c7f602b950a5e9833d292c8e4a2e06b709292806ae2"}, {file = "pyOpenSSL-23.2.0.tar.gz", hash = "sha256:276f931f55a452e7dea69c7173e984eb2a4407ce413c918aa34b55f82f9b8bac"}, @@ -4406,6 +4613,7 @@ version = "1.11" description = "Thin wrapper for pandoc." optional = false python-versions = ">=3.6" +groups = ["unstructured_data"] files = [ {file = "pypandoc-1.11-py3-none-any.whl", hash = "sha256:b260596934e9cfc6513056110a7c8600171d414f90558bf4407e68b209be8007"}, {file = "pypandoc-1.11.tar.gz", hash = "sha256:7f6d68db0e57e0f6961bec2190897118c4d305fc2d31c22cd16037f22ee084a5"}, @@ -4417,6 +4625,7 @@ version = "3.1.1" description = "pyparsing module - Classes and methods to define and execute parsing grammars" optional = false python-versions = ">=3.6.8" +groups = ["google_ads", "google_analytics", "google_sheets"] files = [ {file = "pyparsing-3.1.1-py3-none-any.whl", hash = "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb"}, {file = "pyparsing-3.1.1.tar.gz", hash = "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db"}, @@ -4431,6 +4640,7 @@ version = "3.0.1" description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" optional = false python-versions = ">=3.6" +groups = ["dev"] files = [ {file = "PyPDF2-3.0.1.tar.gz", hash = "sha256:a74408f69ba6271f71b9352ef4ed03dc53a31aa404d29b5d31f53bfecfee1440"}, {file = "pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928"}, @@ -4452,6 +4662,8 @@ version = "2.1.2" description = "Multi-producer-multi-consumer signal dispatching mechanism" optional = false python-versions = "*" +groups = ["scrapy"] +markers = "platform_python_implementation == \"PyPy\"" files = [ {file = "PyPyDispatcher-2.1.2.tar.gz", hash = "sha256:b6bec5dfcff9d2535bca2b23c80eae367b1ac250a645106948d315fcfa9130f2"}, ] @@ -4462,6 +4674,8 @@ version = "3.4.1" description = "A python implementation of GNU readline." optional = false python-versions = "*" +groups = ["unstructured_data"] +markers = "sys_platform == \"win32\"" files = [ {file = "pyreadline3-3.4.1-py3-none-any.whl", hash = "sha256:b0efb6516fd4fb07b45949053826a62fa4cb353db5be2bbb4a7aa1fdd1e345fb"}, {file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"}, @@ -4473,6 +4687,7 @@ version = "7.4.2" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" +groups = ["dev", "pytest"] files = [ {file = "pytest-7.4.2-py3-none-any.whl", hash = "sha256:1d881c6124e08ff0a1bb75ba3ec0bfd8b5354a01c194ddd5a0a870a48d99b002"}, {file = "pytest-7.4.2.tar.gz", hash = "sha256:a766259cfab564a2ad52cb1aae1b881a75c3eb7e34ca3779697c23ed47c47069"}, @@ -4489,12 +4704,32 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "pytest-cov" +version = "5.0.0" +description = "Pytest plugin for measuring coverage." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pytest-cov-5.0.0.tar.gz", hash = "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857"}, + {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"}, +] + +[package.dependencies] +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] + [[package]] name = "pytest-forked" version = "1.6.0" description = "run tests in isolated forked subprocesses" optional = false python-versions = ">=3.7" +groups = ["dev", "pytest"] files = [ {file = "pytest-forked-1.6.0.tar.gz", hash = "sha256:4dafd46a9a600f65d822b8f605133ecf5b3e1941ebb3588e943b4e3eb71a5a3f"}, {file = "pytest_forked-1.6.0-py3-none-any.whl", hash = "sha256:810958f66a91afb1a1e2ae83089d8dc1cd2437ac96b12963042fbb9fb4d16af0"}, @@ -4510,6 +4745,7 @@ version = "3.12.0" description = "Thin-wrapper around the mock package for easier use with pytest" optional = false python-versions = ">=3.8" +groups = ["dev", "pytest"] files = [ {file = "pytest-mock-3.12.0.tar.gz", hash = "sha256:31a40f038c22cad32287bb43932054451ff5583ff094bca6f675df2f8bc1a6e9"}, {file = "pytest_mock-3.12.0-py3-none-any.whl", hash = "sha256:0972719a7263072da3a21c7f4773069bcc7486027d7e8e1f81d98a47e701bc4f"}, @@ -4527,6 +4763,7 @@ version = "2.8.2" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main", "dev", "dltpure", "filesystem", "mongodb", "salesforce", "stripe_analytics", "unstructured_data"] files = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, @@ -4541,6 +4778,7 @@ version = "1.0.1" description = "Create, read, and update Microsoft Word .docx files." optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "python-docx-1.0.1.tar.gz", hash = "sha256:255148e15a4414244ec75f50e92d19864e52a7416768c65491707a7414659524"}, {file = "python_docx-1.0.1-py3-none-any.whl", hash = "sha256:851340c49b36f917a1838a44c602a5a0702c0c3507b9890969545732dc10d2d1"}, @@ -4556,6 +4794,7 @@ version = "1.0.0" description = "Read key-value pairs from a .env file and set them as environment variables" optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] files = [ {file = "python-dotenv-1.0.0.tar.gz", hash = "sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba"}, {file = "python_dotenv-1.0.0-py3-none-any.whl", hash = "sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a"}, @@ -4570,6 +4809,7 @@ version = "0.4.27" description = "File type identification using libmagic" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["unstructured_data"] files = [ {file = "python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b"}, {file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"}, @@ -4581,6 +4821,7 @@ version = "0.6.22" description = "Generate and manipulate Open XML PowerPoint (.pptx) files" optional = false python-versions = "*" +groups = ["unstructured_data"] files = [ {file = "python-pptx-0.6.22.tar.gz", hash = "sha256:38f8ee92dde31d24b4562560e61b0357e5d97ecf75c4352ae6616d5a32978654"}, {file = "python_pptx-0.6.22-py3-none-any.whl", hash = "sha256:3d097c29e08de2da1fc3c6752169087065efa4153216e77fc1b27dff1bcdcb46"}, @@ -4597,6 +4838,7 @@ version = "2023.3.post1" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" +groups = ["main", "dltpure", "mongodb", "salesforce", "stripe_analytics", "unstructured_data"] files = [ {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, @@ -4608,6 +4850,7 @@ version = "306" description = "Python for Window Extensions" optional = false python-versions = "*" +groups = ["main", "dltpure", "filesystem"] files = [ {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, @@ -4624,6 +4867,7 @@ files = [ {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, ] +markers = {main = "sys_platform == \"win32\"", dltpure = "sys_platform == \"win32\"", filesystem = "platform_system == \"Windows\""} [[package]] name = "pyyaml" @@ -4631,6 +4875,7 @@ version = "6.0.1" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.6" +groups = ["main", "dev", "dltpure", "google_ads", "unstructured_data", "unstructured_data_lint"] files = [ {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, @@ -4691,6 +4936,7 @@ version = "1.6.2" description = "Collection of persistent (disk-based) and non-persistent (memory-based) queues" optional = false python-versions = ">=3.5" +groups = ["scrapy"] files = [ {file = "queuelib-1.6.2-py2.py3-none-any.whl", hash = "sha256:4b96d48f650a814c6fb2fd11b968f9c46178b683aad96d68f930fe13a8574d19"}, {file = "queuelib-1.6.2.tar.gz", hash = "sha256:4b207267f2642a8699a1f806045c56eb7ad1a85a10c0e249884580d139c2fcd2"}, @@ -4702,6 +4948,7 @@ version = "2023.10.3" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "regex-2023.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4c34d4f73ea738223a094d8e0ffd6d2c1a1b4c175da34d6b0de3d8d69bee6bcc"}, {file = "regex-2023.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a8f4e49fc3ce020f65411432183e6775f24e02dff617281094ba6ab079ef0915"}, @@ -4799,6 +5046,7 @@ version = "2.31.0" description = "Python HTTP for Humans." optional = false python-versions = ">=3.7" +groups = ["main", "airtable", "asana_dlt", "dev", "dltpure", "facebook_ads", "filesystem", "google_ads", "google_analytics", "google_sheets", "salesforce", "scrapy", "stripe_analytics", "unstructured_data", "unstructured_data_lint"] files = [ {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, @@ -4820,6 +5068,7 @@ version = "1.5.1" description = "File transport adapter for Requests" optional = false python-versions = "*" +groups = ["salesforce", "scrapy"] files = [ {file = "requests-file-1.5.1.tar.gz", hash = "sha256:07d74208d3389d01c38ab89ef403af0cfec63957d53a0081d8eca738d0247d8e"}, {file = "requests_file-1.5.1-py2.py3-none-any.whl", hash = "sha256:dfe5dae75c12481f68ba353183c53a65e6044c923e64c24b2209f6c7570ca953"}, @@ -4835,6 +5084,7 @@ version = "1.11.0" description = "Mock out responses from the requests package" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "requests-mock-1.11.0.tar.gz", hash = "sha256:ef10b572b489a5f28e09b708697208c4a3b2b89ef80a9f01584340ea357ec3c4"}, {file = "requests_mock-1.11.0-py2.py3-none-any.whl", hash = "sha256:f7fae383f228633f6bececebdab236c478ace2284d6292c6e7e2867b9ab74d15"}, @@ -4846,7 +5096,7 @@ six = "*" [package.extras] fixture = ["fixtures"] -test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "testtools"] +test = ["fixtures", "mock ; python_version < \"3.3\"", "purl", "pytest", "requests-futures", "sphinx", "testtools"] [[package]] name = "requests-oauthlib" @@ -4854,6 +5104,7 @@ version = "1.3.1" description = "OAuthlib authentication support for Requests." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["main", "asana_dlt", "filesystem", "google_ads", "google_analytics"] files = [ {file = "requests-oauthlib-1.3.1.tar.gz", hash = "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"}, {file = "requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5"}, @@ -4872,6 +5123,7 @@ version = "1.0.0" description = "A utility belt for advanced users of python-requests" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["salesforce"] files = [ {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, @@ -4886,6 +5138,7 @@ version = "0.5.0" description = "This is a small Python module for parsing Pip requirement files." optional = false python-versions = ">=3.6,<4.0" +groups = ["main", "dltpure"] files = [ {file = "requirements-parser-0.5.0.tar.gz", hash = "sha256:3336f3a3ae23e06d3f0f88595e4052396e3adf91688787f637e5d2ca1a904069"}, {file = "requirements_parser-0.5.0-py3-none-any.whl", hash = "sha256:e7fcdcd04f2049e73a9fb150d8a0f9d51ce4108f5f7cbeac74c484e17b12bcd9"}, @@ -4900,6 +5153,7 @@ version = "13.6.0" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" optional = false python-versions = ">=3.7.0" +groups = ["main", "dev", "dltpure"] files = [ {file = "rich-13.6.0-py3-none-any.whl", hash = "sha256:2b38e2fe9ca72c9a00170a1a2d20c63c790d0e10ef1fe35eba76e1e7b1d7d245"}, {file = "rich-13.6.0.tar.gz", hash = "sha256:5c14d22737e6d5084ef4771b62d5d4363165b403455a30a1c8ca39dc7b644bef"}, @@ -4908,17 +5162,32 @@ files = [ [package.dependencies] markdown-it-py = ">=2.2.0" pygments = ">=2.13.0,<3.0.0" -typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""} [package.extras] jupyter = ["ipywidgets (>=7.5.1,<9)"] +[[package]] +name = "rich-argparse" +version = "1.7.1" +description = "Rich help formatters for argparse and optparse" +optional = false +python-versions = ">=3.8" +groups = ["main", "dltpure"] +files = [ + {file = "rich_argparse-1.7.1-py3-none-any.whl", hash = "sha256:a8650b42e4a4ff72127837632fba6b7da40784842f08d7395eb67a9cbd7b4bf9"}, + {file = "rich_argparse-1.7.1.tar.gz", hash = "sha256:d7a493cde94043e41ea68fb43a74405fa178de981bf7b800f7a3bd02ac5c27be"}, +] + +[package.dependencies] +rich = ">=11.0.0" + [[package]] name = "rsa" version = "4.9" description = "Pure-Python RSA implementation" optional = false python-versions = ">=3.6,<4" +groups = ["main", "filesystem", "google_ads", "google_analytics", "google_sheets"] files = [ {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, @@ -4933,6 +5202,7 @@ version = "2024.3.1" description = "Convenient Filesystem interface over S3" optional = false python-versions = ">= 3.8" +groups = ["filesystem"] files = [ {file = "s3fs-2024.3.1-py3-none-any.whl", hash = "sha256:f4566a5446c473740d272ec08e0b4aae8db1aa05f662c42ff0aa2c89bb5060ea"}, {file = "s3fs-2024.3.1.tar.gz", hash = "sha256:1b8bc8dbd65e7b60f5487378f6eeffe1de59aa72caa9efca6dad6ab877405487"}, @@ -4953,6 +5223,7 @@ version = "2.11.1" description = "A high-level Web Crawling and Web Scraping framework" optional = false python-versions = ">=3.8" +groups = ["scrapy"] files = [ {file = "Scrapy-2.11.1-py2.py3-none-any.whl", hash = "sha256:f1edee0cd214512054c01a8d031a8d213dddb53492b02c9e66256e3efe90d175"}, {file = "Scrapy-2.11.1.tar.gz", hash = "sha256:733a039c7423e52b69bf2810b5332093d4e42a848460359c07b02ecff8f73ebe"}, @@ -4984,6 +5255,7 @@ version = "3.0.2" description = "Python helper for Semantic Versioning (https://semver.org)" optional = false python-versions = ">=3.7" +groups = ["main", "dltpure"] files = [ {file = "semver-3.0.2-py3-none-any.whl", hash = "sha256:b1ea4686fe70b981f85359eda33199d60c53964284e0cfb4977d243e37cf4bf4"}, {file = "semver-3.0.2.tar.gz", hash = "sha256:6253adb39c70f6e51afed2fa7152bcd414c411286088fb4b9effb133885ab4cc"}, @@ -4995,6 +5267,7 @@ version = "24.1.0" description = "Service identity verification for pyOpenSSL & cryptography." optional = false python-versions = ">=3.8" +groups = ["scrapy"] files = [ {file = "service_identity-24.1.0-py3-none-any.whl", hash = "sha256:a28caf8130c8a5c1c7a6f5293faaf239bbfb7751e4862436920ee6f2616f568a"}, {file = "service_identity-24.1.0.tar.gz", hash = "sha256:6829c9d62fb832c2e1c435629b0a8c476e1929881f28bee4d20bc24161009221"}, @@ -5019,6 +5292,7 @@ version = "68.2.2" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" +groups = ["main", "dev", "dltpure", "facebook_ads", "google_ads", "scrapy"] files = [ {file = "setuptools-68.2.2-py3-none-any.whl", hash = "sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a"}, {file = "setuptools-68.2.2.tar.gz", hash = "sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87"}, @@ -5026,7 +5300,7 @@ files = [ [package.extras] docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7) ; platform_python_implementation != \"PyPy\"", "pytest-checkdocs (>=2.4)", "pytest-cov ; platform_python_implementation != \"PyPy\"", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1) ; platform_python_implementation != \"PyPy\"", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-ruff ; sys_platform != \"cygwin\"", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] [[package]] @@ -5035,6 +5309,7 @@ version = "1.12.5" description = "A basic Salesforce.com REST API client." optional = false python-versions = "*" +groups = ["salesforce"] files = [ {file = "simple-salesforce-1.12.5.tar.gz", hash = "sha256:ef65f72438e3b215619f6835d3d4356e147adf3a7ece6896d239127dd6aefcd1"}, {file = "simple_salesforce-1.12.5-py2.py3-none-any.whl", hash = "sha256:07029575385d04132babfd6e19c1c8068c859d616a45dab07bbf9875bdc5ab93"}, @@ -5054,6 +5329,7 @@ version = "3.19.2" description = "Simple, fast, extensible JSON encoder/decoder for Python" optional = false python-versions = ">=2.5, !=3.0.*, !=3.1.*, !=3.2.*" +groups = ["main", "dltpure"] files = [ {file = "simplejson-3.19.2-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:3471e95110dcaf901db16063b2e40fb394f8a9e99b3fe9ee3acc6f6ef72183a2"}, {file = "simplejson-3.19.2-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:3194cd0d2c959062b94094c0a9f8780ffd38417a5322450a0db0ca1a23e7fbd2"}, @@ -5161,6 +5437,7 @@ version = "1.16.0" description = "Python 2 and 3 compatibility utilities" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +groups = ["main", "dev", "dltpure", "facebook_ads", "filesystem", "mongodb", "salesforce", "scrapy", "stripe_analytics", "unstructured_data"] files = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, @@ -5172,6 +5449,7 @@ version = "5.0.1" description = "A pure Python implementation of a sliding window memory map manager" optional = false python-versions = ">=3.7" +groups = ["main", "dev", "dltpure"] files = [ {file = "smmap-5.0.1-py3-none-any.whl", hash = "sha256:e6d8668fa5f93e706934a62d7b4db19c8d9eb8cf2adbb75ef1b675aa332b69da"}, {file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"}, @@ -5183,6 +5461,7 @@ version = "1.3.0" description = "Sniff out which async library your code is running under" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, @@ -5194,6 +5473,7 @@ version = "2.0.22" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" +groups = ["pg_legacy_replication", "sql_database", "unstructured_data", "unstructured_data_lint"] files = [ {file = "SQLAlchemy-2.0.22-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f146c61ae128ab43ea3a0955de1af7e1633942c2b2b4985ac51cc292daf33222"}, {file = "SQLAlchemy-2.0.22-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:875de9414393e778b655a3d97d60465eb3fae7c919e88b70cc10b40b9f56042d"}, @@ -5280,6 +5560,7 @@ version = "0.20.4" description = "The little ASGI library that shines." optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "starlette-0.20.4-py3-none-any.whl", hash = "sha256:c0414d5a56297d37f3db96a84034d61ce29889b9eaccf65eb98a0b39441fcaa3"}, {file = "starlette-0.20.4.tar.gz", hash = "sha256:42fcf3122f998fefce3e2c5ad7e5edbf0f02cf685d646a83a08d404726af5084"}, @@ -5298,6 +5579,7 @@ version = "5.1.0" description = "Manage dynamic plugins for Python applications" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "stevedore-5.1.0-py3-none-any.whl", hash = "sha256:8cc040628f3cea5d7128f2e76cf486b2251a4e543c7b938f58d9a377f6694a2d"}, {file = "stevedore-5.1.0.tar.gz", hash = "sha256:a54534acf9b89bc7ed264807013b505bf07f74dbe4bcfa37d32bd063870b087c"}, @@ -5312,6 +5594,7 @@ version = "5.5.0" description = "Python bindings for the Stripe API" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["stripe_analytics"] files = [ {file = "stripe-5.5.0-py2.py3-none-any.whl", hash = "sha256:b4947da66dbb3de8969004ba6398f9a019c6b1b3ffe6aa88d5b07ac560a52b28"}, {file = "stripe-5.5.0.tar.gz", hash = "sha256:04a9732b37a46228ecf0e496163a3edd93596b0e6200029fbc48911638627e19"}, @@ -5326,6 +5609,7 @@ version = "1.12" description = "Computer algebra system (CAS) in Python" optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] files = [ {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"}, @@ -5340,6 +5624,7 @@ version = "0.9.0" description = "Pretty-print tabular data" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, @@ -5354,6 +5639,7 @@ version = "8.2.3" description = "Retry code until it succeeds" optional = false python-versions = ">=3.7" +groups = ["main", "dltpure", "unstructured_data", "unstructured_data_lint"] files = [ {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, @@ -5368,6 +5654,7 @@ version = "0.4.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] files = [ {file = "tiktoken-0.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:176cad7f053d2cc82ce7e2a7c883ccc6971840a4b5276740d0b732a2b2011f8a"}, {file = "tiktoken-0.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:450d504892b3ac80207700266ee87c932df8efea54e05cefe8613edc963c1285"}, @@ -5413,6 +5700,7 @@ version = "5.1.1" description = "Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well." optional = false python-versions = ">=3.8" +groups = ["scrapy"] files = [ {file = "tldextract-5.1.1-py3-none-any.whl", hash = "sha256:b9c4510a8766d377033b6bace7e9f1f17a891383ced3c5d50c150f181e9e1cc2"}, {file = "tldextract-5.1.1.tar.gz", hash = "sha256:9b6dbf803cb5636397f0203d48541c0da8ba53babaf0e8a6feda2d88746813d4"}, @@ -5433,6 +5721,7 @@ version = "0.14.1" description = "" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "tokenizers-0.14.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:04ec1134a18ede355a05641cdc7700f17280e01f69f2f315769f02f7e295cf1e"}, {file = "tokenizers-0.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:638abedb39375f0ddce2de536fc9c976639b2d1b7202d715c2e7a25f0ebfd091"}, @@ -5548,6 +5837,8 @@ version = "2.0.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.7" +groups = ["dev", "pytest"] +markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, @@ -5559,6 +5850,7 @@ version = "0.12.1" description = "Style preserving TOML library" optional = false python-versions = ">=3.7" +groups = ["main", "dltpure"] files = [ {file = "tomlkit-0.12.1-py3-none-any.whl", hash = "sha256:712cbd236609acc6a3e2e97253dfc52d4c2082982a88f61b640ecf0817eab899"}, {file = "tomlkit-0.12.1.tar.gz", hash = "sha256:38e1ff8edb991273ec9f6181244a6a391ac30e9f5098e7535640ea6be97a7c86"}, @@ -5570,6 +5862,7 @@ version = "4.66.1" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, @@ -5590,6 +5883,7 @@ version = "22.10.0" description = "An asynchronous networking framework written in Python" optional = false python-versions = ">=3.7.1" +groups = ["dev", "scrapy"] files = [ {file = "Twisted-22.10.0-py3-none-any.whl", hash = "sha256:86c55f712cc5ab6f6d64e02503352464f0400f66d4f079096d744080afcccbd0"}, {file = "Twisted-22.10.0.tar.gz", hash = "sha256:32acbd40a94f5f46e7b42c109bfae2b302250945561783a8b7a059048f2d4d31"}, @@ -5606,21 +5900,21 @@ typing-extensions = ">=3.6.5" "zope.interface" = ">=4.4.2" [package.extras] -all-non-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3)", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "service-identity (>=18.1.0)"] +all-non-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3) ; python_version < \"3.7\"", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pywin32 (!=226) ; platform_system == \"Windows\"", "service-identity (>=18.1.0)"] conch = ["appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "cryptography (>=2.6)", "pyasn1"] conch-nacl = ["PyNaCl", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "cryptography (>=2.6)", "pyasn1"] -contextvars = ["contextvars (>=2.4,<3)"] -dev = ["coverage (>=6b1,<7)", "pydoctor (>=22.9.0,<22.10.0)", "pyflakes (>=2.2,<3.0)", "python-subunit (>=1.4,<2.0)", "readthedocs-sphinx-ext (>=2.1,<3.0)", "sphinx (>=5.0,<6)", "sphinx-rtd-theme (>=1.0,<2.0)", "towncrier (>=22.8,<23.0)", "twistedchecker (>=0.7,<1.0)"] +contextvars = ["contextvars (>=2.4,<3) ; python_version < \"3.7\""] +dev = ["coverage (>=6b1,<7)", "pydoctor (>=22.9.0,<22.10.0)", "pyflakes (>=2.2,<3.0)", "python-subunit (>=1.4,<2.0) ; python_version < \"3.10\"", "readthedocs-sphinx-ext (>=2.1,<3.0)", "sphinx (>=5.0,<6)", "sphinx-rtd-theme (>=1.0,<2.0)", "towncrier (>=22.8,<23.0)", "twistedchecker (>=0.7,<1.0)"] dev-release = ["pydoctor (>=22.9.0,<22.10.0)", "readthedocs-sphinx-ext (>=2.1,<3.0)", "sphinx (>=5.0,<6)", "sphinx-rtd-theme (>=1.0,<2.0)", "towncrier (>=22.8,<23.0)"] -gtk-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3)", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pygobject", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "service-identity (>=18.1.0)"] +gtk-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3) ; python_version < \"3.7\"", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pygobject", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pywin32 (!=226) ; platform_system == \"Windows\"", "service-identity (>=18.1.0)"] http2 = ["h2 (>=3.0,<5.0)", "priority (>=1.1.0,<2.0)"] -macos-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3)", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pyobjc-core", "pyobjc-framework-CFNetwork", "pyobjc-framework-Cocoa", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "service-identity (>=18.1.0)"] -mypy = ["PyHamcrest (>=1.9.0)", "PyNaCl", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3)", "coverage (>=6b1,<7)", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "mypy (==0.930)", "mypy-zope (==0.3.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pydoctor (>=22.9.0,<22.10.0)", "pyflakes (>=2.2,<3.0)", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "python-subunit (>=1.4,<2.0)", "pywin32 (!=226)", "readthedocs-sphinx-ext (>=2.1,<3.0)", "service-identity (>=18.1.0)", "sphinx (>=5.0,<6)", "sphinx-rtd-theme (>=1.0,<2.0)", "towncrier (>=22.8,<23.0)", "twistedchecker (>=0.7,<1.0)", "types-pyOpenSSL", "types-setuptools"] -osx-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3)", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pyobjc-core", "pyobjc-framework-CFNetwork", "pyobjc-framework-Cocoa", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "service-identity (>=18.1.0)"] -serial = ["pyserial (>=3.0)", "pywin32 (!=226)"] +macos-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3) ; python_version < \"3.7\"", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pyobjc-core", "pyobjc-framework-CFNetwork", "pyobjc-framework-Cocoa", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pywin32 (!=226) ; platform_system == \"Windows\"", "service-identity (>=18.1.0)"] +mypy = ["PyHamcrest (>=1.9.0)", "PyNaCl", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3) ; python_version < \"3.7\"", "coverage (>=6b1,<7)", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "mypy (==0.930)", "mypy-zope (==0.3.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pydoctor (>=22.9.0,<22.10.0)", "pyflakes (>=2.2,<3.0)", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "python-subunit (>=1.4,<2.0) ; python_version < \"3.10\"", "pywin32 (!=226) ; platform_system == \"Windows\"", "readthedocs-sphinx-ext (>=2.1,<3.0)", "service-identity (>=18.1.0)", "sphinx (>=5.0,<6)", "sphinx-rtd-theme (>=1.0,<2.0)", "towncrier (>=22.8,<23.0)", "twistedchecker (>=0.7,<1.0)", "types-pyOpenSSL", "types-setuptools"] +osx-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3) ; python_version < \"3.7\"", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pyobjc-core", "pyobjc-framework-CFNetwork", "pyobjc-framework-Cocoa", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pywin32 (!=226) ; platform_system == \"Windows\"", "service-identity (>=18.1.0)"] +serial = ["pyserial (>=3.0)", "pywin32 (!=226) ; platform_system == \"Windows\""] test = ["PyHamcrest (>=1.9.0)", "cython-test-exception-raiser (>=1.0.2,<2)", "hypothesis (>=6.0,<7.0)"] tls = ["idna (>=2.4)", "pyopenssl (>=21.0.0)", "service-identity (>=18.1.0)"] -windows-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3)", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "pywin32 (!=226)", "service-identity (>=18.1.0)"] +windows-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0.0)", "contextvars (>=2.4,<3) ; python_version < \"3.7\"", "cryptography (>=2.6)", "cython-test-exception-raiser (>=1.0.2,<2)", "h2 (>=3.0,<5.0)", "hypothesis (>=6.0,<7.0)", "idna (>=2.4)", "priority (>=1.1.0,<2.0)", "pyasn1", "pyopenssl (>=21.0.0)", "pyserial (>=3.0)", "pywin32 (!=226)", "pywin32 (!=226) ; platform_system == \"Windows\"", "service-identity (>=18.1.0)"] [[package]] name = "twisted-iocpsupport" @@ -5628,6 +5922,8 @@ version = "1.0.4" description = "An extension for use in the twisted I/O Completion Ports reactor." optional = false python-versions = "*" +groups = ["dev", "scrapy"] +markers = "platform_system == \"Windows\"" files = [ {file = "twisted-iocpsupport-1.0.4.tar.gz", hash = "sha256:858096c0d15e33f15ac157f455d8f86f2f2cdd223963e58c0f682a3af8362d89"}, {file = "twisted_iocpsupport-1.0.4-cp310-cp310-win32.whl", hash = "sha256:afa2b630797f9ed2f27f3d9f55e3f72b4244911e45a8c82756f44babbf0b243e"}, @@ -5650,12 +5946,25 @@ files = [ {file = "twisted_iocpsupport-1.0.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:300437af17396a945a58dcfffd77863303a8b6d9e65c6e81f1d2eed55b50d444"}, ] +[[package]] +name = "types-protobuf" +version = "5.29.1.20241207" +description = "Typing stubs for protobuf" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "types_protobuf-5.29.1.20241207-py3-none-any.whl", hash = "sha256:92893c42083e9b718c678badc0af7a9a1307b92afe1599e5cba5f3d35b668b2f"}, + {file = "types_protobuf-5.29.1.20241207.tar.gz", hash = "sha256:2ebcadb8ab3ef2e3e2f067e0882906d64ba0dc65fc5b0fd7a8b692315b4a0be9"}, +] + [[package]] name = "types-psycopg2" version = "2.9.21.20240218" description = "Typing stubs for psycopg2" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "types-psycopg2-2.9.21.20240218.tar.gz", hash = "sha256:3084cd807038a62c80fb5be78b41d855b48a060316101ea59fd85c302efb57d4"}, {file = "types_psycopg2-2.9.21.20240218-py3-none-any.whl", hash = "sha256:cac96264e063cbce28dee337a973d39e6df4ca671252343cb4f8e5ef6db5e67d"}, @@ -5667,6 +5976,7 @@ version = "2023.3.1.1" description = "Typing stubs for pytz" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "types-pytz-2023.3.1.1.tar.gz", hash = "sha256:cc23d0192cd49c8f6bba44ee0c81e4586a8f30204970fc0894d209a6b08dab9a"}, {file = "types_pytz-2023.3.1.1-py3-none-any.whl", hash = "sha256:1999a123a3dc0e39a2ef6d19f3f8584211de9e6a77fe7a0259f04a524e90a5cf"}, @@ -5678,6 +5988,7 @@ version = "2.31.0.6" description = "Typing stubs for requests" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "types-requests-2.31.0.6.tar.gz", hash = "sha256:cd74ce3b53c461f1228a9b783929ac73a666658f223e28ed29753771477b3bd0"}, {file = "types_requests-2.31.0.6-py3-none-any.whl", hash = "sha256:a2db9cb228a81da8348b49ad6db3f5519452dd20a9c1e1a868c83c5fe88fd1a9"}, @@ -5692,6 +6003,7 @@ version = "68.2.0.0" description = "Typing stubs for setuptools" optional = false python-versions = "*" +groups = ["main", "dltpure"] files = [ {file = "types-setuptools-68.2.0.0.tar.gz", hash = "sha256:a4216f1e2ef29d089877b3af3ab2acf489eb869ccaf905125c69d2dc3932fd85"}, {file = "types_setuptools-68.2.0.0-py3-none-any.whl", hash = "sha256:77edcc843e53f8fc83bb1a840684841f3dc804ec94562623bfa2ea70d5a2ba1b"}, @@ -5703,6 +6015,7 @@ version = "3.5.2.14" description = "Typing stubs for stripe" optional = false python-versions = "*" +groups = ["stripe_analytics"] files = [ {file = "types-stripe-3.5.2.14.tar.gz", hash = "sha256:bcc020aa5ba9acd796b9f2ac21f044c8e377ce2c0f570057f0f64c4b4637bbe7"}, {file = "types_stripe-3.5.2.14-py3-none-any.whl", hash = "sha256:f5f1249f72a35ada1db95523edc7e8f7b543dc8434b2ff23eaa9ec2e251c2e59"}, @@ -5714,6 +6027,7 @@ version = "1.26.25.14" description = "Typing stubs for urllib3" optional = false python-versions = "*" +groups = ["dev"] files = [ {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"}, {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"}, @@ -5725,6 +6039,7 @@ version = "4.8.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" +groups = ["main", "airtable", "dev", "dltpure", "filesystem", "pg_legacy_replication", "scrapy", "sql_database", "unstructured_data", "unstructured_data_lint"] files = [ {file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"}, {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, @@ -5736,6 +6051,7 @@ version = "0.9.0" description = "Runtime inspection utilities for typing module." optional = false python-versions = "*" +groups = ["unstructured_data", "unstructured_data_lint"] files = [ {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, @@ -5751,6 +6067,7 @@ version = "2023.3" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" +groups = ["main", "dev", "dltpure", "mongodb", "salesforce", "stripe_analytics", "unstructured_data"] files = [ {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, @@ -5762,6 +6079,7 @@ version = "0.7.12" description = "A library that prepares raw documents for downstream ML tasks." optional = false python-versions = ">=3.7.0" +groups = ["unstructured_data"] files = [ {file = "unstructured-0.7.12-py3-none-any.whl", hash = "sha256:6dec4f23574e213f30bccb680a4fb84c95617092ce4abf5d8955cc71af402fef"}, {file = "unstructured-0.7.12.tar.gz", hash = "sha256:3dcddea34f52e1070f38fd10063b3b0f64bc4cbe5b778d6b86b5d33262d625cd"}, @@ -5809,6 +6127,7 @@ version = "4.1.1" description = "Implementation of RFC 6570 URI Templates" optional = false python-versions = ">=3.6" +groups = ["google_ads", "google_analytics", "google_sheets"] files = [ {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"}, {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, @@ -5820,14 +6139,15 @@ version = "1.26.17" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +groups = ["main", "airtable", "asana_dlt", "dev", "dltpure", "facebook_ads", "filesystem", "google_ads", "google_analytics", "google_sheets", "salesforce", "scrapy", "stripe_analytics", "unstructured_data", "unstructured_data_lint"] files = [ {file = "urllib3-1.26.17-py2.py3-none-any.whl", hash = "sha256:94a757d178c9be92ef5539b8840d48dc9cf1b2709c9d6b588232a055c524458b"}, {file = "urllib3-1.26.17.tar.gz", hash = "sha256:24d6a242c28d29af46c3fae832c36db3bbebcc533dd1bb549172cd739c82df21"}, ] [package.extras] -brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +brotli = ["brotli (==1.0.9) ; os_name != \"nt\" and python_version < \"3\" and platform_python_implementation == \"CPython\"", "brotli (>=1.0.9) ; python_version >= \"3\" and platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; (os_name != \"nt\" or python_version >= \"3\") and platform_python_implementation != \"CPython\"", "brotlipy (>=0.6.0) ; os_name == \"nt\" and python_version < \"3\""] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress ; python_version == \"2.7\"", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] @@ -5836,6 +6156,7 @@ version = "0.23.2" description = "The lightning-fast ASGI server." optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] files = [ {file = "uvicorn-0.23.2-py3-none-any.whl", hash = "sha256:1f9be6558f01239d4fdf22ef8126c39cb1ad0addf76c40e760549d2c2f43ab53"}, {file = "uvicorn-0.23.2.tar.gz", hash = "sha256:4d3cc12d7727ba72b64d12d3cc7743124074c0a69f7b201512fc50c3e3f1569a"}, @@ -5849,12 +6170,12 @@ httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standar python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} -uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""} +uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\" and extra == \"standard\""} watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} [package.extras] -standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] +standard = ["colorama (>=0.4) ; sys_platform == \"win32\"", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1) ; sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"", "watchfiles (>=0.13)", "websockets (>=10.4)"] [[package]] name = "uvloop" @@ -5862,6 +6183,8 @@ version = "0.18.0" description = "Fast implementation of asyncio event loop on top of libuv" optional = false python-versions = ">=3.7.0" +groups = ["unstructured_data"] +markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\"" files = [ {file = "uvloop-0.18.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1f354d669586fca96a9a688c585b6257706d216177ac457c92e15709acaece10"}, {file = "uvloop-0.18.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:280904236a5b333a273292b3bcdcbfe173690f69901365b973fa35be302d7781"}, @@ -5903,7 +6226,7 @@ files = [ [package.extras] docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] -test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] +test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0) ; python_version >= \"3.12\"", "aiohttp (>=3.8.1) ; python_version < \"3.12\"", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] [[package]] name = "w3lib" @@ -5911,6 +6234,7 @@ version = "2.1.2" description = "Library of web-related functions" optional = false python-versions = ">=3.7" +groups = ["scrapy"] files = [ {file = "w3lib-2.1.2-py3-none-any.whl", hash = "sha256:c4432926e739caa8e3f49f5de783f336df563d9490416aebd5d39fb896d264e7"}, {file = "w3lib-2.1.2.tar.gz", hash = "sha256:ed5b74e997eea2abe3c1321f916e344144ee8e9072a6f33463ee8e57f858a4b1"}, @@ -5922,6 +6246,7 @@ version = "0.21.0" description = "Simple, modern and high performance file watching and code reload in python." optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] files = [ {file = "watchfiles-0.21.0-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:27b4035013f1ea49c6c0b42d983133b136637a527e48c132d368eb19bf1ac6aa"}, {file = "watchfiles-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c81818595eff6e92535ff32825f31c116f867f64ff8cdf6562cd1d6b2e1e8f3e"}, @@ -6009,6 +6334,7 @@ version = "11.0.3" description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3ccc8a0c387629aec40f2fc9fdcb4b9d5431954f934da3eaf16cdc94f67dbfac"}, {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d67ac60a307f760c6e65dad586f556dde58e683fab03323221a4e530ead6f74d"}, @@ -6082,26 +6408,14 @@ files = [ {file = "websockets-11.0.3.tar.gz", hash = "sha256:88fc51d9a26b10fc331be344f1781224a375b78488fc343620184e95a4b27016"}, ] -[[package]] -name = "wheel" -version = "0.41.2" -description = "A built-package format for Python" -optional = false -python-versions = ">=3.7" -files = [ - {file = "wheel-0.41.2-py3-none-any.whl", hash = "sha256:75909db2664838d015e3d9139004ee16711748a52c8f336b52882266540215d8"}, - {file = "wheel-0.41.2.tar.gz", hash = "sha256:0c5ac5ff2afb79ac23ab82bab027a0be7b5dbcf2e54dc50efe4bf507de1f7985"}, -] - -[package.extras] -test = ["pytest (>=6.0.0)", "setuptools (>=65)"] - [[package]] name = "win-precise-time" version = "1.4.2" description = "" optional = false python-versions = ">=3.7" +groups = ["main", "dltpure"] +markers = "os_name == \"nt\"" files = [ {file = "win-precise-time-1.4.2.tar.gz", hash = "sha256:89274785cbc5f2997e01675206da3203835a442c60fd97798415c6b3c179c0b9"}, {file = "win_precise_time-1.4.2-cp310-cp310-win32.whl", hash = "sha256:7fa13a2247c2ef41cd5e9b930f40716eacc7fc1f079ea72853bd5613fe087a1a"}, @@ -6124,6 +6438,7 @@ version = "1.15.0" description = "Module for decorators, wrappers and monkey patching." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" +groups = ["filesystem"] files = [ {file = "wrapt-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1"}, {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29"}, @@ -6208,6 +6523,7 @@ version = "2.0.1" description = "Library for developers to extract data from Microsoft Excel (tm) .xls spreadsheet files" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +groups = ["unstructured_data"] files = [ {file = "xlrd-2.0.1-py2.py3-none-any.whl", hash = "sha256:6a33ee89877bd9abc1158129f6e94be74e2679636b8a205b43b85206c3f0bbdd"}, {file = "xlrd-2.0.1.tar.gz", hash = "sha256:f72f148f54442c6b056bf931dbc34f986fd0c3b0b6b5a58d013c9aef274d0c88"}, @@ -6224,6 +6540,7 @@ version = "3.1.7" description = "A Python module for creating Excel XLSX files." optional = false python-versions = ">=3.6" +groups = ["unstructured_data"] files = [ {file = "XlsxWriter-3.1.7-py3-none-any.whl", hash = "sha256:8c730c4beb468696c4160aa1d6d168fb4c1a20dd972b212cd8cc1e74ddeab1b6"}, {file = "XlsxWriter-3.1.7.tar.gz", hash = "sha256:353042efb0f8551ce72baa087e98228f3394fcb380e8b96313edf1eec8d50823"}, @@ -6235,6 +6552,7 @@ version = "1.9.2" description = "Yet another URL library" optional = false python-versions = ">=3.7" +groups = ["main", "facebook_ads", "filesystem", "unstructured_data", "unstructured_data_lint"] files = [ {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8c2ad583743d16ddbdf6bb14b5cd76bf43b0d0006e918809d5d4ddf7bde8dd82"}, {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:82aa6264b36c50acfb2424ad5ca537a2060ab6de158a5bd2a72a032cc75b9eb8"}, @@ -6322,6 +6640,7 @@ version = "4.2.1" description = "A Python SOAP client" optional = false python-versions = ">=3.7" +groups = ["salesforce"] files = [ {file = "zeep-4.2.1-py3-none-any.whl", hash = "sha256:6754feb4c34a4b6d65fbc359252bf6654dcce3937bf1d95aae4402a60a8f5939"}, {file = "zeep-4.2.1.tar.gz", hash = "sha256:72093acfdb1d8360ed400869b73fbf1882b95c4287f798084c42ee0c1ff0e425"}, @@ -6349,6 +6668,8 @@ version = "3.17.0" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false python-versions = ">=3.8" +groups = ["unstructured_data"] +markers = "python_version == \"3.9\"" files = [ {file = "zipp-3.17.0-py3-none-any.whl", hash = "sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31"}, {file = "zipp-3.17.0.tar.gz", hash = "sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0"}, @@ -6356,7 +6677,7 @@ files = [ [package.extras] docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] +testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7) ; platform_python_implementation != \"PyPy\"", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1) ; platform_python_implementation != \"PyPy\"", "pytest-ruff"] [[package]] name = "zope-interface" @@ -6364,6 +6685,7 @@ version = "6.2" description = "Interfaces for Python" optional = false python-versions = ">=3.7" +groups = ["dev", "scrapy"] files = [ {file = "zope.interface-6.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:506f5410b36e5ba494136d9fa04c548eaf1a0d9c442b0b0e7a0944db7620e0ab"}, {file = "zope.interface-6.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b386b8b9d2b6a5e1e4eadd4e62335571244cb9193b7328c2b6e38b64cfda4f0e"}, @@ -6417,6 +6739,7 @@ version = "0.21.0" description = "Zstandard bindings for Python" optional = false python-versions = ">=3.7" +groups = ["unstructured_data"] files = [ {file = "zstandard-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:649a67643257e3b2cff1c0a73130609679a5673bf389564bc6d4b164d822a7ce"}, {file = "zstandard-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:144a4fe4be2e747bf9c646deab212666e39048faa4372abb6a250dab0f347a29"}, @@ -6470,6 +6793,6 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [metadata] -lock-version = "2.0" -python-versions = ">=3.8.1,<3.13" -content-hash = "e216234bd35e71ef0c8e5a498c2cc616df417c5b14658b00aed9d935ba5a782e" +lock-version = "2.1" +python-versions = ">=3.9,<3.13" +content-hash = "57a99164550c77f5d400a1f134f72ea85b4a07128b28876923bd9b3deee5a94b" diff --git a/pyproject.toml b/pyproject.toml index a1a431d54..7b66c7057 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,12 +11,11 @@ readme = "README.md" packages = [{include = "sources"}] [tool.poetry.dependencies] -python = ">=3.8.1,<3.13" -dlt = {version = "1.3.0", allow-prereleases = true, extras = ["redshift", "bigquery", "postgres", "duckdb"]} -graphlib-backport = {version = "*", python = "<3.9"} +python = ">=3.9,<3.13" +dlt = {version = "1.8.1", allow-prereleases = true, extras = ["redshift", "bigquery", "postgres", "duckdb"]} [tool.poetry.group.dltpure.dependencies] -dlt = {version = "1.3.0", allow-prereleases = true} +dlt = {version = "1.8.1", allow-prereleases = true} [tool.poetry.group.pytest.dependencies] pytest = "^7.2.0" @@ -45,6 +44,9 @@ pytest-mock = "^3.12.0" twisted = "22.10.0" pytest-forked = "^1.6.0" pendulum = "^3.0.0" +types-protobuf = "^5.27.0.20240907" +pytest-cov = "^5.0.0" +mypy-protobuf = "^3.6.0" [tool.poetry.group.sql_database.dependencies] sqlalchemy = ">=1.4" @@ -54,6 +56,11 @@ connectorx = ">=0.3.1" [tool.poetry.group.pg_replication.dependencies] psycopg2-binary = ">=2.9.9" +[tool.poetry.group.pg_legacy_replication.dependencies] +protobuf = ">=4.25" +psycopg2-binary = ">=2.9.9" +sqlalchemy = ">=1.4" + [tool.poetry.group.google_sheets.dependencies] google-api-python-client = "^2.78.0" @@ -116,4 +123,4 @@ requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.black] -include = '.*py$' +include = '.*py$' \ No newline at end of file diff --git a/sources/.dlt/example.secrets.toml b/sources/.dlt/example.secrets.toml index a0e8963e0..4a9590cfe 100644 --- a/sources/.dlt/example.secrets.toml +++ b/sources/.dlt/example.secrets.toml @@ -16,7 +16,11 @@ location = "US" ### Sources [sources] +# local postgres +helpers.credentials="postgresql://loader:loader@localhost:5432/dlt_data" +pg_legacy_replication.credentials="postgresql://loader:loader@localhost:5432/dlt_data" + ## chess pipeline # the section below defines secrets for "chess_dlt_config_example" source in chess/__init__.py [sources.chess] -secret_str="secret string" # a string secret +secret_str="secret string" # a string secret \ No newline at end of file diff --git a/sources/pg_legacy_replication/README.md b/sources/pg_legacy_replication/README.md new file mode 100644 index 000000000..f6c9de239 --- /dev/null +++ b/sources/pg_legacy_replication/README.md @@ -0,0 +1,130 @@ +# Postgres legacy replication +[Postgres](https://www.postgresql.org/) is one of the most popular relational database management systems. This verified source uses Postgres' replication functionality to efficiently process changes +in tables (a process often referred to as _Change Data Capture_ or CDC). It uses [logical decoding](https://www.postgresql.org/docs/current/logicaldecoding.html) and the optional `decoderbufs` +[output plugin](https://github.com/debezium/postgres-decoderbufs), which is a shared library which must be built or enabled. + +| Source | Description | +|---------------------|-------------------------------------------------| +| replication_source | Load published messages from a replication slot | + +## Install decoderbufs + +Instructions can be found [here](https://github.com/debezium/postgres-decoderbufs?tab=readme-ov-file#building) + +Below is an example installation in a docker image: +```Dockerfile +FROM postgres:14 + +# Install dependencies required to build decoderbufs +RUN apt-get update +RUN apt-get install -f -y \ + software-properties-common \ + build-essential \ + pkg-config \ + git + +RUN apt-get install -f -y \ + postgresql-server-dev-14 \ + libprotobuf-c-dev && \ + rm -rf /var/lib/apt/lists/* + +ARG decoderbufs_version=v1.7.0.Final +RUN git clone https://github.com/debezium/postgres-decoderbufs -b $decoderbufs_version --single-branch && \ + cd postgres-decoderbufs && \ + make && make install && \ + cd .. && \ + rm -rf postgres-decoderbufs +``` + +## Initialize the pipeline + +```bash +$ dlt init pg_legacy_replication duckdb +``` + +This uses `duckdb` as destination, but you can choose any of the supported [destinations](https://dlthub.com/docs/dlt-ecosystem/destinations/). + +## Set up user + +The Postgres user needs to have the `LOGIN` and `REPLICATION` attributes assigned: + +```sql +CREATE ROLE replication_user WITH LOGIN REPLICATION; +``` + +It also needs various read only privileges on the database (by first connecting to the database): + +```sql +\connect dlt_data +GRANT USAGE ON SCHEMA schema_name TO replication_user; +GRANT SELECT ON ALL TABLES IN SCHEMA public TO replication_user; +ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO replication_user; +``` + +## Add credentials +1. Open `.dlt/secrets.toml`. +2. Enter your Postgres credentials: + + ```toml + [sources.pg_legacy_replication] + credentials="postgresql://replication_user:<>@localhost:5432/dlt_data" + ``` +3. Enter credentials for your chosen destination as per the [docs](https://dlthub.com/docs/dlt-ecosystem/destinations/). + +## Run the pipeline + +1. Install the necessary dependencies by running the following command: + + ```bash + pip install -r requirements.txt + ``` + +1. Now the pipeline can be run by using the command: + + ```bash + python pg_legacy_replication_pipeline.py + ``` + +1. To make sure that everything is loaded as expected, use the command: + + ```bash + dlt pipeline pg_replication_pipeline show + ``` + +# Differences between `pg_legacy_replication` and `pg_replication` + +## Overview + +`pg_legacy_replication` is a fork of the verified `pg_replication` source. The primary goal of this fork is to provide logical replication capabilities for Postgres instances running versions +earlier than 10, when the `pgoutput` plugin was not yet available. This fork draws inspiration from the original `pg_replication` source and the `decoderbufs` library, +which is actively maintained by Debezium. + +## Key Differences from `pg_replication` + +### Replication User Ownership Requirements +One of the limitations of native Postgre replication is that the replication user must **own** the tables in order to add them to a **publication**. +Additionally, once a table is added to a publication, it cannot be removed, requiring the creation of a new replication slot, which results in the loss of any state tracking. + +### Limitations in `pg_replication` +The current pg_replication implementation has several limitations: +- It supports only a single initial snapshot of the data. +- It requires `CREATE` access to the source database in order to perform the initial snapshot. +- **Superuser** access is required to replicate entire Postgres schemas. + While the `pg_legacy_replication` source theoretically reads the entire WAL across all schemas, the current implementation using dlt transformers restricts this functionality. + In practice, this has not been a common use case. +- The implementation is opinionated in its approach to data transfer. Specifically, when updates or deletes are required, it defaults to a `merge` write disposition, + which replicates live data without tracking changes over time. + +### Features of `pg_legacy_replication` + +This fork of `pg_replication` addresses the aforementioned limitations and introduces the following improvements: +- Adheres to the dlt philosophy by treating the WAL as an upstream resources. This replication stream is then transformed into various DLT resources, with customizable options for write disposition, + file formats, type hints, etc., specified at the resource level rather than at the source level. +- Supports an initial snapshot of all tables using the transaction slot isolation level. Additionally, ad-hoc snapshots can be performed using the serializable deferred isolation level, + similar to `pg_dump`. +- Emphasizes the use of `pyarrow` and parquet formats for efficient data storage and transfer. A dedicated backend has been implemented to support these formats. +- Replication messages are decoded using Protocol Buffers (protobufs) in C, rather than relying on native Python byte buffer parsing. This ensures greater efficiency and performance. + +## Next steps +- Add support for the [wal2json](https://github.com/eulerto/wal2json) replication plugin. This is particularly important for environments such as **Amazon RDS**, which supports `wal2json`, +- as opposed to on-premise or Google Cloud SQL instances that support `decoderbufs`. \ No newline at end of file diff --git a/sources/pg_legacy_replication/__init__.py b/sources/pg_legacy_replication/__init__.py new file mode 100644 index 000000000..aaf1f73ed --- /dev/null +++ b/sources/pg_legacy_replication/__init__.py @@ -0,0 +1,213 @@ +"""Replicates postgres tables in batch using logical decoding.""" + +from collections import defaultdict +from typing import Any, Callable, Iterable, Mapping, Optional, Sequence, Union + +import dlt +from dlt.extract import DltResource +from dlt.extract.items import TDataItem +from dlt.sources.credentials import ConnectionStringCredentials +from dlt.sources.sql_database import sql_table + +from .helpers import ( + BackendHandler, + ItemGenerator, + ReplicationOptions, + SqlTableOptions, + advance_slot, + cleanup_snapshot_resources, + configure_engine, + create_replication_slot, + drop_replication_slot, + get_max_lsn, + get_rep_conn, +) + + +@dlt.source +def replication_source( + slot_name: str, + schema: str, + table_names: Union[str, Sequence[str]], + credentials: ConnectionStringCredentials = dlt.secrets.value, + repl_options: Optional[Mapping[str, ReplicationOptions]] = None, + target_batch_size: int = 1000, + flush_slot: bool = True, +) -> Iterable[DltResource]: + """ + Defines a dlt source for replicating Postgres tables using logical replication. + This source reads from a replication slot and pipes the changes using transformers. + + - Relies on a replication slot that publishes DML operations (i.e. `insert`, `update`, and `delete`). + - Maintains LSN of last consumed message in state to track progress. + - At start of the run, advances the slot upto last consumed message in previous run (for pg>10 only) + - Processes in batches to limit memory usage. + + Args: + slot_name (str): + The name of the logical replication slot used to fetch WAL changes. + schema (str): + Name of the schema to replicate tables from. + table_names (Union[str, Sequence[str]]): + The name(s) of the tables to replicate. Can be a single table name or a list of table names. + credentials (ConnectionStringCredentials): + Database credentials for connecting to the Postgres instance. + repl_options (Optional[Mapping[str, ReplicationOptions]], optional): + A mapping of table names to `ReplicationOptions`, allowing for fine-grained control over + replication behavior for each table. + + Each `ReplicationOptions` dictionary can include the following keys: + - `backend` (Optional[TableBackend]): Specifies the backend to use for table replication. + - `backend_kwargs` (Optional[Mapping[str, Any]]): Additional configuration options for the backend. + - `column_hints` (Optional[TTableSchemaColumns]): A dictionary of hints for column types or properties. + - `include_lsn` (Optional[bool]): Whether to include the LSN (Log Sequence Number) + in the replicated data. Defaults to `True`. + - `include_deleted_ts` (Optional[bool]): Whether to include a timestamp for deleted rows. + Defaults to `True`. + - `include_commit_ts` (Optional[bool]): Whether to include the commit timestamp of each change. + - `include_tx_id` (Optional[bool]): Whether to include the transaction ID of each change. + - `included_columns` (Optional[Set[str]]): A set of specific columns to include in the replication. + If not specified, all columns are included. + target_batch_size (int, optional): + The target size of each batch of replicated data items. Defaults to `1000`. + flush_slot (bool, optional): + If `True`, advances the replication slot to the last processed LSN + to prevent replaying already replicated changes. Defaults to `True`. + + Yields: + Iterable[DltResource]: + A collection of `DltResource` objects, each corresponding to a table being replicated. + + Notes: + - The `repl_options` parameter allows fine-tuning of replication behavior, such as column filtering + or write disposition configuration, per table. + - The replication process is incremental, ensuring only new changes are processed after the last commit LSN. + """ + table_names = [table_names] if isinstance(table_names, str) else table_names or [] + repl_options = defaultdict(lambda: ReplicationOptions(), repl_options or {}) + + @dlt.resource(name=lambda args: args["slot_name"], standalone=True) + def replication_resource(slot_name: str) -> Iterable[TDataItem]: + # start where we left off in previous run + start_lsn = dlt.current.resource_state().get("last_commit_lsn", 0) + if flush_slot and start_lsn > 0: + advance_slot(start_lsn, slot_name, credentials) + + # continue until last message in replication slot + upto_lsn = get_max_lsn(credentials, slot_name) + if upto_lsn is None: + return + + table_qnames = {f"{schema}.{table_name}" for table_name in table_names} + + # generate items in batches + while True: + gen = ItemGenerator( + credentials=credentials, + slot_name=slot_name, + table_qnames=table_qnames, + upto_lsn=upto_lsn, + start_lsn=start_lsn, + repl_options=repl_options, + target_batch_size=target_batch_size, + ) + yield from gen + if gen.generated_all: + dlt.current.resource_state()["last_commit_lsn"] = gen.last_commit_lsn + break + start_lsn = gen.last_commit_lsn + + wal_reader = replication_resource(slot_name) + + for table in table_names: + yield dlt.transformer( + _create_table_dispatch(table, repl_options=repl_options[table]), + data_from=wal_reader, + name=table, + ) + + +def _create_table_dispatch( + table: str, repl_options: ReplicationOptions +) -> Callable[[TDataItem], Any]: + """Creates a dispatch handler that processes data items based on a specified table and optional column hints.""" + handler = BackendHandler(table, repl_options) + # FIXME Uhhh.. why do I have to do this? + handler.__qualname__ = "BackendHandler.__call__" # type: ignore[attr-defined] + return handler + + +@dlt.source +def init_replication( + slot_name: str, + schema: str, + table_names: Optional[Union[str, Sequence[str]]] = None, + credentials: ConnectionStringCredentials = dlt.secrets.value, + take_snapshots: bool = False, + table_options: Optional[Mapping[str, SqlTableOptions]] = None, + reset: bool = False, +) -> Iterable[DltResource]: + """ + Initializes a replication session for Postgres using logical replication. + Optionally, snapshots of specified tables can be taken during initialization. + + Args: + slot_name (str): + The name of the logical replication slot to be used or created. + schema (str): + Name of the schema to replicate tables from. + table_names (Optional[Union[str, Sequence[str]]]): + The name(s) of the table(s) to replicate. Can be a single table name or a list of table names. + If not provided, no tables will be replicated unless `take_snapshots` is `True`. + credentials (ConnectionStringCredentials): + Database credentials for connecting to the Postgres instance. + take_snapshots (bool): + Whether to take initial snapshots of the specified tables. + Defaults to `False`. + table_options (Optional[Mapping[str, SqlTableOptions]]): + Additional options for configuring replication for specific tables. + These are the exact same parameters for the `dlt.sources.sql_database.sql_table` function. + Argument is only used if `take_snapshots` is `True`. + reset (bool, optional): + If `True`, drops the existing replication slot before creating a new one. + Use with caution, as this will clear existing replication state. + Defaults to `False`. + + Returns: + - None if `take_snapshots` is `False` + - a list of `DltResource` objects for the snapshot table(s) if `take_snapshots` is `True`. + + Notes: + - If `reset` is `True`, the existing replication slot will be dropped before creating a new one. + - When `take_snapshots` is `True`, the function configures a snapshot isolation level for consistent table snapshots. + """ + rep_conn = get_rep_conn(credentials) + with rep_conn.cursor() as rep_cur: + if reset: + drop_replication_slot(slot_name, rep_cur) + slot = create_replication_slot(slot_name, rep_cur) + + # Close connection if no snapshots are needed + if not take_snapshots: + rep_conn.close() + return + + assert table_names is not None + + engine = configure_engine( + credentials, rep_conn, slot.get("snapshot_name") if slot else None + ) + + table_names = [table_names] if isinstance(table_names, str) else table_names or [] + + for table in table_names: + table_args = (table_options or {}).get(table, {}).copy() + yield sql_table(credentials=engine, table=table, schema=schema, **table_args) + + +__all__ = [ + "ReplicationOptions", + "cleanup_snapshot_resources", + "init_replication", + "replication_source", +] diff --git a/sources/pg_legacy_replication/exceptions.py b/sources/pg_legacy_replication/exceptions.py new file mode 100644 index 000000000..99e3db420 --- /dev/null +++ b/sources/pg_legacy_replication/exceptions.py @@ -0,0 +1,6 @@ +# class SqlDatabaseSourceImportError(Exception): +# def __init__(self) -> None: +# super().__init__( +# "Could not import `sql_database` source. Run `dlt init sql_database `" +# " to download the source code." +# ) diff --git a/sources/pg_legacy_replication/helpers.py b/sources/pg_legacy_replication/helpers.py new file mode 100644 index 000000000..8edd98502 --- /dev/null +++ b/sources/pg_legacy_replication/helpers.py @@ -0,0 +1,707 @@ +import hashlib +from collections import defaultdict +from contextlib import closing +from dataclasses import dataclass, field +from functools import partial +from typing import ( + Any, + Callable, + DefaultDict, + Dict, + Iterable, + Iterator, + List, + Mapping, + NamedTuple, + Optional, + Sequence, + Set, + TypedDict, +) + +import dlt +import psycopg2 +from dlt.common import logger +from dlt.common.libs.sql_alchemy import Engine, MetaData, Table, sa +from dlt.common.pendulum import pendulum +from dlt.common.schema.typing import TColumnSchema, TTableSchema, TTableSchemaColumns +from dlt.common.schema.utils import merge_column +from dlt.common.typing import TDataItem +from dlt.extract import DltSource +from dlt.extract.items import DataItemWithMeta +from dlt.sources.credentials import ConnectionStringCredentials +from dlt.sources.sql_database import ( + ReflectionLevel, + TableBackend, + TQueryAdapter, + TTypeAdapter, + arrow_helpers as arrow, + engine_from_credentials, +) +from dlt.sources.sql_database.schema_types import sqla_col_to_column_schema +from psycopg2.extensions import connection as ConnectionExt, cursor +from psycopg2.extras import ( + LogicalReplicationConnection, + ReplicationCursor, + ReplicationMessage, + StopReplication, +) + +from .pg_logicaldec_pb2 import DatumMessage, Op, RowMessage, TypeInfo +from .schema_types import _epoch_micros_to_datetime, _to_dlt_column_schema, _to_dlt_val + + +class ReplicationOptions(TypedDict, total=False): + backend: Optional[TableBackend] + backend_kwargs: Optional[Mapping[str, Any]] + column_hints: Optional[TTableSchemaColumns] + include_lsn: Optional[bool] # Default is true + include_deleted_ts: Optional[bool] # Default is true + include_commit_ts: Optional[bool] + include_tx_id: Optional[bool] + included_columns: Optional[Set[str]] + + +class SqlTableOptions(TypedDict, total=False): + backend: TableBackend + backend_kwargs: Optional[Dict[str, Any]] + chunk_size: int + defer_table_reflect: Optional[bool] + detect_precision_hints: Optional[bool] + included_columns: Optional[List[str]] + metadata: Optional[MetaData] + query_adapter_callback: Optional[TQueryAdapter] + reflection_level: Optional[ReflectionLevel] + table_adapter_callback: Optional[Callable[[Table], None]] + type_adapter_callback: Optional[TTypeAdapter] + + +def configure_engine( + credentials: ConnectionStringCredentials, + rep_conn: LogicalReplicationConnection, + snapshot_name: Optional[str], +) -> Engine: + """ + Configures the SQLAlchemy engine. + Also attaches the replication connection in order to prevent it being garbage collected and closed. + + Args: + snapshot_name (str, optional): This is used during the initial first table snapshot allowing + all transactions to run with the same consistent snapshot. + """ + engine: Engine = engine_from_credentials(credentials) + engine.execution_options(stream_results=True, max_row_buffer=2 * 50000) + setattr(engine, "rep_conn", rep_conn) # noqa + + @sa.event.listens_for(engine, "begin") + def on_begin(conn: sa.Connection) -> None: + cur = conn.connection.cursor() + if snapshot_name is None: + # Using the same isolation level that pg_backup uses + cur.execute( + "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE, READ ONLY, DEFERRABLE" + ) + else: + cur.execute("SET TRANSACTION ISOLATION LEVEL REPEATABLE READ") + cur.execute(f"SET TRANSACTION SNAPSHOT '{snapshot_name}'") + + @sa.event.listens_for(engine, "engine_disposed") + def on_engine_disposed(e: Engine) -> None: + delattr(e, "rep_conn") + + return engine + + +def cleanup_snapshot_resources(snapshots: DltSource) -> None: + """FIXME Awful hack to release the underlying SQL engine when snapshotting tables""" + resources = snapshots.resources + if resources: + engine: Engine = next(iter(resources.values()))._explicit_args["credentials"] + engine.dispose() + + +def get_pg_version(cur: cursor) -> int: + """Returns Postgres server version as int.""" + return cur.connection.server_version + + +def create_replication_slot( # type: ignore[return] + name: str, cur: ReplicationCursor, output_plugin: str = "decoderbufs" +) -> Optional[Dict[str, str]]: + """Creates a replication slot if it doesn't exist yet.""" + try: + cur.create_replication_slot(name, output_plugin=output_plugin) + logger.info("Successfully created replication slot '%s'", name) + result = cur.fetchone() + return { + "slot_name": result[0], + "consistent_point": result[1], + "snapshot_name": result[2], + "output_plugin": result[3], + } + except psycopg2.errors.DuplicateObject: # the replication slot already exists + logger.info( + "Replication slot '%s' cannot be created because it already exists", name + ) + + +def drop_replication_slot(name: str, cur: ReplicationCursor) -> None: + """Drops a replication slot if it exists.""" + try: + cur.drop_replication_slot(name) + logger.info("Successfully dropped replication slot '%s'", name) + except psycopg2.errors.UndefinedObject: # the replication slot does not exist + logger.info( + "Replication slot '%s' cannot be dropped because it does not exist", name + ) + + +def get_max_lsn( + credentials: ConnectionStringCredentials, slot_name: str +) -> Optional[int]: + """ + Returns maximum Log Sequence Number (LSN). + + Returns None if the replication slot is empty. + Does not consume the slot, i.e. messages are not flushed. + """ + with closing(_get_conn(credentials)) as conn: + with conn.cursor() as cur: + pg_version = get_pg_version(cur) + lsn_field = "lsn" if pg_version >= 100000 else "location" + # subtract '0/0' to convert pg_lsn type to int (https://stackoverflow.com/a/73738472) + cur.execute( + f""" + SELECT {lsn_field} - '0/0' AS max_lsn + FROM pg_logical_slot_peek_binary_changes(%s, NULL, NULL) + ORDER BY {lsn_field} DESC + LIMIT 1; + """, + (slot_name,), + ) + row = cur.fetchone() + return row[0] if row else None # type: ignore[no-any-return] + + +def lsn_int_to_hex(lsn: int) -> str: + """Convert integer LSN to postgres hexadecimal representation.""" + # https://stackoverflow.com/questions/66797767/lsn-external-representation. + return f"{lsn >> 32 & 4294967295:X}/{lsn & 4294967295:08X}" + + +def advance_slot( + upto_lsn: int, + slot_name: str, + credentials: ConnectionStringCredentials, +) -> None: + """ + Advances position in the replication slot. + + Flushes all messages upto (and including) the message with LSN = `upto_lsn`. + This function is used as alternative to psycopg2's `send_feedback` method, because + the behavior of that method seems odd when used outside of `consume_stream`. + """ + assert upto_lsn > 0 + with closing(_get_conn(credentials)) as conn: + with conn.cursor() as cur: + # There is unfortunately no way in pg9.6 to manually advance the replication slot + if get_pg_version(cur) > 100000: + cur.execute( + f"SELECT * FROM pg_replication_slot_advance('{slot_name}', '{lsn_int_to_hex(upto_lsn)}');" + ) + + +def _get_conn( + credentials: ConnectionStringCredentials, + connection_factory: Optional[Any] = None, +) -> ConnectionExt: + """Returns a psycopg2 connection to interact with postgres.""" + return psycopg2.connect( # type: ignore[no-any-return] + database=credentials.database, + user=credentials.username, + password=credentials.password, + host=credentials.host, + port=credentials.port, + connection_factory=connection_factory, + **({} if credentials.query is None else credentials.query), + ) + + +def get_rep_conn( + credentials: ConnectionStringCredentials, +) -> LogicalReplicationConnection: + """ + Returns a psycopg2 LogicalReplicationConnection to interact with postgres replication functionality. + + Raises error if the user does not have the REPLICATION attribute assigned. + """ + return _get_conn(credentials, LogicalReplicationConnection) # type: ignore[return-value] + + +class MessageConsumer: + """ + Consumes messages from a ReplicationCursor sequentially. + + Generates data item for each `insert`, `update`, and `delete` message. + Processes in batches to limit memory usage. + Maintains message data needed by subsequent messages in internal state. + """ + + def __init__( + self, + credentials: ConnectionStringCredentials, + upto_lsn: int, + table_qnames: Set[str], + repl_options: DefaultDict[str, ReplicationOptions], + target_batch_size: int = 1000, + ) -> None: + self.credentials = credentials + self.upto_lsn = upto_lsn + self.table_qnames = table_qnames + self.target_batch_size = target_batch_size + self.repl_options = repl_options + + self.consumed_all: bool = False + # maps table names to list of data items + self.data_items: Dict[str, List[TDataItem]] = defaultdict(list) + # maps table name to table schema + self.last_table_schema: Dict[str, TTableSchema] = {} + # maps table names to new_typeinfo hashes + self.last_table_hashes: Dict[str, int] = {} + self.last_commit_ts: pendulum.DateTime + self.last_commit_lsn: int + + def __call__(self, msg: ReplicationMessage) -> None: + """Processes message received from stream.""" + self.process_msg(msg) + + def process_msg(self, msg: ReplicationMessage) -> None: + """Processes encoded replication message. + + Identifies message type and decodes accordingly. + Message treatment is different for various message types. + Breaks out of stream with StopReplication exception when + - `upto_lsn` is reached + - `target_batch_size` is reached + - a table's schema has changed + """ + row_msg = RowMessage() + try: + row_msg.ParseFromString(msg.payload) + lsn = msg.data_start + assert row_msg.op != Op.UNKNOWN, f"Unsupported operation : {row_msg}" + logger.debug( + "op: %s, current lsn: %s, max lsn: %s", + Op.Name(row_msg.op), + lsn, + self.upto_lsn, + ) + + if row_msg.op == Op.BEGIN: + # self.last_commit_ts = _epoch_micros_to_datetime(row_msg.commit_time) + pass + elif row_msg.op == Op.COMMIT: + self.process_commit(lsn=lsn) + else: # INSERT, UPDATE or DELETE + self.process_change(row_msg, lsn=lsn) + except StopReplication: + raise + except Exception: + logger.error( + "A fatal error occurred while processing a message: %s", row_msg + ) + raise + + def process_commit(self, lsn: int) -> None: + """ + Updates object state when Commit message is observed. + + Raises StopReplication when `upto_lsn` or `target_batch_size` is reached. + """ + self.last_commit_lsn = lsn + if lsn >= self.upto_lsn: + self.consumed_all = True + n_items = sum( + [len(items) for items in self.data_items.values()] + ) # combine items for all tables + if self.consumed_all or n_items >= self.target_batch_size: + raise StopReplication + + def process_change(self, msg: RowMessage, lsn: int) -> None: + """Processes replication message of type Insert, Update or Delete""" + if msg.table not in self.table_qnames: + return + table_name = msg.table.split(".")[1] + table_schema = self.get_table_schema(msg) + data_item = gen_data_item( + msg, table_schema["columns"], lsn, **self.repl_options[table_name] + ) + self.data_items[table_name].append(data_item) + + def get_table_schema(self, msg: RowMessage) -> TTableSchema: + """Given a row message, calculates or fetches a table schema.""" + schema, table_name = msg.table.split(".") + last_schema = self.last_table_schema.get(table_name) + + # Used cached schema if the operation is a DELETE + if msg.op == Op.DELETE: + if last_schema is None: + # If absent than reflect it using sqlalchemy + last_schema = self._fetch_table_schema_with_sqla(schema, table_name) + self.last_table_schema[table_name] = last_schema + return last_schema + + # Return cached schema if hash matches + current_hash = hash_typeinfo(msg.new_typeinfo) + if current_hash == self.last_table_hashes.get(table_name): + return self.last_table_schema[table_name] + + new_schema = infer_table_schema(msg, self.repl_options[table_name]) + if last_schema is None: + # Cache the inferred schema and hash if it is not already cached + self.last_table_schema[table_name] = new_schema + self.last_table_hashes[table_name] = current_hash + else: + try: + retained_schema = compare_schemas(last_schema, new_schema) + self.last_table_schema[table_name] = retained_schema + except AssertionError as e: + logger.info(str(e)) + raise StopReplication + + return new_schema + + def _fetch_table_schema_with_sqla( + self, schema: str, table_name: str + ) -> TTableSchema: + """Last resort function used to fetch the table schema from the database""" + engine = engine_from_credentials(self.credentials) + options = self.repl_options[table_name] + to_col_schema = partial( + sqla_col_to_column_schema, + reflection_level=options.get("reflection_level", "full"), + ) + try: + metadata = MetaData(schema=schema) + table = Table(table_name, metadata, autoload_with=engine) + included_columns = options.get("included_columns") + columns = { + col["name"]: col + for c in table.columns + if (col := to_col_schema(c)) is not None + and (not included_columns or c.name in included_columns) + } + + return TTableSchema( + name=table_name, + columns=add_replication_columns(columns, **options), + ) + finally: + engine.dispose() + + +def hash_typeinfo(new_typeinfo: Sequence[TypeInfo]) -> int: + """Generate a hash for the entire new_typeinfo list by hashing each TypeInfo message.""" + typeinfo_tuple = tuple( + (info.modifier, info.value_optional) for info in new_typeinfo + ) + hash_obj = hashlib.blake2b(repr(typeinfo_tuple).encode(), digest_size=8) + return int(hash_obj.hexdigest(), 16) + + +class TableItems(NamedTuple): + schema: TTableSchema + items: List[TDataItem] + + +@dataclass +class ItemGenerator: + credentials: ConnectionStringCredentials + slot_name: str + table_qnames: Set[str] + upto_lsn: int + start_lsn: int + repl_options: DefaultDict[str, ReplicationOptions] + target_batch_size: int = 1000 + keepalive_interval: Optional[int] = None + last_commit_lsn: Optional[int] = field(default=None, init=False) + generated_all: bool = False + + def __iter__(self) -> Iterator[TableItems]: + """ + Yields data items/schema from MessageConsumer. + + Starts replication of messages from the replication slot. + Maintains LSN of last consumed commit message in object state. + Advances the slot only when all messages have been consumed. + """ + with closing(get_rep_conn(self.credentials)) as rep_conn: + with rep_conn.cursor() as rep_cur: + try: + consumer = MessageConsumer( + credentials=self.credentials, + upto_lsn=self.upto_lsn, + table_qnames=self.table_qnames, + repl_options=self.repl_options, + target_batch_size=self.target_batch_size, + ) + rep_cur.start_replication(self.slot_name, start_lsn=self.start_lsn) + rep_cur.consume_stream(consumer, self.keepalive_interval) + except StopReplication: # completed batch or reached `upto_lsn` + yield from self.flush_batch(rep_cur, consumer) + finally: + logger.debug( + "Closing connection... last_commit_lsn: %s, generated_all: %s, feedback_ts: %s", + self.last_commit_lsn, + self.generated_all, + rep_cur.feedback_timestamp, + ) + + def flush_batch( + self, cur: ReplicationCursor, consumer: MessageConsumer + ) -> Iterator[TableItems]: + last_commit_lsn = consumer.last_commit_lsn + consumed_all = consumer.consumed_all + for table, data_items in consumer.data_items.items(): + logger.info("Flushing %s events for table '%s'", len(data_items), table) + yield TableItems(consumer.last_table_schema[table], data_items) + if consumed_all: + cur.send_feedback( + write_lsn=last_commit_lsn, + flush_lsn=last_commit_lsn, + reply=True, + force=True, + ) + else: + cur.send_feedback(write_lsn=last_commit_lsn, reply=True, force=True) + self.last_commit_lsn = last_commit_lsn + self.generated_all = consumed_all + + +@dataclass +class BackendHandler: + """ + Consumes messages from ItemGenerator once a batch is ready for emitting. + + It is mainly responsible for emitting schema and dict data times or transforming + into arrow tables. + """ + + table: str + repl_options: ReplicationOptions + + def __call__(self, table_items: TableItems) -> Iterable[DataItemWithMeta]: + if table_items.schema["name"] != self.table: + return + + # Apply column hints if provided + columns = table_items.schema["columns"] + if column_hints := self.repl_options.get("column_hints"): + for col_name, col_hint in column_hints.items(): + if col_name in columns: + columns[col_name] = merge_column(columns[col_name], col_hint) + + # Process based on backend + data = table_items.items + backend = self.repl_options.get("backend", "sqlalchemy") + try: + if backend == "sqlalchemy": + yield from self.emit_schema_and_items(columns, data) + elif backend == "pyarrow": + yield from self.emit_arrow_table(columns, data) + else: + raise NotImplementedError(f"Unsupported backend: {backend}") + except Exception: + logger.error( + "A fatal error occurred while processing batch for '%s' (columns=%s, data=%s)", + self.table, + columns, + data, + ) + raise + + def emit_schema_and_items( + self, columns: TTableSchemaColumns, items: List[TDataItem] + ) -> Iterator[DataItemWithMeta]: + yield dlt.mark.with_hints( + [], + dlt.mark.make_hints(table_name=self.table, columns=columns), + create_table_variant=True, + ) + yield dlt.mark.with_table_name(items, self.table) + + def emit_arrow_table( + self, columns: TTableSchemaColumns, items: List[TDataItem] + ) -> Iterator[DataItemWithMeta]: + # Create rows for pyarrow using ordered column keys + rows = [ + tuple(item.get(column, None) for column in list(columns.keys())) + for item in items + ] + tz = self.repl_options.get("backend_kwargs", {}).get("tz", "UTC") + yield dlt.mark.with_table_name( + arrow.row_tuples_to_arrow(rows, columns=columns, tz=tz), + self.table, + ) + + +def infer_table_schema(msg: RowMessage, options: ReplicationOptions) -> TTableSchema: + """Infers the table schema from the replication message and optional hints.""" + # Choose the correct source based on operation type + assert msg.op != Op.DELETE + included_columns = options.get("included_columns") + columns = { + col_name: _to_dlt_column_schema( + col_name, datum=col, type_info=msg.new_typeinfo[i] + ) + for i, col in enumerate(msg.new_tuple) + if (col_name := _actual_column_name(col)) + and (not included_columns or col_name in included_columns) + } + + return TTableSchema( + name=msg.table.split(".")[1], + columns=add_replication_columns(columns, **options), + ) + + +def add_replication_columns( + columns: TTableSchemaColumns, + *, + include_lsn: bool = True, + include_deleted_ts: bool = True, + include_commit_ts: bool = False, + include_tx_id: bool = False, + **_: Any, +) -> TTableSchemaColumns: + if include_lsn: + columns["_pg_lsn"] = { + "data_type": "bigint", + "name": "_pg_lsn", + "nullable": True, + } + if include_deleted_ts: + columns["_pg_deleted_ts"] = { + "data_type": "timestamp", + "name": "_pg_deleted_ts", + "nullable": True, + } + if include_commit_ts: + columns["_pg_commit_ts"] = { + "data_type": "timestamp", + "name": "_pg_commit_ts", + "nullable": True, + } + if include_tx_id: + columns["_pg_tx_id"] = { + "data_type": "bigint", + "name": "_pg_tx_id", + "nullable": True, + "precision": 32, + } + return columns + + +def gen_data_item( + msg: RowMessage, + column_schema: TTableSchemaColumns, + lsn: int, + *, + include_lsn: bool = True, + include_deleted_ts: bool = True, + include_commit_ts: bool = False, + include_tx_id: bool = False, + included_columns: Optional[Set[str]] = None, + **_: Any, +) -> TDataItem: + """Generates data item from a row message and corresponding metadata.""" + data_item: TDataItem = {} + if include_lsn: + data_item["_pg_lsn"] = lsn + if include_commit_ts: + data_item["_pg_commit_ts"] = _epoch_micros_to_datetime(msg.commit_time) + if include_tx_id: + data_item["_pg_tx_id"] = msg.transaction_id + + # Select the relevant row tuple based on operation type + is_delete = msg.op == Op.DELETE + row = msg.old_tuple if is_delete else msg.new_tuple + if is_delete and include_deleted_ts: + data_item["_pg_deleted_ts"] = _epoch_micros_to_datetime(msg.commit_time) + + for data in row: + col_name = _actual_column_name(data) + if not included_columns or col_name in included_columns: + data_item[col_name] = _to_dlt_val( + data, column_schema[col_name], for_delete=is_delete + ) + + return data_item + + +def _actual_column_name(column: DatumMessage) -> str: + """ + Certain column names are quoted since they are reserved keywords, + however let the destination decide on how to normalize them + """ + col_name = column.column_name + if col_name.startswith('"') and col_name.endswith('"'): + col_name = col_name[1:-1] + return col_name + + +ALLOWED_COL_SCHEMA_FIELDS: Set[str] = { + "name", + "data_type", + "nullable", + "precision", + "scale", + "timezone", +} + + +def compare_schemas(last: TTableSchema, new: TTableSchema) -> TTableSchema: + """ + Compares the last schema with the new one and chooses the more + precise one if they are relatively equal or else raises a + AssertionError due to an incompatible schema change + """ + assert last["name"] == new["name"], "Table names do not match" + + table_schema = TTableSchema(name=last["name"], columns={}) + last_cols, new_cols = last["columns"], new["columns"] + assert len(last_cols) == len( + new_cols + ), f"Columns mismatch last:{last_cols} new:{new_cols}" + + for name, s1 in last_cols.items(): + s2 = new_cols.get(name) + assert ( + s2 and s1["data_type"] == s2["data_type"] + ), f"Incompatible schema for column '{name}'" + + # Ensure new has no fields outside allowed fields + extra_fields = set(s2.keys()) - ALLOWED_COL_SCHEMA_FIELDS + assert not extra_fields, f"Unexpected fields {extra_fields} in column '{name}'" + + # Select the more precise schema by comparing nullable, precision, and scale + col_schema = TColumnSchema(name=name, data_type=s1["data_type"]) + if "nullable" in s1 or "nullable" in s2: + # Get nullable values (could be True, False, or None) + s1_null = s1.get("nullable") + s2_null = s2.get("nullable") + if s1_null is not None and s2_null is not None: + col_schema["nullable"] = s1_null or s2_null # Default is True + else: + col_schema["nullable"] = s1_null if s1_null is not None else s2_null + if "precision" in s1 or "precision" in s2: + col_schema["precision"] = s1.get("precision", s2.get("precision")) + if "scale" in s1 or "scale" in s2: + col_schema["scale"] = s1.get("scale", s2.get("scale")) + if "timezone" in s1 or "timezone" in s2: + col_schema["timezone"] = s1.get("timezone", s2.get("timezone")) + + # Update with the more detailed schema per column + table_schema["columns"][name] = col_schema + + return table_schema diff --git a/sources/pg_legacy_replication/pg_logicaldec.proto b/sources/pg_legacy_replication/pg_logicaldec.proto new file mode 100644 index 000000000..43371f5a8 --- /dev/null +++ b/sources/pg_legacy_replication/pg_logicaldec.proto @@ -0,0 +1,50 @@ +package decoderbufs; + +option java_package="io.debezium.connector.postgresql.proto"; +option java_outer_classname = "PgProto"; +option optimize_for = SPEED; + +enum Op { + UNKNOWN = -1; + INSERT = 0; + UPDATE = 1; + DELETE = 2; + BEGIN = 3; + COMMIT = 4; +} + +message Point { + required double x = 1; + required double y = 2; +} + +message DatumMessage { + optional string column_name = 1; + optional int64 column_type = 2; + oneof datum { + int32 datum_int32 = 3; + int64 datum_int64 = 4; + float datum_float = 5; + double datum_double = 6; + bool datum_bool = 7; + string datum_string = 8; + bytes datum_bytes = 9; + Point datum_point = 10; + bool datum_missing = 11; + } +} + +message TypeInfo { + required string modifier = 1; + required bool value_optional = 2; +} + +message RowMessage { + optional uint32 transaction_id = 1; + optional uint64 commit_time = 2; + optional string table = 3; + optional Op op = 4; + repeated DatumMessage new_tuple = 5; + repeated DatumMessage old_tuple = 6; + repeated TypeInfo new_typeinfo = 7; +} diff --git a/sources/pg_legacy_replication/pg_logicaldec_pb2.py b/sources/pg_legacy_replication/pg_logicaldec_pb2.py new file mode 100644 index 000000000..08fa960a1 --- /dev/null +++ b/sources/pg_legacy_replication/pg_logicaldec_pb2.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: pg_logicaldec.proto +# Protobuf Python Version: 5.26.1 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\x13pg_logicaldec.proto\x12\x0b\x64\x65\x63oderbufs"\x1d\n\x05Point\x12\t\n\x01x\x18\x01 \x02(\x01\x12\t\n\x01y\x18\x02 \x02(\x01"\xa7\x02\n\x0c\x44\x61tumMessage\x12\x13\n\x0b\x63olumn_name\x18\x01 \x01(\t\x12\x13\n\x0b\x63olumn_type\x18\x02 \x01(\x03\x12\x15\n\x0b\x64\x61tum_int32\x18\x03 \x01(\x05H\x00\x12\x15\n\x0b\x64\x61tum_int64\x18\x04 \x01(\x03H\x00\x12\x15\n\x0b\x64\x61tum_float\x18\x05 \x01(\x02H\x00\x12\x16\n\x0c\x64\x61tum_double\x18\x06 \x01(\x01H\x00\x12\x14\n\ndatum_bool\x18\x07 \x01(\x08H\x00\x12\x16\n\x0c\x64\x61tum_string\x18\x08 \x01(\tH\x00\x12\x15\n\x0b\x64\x61tum_bytes\x18\t \x01(\x0cH\x00\x12)\n\x0b\x64\x61tum_point\x18\n \x01(\x0b\x32\x12.decoderbufs.PointH\x00\x12\x17\n\rdatum_missing\x18\x0b \x01(\x08H\x00\x42\x07\n\x05\x64\x61tum"4\n\x08TypeInfo\x12\x10\n\x08modifier\x18\x01 \x02(\t\x12\x16\n\x0evalue_optional\x18\x02 \x02(\x08"\xee\x01\n\nRowMessage\x12\x16\n\x0etransaction_id\x18\x01 \x01(\r\x12\x13\n\x0b\x63ommit_time\x18\x02 \x01(\x04\x12\r\n\x05table\x18\x03 \x01(\t\x12\x1b\n\x02op\x18\x04 \x01(\x0e\x32\x0f.decoderbufs.Op\x12,\n\tnew_tuple\x18\x05 \x03(\x0b\x32\x19.decoderbufs.DatumMessage\x12,\n\told_tuple\x18\x06 \x03(\x0b\x32\x19.decoderbufs.DatumMessage\x12+\n\x0cnew_typeinfo\x18\x07 \x03(\x0b\x32\x15.decoderbufs.TypeInfo*U\n\x02Op\x12\x14\n\x07UNKNOWN\x10\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x12\n\n\x06INSERT\x10\x00\x12\n\n\x06UPDATE\x10\x01\x12\n\n\x06\x44\x45LETE\x10\x02\x12\t\n\x05\x42\x45GIN\x10\x03\x12\n\n\x06\x43OMMIT\x10\x04\x42\x33\n&io.debezium.connector.postgresql.protoB\x07PgProtoH\x01' +) + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "pg_logicaldec_pb2", _globals) +if not _descriptor._USE_C_DESCRIPTORS: + _globals["DESCRIPTOR"]._loaded_options = None + _globals[ + "DESCRIPTOR" + ]._serialized_options = ( + b"\n&io.debezium.connector.postgresql.protoB\007PgProtoH\001" + ) + _globals["_OP"]._serialized_start = 660 + _globals["_OP"]._serialized_end = 745 + _globals["_POINT"]._serialized_start = 36 + _globals["_POINT"]._serialized_end = 65 + _globals["_DATUMMESSAGE"]._serialized_start = 68 + _globals["_DATUMMESSAGE"]._serialized_end = 363 + _globals["_TYPEINFO"]._serialized_start = 365 + _globals["_TYPEINFO"]._serialized_end = 417 + _globals["_ROWMESSAGE"]._serialized_start = 420 + _globals["_ROWMESSAGE"]._serialized_end = 658 +# @@protoc_insertion_point(module_scope) diff --git a/sources/pg_legacy_replication/pg_logicaldec_pb2.pyi b/sources/pg_legacy_replication/pg_logicaldec_pb2.pyi new file mode 100644 index 000000000..abd25bf22 --- /dev/null +++ b/sources/pg_legacy_replication/pg_logicaldec_pb2.pyi @@ -0,0 +1,166 @@ +""" +@generated by mypy-protobuf. Do not edit manually! +isort:skip_file +""" + +import builtins +import collections.abc +import google.protobuf.descriptor +import google.protobuf.internal.containers +import google.protobuf.internal.enum_type_wrapper +import google.protobuf.message +import sys +import typing + +if sys.version_info >= (3, 10): + import typing as typing_extensions +else: + import typing_extensions + +DESCRIPTOR: google.protobuf.descriptor.FileDescriptor + +class _Op: + ValueType = typing.NewType("ValueType", builtins.int) + V: typing_extensions.TypeAlias = ValueType + +class _OpEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[_Op.ValueType], builtins.type): + DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor + UNKNOWN: _Op.ValueType # -1 + INSERT: _Op.ValueType # 0 + UPDATE: _Op.ValueType # 1 + DELETE: _Op.ValueType # 2 + BEGIN: _Op.ValueType # 3 + COMMIT: _Op.ValueType # 4 + +class Op(_Op, metaclass=_OpEnumTypeWrapper): ... + +UNKNOWN: Op.ValueType # -1 +INSERT: Op.ValueType # 0 +UPDATE: Op.ValueType # 1 +DELETE: Op.ValueType # 2 +BEGIN: Op.ValueType # 3 +COMMIT: Op.ValueType # 4 +global___Op = Op + +@typing.final +class Point(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + X_FIELD_NUMBER: builtins.int + Y_FIELD_NUMBER: builtins.int + x: builtins.float + y: builtins.float + def __init__( + self, + *, + x: builtins.float | None = ..., + y: builtins.float | None = ..., + ) -> None: ... + def HasField(self, field_name: typing.Literal["x", b"x", "y", b"y"]) -> builtins.bool: ... + def ClearField(self, field_name: typing.Literal["x", b"x", "y", b"y"]) -> None: ... + +global___Point = Point + +@typing.final +class DatumMessage(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + COLUMN_NAME_FIELD_NUMBER: builtins.int + COLUMN_TYPE_FIELD_NUMBER: builtins.int + DATUM_INT32_FIELD_NUMBER: builtins.int + DATUM_INT64_FIELD_NUMBER: builtins.int + DATUM_FLOAT_FIELD_NUMBER: builtins.int + DATUM_DOUBLE_FIELD_NUMBER: builtins.int + DATUM_BOOL_FIELD_NUMBER: builtins.int + DATUM_STRING_FIELD_NUMBER: builtins.int + DATUM_BYTES_FIELD_NUMBER: builtins.int + DATUM_POINT_FIELD_NUMBER: builtins.int + DATUM_MISSING_FIELD_NUMBER: builtins.int + column_name: builtins.str + column_type: builtins.int + datum_int32: builtins.int + datum_int64: builtins.int + datum_float: builtins.float + datum_double: builtins.float + datum_bool: builtins.bool + datum_string: builtins.str + datum_bytes: builtins.bytes + datum_missing: builtins.bool + @property + def datum_point(self) -> global___Point: ... + def __init__( + self, + *, + column_name: builtins.str | None = ..., + column_type: builtins.int | None = ..., + datum_int32: builtins.int | None = ..., + datum_int64: builtins.int | None = ..., + datum_float: builtins.float | None = ..., + datum_double: builtins.float | None = ..., + datum_bool: builtins.bool | None = ..., + datum_string: builtins.str | None = ..., + datum_bytes: builtins.bytes | None = ..., + datum_point: global___Point | None = ..., + datum_missing: builtins.bool | None = ..., + ) -> None: ... + def HasField(self, field_name: typing.Literal["column_name", b"column_name", "column_type", b"column_type", "datum", b"datum", "datum_bool", b"datum_bool", "datum_bytes", b"datum_bytes", "datum_double", b"datum_double", "datum_float", b"datum_float", "datum_int32", b"datum_int32", "datum_int64", b"datum_int64", "datum_missing", b"datum_missing", "datum_point", b"datum_point", "datum_string", b"datum_string"]) -> builtins.bool: ... + def ClearField(self, field_name: typing.Literal["column_name", b"column_name", "column_type", b"column_type", "datum", b"datum", "datum_bool", b"datum_bool", "datum_bytes", b"datum_bytes", "datum_double", b"datum_double", "datum_float", b"datum_float", "datum_int32", b"datum_int32", "datum_int64", b"datum_int64", "datum_missing", b"datum_missing", "datum_point", b"datum_point", "datum_string", b"datum_string"]) -> None: ... + def WhichOneof(self, oneof_group: typing.Literal["datum", b"datum"]) -> typing.Literal["datum_int32", "datum_int64", "datum_float", "datum_double", "datum_bool", "datum_string", "datum_bytes", "datum_point", "datum_missing"] | None: ... + +global___DatumMessage = DatumMessage + +@typing.final +class TypeInfo(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + MODIFIER_FIELD_NUMBER: builtins.int + VALUE_OPTIONAL_FIELD_NUMBER: builtins.int + modifier: builtins.str + value_optional: builtins.bool + def __init__( + self, + *, + modifier: builtins.str | None = ..., + value_optional: builtins.bool | None = ..., + ) -> None: ... + def HasField(self, field_name: typing.Literal["modifier", b"modifier", "value_optional", b"value_optional"]) -> builtins.bool: ... + def ClearField(self, field_name: typing.Literal["modifier", b"modifier", "value_optional", b"value_optional"]) -> None: ... + +global___TypeInfo = TypeInfo + +@typing.final +class RowMessage(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + TRANSACTION_ID_FIELD_NUMBER: builtins.int + COMMIT_TIME_FIELD_NUMBER: builtins.int + TABLE_FIELD_NUMBER: builtins.int + OP_FIELD_NUMBER: builtins.int + NEW_TUPLE_FIELD_NUMBER: builtins.int + OLD_TUPLE_FIELD_NUMBER: builtins.int + NEW_TYPEINFO_FIELD_NUMBER: builtins.int + transaction_id: builtins.int + commit_time: builtins.int + table: builtins.str + op: global___Op.ValueType + @property + def new_tuple(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___DatumMessage]: ... + @property + def old_tuple(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___DatumMessage]: ... + @property + def new_typeinfo(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___TypeInfo]: ... + def __init__( + self, + *, + transaction_id: builtins.int | None = ..., + commit_time: builtins.int | None = ..., + table: builtins.str | None = ..., + op: global___Op.ValueType | None = ..., + new_tuple: collections.abc.Iterable[global___DatumMessage] | None = ..., + old_tuple: collections.abc.Iterable[global___DatumMessage] | None = ..., + new_typeinfo: collections.abc.Iterable[global___TypeInfo] | None = ..., + ) -> None: ... + def HasField(self, field_name: typing.Literal["commit_time", b"commit_time", "op", b"op", "table", b"table", "transaction_id", b"transaction_id"]) -> builtins.bool: ... + def ClearField(self, field_name: typing.Literal["commit_time", b"commit_time", "new_tuple", b"new_tuple", "new_typeinfo", b"new_typeinfo", "old_tuple", b"old_tuple", "op", b"op", "table", b"table", "transaction_id", b"transaction_id"]) -> None: ... + +global___RowMessage = RowMessage diff --git a/sources/pg_legacy_replication/requirements.txt b/sources/pg_legacy_replication/requirements.txt new file mode 100644 index 000000000..85f40b3e5 --- /dev/null +++ b/sources/pg_legacy_replication/requirements.txt @@ -0,0 +1,4 @@ +dlt>=1.3.0 +psycopg2-binary>=2.9.9 +protobuf>=5 +sqlalchemy>=1.4 \ No newline at end of file diff --git a/sources/pg_legacy_replication/schema_types.py b/sources/pg_legacy_replication/schema_types.py new file mode 100644 index 000000000..5caeefa89 --- /dev/null +++ b/sources/pg_legacy_replication/schema_types.py @@ -0,0 +1,217 @@ +import json +import re +from functools import lru_cache +from typing import Any, Callable, Dict, List, Optional, Tuple + +import pendulum +from dlt.common import Decimal, logger +from dlt.common.data_types.type_helpers import coerce_value +from dlt.common.data_types.typing import TDataType +from dlt.common.schema.typing import TColumnSchema, TColumnType +from dlt.destinations.impl.postgres.factory import PostgresTypeMapper + +from .pg_logicaldec_pb2 import DatumMessage, TypeInfo + +_DUMMY_VALS: Dict[TDataType, Any] = { + "bigint": 0, + "binary": b" ", + "bool": True, + "json": [0], + "date": pendulum.Date(1970, 1, 1), + "decimal": Decimal(0), + "double": 0.0, + "text": "", + "time": pendulum.Time(), + "timestamp": pendulum.from_timestamp(0), + "wei": 0, +} +"""Dummy values used to replace NULLs in NOT NULL columns in key-only delete records.""" + +_PG_TYPES: Dict[int, str] = { + 16: "boolean", + 17: "bytea", + 20: "bigint", + 21: "smallint", + 23: "integer", + 25: "text", + 114: "json", + 700: "real", + 701: "double precision", + 1043: "character varying", + 1082: "date", + 1083: "time without time zone", + 1114: "timestamp without time zone", + 1184: "timestamp with time zone", + 1700: "numeric", + 3802: "jsonb", +} +"""Maps postgres type OID to type string.""" + +_MISSING_TYPES: Dict[str, TDataType] = { + "json": "json", + "real": "double", + "text": "text", + "timestamp without time zone": "timestamp", +} +# FIXME Missing types for old postgres versions + +_DATUM_RAW_TYPES: Dict[str, TDataType] = { + "datum_int32": "bigint", + "datum_int64": "bigint", + "datum_float": "double", + "datum_double": "double", + "datum_bool": "bool", + "datum_string": "text", + "datum_bytes": "binary", +} +"""Maps decoderbuf's datum msg type to dlt type.""" + +_FIXED_PRECISION_TYPES: Dict[int, Tuple[int, Optional[int]]] = { + 21: (32, None), # smallint + 23: (64, None), # integer + 20: (64, None), # bigint + 700: (64, None), # real +} +"""Dict for fixed precision types""" + +_VARYING_PRECISION_PATTERNS: Dict[int, str] = { + 1043: r"character varying\((\d+)\)", + 1700: r"numeric\((\d+),(\d+)\)", + 1184: r"timestamp\((\d+)\) with time zone", + 1083: r"time\((\d+)\) without time zone", +} +"""Regex patterns for precision/scale types""" + + +def _get_precision_and_scale( + type_id: int, modifier: str +) -> Tuple[Optional[int], Optional[int]]: + """Get precision from postgres type attributes and modifiers.""" + if type_id in _FIXED_PRECISION_TYPES: + return _FIXED_PRECISION_TYPES[type_id] + + # If pattern is missing, return defaults + if (pattern := _VARYING_PRECISION_PATTERNS.get(type_id)) is None: + return None, None + + if match := re.search(pattern, modifier): + groups = match.groups() + precision = int(groups[0]) + scale = int(groups[1]) if len(groups) > 1 else None + return precision, scale + + return None, None + + +@lru_cache(maxsize=None) +def _type_mapper() -> PostgresTypeMapper: + from dlt.destinations import postgres + + return PostgresTypeMapper(postgres().capabilities()) + + +def _to_dlt_column_type(type_id: int, modifier: str) -> TColumnType: + """ + Converts postgres type OID to dlt column type. + + Type OIDs not in _PG_TYPES mapping default to "text" type. + """ + pg_type = _PG_TYPES.get(type_id) + if pg_type in _MISSING_TYPES: + return {"data_type": _MISSING_TYPES[pg_type]} + if modifier.endswith("[]"): + return {"data_type": "json"} + if pg_type is None: + logger.warning( + "No type found for type_id '%s' and modifier '%s'", type_id, modifier + ) + pg_type = "character varying" + + precision, scale = _get_precision_and_scale(type_id, modifier) + return _type_mapper().from_destination_type(pg_type, precision, scale) + + +def _to_dlt_column_schema( + col_name: str, datum: DatumMessage, type_info: TypeInfo +) -> TColumnSchema: + """Converts decoderbuf's datum value/typeinfo to dlt column schema.""" + return { + "name": col_name, + "nullable": type_info.value_optional, + **_to_dlt_column_type(datum.column_type, type_info.modifier), + } + + +def _epoch_micros_to_datetime(microseconds_since_1970: int) -> pendulum.DateTime: + return pendulum.from_timestamp(microseconds_since_1970 / 1_000_000) + + +def _microseconds_to_time(microseconds: int) -> pendulum.Time: + return pendulum.Time().add(microseconds=microseconds) + + +def _epoch_days_to_date(epoch_days: int) -> pendulum.Date: + return pendulum.Date(1970, 1, 1).add(days=epoch_days) + + +data_type_handlers: Dict[TDataType, Callable[[Any], Any]] = { + "date": _epoch_days_to_date, + "time": _microseconds_to_time, + "timestamp": _epoch_micros_to_datetime, +} +"""Dispatch table for type conversions""" + + +def _to_dlt_val( + val: DatumMessage, col_schema: TColumnSchema, *, for_delete: bool = False +) -> Any: + """Converts decoderbuf's datum value into dlt-compatible data value.""" + data_type = col_schema["data_type"] + assert data_type is not None + datum = _get_datum_attr(val) + if datum is None: + nullable = col_schema.get("nullable", False) + if for_delete and not nullable: + return _DUMMY_VALS[data_type] + return None + + raw_value = getattr(val, datum) + if data_type in data_type_handlers: + return data_type_handlers[data_type](raw_value) + + raw_type = _DATUM_RAW_TYPES[datum] + if raw_type == "binary" and _is_scalar_pg_array(data_type, raw_value): + return _pg_array_to_json_array(raw_value) + + return coerce_value(data_type, raw_type, raw_value) + + +def _is_scalar_pg_array(data_type: TDataType, raw_value: bytes) -> bool: + return ( + len(raw_value) > 1 + and data_type == "json" + and raw_value[0] == ord("{") + and raw_value[-1] == ord("}") + ) + + +def _pg_array_to_json_array(raw_value: bytes) -> List[Any]: + """ + Decode the byte string into a scalar array + """ + without_braces = raw_value[1:-1].decode() + + def safe_load(x: str) -> Any: + try: + return json.loads(x) + except json.JSONDecodeError: + return x + + return [safe_load(x) for x in without_braces.split(",")] + + +def _get_datum_attr(val: DatumMessage) -> Optional[str]: + datum = val.WhichOneof("datum") + if datum is None or datum == "datum_missing": + return None + return datum diff --git a/sources/pg_legacy_replication_pipeline.py b/sources/pg_legacy_replication_pipeline.py new file mode 100644 index 000000000..be38414bc --- /dev/null +++ b/sources/pg_legacy_replication_pipeline.py @@ -0,0 +1,239 @@ +import dlt +from dlt.common.destination import Destination +from dlt.destinations.impl.postgres.configuration import PostgresCredentials + +from pg_legacy_replication import init_replication, replication_source + +PG_CREDS = dlt.secrets.get("sources.pg_replication.credentials", PostgresCredentials) + + +def replicate_single_table() -> None: + """Sets up replication for a single Postgres table and loads changes into a destination. + + Demonstrates basic usage of `init_replication` helper and `replication_resource` resource. + Uses `src_pl` to create and change the replicated Postgres table—this + is only for demonstration purposes, you won't need this when you run in production + as you'll probably have another process feeding your Postgres instance. + """ + # create source and destination pipelines + src_pl = get_postgres_pipeline() + dest_pl = dlt.pipeline( + pipeline_name="pg_replication_pipeline", + destination="duckdb", + dataset_name="replicate_single_table", + dev_mode=True, + ) + + # create table "my_source_table" in source to demonstrate replication + create_source_table( + src_pl, "CREATE TABLE {table_name} (id integer PRIMARY KEY, val bool);" + ) + + # initialize replication for the source table—this creates a replication slot and publication + slot_name = "example_slot" + init_replication( # requires the Postgres user to have the REPLICATION attribute assigned + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names="my_source_table", + reset=True, + ) + + # create a resource that generates items for each change in the source table + changes = replication_source( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names="my_source_table", + ) + changes.my_source_table.apply_hints( + write_disposition="merge", + primary_key="id", + columns={ + "_pg_deleted_ts": {"hard_delete": True}, + "_pg_lsn": {"dedup_sort": "desc"}, + }, + ) + + # insert two records in source table and propagate changes to destination + change_source_table( + src_pl, "INSERT INTO {table_name} VALUES (1, true), (2, false);" + ) + dest_pl.run(changes) + show_destination_table(dest_pl) + + # update record in source table and propagate change to destination + change_source_table(src_pl, "UPDATE {table_name} SET val = true WHERE id = 2;") + dest_pl.run(changes) + show_destination_table(dest_pl) + + # delete record from source table and propagate change to destination + change_source_table(src_pl, "DELETE FROM {table_name} WHERE id = 2;") + dest_pl.run(changes) + show_destination_table(dest_pl) + + +def replicate_with_initial_load() -> None: + """Sets up replication with initial load. + + Demonstrates usage of `take_snapshots` argument and snapshot resource + returned by `init_replication` helper. + """ + # create source and destination pipelines + src_pl = get_postgres_pipeline() + dest_pl = dlt.pipeline( + pipeline_name="pg_replication_pipeline", + destination="duckdb", + dataset_name="replicate_with_initial_load", + dev_mode=True, + ) + + # create table "my_source_table" in source to demonstrate replication + create_source_table( + src_pl, "CREATE TABLE {table_name} (id integer PRIMARY KEY, val bool);" + ) + + # insert records before initializing replication + change_source_table( + src_pl, "INSERT INTO {table_name} VALUES (1, true), (2, false);" + ) + + # initialize replication for the source table + slot_name = "example_slot" + snapshot = init_replication( # requires the Postgres user to have the REPLICATION attribute assigned + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names="my_source_table", + take_snapshots=True, # let function return resource(s) for initial load + reset=True, + ) + + # perform initial load to capture all records present in source table prior to replication initialization + dest_pl.run(snapshot) + show_destination_table(dest_pl) + + # insert record in source table and propagate change to destination + change_source_table(src_pl, "INSERT INTO {table_name} VALUES (3, true);") + changes = replication_source( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names="my_source_table", + ) + dest_pl.run(changes) + show_destination_table(dest_pl) + + +def replicate_with_column_selection() -> None: + """Sets up replication with column selection. + + Demonstrates usage of `include_columns` argument. + """ + # create source and destination pipelines + src_pl = get_postgres_pipeline() + dest_pl = dlt.pipeline( + pipeline_name="pg_replication_pipeline", + destination="duckdb", + dataset_name="replicate_with_column_selection", + dev_mode=True, + ) + + # create two source tables to demonstrate schema replication + create_source_table( + src_pl, + "CREATE TABLE {table_name} (c1 integer PRIMARY KEY, c2 bool, c3 varchar);", + "tbl_x", + ) + create_source_table( + src_pl, + "CREATE TABLE {table_name} (c1 integer PRIMARY KEY, c2 bool, c3 varchar);", + "tbl_y", + ) + + # initialize schema replication by omitting the `table_names` argument + slot_name = "example_slot" + init_replication( # requires the Postgres user to have the REPLICATION attribute assigned + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y"), + reset=True, + ) + + # create a resource that generates items for each change in the schema's tables + changes = replication_source( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y"), + repl_options={ + "tbl_x": {"included_columns": {"c1", "c2"}} + }, # columns not specified here are excluded from generated data items + ) + + # insert records in source tables and propagate changes to destination + change_source_table( + src_pl, "INSERT INTO {table_name} VALUES (1, true, 'foo');", "tbl_x" + ) + change_source_table( + src_pl, "INSERT INTO {table_name} VALUES (1, false, 'bar');", "tbl_y" + ) + dest_pl.run(changes) + + # show columns in schema for both tables + # column c3 is not in the schema for tbl_x because we did not include it + # tbl_y does have column c3 because we didn't specify include columns for this table and by default all columns are included + print("tbl_x", ":", list(dest_pl.default_schema.get_table_columns("tbl_x").keys())) + print("tbl_y", ":", list(dest_pl.default_schema.get_table_columns("tbl_y").keys())) + + +# define some helper methods to make examples more readable + + +def get_postgres_pipeline() -> dlt.Pipeline: + """Returns a pipeline loading into `postgres` destination. + + Uses workaround to fix destination to `postgres`, so it does not get replaced + during `dlt init`. + """ + # this trick prevents dlt init command from replacing "destination" argument to "pipeline" + p_call = dlt.pipeline + pipe = p_call( + pipeline_name="source_pipeline", + destination=Destination.from_reference("postgres", credentials=PG_CREDS), + dataset_name="source_dataset", + dev_mode=True, + ) + return pipe + + +def create_source_table( + src_pl: dlt.Pipeline, sql: str, table_name: str = "my_source_table" +) -> None: + with src_pl.sql_client() as c: + try: + c.create_dataset() + except dlt.destinations.exceptions.DatabaseTerminalException: + pass + qual_name = c.make_qualified_table_name(table_name) + c.execute_sql(sql.format(table_name=qual_name)) + + +def change_source_table( + src_pl: dlt.Pipeline, sql: str, table_name: str = "my_source_table" +) -> None: + with src_pl.sql_client() as c: + qual_name = c.make_qualified_table_name(table_name) + c.execute_sql(sql.format(table_name=qual_name)) + + +def show_destination_table( + dest_pl: dlt.Pipeline, + table_name: str = "my_source_table", + column_names: str = "id, val", +) -> None: + with dest_pl.sql_client() as c: + dest_qual_name = c.make_qualified_table_name(table_name) + with c.execute_query(f"SELECT {column_names} FROM {dest_qual_name}") as curr: + print(table_name, ":\n", curr.df()) + + +if __name__ == "__main__": + replicate_single_table() + # replicate_with_initial_load() + # replicate_with_column_selection() diff --git a/tests/pg_legacy_replication/__init__.py b/tests/pg_legacy_replication/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/pg_legacy_replication/cases.py b/tests/pg_legacy_replication/cases.py new file mode 100644 index 000000000..cc217930f --- /dev/null +++ b/tests/pg_legacy_replication/cases.py @@ -0,0 +1,1021 @@ +from base64 import b64encode +from enum import IntEnum +from typing import List, Tuple + +import pendulum +from dlt.common import Decimal +from dlt.common.schema import TColumnSchema, TTableSchema, TTableSchemaColumns +from dlt.common.typing import TDataItem + +TABLE_ROW_ALL_DATA_TYPES = { + "col1": 989127831, + "col2": 898912.821982, + "col3": True, + "col4": "2022-05-23T13:26:45.176451+00:00", + "col5": "string data \n \r \x8e 🦆", + "col6": Decimal("2323.34"), + "col7": b"binary data \n \r \x8e", + # "col8": 2**56 + 92093890840, # TODO: uncommment and make it work + "col9": { + "json": [1, 2, 3, "a"], + "link": ( + "?commen\ntU\nrn=urn%3Ali%3Acomment%3A%28acti\012 \6" + " \\vity%3A69'08444473\n\n551163392%2C6n \r \x8e9085" + ), + }, + "col10": "2023-02-27", + "col11": "13:26:45.176451", + "col1_null": None, + "col2_null": None, + "col3_null": None, + "col4_null": None, + "col5_null": None, + "col6_null": None, + "col7_null": None, + # "col8_null": None, + "col9_null": None, + "col10_null": None, + "col11_null": None, + "col1_precision": 22324, + "col4_precision": "2022-05-23T13:26:46.167231+00:00", + "col5_precision": "string data 2 \n \r \x8e 🦆", + "col6_precision": Decimal("2323.34"), + # "col7_precision": b"binary data 2 \n \r \x8e", # FIXME This is no longer possible in pyarrow and it's absurd to begin with + "col11_precision": "13:26:45.176451", +} +TABLE_UPDATE: List[TColumnSchema] = [ + {"name": "col1", "data_type": "bigint", "nullable": False}, + {"name": "col2", "data_type": "double", "nullable": False}, + {"name": "col3", "data_type": "bool", "nullable": False}, + {"name": "col4", "data_type": "timestamp", "nullable": False}, + {"name": "col5", "data_type": "text", "nullable": False}, + {"name": "col6", "data_type": "decimal", "nullable": False}, + {"name": "col7", "data_type": "binary", "nullable": False}, + # {"name": "col8", "data_type": "wei", "nullable": False}, + {"name": "col9", "data_type": "json", "nullable": False, "variant": True}, + {"name": "col10", "data_type": "date", "nullable": False}, + {"name": "col11", "data_type": "time", "nullable": False}, + {"name": "col1_null", "data_type": "bigint", "nullable": True}, + {"name": "col2_null", "data_type": "double", "nullable": True}, + {"name": "col3_null", "data_type": "bool", "nullable": True}, + {"name": "col4_null", "data_type": "timestamp", "nullable": True}, + {"name": "col5_null", "data_type": "text", "nullable": True}, + {"name": "col6_null", "data_type": "decimal", "nullable": True}, + {"name": "col7_null", "data_type": "binary", "nullable": True}, + # {"name": "col8_null", "data_type": "wei", "nullable": True}, + {"name": "col9_null", "data_type": "json", "nullable": True, "variant": True}, + {"name": "col10_null", "data_type": "date", "nullable": True}, + {"name": "col11_null", "data_type": "time", "nullable": True}, + { + "name": "col1_precision", + "data_type": "bigint", + "precision": 16, + "nullable": False, + }, + { + "name": "col4_precision", + "data_type": "timestamp", + "precision": 6, + "nullable": False, + }, + {"name": "col5_precision", "data_type": "text", "precision": 25, "nullable": False}, + { + "name": "col6_precision", + "data_type": "decimal", + "precision": 6, + "scale": 2, + "nullable": False, + }, + # { + # "name": "col7_precision", + # "data_type": "binary", + # "precision": 19, + # "nullable": False, + # }, # FIXME See comment above + {"name": "col11_precision", "data_type": "time", "precision": 6, "nullable": False}, +] + +TABLE_UPDATE_COLUMNS_SCHEMA: TTableSchemaColumns = {t["name"]: t for t in TABLE_UPDATE} + +ROW_MESSAGES: List[dict] = [ + { + "transactionId": 969, + "commitTime": "1728662646949062", + "table": "src_pl_dataset_202410110404048747_staging.tbl_y", + "op": "INSERT", + "newTuple": [ + { + "columnName": "id_y", + "columnType": 20, + "datumInt64": 2, + }, + { + "columnName": "val_y", + "columnType": 16, + "datumBool": False, + }, + { + "columnName": '"primary"', + "columnType": 16, + "datumBool": True, + }, + { + "columnName": "_dlt_load_id", + "columnType": 1043, + "datumString": "1728662646.2657657", + }, + { + "columnName": "_dlt_id", + "columnType": 1043, + "datumString": "gGjifTMTAUs5ag", + }, + ], + "newTypeinfo": [ + { + "modifier": "bigint", + "valueOptional": False, + }, + { + "modifier": "boolean", + "valueOptional": True, + }, + { + "modifier": "boolean", + "valueOptional": True, + }, + { + "modifier": "character varying", + "valueOptional": False, + }, + { + "modifier": "character varying", + "valueOptional": False, + }, + ], + "oldTuple": [], + }, + { + "transactionId": 2018, + "commitTime": "1729503423666542", + "table": "src_pl_dataset_202410210936594956.items", + "op": "INSERT", + "newTuple": [ + { + "columnName": "col4", + "columnType": 1184, + "datumInt64": 1653312405176451, + }, + { + "columnName": "col9", + "columnType": 3802, + "datumString": ( + '{"link": "?commen\\ntU\\nrn=urn%3Ali%3Acomment%3A%28acti\\n \\u0006 \\\\vity%3A69\'08444473\\n\\n551163392' + '%2C6n \\r \x8e9085", "json": [1, 2, 3, "a"]}' + ), + }, + { + "columnName": "col10", + "columnType": 1082, + "datumInt32": 19415, + }, + { + "columnName": "col11", + "columnType": 1083, + "datumInt64": 48405176451, + }, + {"columnName": "col12", "columnType": 1114}, + {"columnName": "col13", "columnType": 700}, + {"columnName": "col14", "columnType": 1043, "datum_missing": True}, + ], + "newTypeinfo": [ + {"modifier": "timestamp with time zone", "valueOptional": False}, + {"modifier": "jsonb", "valueOptional": False}, + {"modifier": "date", "valueOptional": False}, + {"modifier": "time without time zone", "valueOptional": False}, + {"modifier": "timestamp without time zone", "valueOptional": True}, + {"modifier": "real", "valueOptional": True}, + {"modifier": "character varying", "valueOptional": True}, + ], + }, + { + "transactionId": 932, + "commitTime": "1729299383354856", + "table": "src_pl_dataset_202410191256122080.tbl_x", + "op": "DELETE", + "oldTuple": [ + { + "columnName": "id_x", + "columnType": 20, + "datumInt64": 1, + }, + { + "columnName": "val_x", + "columnType": 1043, + }, + { + "columnName": "col_bool", + "columnType": 16, + }, + { + "columnName": "col_bytea", + "columnType": 17, + }, + { + "columnName": "col_int4", + "columnType": 21, + }, + { + "columnName": "col_int", + "columnType": 23, + }, + { + "columnName": "col_real", + "columnType": 700, + }, + { + "columnName": "col_double", + "columnType": 701, + }, + { + "columnName": "col_date", + "columnType": 1082, + }, + { + "columnName": "col_time", + "columnType": 1083, + }, + { + "columnName": "col_ts", + "columnType": 1114, + }, + { + "columnName": "col_tstz", + "columnType": 1184, + }, + { + "columnName": "col_num", + "columnType": 1700, + }, + { + "columnName": "col_json", + "columnType": 3802, + }, + ], + }, + { + "transactionId": 754, + "commitTime": "1736873892023448", + "table": "src_pl_dataset_202501140458116348.data_types", + "op": "INSERT", + "newTuple": [ + { + "columnName": "bit_col", + "columnType": 1560, + "datumString": "1", + }, + { + "columnName": "box_col", + "columnType": 603, + "datumBytes": b64encode(b"(1,1),(0,0)").decode(), + }, + { + "columnName": "uuid_col", + "columnType": 2950, + "datumString": "6e1f5de1-1093-4bfe-98e4-62ac56b2db54", + }, + { + "columnName": "text_a", + "columnType": 1009, + "datumBytes": b64encode( + b'{"Network administration",GNS3,BGP}' + ).decode(), + }, + { + "columnName": "json_col", + "columnType": 114, + "datum_string": '{"a":[null,1]}', + }, + ], + "newTypeinfo": [ + { + "modifier": "bit(1)", + "valueOptional": True, + }, + { + "modifier": "box", + "valueOptional": True, + }, + { + "modifier": "uuid", + "valueOptional": True, + }, + { + "modifier": "text[]", + "valueOptional": True, + }, + { + "modifier": "json", + "valueOptional": True, + }, + ], + "oldTuple": [], + }, +] + +DATA_ITEMS: List[TDataItem] = [ + { + "id_y": 2, + "val_y": False, + "primary": True, + "_dlt_id": "gGjifTMTAUs5ag", + "_dlt_load_id": "1728662646.2657657", + "_pg_lsn": 1, + "_pg_commit_ts": pendulum.parse("2024-10-11T16:04:06.949062+00:00"), + "_pg_tx_id": 969, + }, + { + "col4": pendulum.parse("2022-05-23T13:26:45.176451+00:00"), + "col9": { + "json": [1, 2, 3, "a"], + "link": ( + "?commen\ntU\nrn=urn%3Ali%3Acomment%3A%28acti\012 \6" + " \\vity%3A69'08444473\n\n551163392%2C6n \r \x8e9085" + ), + }, + "col10": pendulum.parse("2023-02-27", strict=False).date(), + "col11": pendulum.parse("13:26:45.176451", strict=False).time(), + "col12": None, + "col13": None, + "col14": None, + "_pg_lsn": 1, + "_pg_commit_ts": pendulum.parse("2024-10-21T09:37:03.666542+00:00"), + "_pg_tx_id": 2018, + }, + { + "id_x": 1, + "val_x": "", + "col_bool": True, + "col_bytea": b" ", + "col_int4": 0, + "col_int": 0, + "col_real": 0.0, + "col_double": 0.0, + "col_time": pendulum.parse("00:00:00", strict=False).time(), + "col_date": pendulum.parse("1970-01-01", strict=False).date(), + "col_ts": pendulum.parse("1970-01-01T00:00:00+00:00"), + "col_tstz": pendulum.parse("1970-01-01T00:00:00+00:00"), + "col_num": Decimal(0), + "col_json": [0], + "_pg_lsn": 1, + "_pg_deleted_ts": pendulum.parse("2024-10-19T00:56:23.354856+00:00"), + "_pg_commit_ts": pendulum.parse("2024-10-19T00:56:23.354856+00:00"), + "_pg_tx_id": 932, + }, + { + "bit_col": "1", + "box_col": "KDEsMSksKDAsMCk=", + "uuid_col": "6e1f5de1-1093-4bfe-98e4-62ac56b2db54", + "text_a": ["Network administration", "GNS3", "BGP"], + "json_col": {"a": [None, 1]}, + "_pg_lsn": 1, + "_pg_commit_ts": pendulum.parse("2025-01-14T16:58:12.023448+00:00"), + "_pg_tx_id": 754, + }, +] + +TABLE_SCHEMAS: List[TTableSchema] = [ + { + "name": "tbl_y", + "columns": { + "id_y": { + "data_type": "bigint", + "name": "id_y", + "nullable": False, + "precision": 64, + }, + "val_y": {"data_type": "bool", "name": "val_y", "nullable": True}, + "primary": {"data_type": "bool", "name": "primary", "nullable": True}, + "_dlt_id": {"data_type": "text", "name": "_dlt_id", "nullable": False}, + "_dlt_load_id": { + "data_type": "text", + "name": "_dlt_load_id", + "nullable": False, + }, + "_pg_lsn": {"data_type": "bigint", "name": "_pg_lsn", "nullable": True}, + "_pg_deleted_ts": { + "data_type": "timestamp", + "name": "_pg_deleted_ts", + "nullable": True, + }, + "_pg_commit_ts": { + "data_type": "timestamp", + "name": "_pg_commit_ts", + "nullable": True, + }, + "_pg_tx_id": { + "data_type": "bigint", + "name": "_pg_tx_id", + "nullable": True, + "precision": 32, + }, + }, + }, + { + "name": "items", + "columns": { + "col4": {"data_type": "timestamp", "name": "col4", "nullable": False}, + "col9": {"data_type": "json", "name": "col9", "nullable": False}, + "col10": {"data_type": "date", "name": "col10", "nullable": False}, + "col11": {"data_type": "time", "name": "col11", "nullable": False}, + "col12": {"data_type": "timestamp", "name": "col12", "nullable": True}, + "col13": {"data_type": "double", "name": "col13", "nullable": True}, + "col14": {"data_type": "text", "name": "col14", "nullable": True}, + "_pg_lsn": {"data_type": "bigint", "name": "_pg_lsn", "nullable": True}, + "_pg_deleted_ts": { + "data_type": "timestamp", + "name": "_pg_deleted_ts", + "nullable": True, + }, + "_pg_commit_ts": { + "data_type": "timestamp", + "name": "_pg_commit_ts", + "nullable": True, + }, + "_pg_tx_id": { + "data_type": "bigint", + "name": "_pg_tx_id", + "nullable": True, + "precision": 32, + }, + }, + }, + { + "name": "tbl_x", + "columns": { + "id_x": {"data_type": "bigint", "name": "id_x", "precision": 64}, + "val_x": {"data_type": "text", "name": "val_x"}, + "col_bool": {"data_type": "bool", "name": "col_bool"}, + "col_bytea": {"data_type": "binary", "name": "col_bytea"}, + "col_int4": {"data_type": "bigint", "name": "col_int4", "precision": 16}, + "col_int": {"data_type": "bigint", "name": "col_int", "precision": 32}, + "col_real": {"data_type": "double", "name": "col_real"}, + "col_double": {"data_type": "double", "name": "col_double"}, + "col_date": {"data_type": "date", "name": "col_date"}, + "col_time": {"data_type": "time", "name": "col_time"}, + "col_ts": {"data_type": "timestamp", "name": "col_ts"}, + "col_tstz": {"data_type": "timestamp", "name": "col_tstz"}, + "col_num": {"data_type": "decimal", "name": "col_num"}, + "col_json": {"data_type": "json", "name": "col_json"}, + "_pg_lsn": {"data_type": "bigint", "name": "_pg_lsn", "nullable": True}, + "_pg_deleted_ts": { + "data_type": "timestamp", + "name": "_pg_deleted_ts", + "nullable": True, + }, + "_pg_commit_ts": { + "data_type": "timestamp", + "name": "_pg_commit_ts", + "nullable": True, + }, + "_pg_tx_id": { + "data_type": "bigint", + "name": "_pg_tx_id", + "nullable": True, + "precision": 32, + }, + }, + }, + { + "name": "data_types", + "columns": { + "bit_col": {"data_type": "text", "name": "bit_col", "nullable": True}, + "box_col": {"data_type": "text", "name": "box_col", "nullable": True}, + "uuid_col": {"data_type": "text", "name": "uuid_col", "nullable": True}, + "text_a": {"data_type": "json", "name": "text_a", "nullable": True}, + "json_col": {"data_type": "json", "name": "json_col", "nullable": True}, + "_pg_lsn": {"data_type": "bigint", "name": "_pg_lsn", "nullable": True}, + "_pg_deleted_ts": { + "data_type": "timestamp", + "name": "_pg_deleted_ts", + "nullable": True, + }, + "_pg_commit_ts": { + "data_type": "timestamp", + "name": "_pg_commit_ts", + "nullable": True, + }, + "_pg_tx_id": { + "data_type": "bigint", + "name": "_pg_tx_id", + "nullable": True, + "precision": 32, + }, + }, + }, +] + + +class SchemaChoice(IntEnum): + first = 0 + second = 1 + error = -1 + + +SIMILAR_SCHEMAS: List[Tuple[TTableSchema, TTableSchema, SchemaChoice]] = [ + ( + { + "name": "items", + "columns": { + "col1": { + "name": "col1", + "data_type": "bigint", + "precision": 64, + "nullable": False, + }, + "col2": {"name": "col2", "data_type": "double", "nullable": False}, + "col3": {"name": "col3", "data_type": "bool", "nullable": False}, + "col4": {"name": "col4", "data_type": "timestamp", "nullable": False}, + "col5": {"name": "col5", "data_type": "text", "nullable": False}, + "col6": { + "name": "col6", + "data_type": "decimal", + "precision": 38, + "scale": 9, + "nullable": False, + }, + "col7": {"name": "col7", "data_type": "binary", "nullable": False}, + "col9": {"name": "col9", "data_type": "json", "nullable": False}, + "col10": {"name": "col10", "data_type": "date", "nullable": False}, + "col11": {"name": "col11", "data_type": "time", "nullable": False}, + "col1_null": { + "name": "col1_null", + "data_type": "bigint", + "precision": 64, + "nullable": True, + }, + "col2_null": { + "name": "col2_null", + "data_type": "double", + "nullable": True, + }, + "col3_null": { + "name": "col3_null", + "data_type": "bool", + "nullable": True, + }, + "col4_null": { + "name": "col4_null", + "data_type": "timestamp", + "nullable": True, + }, + "col5_null": { + "name": "col5_null", + "data_type": "text", + "nullable": True, + }, + "col6_null": { + "name": "col6_null", + "data_type": "decimal", + "precision": 38, + "scale": 9, + "nullable": True, + }, + "col7_null": { + "name": "col7_null", + "data_type": "binary", + "nullable": True, + }, + "col9_null": { + "name": "col9_null", + "data_type": "json", + "nullable": True, + }, + "col10_null": { + "name": "col10_null", + "data_type": "date", + "nullable": True, + }, + "col11_null": { + "name": "col11_null", + "data_type": "time", + "nullable": True, + }, + "col1_precision": { + "name": "col1_precision", + "data_type": "bigint", + "precision": 16, + "nullable": False, + }, + "col4_precision": { + "name": "col4_precision", + "data_type": "timestamp", + "precision": 3, + "nullable": False, + }, + "col5_precision": { + "name": "col5_precision", + "data_type": "text", + "precision": 25, + "nullable": False, + }, + "col6_precision": { + "name": "col6_precision", + "data_type": "decimal", + "precision": 6, + "scale": 2, + "nullable": False, + }, + "col7_precision": { + "name": "col7_precision", + "data_type": "binary", + "nullable": False, + }, + "col11_precision": { + "name": "col11_precision", + "data_type": "time", + "precision": 3, + "nullable": False, + }, + "_dlt_load_id": { + "name": "_dlt_load_id", + "data_type": "text", + "nullable": False, + }, + "_dlt_id": {"name": "_dlt_id", "data_type": "text", "nullable": False}, + "_pg_lsn": {"data_type": "bigint", "name": "_pg_lsn", "nullable": True}, + "_pg_deleted_ts": { + "data_type": "timestamp", + "name": "_pg_deleted_ts", + "nullable": True, + }, + }, + }, + { + "name": "items", + "columns": { + "col1": { + "name": "col1", + "data_type": "bigint", + "precision": 64, + "nullable": False, + }, + "col2": {"name": "col2", "data_type": "double"}, + "col3": {"name": "col3", "data_type": "bool"}, + "col4": {"name": "col4", "data_type": "timestamp"}, + "col5": {"name": "col5", "data_type": "text"}, + "col6": {"name": "col6", "data_type": "decimal"}, + "col7": {"name": "col7", "data_type": "binary"}, + "col9": {"name": "col9", "data_type": "json"}, + "col10": {"name": "col10", "data_type": "date"}, + "col11": {"name": "col11", "data_type": "time"}, + "col1_null": { + "name": "col1_null", + "data_type": "bigint", + "precision": 64, + }, + "col2_null": {"name": "col2_null", "data_type": "double"}, + "col3_null": {"name": "col3_null", "data_type": "bool"}, + "col4_null": {"name": "col4_null", "data_type": "timestamp"}, + "col5_null": {"name": "col5_null", "data_type": "text"}, + "col6_null": {"name": "col6_null", "data_type": "decimal"}, + "col7_null": {"name": "col7_null", "data_type": "binary"}, + "col9_null": {"name": "col9_null", "data_type": "json"}, + "col10_null": {"name": "col10_null", "data_type": "date"}, + "col11_null": {"name": "col11_null", "data_type": "time"}, + "col1_precision": { + "name": "col1_precision", + "data_type": "bigint", + "precision": 16, + }, + "col4_precision": {"name": "col4_precision", "data_type": "timestamp"}, + "col5_precision": {"name": "col5_precision", "data_type": "text"}, + "col6_precision": {"name": "col6_precision", "data_type": "decimal"}, + "col7_precision": {"name": "col7_precision", "data_type": "binary"}, + "col11_precision": {"name": "col11_precision", "data_type": "time"}, + "_dlt_load_id": {"name": "_dlt_load_id", "data_type": "text"}, + "_dlt_id": {"name": "_dlt_id", "data_type": "text"}, + "_pg_lsn": {"data_type": "bigint", "name": "_pg_lsn", "nullable": True}, + "_pg_deleted_ts": { + "data_type": "timestamp", + "name": "_pg_deleted_ts", + "nullable": True, + }, + }, + }, + SchemaChoice.first, + ), + ( + { + "name": "items", + "columns": { + "_dlt_id": {"data_type": "text", "name": "_dlt_id", "nullable": False}, + "_dlt_load_id": { + "data_type": "text", + "name": "_dlt_load_id", + "nullable": False, + }, + "c1": { + "data_type": "bigint", + "name": "c1", + "nullable": True, + "precision": 64, + }, + "c2": { + "data_type": "bigint", + "name": "c2", + "nullable": True, + "precision": 64, + }, + "c3": { + "data_type": "bigint", + "name": "c3", + "nullable": True, + "precision": 64, + }, + "_pg_deleted_ts": { + "data_type": "timestamp", + "name": "_pg_deleted_ts", + "nullable": True, + }, + "_pg_lsn": {"data_type": "bigint", "name": "_pg_lsn", "nullable": True}, + }, + }, + { + "name": "items", + "columns": { + "_dlt_id": {"data_type": "text", "name": "_dlt_id", "nullable": False}, + "_dlt_load_id": { + "data_type": "text", + "name": "_dlt_load_id", + "nullable": False, + }, + "c1": { + "data_type": "bigint", + "name": "c1", + "nullable": True, + "precision": 64, + }, + "c2": { + "data_type": "bigint", + "name": "c2", + "nullable": True, + "precision": 64, + }, + "c3": { + "data_type": "bigint", + "name": "c3", + "nullable": True, + "precision": 64, + }, + # Added c4 column + "c4": { + "data_type": "bigint", + "name": "c4", + "nullable": True, + "precision": 64, + }, + "_pg_deleted_ts": { + "data_type": "timestamp", + "name": "_pg_deleted_ts", + "nullable": True, + }, + "_pg_lsn": {"data_type": "bigint", "name": "_pg_lsn", "nullable": True}, + }, + }, + SchemaChoice.error, + ), + ( + { + "name": "scale_teams", + "columns": { + "id": { + "name": "id", + "nullable": False, + "data_type": "bigint", + "precision": 32, + }, + "user_id": { + "name": "user_id", + "nullable": False, + "data_type": "bigint", + "precision": 32, + }, + "begin_at": { + "name": "begin_at", + "nullable": False, + "data_type": "timestamp", + "precision": 6, + }, + "created_at": { + "name": "created_at", + "nullable": False, + "data_type": "timestamp", + "precision": 6, + }, + "updated_at": { + "name": "updated_at", + "nullable": False, + "data_type": "timestamp", + "precision": 6, + }, + "scale_id": { + "name": "scale_id", + "nullable": False, + "data_type": "bigint", + "precision": 32, + }, + "team_id": { + "name": "team_id", + "nullable": False, + "data_type": "bigint", + "precision": 32, + }, + "comment": {"name": "comment", "nullable": True, "data_type": "text"}, + "old_feedback": { + "name": "old_feedback", + "nullable": True, + "data_type": "text", + }, + "feedback_rating": { + "name": "feedback_rating", + "nullable": True, + "data_type": "bigint", + "precision": 32, + }, + "final_mark": { + "name": "final_mark", + "nullable": True, + "data_type": "bigint", + "precision": 32, + }, + "truant_id": { + "name": "truant_id", + "nullable": True, + "data_type": "bigint", + "precision": 32, + }, + "flag_id": { + "name": "flag_id", + "nullable": False, + "data_type": "bigint", + "precision": 32, + }, + "token": {"name": "token", "nullable": True, "data_type": "text"}, + "ip": {"name": "ip", "nullable": True, "data_type": "text"}, + "internship_id": { + "name": "internship_id", + "nullable": True, + "data_type": "bigint", + "precision": 32, + }, + "filled_at": { + "name": "filled_at", + "nullable": True, + "data_type": "timestamp", + "precision": 6, + }, + "_pg_lsn": {"name": "_pg_lsn", "nullable": True, "data_type": "bigint"}, + "_pg_deleted_ts": { + "name": "_pg_deleted_ts", + "nullable": True, + "data_type": "timestamp", + "precision": 6, + }, + "_pg_commit_ts": { + "name": "_pg_commit_ts", + "nullable": True, + "data_type": "timestamp", + "precision": 6, + }, + "_pg_tx_id": { + "name": "_pg_tx_id", + "nullable": True, + "data_type": "bigint", + "precision": 32, + }, + "_dlt_load_id": { + "name": "_dlt_load_id", + "data_type": "text", + "nullable": False, + }, + }, + }, + { + "name": "scale_teams", + "columns": { + "id": { + "name": "id", + "nullable": False, + "data_type": "bigint", + "precision": 32, + }, + "user_id": { + "name": "user_id", + "nullable": True, + "data_type": "bigint", + "precision": 32, + }, + "begin_at": { + "name": "begin_at", + "nullable": True, + "data_type": "timestamp", + "precision": 6, + }, + "created_at": { + "name": "created_at", + "nullable": False, + "data_type": "timestamp", + "precision": 6, + }, + "updated_at": { + "name": "updated_at", + "nullable": False, + "data_type": "timestamp", + "precision": 6, + }, + "scale_id": { + "name": "scale_id", + "nullable": True, + "data_type": "bigint", + "precision": 32, + }, + "team_id": { + "name": "team_id", + "nullable": True, + "data_type": "bigint", + "precision": 32, + }, + "comment": {"name": "comment", "nullable": True, "data_type": "text"}, + "old_feedback": { + "name": "old_feedback", + "nullable": True, + "data_type": "text", + }, + "feedback_rating": { + "name": "feedback_rating", + "nullable": True, + "data_type": "bigint", + "precision": 32, + }, + "final_mark": { + "name": "final_mark", + "nullable": True, + "data_type": "bigint", + "precision": 32, + }, + "truant_id": { + "name": "truant_id", + "nullable": True, + "data_type": "bigint", + "precision": 32, + }, + "flag_id": { + "name": "flag_id", + "nullable": True, + "data_type": "bigint", + "precision": 32, + }, + "token": {"name": "token", "nullable": True, "data_type": "text"}, + "ip": {"name": "ip", "nullable": True, "data_type": "text"}, + "internship_id": { + "name": "internship_id", + "nullable": True, + "data_type": "bigint", + "precision": 32, + }, + "filled_at": { + "name": "filled_at", + "nullable": True, + "data_type": "timestamp", + "timezone": True, + "precision": 6, + }, + "_pg_lsn": {"name": "_pg_lsn", "nullable": True, "data_type": "bigint"}, + "_pg_deleted_ts": { + "name": "_pg_deleted_ts", + "nullable": True, + "data_type": "timestamp", + "precision": 6, + }, + "_pg_commit_ts": { + "name": "_pg_commit_ts", + "nullable": True, + "data_type": "timestamp", + "precision": 6, + }, + "_pg_tx_id": { + "name": "_pg_tx_id", + "nullable": True, + "data_type": "bigint", + "precision": 32, + }, + "_dlt_load_id": { + "name": "_dlt_load_id", + "data_type": "text", + "nullable": False, + }, + }, + }, + SchemaChoice.second, + ), +] diff --git a/tests/pg_legacy_replication/conftest.py b/tests/pg_legacy_replication/conftest.py new file mode 100644 index 000000000..dcd1a0f16 --- /dev/null +++ b/tests/pg_legacy_replication/conftest.py @@ -0,0 +1,43 @@ +import faulthandler +import pytest + +from typing import Iterator, Tuple + +import dlt +from dlt.common.utils import uniq_id + + +def pytest_configure(): + faulthandler.enable() + + +@pytest.fixture() +def src_config() -> Iterator[Tuple[dlt.Pipeline, str]]: + # random slot to enable parallel runs + slot = "test_slot_" + uniq_id(4) + # setup + src_pl = dlt.pipeline( + pipeline_name="src_pl", + destination=dlt.destinations.postgres( + credentials=dlt.secrets.get("sources.pg_replication.credentials") + ), + dev_mode=True, + ) + yield src_pl, slot + # teardown + with src_pl.sql_client() as c: + # drop tables + try: + c.drop_dataset() + except Exception as e: + print(e) + with c.with_staging_dataset(): + try: + c.drop_dataset() + except Exception as e: + print(e) + # drop replication slot + try: + c.execute_sql(f"SELECT pg_drop_replication_slot('{slot}');") + except Exception as e: + print(e) diff --git a/tests/pg_legacy_replication/test_helpers.py b/tests/pg_legacy_replication/test_helpers.py new file mode 100644 index 000000000..bd698d3f3 --- /dev/null +++ b/tests/pg_legacy_replication/test_helpers.py @@ -0,0 +1,65 @@ +import pytest +from dlt.common.schema.typing import TTableSchema +from dlt.common.typing import TDataItem +from google.protobuf.json_format import ParseDict as parse_dict + +from sources.pg_legacy_replication import ReplicationOptions +from sources.pg_legacy_replication.helpers import ( + compare_schemas, + gen_data_item, + infer_table_schema, +) +from sources.pg_legacy_replication.pg_logicaldec_pb2 import Op, RowMessage +from .cases import ( + DATA_ITEMS, + ROW_MESSAGES, + SIMILAR_SCHEMAS, + TABLE_SCHEMAS, + SchemaChoice, +) + + +@pytest.mark.parametrize("data, expected_schema", zip(ROW_MESSAGES, TABLE_SCHEMAS)) +def test_infer_table_schema( + data, + expected_schema: TTableSchema, +): + row_msg = RowMessage() + parse_dict(data, row_msg) + options = ReplicationOptions(include_commit_ts=True, include_tx_id=True) + if row_msg.op == Op.DELETE: + with pytest.raises(AssertionError): + infer_table_schema(row_msg, options) + else: + assert infer_table_schema(row_msg, options) == expected_schema + + +@pytest.mark.parametrize( + "data, data_item, schema", zip(ROW_MESSAGES, DATA_ITEMS, TABLE_SCHEMAS) +) +def test_gen_data_item(data, data_item: TDataItem, schema: TTableSchema): + row_msg = RowMessage() + parse_dict(data, row_msg) + assert ( + gen_data_item( + row_msg, + schema["columns"], + lsn=1, + include_commit_ts=True, + include_tx_id=True, + ) + == data_item + ) + + +@pytest.mark.parametrize("s1, s2, choice", SIMILAR_SCHEMAS) +def test_compare_schemas(s1: TTableSchema, s2: TTableSchema, choice: SchemaChoice): + if choice == SchemaChoice.error: + with pytest.raises(AssertionError): + compare_schemas(s1, s2) + with pytest.raises(AssertionError): + compare_schemas(s2, s1) + else: + expected_schema = (s1, s2)[choice] + assert compare_schemas(s1, s2) == expected_schema + assert compare_schemas(s2, s1) == expected_schema diff --git a/tests/pg_legacy_replication/test_pg_replication.py b/tests/pg_legacy_replication/test_pg_replication.py new file mode 100644 index 000000000..643cf2f1f --- /dev/null +++ b/tests/pg_legacy_replication/test_pg_replication.py @@ -0,0 +1,811 @@ +from copy import deepcopy +from typing import Dict, Tuple + +import dlt +import pytest +from dlt.common.schema.typing import TTableSchemaColumns +from dlt.destinations.job_client_impl import SqlJobClientBase + +from sources.pg_legacy_replication import ( + init_replication, + cleanup_snapshot_resources, + replication_source, +) +from sources.pg_legacy_replication.helpers import TableBackend +from tests.utils import ( + ALL_DESTINATIONS, + assert_load_info, + load_table_counts, +) +from .cases import TABLE_ROW_ALL_DATA_TYPES, TABLE_UPDATE_COLUMNS_SCHEMA +from .utils import add_pk, assert_loaded_data + +merge_hints: TTableSchemaColumns = { + "_pg_deleted_ts": {"hard_delete": True}, + "_pg_lsn": {"dedup_sort": "desc"}, +} + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +@pytest.mark.parametrize("backend", ["sqlalchemy", "pyarrow"]) +def test_core_functionality( + src_config: Tuple[dlt.Pipeline, str], destination_name: str, backend: TableBackend +) -> None: + @dlt.resource(write_disposition="merge", primary_key="id_x") + def tbl_x(data): + yield data + + @dlt.resource(write_disposition="merge", primary_key="id_y") + def tbl_y(data): + yield data + + src_pl, slot_name = src_config + + src_pl.run( + [ + tbl_x({"id_x": 1, "val_x": "foo"}), + tbl_y({"id_y": 1, "val_y": True}), + ] + ) + add_pk(src_pl.sql_client, "tbl_x", "id_x") + add_pk(src_pl.sql_client, "tbl_y", "id_y") + + snapshots = init_replication( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y"), + take_snapshots=True, + table_options={ + "tbl_x": {"backend": backend}, + "tbl_y": {"backend": backend}, + }, + ) + + changes = replication_source( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y"), + repl_options={ + "tbl_x": {"backend": backend}, + "tbl_y": {"backend": backend}, + }, + ) + changes.tbl_x.apply_hints( + write_disposition="merge", primary_key="id_x", columns=merge_hints + ) + changes.tbl_y.apply_hints( + write_disposition="merge", primary_key="id_y", columns=merge_hints + ) + + src_pl.run( + [ + tbl_x([{"id_x": 2, "val_x": "bar"}, {"id_x": 3, "val_x": "baz"}]), + tbl_y({"id_y": 2, "val_y": False}), + ] + ) + + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, dev_mode=True + ) + + # initial load + info = dest_pl.run(snapshots) + cleanup_snapshot_resources(snapshots) + assert_load_info(info) + assert load_table_counts(dest_pl, "tbl_x", "tbl_y") == {"tbl_x": 1, "tbl_y": 1} + exp_tbl_x = [{"id_x": 1, "val_x": "foo"}] + exp_tbl_y = [{"id_y": 1, "val_y": True}] + assert_loaded_data(dest_pl, "tbl_x", ["id_x", "val_x"], exp_tbl_x, "id_x") + assert_loaded_data(dest_pl, "tbl_y", ["id_y", "val_y"], exp_tbl_y, "id_y") + + # process changes + info = dest_pl.run(changes) + assert_load_info(info, expected_load_packages=2) + assert load_table_counts(dest_pl, "tbl_x", "tbl_y") == {"tbl_x": 3, "tbl_y": 2} + exp_tbl_x = [ + {"id_x": 1, "val_x": "foo"}, + {"id_x": 2, "val_x": "bar"}, + {"id_x": 3, "val_x": "baz"}, + ] + exp_tbl_y = [{"id_y": 1, "val_y": True}, {"id_y": 2, "val_y": False}] + assert_loaded_data(dest_pl, "tbl_x", ["id_x", "val_x"], exp_tbl_x, "id_x") + assert_loaded_data(dest_pl, "tbl_y", ["id_y", "val_y"], exp_tbl_y, "id_y") + + # change single table + src_pl.run(tbl_y({"id_y": 3, "val_y": True})) + + # process changes + info = dest_pl.run(changes) + assert_load_info(info, expected_load_packages=2) + assert load_table_counts(dest_pl, "tbl_x", "tbl_y") == {"tbl_x": 3, "tbl_y": 3} + exp_tbl_y = [ + {"id_y": 1, "val_y": True}, + {"id_y": 2, "val_y": False}, + {"id_y": 3, "val_y": True}, + ] + assert_loaded_data(dest_pl, "tbl_x", ["id_x", "val_x"], exp_tbl_x, "id_x") + assert_loaded_data(dest_pl, "tbl_y", ["id_y", "val_y"], exp_tbl_y, "id_y") + + # update tables + with src_pl.sql_client() as c: + qual_name = src_pl.sql_client().make_qualified_table_name("tbl_x") + c.execute_sql(f"UPDATE {qual_name} SET val_x = 'foo_updated' WHERE id_x = 1;") + qual_name = src_pl.sql_client().make_qualified_table_name("tbl_y") + c.execute_sql(f"UPDATE {qual_name} SET val_y = false WHERE id_y = 1;") + + # process changes + info = dest_pl.run(changes) + assert_load_info(info, expected_load_packages=2) + assert load_table_counts(dest_pl, "tbl_x", "tbl_y") == {"tbl_x": 3, "tbl_y": 3} + exp_tbl_x = [ + {"id_x": 1, "val_x": "foo_updated"}, + {"id_x": 2, "val_x": "bar"}, + {"id_x": 3, "val_x": "baz"}, + ] + exp_tbl_y = [ + {"id_y": 1, "val_y": False}, + {"id_y": 2, "val_y": False}, + {"id_y": 3, "val_y": True}, + ] + assert_loaded_data(dest_pl, "tbl_x", ["id_x", "val_x"], exp_tbl_x, "id_x") + assert_loaded_data(dest_pl, "tbl_y", ["id_y", "val_y"], exp_tbl_y, "id_y") + + # delete from table + with src_pl.sql_client() as c: + qual_name = src_pl.sql_client().make_qualified_table_name("tbl_x") + c.execute_sql(f"DELETE FROM {qual_name} WHERE id_x = 1;") + + # process changes + info = dest_pl.run(changes) + assert_load_info(info, expected_load_packages=2) + assert load_table_counts(dest_pl, "tbl_x", "tbl_y") == {"tbl_x": 2, "tbl_y": 3} + exp_tbl_x = [{"id_x": 2, "val_x": "bar"}, {"id_x": 3, "val_x": "baz"}] + exp_tbl_y = [ + {"id_y": 1, "val_y": False}, + {"id_y": 2, "val_y": False}, + {"id_y": 3, "val_y": True}, + ] + assert_loaded_data(dest_pl, "tbl_x", ["id_x", "val_x"], exp_tbl_x, "id_x") + assert_loaded_data(dest_pl, "tbl_y", ["id_y", "val_y"], exp_tbl_y, "id_y") + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +@pytest.mark.parametrize("backend", ["sqlalchemy", "pyarrow"]) +def test_without_init_load( + src_config: Tuple[dlt.Pipeline, str], destination_name: str, backend: TableBackend +) -> None: + @dlt.resource(write_disposition="merge", primary_key="id_x") + def tbl_x(data): + yield data + + @dlt.resource(write_disposition="merge", primary_key="id_y") + def tbl_y(data): + yield data + + src_pl, slot_name = src_config + + # create postgres table + # since we're skipping initial load, these records should not be in the replicated table + src_pl.run( + [ + tbl_x({"id_x": 1, "val_x": "foo"}), + tbl_y({"id_y": 1, "val_y": True}), + ] + ) + add_pk(src_pl.sql_client, "tbl_x", "id_x") + add_pk(src_pl.sql_client, "tbl_y", "id_y") + + # initialize replication and create resource for changes + init_replication( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y"), + ) + + changes = replication_source( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y"), + repl_options={ + "tbl_x": {"backend": backend}, + "tbl_y": {"backend": backend}, + }, + ) + changes.tbl_x.apply_hints( + write_disposition="merge", primary_key="id_x", columns=merge_hints + ) + changes.tbl_y.apply_hints( + write_disposition="merge", primary_key="id_y", columns=merge_hints + ) + + # change postgres table after replication has been initialized + # these records should be in the replicated table + src_pl.run( + [ + tbl_x([{"id_x": 2, "val_x": "bar"}, {"id_x": 3, "val_x": "baz"}]), + tbl_y({"id_y": 2, "val_y": False}), + ] + ) + + # load changes to destination and assert expectations + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, dev_mode=True + ) + info = dest_pl.run(changes) + assert_load_info(info) + assert load_table_counts(dest_pl, "tbl_x", "tbl_y") == {"tbl_x": 2, "tbl_y": 1} + exp_tbl_x = [{"id_x": 2, "val_x": "bar"}, {"id_x": 3, "val_x": "baz"}] + exp_tbl_y = [{"id_y": 2, "val_y": False}] + assert_loaded_data(dest_pl, "tbl_x", ["id_x", "val_x"], exp_tbl_x, "id_x") + assert_loaded_data(dest_pl, "tbl_y", ["id_y", "val_y"], exp_tbl_y, "id_y") + + # delete from table + with src_pl.sql_client() as c: + qual_name = src_pl.sql_client().make_qualified_table_name("tbl_x") + c.execute_sql(f"DELETE FROM {qual_name} WHERE id_x = 2;") + + # process change and assert expectations + info = dest_pl.run(changes) + assert_load_info(info) + assert load_table_counts(dest_pl, "tbl_x", "tbl_y") == {"tbl_x": 1, "tbl_y": 1} + exp_tbl_x = [{"id_x": 3, "val_x": "baz"}] + exp_tbl_y = [{"id_y": 2, "val_y": False}] + assert_loaded_data(dest_pl, "tbl_x", ["id_x", "val_x"], exp_tbl_x, "id_x") + assert_loaded_data(dest_pl, "tbl_y", ["id_y", "val_y"], exp_tbl_y, "id_y") + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +@pytest.mark.parametrize("give_hints", [True, False]) +@pytest.mark.parametrize("init_load", [True, False]) +@pytest.mark.parametrize("backend", ["sqlalchemy", "pyarrow"]) +def test_mapped_data_types( + src_config: Tuple[dlt.Pipeline, str], + destination_name: str, + give_hints: bool, + init_load: bool, + backend: TableBackend, +) -> None: + """Assert common data types (the ones mapped in PostgresTypeMapper) are properly handled.""" + + data = deepcopy(TABLE_ROW_ALL_DATA_TYPES) + column_schema = deepcopy(TABLE_UPDATE_COLUMNS_SCHEMA) + + # FIXME Need to figure out why when creating a snapshot my schema get loaded in another job + expected_load_packages = 1 + if init_load: + expected_load_packages = 2 + + # resource to load data into postgres source table + @dlt.resource(primary_key="col1", write_disposition="merge", columns=column_schema) + def items(data): + yield data + + src_pl, slot_name = src_config + + # create postgres table with single record containing all data types + src_pl.run(items(data)) + add_pk(src_pl.sql_client, "items", "col1") + + # initialize replication and create resources + snapshot = init_replication( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names="items", + take_snapshots=init_load, + table_options={"items": {"backend": backend}}, + ) + if init_load and give_hints: + snapshot.items.apply_hints(columns=column_schema) + + repl_options = {"items": {"backend": backend}} + if give_hints: + repl_options["items"]["column_hints"] = column_schema + changes = replication_source( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names="items", + repl_options=repl_options, + ) + changes.items.apply_hints( + write_disposition="merge", primary_key="col1", columns=merge_hints + ) + if give_hints: + changes.items.apply_hints(columns=column_schema) + + # initial load + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, dev_mode=True + ) + if init_load: + info = dest_pl.run(snapshot) + cleanup_snapshot_resources(snapshot) + assert_load_info(info) + assert load_table_counts(dest_pl, "items")["items"] == 1 + + # insert two records in postgres table + r1 = deepcopy(data) + r2 = deepcopy(data) + r1["col1"] = 1 + r2["col1"] = 2 + src_pl.run(items([r1, r2])) + + info = dest_pl.run(changes) + assert_load_info(info, expected_load_packages=expected_load_packages) + assert load_table_counts(dest_pl, "items")["items"] == 3 if init_load else 2 + + if give_hints: + # compare observed with expected column types + observed = dest_pl.default_schema.get_table("items")["columns"] + for name, expected in column_schema.items(): + assert observed[name]["data_type"] == expected["data_type"] + # postgres bytea does not have precision + if ( + expected.get("precision") is not None + and expected["data_type"] != "binary" + ): + assert observed[name]["precision"] == expected["precision"] + + # update two records in postgres table + # this does two deletes and two inserts because dlt implements "merge" as "delete-and-insert" + # as such, postgres will create four replication messages: two of type Delete and two of type Insert + r1["col2"] = 1.5 + r2["col3"] = False + src_pl.run(items([r1, r2])) + + # process changes and assert expectations + info = dest_pl.run(changes) + assert_load_info(info, expected_load_packages=expected_load_packages) + assert load_table_counts(dest_pl, "items")["items"] == 3 if init_load else 2 + exp = [ + {"col1": 1, "col2": 1.5, "col3": True}, + {"col1": 2, "col2": 898912.821982, "col3": False}, + { + "col1": 989127831, + "col2": 898912.821982, + "col3": True, + }, # only present with init load + ] + if not init_load: + del exp[-1] + assert_loaded_data(dest_pl, "items", ["col1", "col2", "col3"], exp, "col1") + + # now do an actual update, so postgres will create a replication message of type Update + with src_pl.sql_client() as c: + qual_name = src_pl.sql_client().make_qualified_table_name("items") + c.execute_sql(f"UPDATE {qual_name} SET col2 = 2.5 WHERE col1 = 2;") + + # process change and assert expectation + info = dest_pl.run(changes) + assert_load_info(info, expected_load_packages=expected_load_packages) + assert load_table_counts(dest_pl, "items")["items"] == 3 if init_load else 2 + exp = [{"col1": 2, "col2": 2.5, "col3": False}] + assert_loaded_data( + dest_pl, "items", ["col1", "col2", "col3"], exp, "col1", "col1 = 2" + ) + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +@pytest.mark.parametrize("backend", ["sqlalchemy", "pyarrow"]) +def test_unmapped_data_types( + src_config: Tuple[dlt.Pipeline, str], destination_name: str, backend: TableBackend +) -> None: + """Assert postgres data types that aren't explicitly mapped default to "text" type.""" + src_pl, slot_name = src_config + + # create postgres table with some unmapped types + with src_pl.sql_client() as c: + c.create_dataset() + c.execute_sql( + "CREATE TABLE data_types (bit_col bit(1), box_col box, uuid_col uuid);" + ) + + # initialize replication and create resource + init_replication( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names="data_types", + ) + changes = replication_source( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names="data_types", + repl_options={"data_types": {"backend": backend}}, + ) + + # insert record in source table to create replication item + with src_pl.sql_client() as c: + c.execute_sql( + "INSERT INTO data_types VALUES (B'1', box '((1,1), (0,0))', gen_random_uuid());" + ) + + # run destination pipeline and assert resulting data types + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, dev_mode=True + ) + dest_pl.extract(changes) + dest_pl.normalize() + columns = dest_pl.default_schema.get_table_columns("data_types") + assert columns["bit_col"]["data_type"] == "text" + assert columns["box_col"]["data_type"] == "text" + assert columns["uuid_col"]["data_type"] == "text" + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +@pytest.mark.parametrize("init_load", [True, False]) +@pytest.mark.parametrize("backend", ["sqlalchemy", "pyarrow"]) +def test_included_columns( + src_config: Tuple[dlt.Pipeline, str], + destination_name: str, + init_load: bool, + backend: TableBackend, +) -> None: + def get_cols(pipeline: dlt.Pipeline, table_name: str) -> set: + with pipeline.destination_client(pipeline.default_schema_name) as client: + assert isinstance(client, SqlJobClientBase) + return { + k + for k in client.get_storage_table(table_name)[1].keys() + if not k.startswith("_dlt_") + } + + @dlt.resource + def tbl_x(data): + yield data + + @dlt.resource + def tbl_y(data): + yield data + + @dlt.resource + def tbl_z(data): + yield data + + src_pl, slot_name = src_config + + # create three postgres tables + src_pl.run( + [ + tbl_x({"id_x": 1, "val_x": "foo", "another_col_x": 1}), + tbl_y({"id_y": 1, "val_y": "foo", "another_col_y": 1}), + tbl_z({"id_z": 1, "val_z": "foo", "another_col_z": 1}), + ] + ) + + # initialize replication and create resources + table_options = { + "tbl_x": {"backend": backend, "included_columns": {"id_x", "val_x"}}, + "tbl_y": {"backend": backend, "included_columns": {"id_y", "val_y"}}, + "tbl_z": {"backend": backend}, + # tbl_z is not specified, hence all columns should be included + } + snapshots = init_replication( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y", "tbl_z"), + take_snapshots=init_load, + table_options=table_options, + ) + changes = replication_source( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y", "tbl_z"), + repl_options=table_options, + ) + + # update three postgres tables + src_pl.run( + [ + tbl_x({"id_x": 2, "val_x": "foo", "another_col_x": 1}), + tbl_y({"id_y": 2, "val_y": "foo", "another_col_y": 1}), + tbl_z({"id_z": 2, "val_z": "foo", "another_col_z": 1}), + ] + ) + + # load to destination and assert column expectations + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, dev_mode=True + ) + if init_load: + dest_pl.run(snapshots) + cleanup_snapshot_resources(snapshots) + assert get_cols(dest_pl, "tbl_x") == {"id_x", "val_x"} + assert get_cols(dest_pl, "tbl_y") == {"id_y", "val_y"} + assert get_cols(dest_pl, "tbl_z") == {"id_z", "val_z", "another_col_z"} + + dest_pl.run(changes) + assert get_cols(dest_pl, "tbl_x") == {"id_x", "val_x", "_pg_lsn", "_pg_deleted_ts"} + assert get_cols(dest_pl, "tbl_y") == {"id_y", "val_y", "_pg_lsn", "_pg_deleted_ts"} + assert get_cols(dest_pl, "tbl_z") == { + "id_z", + "val_z", + "another_col_z", + "_pg_lsn", + "_pg_deleted_ts", + } + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +@pytest.mark.parametrize("init_load", [True, False]) +@pytest.mark.parametrize("backend", ["sqlalchemy", "pyarrow"]) +def test_column_hints( + src_config: Tuple[dlt.Pipeline, str], + destination_name: str, + init_load: bool, + backend: TableBackend, +) -> None: + @dlt.resource + def tbl_x(data): + yield data + + @dlt.resource + def tbl_y(data): + yield data + + @dlt.resource + def tbl_z(data): + yield data + + src_pl, slot_name = src_config + + # create three postgres tables + src_pl.run( + [ + tbl_x({"id_x": 1, "val_x": "foo", "another_col_x": 1}), + tbl_y({"id_y": 1, "val_y": "foo", "another_col_y": 1}), + tbl_z({"id_z": 1, "val_z": "foo", "another_col_z": 1}), + ] + ) + + # initialize replication and create resources + snapshots = init_replication( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y", "tbl_z"), + take_snapshots=init_load, + table_options={ + "tbl_x": {"backend": backend}, + "tbl_y": {"backend": backend}, + "tbl_z": {"backend": backend}, + }, + ) + if init_load: + snapshots.tbl_x.apply_hints(columns={"another_col_x": {"data_type": "double"}}) + snapshots.tbl_y.apply_hints(columns={"another_col_y": {"precision": 32}}) + + changes = replication_source( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names=("tbl_x", "tbl_y", "tbl_z"), + repl_options={ + "tbl_x": { + "backend": backend, + "column_hints": {"another_col_x": {"data_type": "double"}}, + }, + "tbl_y": { + "backend": backend, + "column_hints": {"another_col_y": {"precision": 32}}, + }, + "tbl_z": {"backend": backend}, + }, + ) + + # update three postgres tables + src_pl.run( + [ + tbl_x({"id_x": 2, "val_x": "foo", "another_col_x": 1}), + tbl_y({"id_y": 2, "val_y": "foo", "another_col_y": 1}), + tbl_z({"id_z": 2, "val_z": "foo", "another_col_z": 1}), + ] + ) + + # load to destination and assert column expectations + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, dev_mode=True + ) + if init_load: + dest_pl.run(snapshots) + cleanup_snapshot_resources(snapshots) + assert ( + dest_pl.default_schema.get_table_columns("tbl_x")["another_col_x"][ + "data_type" + ] + == "double" + ) + assert ( + dest_pl.default_schema.get_table_columns("tbl_y")["another_col_y"][ + "precision" + ] + == 32 + ) + assert ( + dest_pl.default_schema.get_table_columns("tbl_z")["another_col_z"][ + "data_type" + ] + == "bigint" + ) + dest_pl.run(changes) + assert ( + dest_pl.default_schema.get_table_columns("tbl_x")["another_col_x"]["data_type"] + == "double" + ) + assert ( + dest_pl.default_schema.get_table_columns("tbl_y")["another_col_y"]["precision"] + == 32 + ) + assert ( + dest_pl.default_schema.get_table_columns("tbl_z")["another_col_z"]["data_type"] + == "bigint" + ) + + # the tests below should pass, but they don't because of a bug that causes + # column hints to be added to other tables when dispatching to multiple tables + assert "another_col_x" not in dest_pl.default_schema.get_table_columns("tbl_y") + assert "another_col_x" not in dest_pl.default_schema.get_table_columns("tbl_z") + assert "another_col_y" not in dest_pl.default_schema.get_table_columns( + "tbl_x", include_incomplete=True + ) + assert "another_col_y" not in dest_pl.default_schema.get_table_columns( + "tbl_z", include_incomplete=True + ) + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +@pytest.mark.parametrize("backend", ["sqlalchemy", "pyarrow"]) +def test_table_schema_change( + src_config: Tuple[dlt.Pipeline, str], destination_name: str, backend: TableBackend +) -> None: + src_pl, slot_name = src_config + + # create postgres table + src_pl.run([{"c1": 1, "c2": 1}], table_name="items") + + # initialize replication + init_replication( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names="items", + ) + + # create resource and pipeline + changes = replication_source( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names="items", + repl_options={"items": {"backend": backend}}, + ) + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, dev_mode=True + ) + + # add a column in one commit, this will create one Relation message + src_pl.run([{"c1": 2, "c2": 1}, {"c1": 3, "c2": 1, "c3": 1}], table_name="items") + info = dest_pl.run(changes) + assert_load_info(info) + assert load_table_counts(dest_pl, "items") == {"items": 2} + exp = [{"c1": 2, "c2": 1, "c3": None}, {"c1": 3, "c2": 1, "c3": 1}] + assert_loaded_data(dest_pl, "items", ["c1", "c2", "c3"], exp, "c1") + + # add a column in two commits, this will create two Relation messages + src_pl.run([{"c1": 4, "c2": 1, "c3": 1}], table_name="items") + src_pl.run([{"c1": 5, "c2": 1, "c3": 1, "c4": 1}], table_name="items") + dest_pl.run(changes) + assert_load_info(info) + assert load_table_counts(dest_pl, "items") == {"items": 4} + exp = [ + {"c1": 4, "c2": 1, "c3": 1, "c4": None}, + {"c1": 5, "c2": 1, "c3": 1, "c4": 1}, + ] + assert_loaded_data( + dest_pl, "items", ["c1", "c2", "c3", "c4"], exp, "c1", "c1 IN (4, 5)" + ) + + +@pytest.mark.parametrize("backend", ["sqlalchemy", "pyarrow"]) +def test_batching(src_config: Tuple[dlt.Pipeline, str], backend: TableBackend) -> None: + # this test asserts the number of data items yielded by the replication resource + # is not affected by `target_batch_size` and the number of replication messages per transaction + src_pl, slot_name = src_config + + # create postgres table with single record + data = {"id": 1000, "val": True} + src_pl.run([data], table_name="items") + + # initialize replication and create resource for changes + init_replication( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names="items", + ) + changes = replication_source( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names="items", + target_batch_size=50, + repl_options={"items": {"backend": backend}}, + ) + + # create destination pipeline and resource + dest_pl = dlt.pipeline(pipeline_name="dest_pl", dev_mode=True) + + # insert 100 records into source table in one transaction + batch = [{**r, **{"id": key}} for r in [data] for key in range(1, 101)] + src_pl.run(batch, table_name="items") + extract_info = dest_pl.extract(changes) + assert extract_info.asdict()["job_metrics"][0]["items_count"] == 100 + + # insert 100 records into source table in 5 transactions + batch = [{**r, **{"id": key}} for r in [data] for key in range(101, 121)] + src_pl.run(batch, table_name="items") + batch = [{**r, **{"id": key}} for r in [data] for key in range(121, 141)] + src_pl.run(batch, table_name="items") + batch = [{**r, **{"id": key}} for r in [data] for key in range(141, 161)] + src_pl.run(batch, table_name="items") + batch = [{**r, **{"id": key}} for r in [data] for key in range(161, 181)] + src_pl.run(batch, table_name="items") + batch = [{**r, **{"id": key}} for r in [data] for key in range(181, 201)] + src_pl.run(batch, table_name="items") + extract_info = dest_pl.extract(changes) + assert extract_info.asdict()["job_metrics"][0]["items_count"] == 100 + + +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +@pytest.mark.parametrize("backend", ["sqlalchemy", "pyarrow"]) +def test_delete_schema_bug( + src_config: Tuple[dlt.Pipeline, str], destination_name: str, backend: TableBackend +) -> None: + src_pl, slot_name = src_config + + # create postgres table with 100 records + data = [{"id": key, "val": True} for key in range(1, 101)] + src_pl.run(data, table_name="items") + + add_pk(src_pl.sql_client, "items", "id") + + snapshots = init_replication( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names=("items",), + take_snapshots=True, + table_options={"items": {"backend": backend}}, + ) + + dest_pl = dlt.pipeline( + pipeline_name="dest_pl", destination=destination_name, dev_mode=True + ) + + # initial load + info = dest_pl.run(snapshots) + cleanup_snapshot_resources(snapshots) + assert_load_info(info) + assert load_table_counts(dest_pl, "items") == {"items": 100} + assert_loaded_data(dest_pl, "items", ["id", "val"], data, "id") + + changes = replication_source( + slot_name=slot_name, + schema=src_pl.dataset_name, + table_names=("items",), + target_batch_size=10, + repl_options={"items": {"backend": backend}}, + ) + changes.items.apply_hints( + write_disposition="merge", primary_key="id", columns=merge_hints + ) + + # process changes + info = dest_pl.run(changes) + assert_load_info(info, expected_load_packages=1) + assert load_table_counts(dest_pl, "items") == {"items": 100} + assert_loaded_data(dest_pl, "items", ["id", "val"], data, "id") + + # delete the first 50 rows and update the next 50 rows + with src_pl.sql_client() as c: + qual_name = src_pl.sql_client().make_qualified_table_name("items") + to_delete = ",".join([str(x) for x in range(1, 51)]) + c.execute_sql(f"DELETE FROM {qual_name} WHERE id IN ({to_delete});") + to_update = ",".join([str(x) for x in range(51, 101)]) + c.execute_sql(f"UPDATE {qual_name} SET val = false WHERE id IN ({to_update});") + + # process changes + info = dest_pl.run(changes) + assert_load_info(info, expected_load_packages=2) + assert load_table_counts(dest_pl, "items") == {"items": 50} diff --git a/tests/pg_legacy_replication/utils.py b/tests/pg_legacy_replication/utils.py new file mode 100644 index 000000000..5deb16af0 --- /dev/null +++ b/tests/pg_legacy_replication/utils.py @@ -0,0 +1,52 @@ +from typing import Sequence, List, Dict, Any, Optional + +import dlt +from dlt import Pipeline +from dlt.common.data_writers.escape import escape_postgres_identifier +from dlt.common.configuration.specs import ConnectionStringCredentials + +from tests.utils import select_data + + +def add_pk(sql_client, table_name: str, column_name: str) -> None: + """Adds primary key to postgres table. + + In the context of replication, the primary key serves as REPLICA IDENTITY. + A REPLICA IDENTITY is required when publishing UPDATEs and/or DELETEs. + """ + with sql_client() as c: + qual_name = c.make_qualified_table_name(table_name) + c.execute_sql(f"ALTER TABLE {qual_name} ADD PRIMARY KEY ({column_name});") + + +def assert_loaded_data( + pipeline: Pipeline, + table_name: str, + column_names: Sequence[str], + expectation: List[Dict[str, Any]], + sort_column_name: str, + where_clause: Optional[str] = None, +) -> None: + """Asserts loaded data meets expectation.""" + qual_name = pipeline.sql_client().make_qualified_table_name(table_name) + escape_id = pipeline.destination_client().capabilities.escape_identifier + column_str = ", ".join(map(escape_id, column_names)) + qry = f"SELECT {column_str} FROM {qual_name}" + if where_clause is not None: + qry += " WHERE " + where_clause + observation = [ + {column_name: row[idx] for idx, column_name in enumerate(column_names)} + for row in select_data(pipeline, qry) + ] + assert sorted(observation, key=lambda d: d[sort_column_name]) == expectation + + +def is_super_user(sql_client) -> bool: + """Returns True if Postgres user is superuser, False otherwise.""" + username = dlt.secrets.get( + "sources.pg_replication.credentials", ConnectionStringCredentials + ).username + with sql_client() as c: + return c.execute_sql( # type: ignore[no-any-return] + f"SELECT rolsuper FROM pg_roles WHERE rolname = '{username}';" + )[0][0] diff --git a/tests/postgres/docker-compose.yml b/tests/postgres/docker-compose.yml index aa0a2c5d7..3b901a5ca 100644 --- a/tests/postgres/docker-compose.yml +++ b/tests/postgres/docker-compose.yml @@ -1,4 +1,3 @@ -version: "3.7" services: db: env_file: postgres.env @@ -6,9 +5,14 @@ services: context: postgres dockerfile: Dockerfile container_name: dlt_postgres_db + command: + - postgres + - -c + - config_file=/etc/postgresql/postgresql.conf restart: unless-stopped volumes: - db_home:/var/lib/postgresql/data + - ./postgresql.conf:/etc/postgresql/postgresql.conf:ro ports: - 5432:5432 diff --git a/tests/postgres/postgres/Dockerfile b/tests/postgres/postgres/Dockerfile index 1dfd569b5..e7f9aa73c 100644 --- a/tests/postgres/postgres/Dockerfile +++ b/tests/postgres/postgres/Dockerfile @@ -1,2 +1,23 @@ FROM postgres:14 + +# Install dependencies required to build decoderbufs +RUN apt-get update +RUN apt-get install -f -y \ + software-properties-common \ + build-essential \ + pkg-config \ + git + +RUN apt-get install -f -y \ + postgresql-server-dev-14 \ + libprotobuf-c-dev && \ + rm -rf /var/lib/apt/lists/* + +ARG decoderbufs_version=v1.7.0.Final +RUN git clone https://github.com/debezium/postgres-decoderbufs -b $decoderbufs_version --single-branch && \ + cd postgres-decoderbufs && \ + make && make install && \ + cd .. && \ + rm -rf postgres-decoderbufs + COPY 01_init.sql /docker-entrypoint-initdb.d/ \ No newline at end of file diff --git a/tests/postgres/postgresql.conf b/tests/postgres/postgresql.conf new file mode 100644 index 000000000..93a3dab5a --- /dev/null +++ b/tests/postgres/postgresql.conf @@ -0,0 +1,798 @@ +# ----------------------------- +# PostgreSQL configuration file +# ----------------------------- +# +# This file consists of lines of the form: +# +# name = value +# +# (The "=" is optional.) Whitespace may be used. Comments are introduced with +# "#" anywhere on a line. The complete list of parameter names and allowed +# values can be found in the PostgreSQL documentation. +# +# The commented-out settings shown in this file represent the default values. +# Re-commenting a setting is NOT sufficient to revert it to the default value; +# you need to reload the server. +# +# This file is read on server startup and when the server receives a SIGHUP +# signal. If you edit the file on a running system, you have to SIGHUP the +# server for the changes to take effect, run "pg_ctl reload", or execute +# "SELECT pg_reload_conf()". Some parameters, which are marked below, +# require a server shutdown and restart to take effect. +# +# Any parameter can also be given as a command-line option to the server, e.g., +# "postgres -c log_connections=on". Some parameters can be changed at run time +# with the "SET" SQL command. +# +# Memory units: B = bytes Time units: us = microseconds +# kB = kilobytes ms = milliseconds +# MB = megabytes s = seconds +# GB = gigabytes min = minutes +# TB = terabytes h = hours +# d = days + + +#------------------------------------------------------------------------------ +# FILE LOCATIONS +#------------------------------------------------------------------------------ + +# The default values of these variables are driven from the -D command-line +# option or PGDATA environment variable, represented here as ConfigDir. + +#data_directory = 'ConfigDir' # use data in another directory + # (change requires restart) +#hba_file = 'ConfigDir/pg_hba.conf' # host-based authentication file + # (change requires restart) +#ident_file = 'ConfigDir/pg_ident.conf' # ident configuration file + # (change requires restart) + +# If external_pid_file is not explicitly set, no extra PID file is written. +#external_pid_file = '' # write an extra PID file + # (change requires restart) + + +#------------------------------------------------------------------------------ +# CONNECTIONS AND AUTHENTICATION +#------------------------------------------------------------------------------ + +# - Connection Settings - + +listen_addresses = '*' + # comma-separated list of addresses; + # defaults to 'localhost'; use '*' for all + # (change requires restart) +#port = 5432 # (change requires restart) +#max_connections = 100 # (change requires restart) +#superuser_reserved_connections = 3 # (change requires restart) +#unix_socket_directories = '/tmp' # comma-separated list of directories + # (change requires restart) +#unix_socket_group = '' # (change requires restart) +#unix_socket_permissions = 0777 # begin with 0 to use octal notation + # (change requires restart) +#bonjour = off # advertise server via Bonjour + # (change requires restart) +#bonjour_name = '' # defaults to the computer name + # (change requires restart) + +# - TCP settings - +# see "man tcp" for details + +#tcp_keepalives_idle = 0 # TCP_KEEPIDLE, in seconds; + # 0 selects the system default +#tcp_keepalives_interval = 0 # TCP_KEEPINTVL, in seconds; + # 0 selects the system default +#tcp_keepalives_count = 0 # TCP_KEEPCNT; + # 0 selects the system default +#tcp_user_timeout = 0 # TCP_USER_TIMEOUT, in milliseconds; + # 0 selects the system default + +#client_connection_check_interval = 0 # time between checks for client + # disconnection while running queries; + # 0 for never + +# - Authentication - + +#authentication_timeout = 1min # 1s-600s +#password_encryption = scram-sha-256 # scram-sha-256 or md5 +#db_user_namespace = off + +# GSSAPI using Kerberos +#krb_server_keyfile = 'FILE:${sysconfdir}/krb5.keytab' +#krb_caseins_users = off + +# - SSL - + +#ssl = off +#ssl_ca_file = '' +#ssl_cert_file = 'server.crt' +#ssl_crl_file = '' +#ssl_crl_dir = '' +#ssl_key_file = 'server.key' +#ssl_ciphers = 'HIGH:MEDIUM:+3DES:!aNULL' # allowed SSL ciphers +#ssl_prefer_server_ciphers = on +#ssl_ecdh_curve = 'prime256v1' +#ssl_min_protocol_version = 'TLSv1.2' +#ssl_max_protocol_version = '' +#ssl_dh_params_file = '' +#ssl_passphrase_command = '' +#ssl_passphrase_command_supports_reload = off + + +#------------------------------------------------------------------------------ +# RESOURCE USAGE (except WAL) +#------------------------------------------------------------------------------ + +# - Memory - + +#shared_buffers = 32MB # min 128kB + # (change requires restart) +#huge_pages = try # on, off, or try + # (change requires restart) +#huge_page_size = 0 # zero for system default + # (change requires restart) +#temp_buffers = 8MB # min 800kB +#max_prepared_transactions = 0 # zero disables the feature + # (change requires restart) +# Caution: it is not advisable to set max_prepared_transactions nonzero unless +# you actively intend to use prepared transactions. +#work_mem = 4MB # min 64kB +#hash_mem_multiplier = 1.0 # 1-1000.0 multiplier on hash table work_mem +#maintenance_work_mem = 64MB # min 1MB +#autovacuum_work_mem = -1 # min 1MB, or -1 to use maintenance_work_mem +#logical_decoding_work_mem = 64MB # min 64kB +#max_stack_depth = 2MB # min 100kB +#shared_memory_type = mmap # the default is the first option + # supported by the operating system: + # mmap + # sysv + # windows + # (change requires restart) +#dynamic_shared_memory_type = posix # the default is the first option + # supported by the operating system: + # posix + # sysv + # windows + # mmap + # (change requires restart) +#min_dynamic_shared_memory = 0MB # (change requires restart) + +# - Disk - + +#temp_file_limit = -1 # limits per-process temp file space + # in kilobytes, or -1 for no limit + +# - Kernel Resources - + +#max_files_per_process = 1000 # min 64 + # (change requires restart) + +# - Cost-Based Vacuum Delay - + +#vacuum_cost_delay = 0 # 0-100 milliseconds (0 disables) +#vacuum_cost_page_hit = 1 # 0-10000 credits +#vacuum_cost_page_miss = 2 # 0-10000 credits +#vacuum_cost_page_dirty = 20 # 0-10000 credits +#vacuum_cost_limit = 200 # 1-10000 credits + +# - Background Writer - + +#bgwriter_delay = 200ms # 10-10000ms between rounds +#bgwriter_lru_maxpages = 100 # max buffers written/round, 0 disables +#bgwriter_lru_multiplier = 2.0 # 0-10.0 multiplier on buffers scanned/round +#bgwriter_flush_after = 0 # measured in pages, 0 disables + +# - Asynchronous Behavior - + +#backend_flush_after = 0 # measured in pages, 0 disables +#effective_io_concurrency = 1 # 1-1000; 0 disables prefetching +#maintenance_io_concurrency = 10 # 1-1000; 0 disables prefetching +#max_worker_processes = 8 # (change requires restart) +#max_parallel_workers_per_gather = 2 # limited by max_parallel_workers +#max_parallel_maintenance_workers = 2 # limited by max_parallel_workers +#max_parallel_workers = 8 # number of max_worker_processes that + # can be used in parallel operations +#parallel_leader_participation = on +#old_snapshot_threshold = -1 # 1min-60d; -1 disables; 0 is immediate + # (change requires restart) + + +#------------------------------------------------------------------------------ +# WRITE-AHEAD LOG +#------------------------------------------------------------------------------ + +# - Settings - + +wal_level = logical # minimal, replica, or logical + # (change requires restart) +#fsync = on # flush data to disk for crash safety + # (turning this off can cause + # unrecoverable data corruption) +#synchronous_commit = on # synchronization level; + # off, local, remote_write, remote_apply, or on +#wal_sync_method = fsync # the default is the first option + # supported by the operating system: + # open_datasync + # fdatasync (default on Linux and FreeBSD) + # fsync + # fsync_writethrough + # open_sync +#full_page_writes = on # recover from partial page writes +#wal_log_hints = off # also do full page writes of non-critical updates + # (change requires restart) +#wal_compression = off # enable compression of full-page writes +#wal_init_zero = on # zero-fill new WAL files +#wal_recycle = on # recycle WAL files +#wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers + # (change requires restart) +#wal_writer_delay = 200ms # 1-10000 milliseconds +#wal_writer_flush_after = 1MB # measured in pages, 0 disables +#wal_skip_threshold = 2MB + +#commit_delay = 0 # range 0-100000, in microseconds +#commit_siblings = 5 # range 1-1000 + +# - Checkpoints - + +#checkpoint_timeout = 5min # range 30s-1d +#checkpoint_completion_target = 0.9 # checkpoint target duration, 0.0 - 1.0 +#checkpoint_flush_after = 0 # measured in pages, 0 disables +#checkpoint_warning = 30s # 0 disables +#max_wal_size = 1GB +#min_wal_size = 80MB + +# - Archiving - + +#archive_mode = off # enables archiving; off, on, or always + # (change requires restart) +#archive_command = '' # command to use to archive a logfile segment + # placeholders: %p = path of file to archive + # %f = file name only + # e.g. 'test ! -f /mnt/server/archivedir/%f && cp %p /mnt/server/archivedir/%f' +#archive_timeout = 0 # force a logfile segment switch after this + # number of seconds; 0 disables + +# - Archive Recovery - + +# These are only used in recovery mode. + +#restore_command = '' # command to use to restore an archived logfile segment + # placeholders: %p = path of file to restore + # %f = file name only + # e.g. 'cp /mnt/server/archivedir/%f %p' +#archive_cleanup_command = '' # command to execute at every restartpoint +#recovery_end_command = '' # command to execute at completion of recovery + +# - Recovery Target - + +# Set these only when performing a targeted recovery. + +#recovery_target = '' # 'immediate' to end recovery as soon as a + # consistent state is reached + # (change requires restart) +#recovery_target_name = '' # the named restore point to which recovery will proceed + # (change requires restart) +#recovery_target_time = '' # the time stamp up to which recovery will proceed + # (change requires restart) +#recovery_target_xid = '' # the transaction ID up to which recovery will proceed + # (change requires restart) +#recovery_target_lsn = '' # the WAL LSN up to which recovery will proceed + # (change requires restart) +#recovery_target_inclusive = on # Specifies whether to stop: + # just after the specified recovery target (on) + # just before the recovery target (off) + # (change requires restart) +#recovery_target_timeline = 'latest' # 'current', 'latest', or timeline ID + # (change requires restart) +#recovery_target_action = 'pause' # 'pause', 'promote', 'shutdown' + # (change requires restart) + + +#------------------------------------------------------------------------------ +# REPLICATION +#------------------------------------------------------------------------------ + +# - Sending Servers - + +# Set these on the primary and on any standby that will send replication data. + +#max_wal_senders = 10 # max number of walsender processes + # (change requires restart) +#max_replication_slots = 10 # max number of replication slots + # (change requires restart) +#wal_keep_size = 0 # in megabytes; 0 disables +#max_slot_wal_keep_size = -1 # in megabytes; -1 disables +#wal_sender_timeout = 60s # in milliseconds; 0 disables +#track_commit_timestamp = off # collect timestamp of transaction commit + # (change requires restart) + +# - Primary Server - + +# These settings are ignored on a standby server. + +#synchronous_standby_names = '' # standby servers that provide sync rep + # method to choose sync standbys, number of sync standbys, + # and comma-separated list of application_name + # from standby(s); '*' = all +#vacuum_defer_cleanup_age = 0 # number of xacts by which cleanup is delayed + +# - Standby Servers - + +# These settings are ignored on a primary server. + +#primary_conninfo = '' # connection string to sending server +#primary_slot_name = '' # replication slot on sending server +#promote_trigger_file = '' # file name whose presence ends recovery +#hot_standby = on # "off" disallows queries during recovery + # (change requires restart) +#max_standby_archive_delay = 30s # max delay before canceling queries + # when reading WAL from archive; + # -1 allows indefinite delay +#max_standby_streaming_delay = 30s # max delay before canceling queries + # when reading streaming WAL; + # -1 allows indefinite delay +#wal_receiver_create_temp_slot = off # create temp slot if primary_slot_name + # is not set +#wal_receiver_status_interval = 10s # send replies at least this often + # 0 disables +#hot_standby_feedback = off # send info from standby to prevent + # query conflicts +#wal_receiver_timeout = 60s # time that receiver waits for + # communication from primary + # in milliseconds; 0 disables +#wal_retrieve_retry_interval = 5s # time to wait before retrying to + # retrieve WAL after a failed attempt +#recovery_min_apply_delay = 0 # minimum delay for applying changes during recovery + +# - Subscribers - + +# These settings are ignored on a publisher. + +#max_logical_replication_workers = 4 # taken from max_worker_processes + # (change requires restart) +#max_sync_workers_per_subscription = 2 # taken from max_logical_replication_workers + + +#------------------------------------------------------------------------------ +# QUERY TUNING +#------------------------------------------------------------------------------ + +# - Planner Method Configuration - + +#enable_async_append = on +#enable_bitmapscan = on +#enable_gathermerge = on +#enable_hashagg = on +#enable_hashjoin = on +#enable_incremental_sort = on +#enable_indexscan = on +#enable_indexonlyscan = on +#enable_material = on +#enable_memoize = on +#enable_mergejoin = on +#enable_nestloop = on +#enable_parallel_append = on +#enable_parallel_hash = on +#enable_partition_pruning = on +#enable_partitionwise_join = off +#enable_partitionwise_aggregate = off +#enable_seqscan = on +#enable_sort = on +#enable_tidscan = on + +# - Planner Cost Constants - + +#seq_page_cost = 1.0 # measured on an arbitrary scale +#random_page_cost = 4.0 # same scale as above +#cpu_tuple_cost = 0.01 # same scale as above +#cpu_index_tuple_cost = 0.005 # same scale as above +#cpu_operator_cost = 0.0025 # same scale as above +#parallel_setup_cost = 1000.0 # same scale as above +#parallel_tuple_cost = 0.1 # same scale as above +#min_parallel_table_scan_size = 8MB +#min_parallel_index_scan_size = 512kB +#effective_cache_size = 4GB + +#jit_above_cost = 100000 # perform JIT compilation if available + # and query more expensive than this; + # -1 disables +#jit_inline_above_cost = 500000 # inline small functions if query is + # more expensive than this; -1 disables +#jit_optimize_above_cost = 500000 # use expensive JIT optimizations if + # query is more expensive than this; + # -1 disables + +# - Genetic Query Optimizer - + +#geqo = on +#geqo_threshold = 12 +#geqo_effort = 5 # range 1-10 +#geqo_pool_size = 0 # selects default based on effort +#geqo_generations = 0 # selects default based on effort +#geqo_selection_bias = 2.0 # range 1.5-2.0 +#geqo_seed = 0.0 # range 0.0-1.0 + +# - Other Planner Options - + +#default_statistics_target = 100 # range 1-10000 +#constraint_exclusion = partition # on, off, or partition +#cursor_tuple_fraction = 0.1 # range 0.0-1.0 +#from_collapse_limit = 8 +#jit = on # allow JIT compilation +#join_collapse_limit = 8 # 1 disables collapsing of explicit + # JOIN clauses +#plan_cache_mode = auto # auto, force_generic_plan or + # force_custom_plan + + +#------------------------------------------------------------------------------ +# REPORTING AND LOGGING +#------------------------------------------------------------------------------ + +# - Where to Log - + +#log_destination = 'stderr' # Valid values are combinations of + # stderr, csvlog, syslog, and eventlog, + # depending on platform. csvlog + # requires logging_collector to be on. + +# This is used when logging to stderr: +#logging_collector = off # Enable capturing of stderr and csvlog + # into log files. Required to be on for + # csvlogs. + # (change requires restart) + +# These are only used if logging_collector is on: +#log_directory = 'log' # directory where log files are written, + # can be absolute or relative to PGDATA +#log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log' # log file name pattern, + # can include strftime() escapes +#log_file_mode = 0600 # creation mode for log files, + # begin with 0 to use octal notation +#log_rotation_age = 1d # Automatic rotation of logfiles will + # happen after that time. 0 disables. +#log_rotation_size = 10MB # Automatic rotation of logfiles will + # happen after that much log output. + # 0 disables. +#log_truncate_on_rotation = off # If on, an existing log file with the + # same name as the new log file will be + # truncated rather than appended to. + # But such truncation only occurs on + # time-driven rotation, not on restarts + # or size-driven rotation. Default is + # off, meaning append to existing files + # in all cases. + +# These are relevant when logging to syslog: +#syslog_facility = 'LOCAL0' +#syslog_ident = 'postgres' +#syslog_sequence_numbers = on +#syslog_split_messages = on + +# This is only relevant when logging to eventlog (Windows): +# (change requires restart) +#event_source = 'PostgreSQL' + +# - When to Log - + +#log_min_messages = warning # values in order of decreasing detail: + # debug5 + # debug4 + # debug3 + # debug2 + # debug1 + # info + # notice + # warning + # error + # log + # fatal + # panic + +#log_min_error_statement = error # values in order of decreasing detail: + # debug5 + # debug4 + # debug3 + # debug2 + # debug1 + # info + # notice + # warning + # error + # log + # fatal + # panic (effectively off) + +#log_min_duration_statement = -1 # -1 is disabled, 0 logs all statements + # and their durations, > 0 logs only + # statements running at least this number + # of milliseconds + +#log_min_duration_sample = -1 # -1 is disabled, 0 logs a sample of statements + # and their durations, > 0 logs only a sample of + # statements running at least this number + # of milliseconds; + # sample fraction is determined by log_statement_sample_rate + +#log_statement_sample_rate = 1.0 # fraction of logged statements exceeding + # log_min_duration_sample to be logged; + # 1.0 logs all such statements, 0.0 never logs + + +#log_transaction_sample_rate = 0.0 # fraction of transactions whose statements + # are logged regardless of their duration; 1.0 logs all + # statements from all transactions, 0.0 never logs + +# - What to Log - + +#debug_print_parse = off +#debug_print_rewritten = off +#debug_print_plan = off +#debug_pretty_print = on +#log_autovacuum_min_duration = -1 # log autovacuum activity; + # -1 disables, 0 logs all actions and + # their durations, > 0 logs only + # actions running at least this number + # of milliseconds. +#log_checkpoints = off +log_connections = on +log_disconnections = on +#log_duration = off +#log_error_verbosity = default # terse, default, or verbose messages +#log_hostname = off +#log_line_prefix = '%m [%p] ' # special values: + # %a = application name + # %u = user name + # %d = database name + # %r = remote host and port + # %h = remote host + # %b = backend type + # %p = process ID + # %P = process ID of parallel group leader + # %t = timestamp without milliseconds + # %m = timestamp with milliseconds + # %n = timestamp with milliseconds (as a Unix epoch) + # %Q = query ID (0 if none or not computed) + # %i = command tag + # %e = SQL state + # %c = session ID + # %l = session line number + # %s = session start timestamp + # %v = virtual transaction ID + # %x = transaction ID (0 if none) + # %q = stop here in non-session + # processes + # %% = '%' + # e.g. '<%u%%%d> ' +#log_lock_waits = off # log lock waits >= deadlock_timeout +#log_recovery_conflict_waits = off # log standby recovery conflict waits + # >= deadlock_timeout +#log_parameter_max_length = -1 # when logging statements, limit logged + # bind-parameter values to N bytes; + # -1 means print in full, 0 disables +#log_parameter_max_length_on_error = 0 # when logging an error, limit logged + # bind-parameter values to N bytes; + # -1 means print in full, 0 disables +log_statement = 'all' # none, ddl, mod, all +#log_replication_commands = off +#log_temp_files = -1 # log temporary files equal or larger + # than the specified size in kilobytes; + # -1 disables, 0 logs all temp files +#log_timezone = 'GMT' + + +#------------------------------------------------------------------------------ +# PROCESS TITLE +#------------------------------------------------------------------------------ + +#cluster_name = '' # added to process titles if nonempty + # (change requires restart) +#update_process_title = on + + +#------------------------------------------------------------------------------ +# STATISTICS +#------------------------------------------------------------------------------ + +# - Query and Index Statistics Collector - + +#track_activities = on +#track_activity_query_size = 1024 # (change requires restart) +#track_counts = on +#track_io_timing = off +#track_wal_io_timing = off +#track_functions = none # none, pl, all +#stats_temp_directory = 'pg_stat_tmp' + + +# - Monitoring - + +#compute_query_id = auto +#log_statement_stats = off +#log_parser_stats = off +#log_planner_stats = off +#log_executor_stats = off + + +#------------------------------------------------------------------------------ +# AUTOVACUUM +#------------------------------------------------------------------------------ + +#autovacuum = on # Enable autovacuum subprocess? 'on' + # requires track_counts to also be on. +#autovacuum_max_workers = 3 # max number of autovacuum subprocesses + # (change requires restart) +#autovacuum_naptime = 1min # time between autovacuum runs +#autovacuum_vacuum_threshold = 50 # min number of row updates before + # vacuum +#autovacuum_vacuum_insert_threshold = 1000 # min number of row inserts + # before vacuum; -1 disables insert + # vacuums +#autovacuum_analyze_threshold = 50 # min number of row updates before + # analyze +#autovacuum_vacuum_scale_factor = 0.2 # fraction of table size before vacuum +#autovacuum_vacuum_insert_scale_factor = 0.2 # fraction of inserts over table + # size before insert vacuum +#autovacuum_analyze_scale_factor = 0.1 # fraction of table size before analyze +#autovacuum_freeze_max_age = 200000000 # maximum XID age before forced vacuum + # (change requires restart) +#autovacuum_multixact_freeze_max_age = 400000000 # maximum multixact age + # before forced vacuum + # (change requires restart) +#autovacuum_vacuum_cost_delay = 2ms # default vacuum cost delay for + # autovacuum, in milliseconds; + # -1 means use vacuum_cost_delay +#autovacuum_vacuum_cost_limit = -1 # default vacuum cost limit for + # autovacuum, -1 means use + # vacuum_cost_limit + + +#------------------------------------------------------------------------------ +# CLIENT CONNECTION DEFAULTS +#------------------------------------------------------------------------------ + +# - Statement Behavior - + +#client_min_messages = notice # values in order of decreasing detail: + # debug5 + # debug4 + # debug3 + # debug2 + # debug1 + # log + # notice + # warning + # error +#search_path = '"$user", public' # schema names +#row_security = on +#default_table_access_method = 'heap' +#default_tablespace = '' # a tablespace name, '' uses the default +#default_toast_compression = 'pglz' # 'pglz' or 'lz4' +#temp_tablespaces = '' # a list of tablespace names, '' uses + # only default tablespace +#check_function_bodies = on +#default_transaction_isolation = 'read committed' +#default_transaction_read_only = off +#default_transaction_deferrable = off +#session_replication_role = 'origin' +#statement_timeout = 0 # in milliseconds, 0 is disabled +#lock_timeout = 0 # in milliseconds, 0 is disabled +#idle_in_transaction_session_timeout = 0 # in milliseconds, 0 is disabled +#idle_session_timeout = 0 # in milliseconds, 0 is disabled +#vacuum_freeze_table_age = 150000000 +#vacuum_freeze_min_age = 50000000 +#vacuum_failsafe_age = 1600000000 +#vacuum_multixact_freeze_table_age = 150000000 +#vacuum_multixact_freeze_min_age = 5000000 +#vacuum_multixact_failsafe_age = 1600000000 +#bytea_output = 'hex' # hex, escape +#xmlbinary = 'base64' +#xmloption = 'content' +#gin_pending_list_limit = 4MB + +# - Locale and Formatting - + +#datestyle = 'iso, mdy' +#intervalstyle = 'postgres' +#timezone = 'GMT' +#timezone_abbreviations = 'Default' # Select the set of available time zone + # abbreviations. Currently, there are + # Default + # Australia (historical usage) + # India + # You can create your own file in + # share/timezonesets/. +#extra_float_digits = 1 # min -15, max 3; any value >0 actually + # selects precise output mode +#client_encoding = sql_ascii # actually, defaults to database + # encoding + +# These settings are initialized by initdb, but they can be changed. +#lc_messages = 'C' # locale for system error message + # strings +#lc_monetary = 'C' # locale for monetary formatting +#lc_numeric = 'C' # locale for number formatting +#lc_time = 'C' # locale for time formatting + +# default configuration for text search +#default_text_search_config = 'pg_catalog.simple' + +# - Shared Library Preloading - + +#local_preload_libraries = '' +#session_preload_libraries = '' +shared_preload_libraries = 'decoderbufs' # (change requires restart) +#jit_provider = 'llvmjit' # JIT library to use + +# - Other Defaults - + +#dynamic_library_path = '$libdir' +#extension_destdir = '' # prepend path when loading extensions + # and shared objects (added by Debian) +#gin_fuzzy_search_limit = 0 + + +#------------------------------------------------------------------------------ +# LOCK MANAGEMENT +#------------------------------------------------------------------------------ + +#deadlock_timeout = 1s +#max_locks_per_transaction = 64 # min 10 + # (change requires restart) +#max_pred_locks_per_transaction = 64 # min 10 + # (change requires restart) +#max_pred_locks_per_relation = -2 # negative values mean + # (max_pred_locks_per_transaction + # / -max_pred_locks_per_relation) - 1 +#max_pred_locks_per_page = 2 # min 0 + + +#------------------------------------------------------------------------------ +# VERSION AND PLATFORM COMPATIBILITY +#------------------------------------------------------------------------------ + +# - Previous PostgreSQL Versions - + +#array_nulls = on +#backslash_quote = safe_encoding # on, off, or safe_encoding +#escape_string_warning = on +#lo_compat_privileges = off +#quote_all_identifiers = off +#standard_conforming_strings = on +#synchronize_seqscans = on + +# - Other Platforms and Clients - + +#transform_null_equals = off + + +#------------------------------------------------------------------------------ +# ERROR HANDLING +#------------------------------------------------------------------------------ + +#exit_on_error = off # terminate session on any error? +#restart_after_crash = on # reinitialize after backend crash? +#data_sync_retry = off # retry or panic on failure to fsync + # data? + # (change requires restart) +#recovery_init_sync_method = fsync # fsync, syncfs (Linux 5.8+) + + +#------------------------------------------------------------------------------ +# CONFIG FILE INCLUDES +#------------------------------------------------------------------------------ + +# These options allow settings to be loaded from files other than the +# default postgresql.conf. Note that these are directives, not variable +# assignments, so they can usefully be given more than once. + +#include_dir = '...' # include files ending in '.conf' from + # a directory, e.g., 'conf.d' +#include_if_exists = '...' # include file only if it exists +#include = '...' # include file + + +#------------------------------------------------------------------------------ +# CUSTOMIZED OPTIONS +#------------------------------------------------------------------------------ + +# Add settings for extensions here \ No newline at end of file diff --git a/tests/test_dlt_init.py b/tests/test_dlt_init.py index 3beb86357..9bbf901fa 100644 --- a/tests/test_dlt_init.py +++ b/tests/test_dlt_init.py @@ -1,13 +1,12 @@ import pytest import os -import sys from typing import Any, Iterator, List from dlt.common.configuration.providers import SecretsTomlProvider from dlt.common.storages.file_storage import FileStorage from dlt.common.utils import set_working_dir -from dlt.extract.source import SourceReference +from dlt.extract import SourceReference from dlt.cli import init_command, echo from dlt.cli.init_command import SOURCES_MODULE_NAME, utils as cli_utils, files_ops