From dff62c99ad45883f4262958600adcbb2cad32b74 Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Tue, 26 Nov 2024 00:13:41 +0000 Subject: [PATCH 1/7] feat(rest_api): custom client for specific resources --- dlt/sources/rest_api/__init__.py | 12 +++++- dlt/sources/rest_api/typing.py | 1 + .../verified-sources/rest_api/basic.md | 26 +++++++++++++ .../rest_api/configurations/source_configs.py | 12 ++++++ .../integration/test_response_actions.py | 38 +++++++++++++++++++ 5 files changed, 88 insertions(+), 1 deletion(-) diff --git a/dlt/sources/rest_api/__init__.py b/dlt/sources/rest_api/__init__.py index ed55f71e10..3ca609e0bc 100644 --- a/dlt/sources/rest_api/__init__.py +++ b/dlt/sources/rest_api/__init__.py @@ -266,7 +266,7 @@ def create_resources( client = RESTClient( base_url=client_config["base_url"], headers=client_config.get("headers"), - auth=create_auth(client_config.get("auth")), + auth=create_auth(endpoint_config.get("auth", client_config.get("auth"))), paginator=create_paginator(client_config.get("paginator")), session=client_config.get("session"), ) @@ -410,6 +410,16 @@ def _validate_config(config: RESTAPIConfig) -> None: auth = client_config.get("auth") if auth: auth = _mask_secrets(auth) + resources = c.get("resources", []) + for resource in resources: + if isinstance(resource, str) or isinstance(resource, DltResource): + continue + if "endpoint" in resource: + endpoint = resource.get("endpoint") + if not isinstance(endpoint, str): + auth = endpoint.get("auth") + if auth: + auth = _mask_secrets(auth) validate_dict(RESTAPIConfig, c, path=".") diff --git a/dlt/sources/rest_api/typing.py b/dlt/sources/rest_api/typing.py index d4cea892a3..ccef828b1a 100644 --- a/dlt/sources/rest_api/typing.py +++ b/dlt/sources/rest_api/typing.py @@ -263,6 +263,7 @@ class Endpoint(TypedDict, total=False): data_selector: Optional[jsonpath.TJsonPath] response_actions: Optional[List[ResponseAction]] incremental: Optional[IncrementalConfig] + auth: Optional[AuthConfig] class ProcessingSteps(TypedDict): diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md index c600d64dbe..9b0384ea2f 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md @@ -308,6 +308,32 @@ A resource configuration is used to define a [dlt resource](../../../general-usa - `include_from_parent`: A list of fields from the parent resource to be included in the resource output. See the [resource relationships](#include-fields-from-the-parent-resource) section for more details. - `processing_steps`: A list of [processing steps](#processing-steps-filter-and-transform-data) to filter and transform the data. - `selected`: A flag to indicate if the resource is selected for loading. This could be useful when you want to load data only from child resources and not from the parent resource. +- `auth`: An optional `AuthConfig` instance. If passed, is used over the one defined in the [dlt resource](../../../general-usage/resource.md) definition. Example: +```py +from dlt.sources.helpers.rest_client.auth import HttpBasicAuth + +config = { + "client": { + "auth": { + "type": "bearer", + "token": dlt.secrets["your_api_token"], + } + }, + "resources": [ + "resource-using-bearer-auth", + { + "name": "my-resource-with-special-auth", + "endpoint": { + # ... + "auth": HttpBasicAuth("user", dlt.secrets["your_basic_auth_password"]) + }, + # ... + } + ] + # ... +} +``` +This would use `Bearer` auth as defined in the `client` for `resource-using-bearer-auth` and `Http Basic` auth for `my-resource-with-special-auth`. You can also pass additional resource parameters that will be used to configure the dlt resource. See [dlt resource API reference](../../../api_reference/extract/decorators#resource) for more details. diff --git a/tests/sources/rest_api/configurations/source_configs.py b/tests/sources/rest_api/configurations/source_configs.py index 705a42637c..f0cfcdbf85 100644 --- a/tests/sources/rest_api/configurations/source_configs.py +++ b/tests/sources/rest_api/configurations/source_configs.py @@ -395,6 +395,18 @@ def repositories(): repositories(), ], }, + { + "client": {"base_url": "https://github.com/api/v2"}, + "resources": [ + { + "name": "issues", + "endpoint": { + "path": "dlt-hub/{repository}/issues/", + "auth": HttpBasicAuth("", "BASIC_AUTH_TOKEN"), + }, + } + ], + }, ] diff --git a/tests/sources/rest_api/integration/test_response_actions.py b/tests/sources/rest_api/integration/test_response_actions.py index 1ec8058a86..0d21231de6 100644 --- a/tests/sources/rest_api/integration/test_response_actions.py +++ b/tests/sources/rest_api/integration/test_response_actions.py @@ -1,3 +1,4 @@ +import base64 import pytest from dlt.common import json from dlt.sources.helpers.requests import Response @@ -316,3 +317,40 @@ def add_field(response: Response, *args, **kwargs) -> Response: mock_response_hook_2.assert_called_once() assert all(record["custom_field"] == "foobar" for record in data) + + +def test_auth_overwrites_for_specific_endpoints(mock_api_server, mocker): + def custom_hook(response: Response, *args, **kwargs) -> Response: + assert response.request.headers["Authorization"] == f"Basic {base64.b64encode(b'U:P').decode('ascii')}" + return response + + mock_response_hook = mocker.Mock(side_effect=custom_hook) + mock_source = rest_api_source( + { + "client": { + "base_url": "https://api.example.com", + "auth": { + "type": "bearer", + "token": "T", + }, + }, + "resources": [ + { + "name": "posts", + "endpoint": { + "auth": { + "type": "http_basic", + "username": "U", + "password": "P", + }, + "response_actions": [ + mock_response_hook, + ], + }, + }, + ], + } + ) + + list(mock_source.with_resources("posts").add_limit(1)) + mock_response_hook.assert_called_once() From 56d4e44ae5c7e06ee1ff3306b9a574b3130e4144 Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 27 Nov 2024 03:04:13 +1100 Subject: [PATCH 2/7] Update docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md Co-authored-by: Anton Burnashev --- .../docs/dlt-ecosystem/verified-sources/rest_api/basic.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md index 9b0384ea2f..48e7d6b765 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md @@ -308,7 +308,7 @@ A resource configuration is used to define a [dlt resource](../../../general-usa - `include_from_parent`: A list of fields from the parent resource to be included in the resource output. See the [resource relationships](#include-fields-from-the-parent-resource) section for more details. - `processing_steps`: A list of [processing steps](#processing-steps-filter-and-transform-data) to filter and transform the data. - `selected`: A flag to indicate if the resource is selected for loading. This could be useful when you want to load data only from child resources and not from the parent resource. -- `auth`: An optional `AuthConfig` instance. If passed, is used over the one defined in the [dlt resource](../../../general-usage/resource.md) definition. Example: +- `auth`: An optional `AuthConfig` instance. If passed, is used over the one defined in the [client](#client) definition. Example: ```py from dlt.sources.helpers.rest_client.auth import HttpBasicAuth From d065d0f9b1d9dbc8b3e491f6d4bc9c47c552be1d Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 27 Nov 2024 03:04:21 +1100 Subject: [PATCH 3/7] Update dlt/sources/rest_api/__init__.py Co-authored-by: Anton Burnashev --- dlt/sources/rest_api/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlt/sources/rest_api/__init__.py b/dlt/sources/rest_api/__init__.py index 3ca609e0bc..03046bc2c4 100644 --- a/dlt/sources/rest_api/__init__.py +++ b/dlt/sources/rest_api/__init__.py @@ -412,7 +412,7 @@ def _validate_config(config: RESTAPIConfig) -> None: auth = _mask_secrets(auth) resources = c.get("resources", []) for resource in resources: - if isinstance(resource, str) or isinstance(resource, DltResource): + if isinstance(resource, (str, DltResource)): continue if "endpoint" in resource: endpoint = resource.get("endpoint") From c3f18344b1fbb96477588fcef33d0f37e1f081f0 Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 27 Nov 2024 03:04:50 +1100 Subject: [PATCH 4/7] Update dlt/sources/rest_api/__init__.py Co-authored-by: Anton Burnashev --- dlt/sources/rest_api/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dlt/sources/rest_api/__init__.py b/dlt/sources/rest_api/__init__.py index 03046bc2c4..a03e45f5ed 100644 --- a/dlt/sources/rest_api/__init__.py +++ b/dlt/sources/rest_api/__init__.py @@ -414,8 +414,7 @@ def _validate_config(config: RESTAPIConfig) -> None: for resource in resources: if isinstance(resource, (str, DltResource)): continue - if "endpoint" in resource: - endpoint = resource.get("endpoint") + if endpoint := resource.get("endpoint"): if not isinstance(endpoint, str): auth = endpoint.get("auth") if auth: From aa0a9d871dd7d0a3e690c70fb42586dbaffdd2bc Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Wed, 27 Nov 2024 08:28:14 +1100 Subject: [PATCH 5/7] style: fix lint --- tests/sources/rest_api/integration/test_response_actions.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/sources/rest_api/integration/test_response_actions.py b/tests/sources/rest_api/integration/test_response_actions.py index 0d21231de6..e4fcc32f8f 100644 --- a/tests/sources/rest_api/integration/test_response_actions.py +++ b/tests/sources/rest_api/integration/test_response_actions.py @@ -321,7 +321,10 @@ def add_field(response: Response, *args, **kwargs) -> Response: def test_auth_overwrites_for_specific_endpoints(mock_api_server, mocker): def custom_hook(response: Response, *args, **kwargs) -> Response: - assert response.request.headers["Authorization"] == f"Basic {base64.b64encode(b'U:P').decode('ascii')}" + assert ( + response.request.headers["Authorization"] + == f"Basic {base64.b64encode(b'U:P').decode('ascii')}" + ) return response mock_response_hook = mocker.Mock(side_effect=custom_hook) From e2f6c8cfa42543173f1403f12bf67c00b83dab01 Mon Sep 17 00:00:00 2001 From: Joscha Feth Date: Tue, 26 Nov 2024 22:47:00 +0000 Subject: [PATCH 6/7] style: remove superfluous assignment --- dlt/sources/rest_api/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dlt/sources/rest_api/__init__.py b/dlt/sources/rest_api/__init__.py index a03e45f5ed..966d9e8b6c 100644 --- a/dlt/sources/rest_api/__init__.py +++ b/dlt/sources/rest_api/__init__.py @@ -409,7 +409,7 @@ def _validate_config(config: RESTAPIConfig) -> None: if client_config: auth = client_config.get("auth") if auth: - auth = _mask_secrets(auth) + _mask_secrets(auth) resources = c.get("resources", []) for resource in resources: if isinstance(resource, (str, DltResource)): @@ -418,7 +418,7 @@ def _validate_config(config: RESTAPIConfig) -> None: if not isinstance(endpoint, str): auth = endpoint.get("auth") if auth: - auth = _mask_secrets(auth) + _mask_secrets(auth) validate_dict(RESTAPIConfig, c, path=".") From b80734ee9b80986bd859a7d3189a244f8eb89791 Mon Sep 17 00:00:00 2001 From: Anton Burnashev Date: Wed, 27 Nov 2024 10:28:45 +0100 Subject: [PATCH 7/7] Fix failing test --- tests/sources/rest_api/configurations/source_configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sources/rest_api/configurations/source_configs.py b/tests/sources/rest_api/configurations/source_configs.py index f0cfcdbf85..ff58fee0fb 100644 --- a/tests/sources/rest_api/configurations/source_configs.py +++ b/tests/sources/rest_api/configurations/source_configs.py @@ -401,7 +401,7 @@ def repositories(): { "name": "issues", "endpoint": { - "path": "dlt-hub/{repository}/issues/", + "path": "user/repos", "auth": HttpBasicAuth("", "BASIC_AUTH_TOKEN"), }, }