-
Notifications
You must be signed in to change notification settings - Fork 22
feat: Support Actor schema storages with Alias mechanism #797
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
c4adb74
19113e7
b12e27e
fd0716c
3b36459
72c2f35
b7604cb
ec6e071
aa3b9ea
aa5624f
17288ca
a7e645f
a996dca
f738e58
2657528
bc7d13d
b7a390b
e2df853
d15fadb
a7306a8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,6 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import dataclasses | ||
| import json | ||
| from datetime import datetime, timedelta | ||
| from decimal import Decimal | ||
|
|
@@ -34,6 +35,29 @@ def _transform_to_list(value: Any) -> list[str] | None: | |
| return value if isinstance(value, list) else str(value).split(',') | ||
|
|
||
|
|
||
| @dataclasses.dataclass | ||
| class ActorStorages: | ||
| """Storage IDs for different storage types used by an Actor.""" | ||
|
|
||
| key_value_stores: dict[str, str] | ||
| datasets: dict[str, str] | ||
| request_queues: dict[str, str] | ||
Pijukatel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| def _load_storage_keys(data: None | str | dict | ActorStorages) -> ActorStorages | None: | ||
| """Load storage keys from environment.""" | ||
| if data is None: | ||
| return None | ||
| if isinstance(data, ActorStorages): | ||
| return data | ||
| storage_mapping = data if isinstance(data, dict) else json.loads(data) | ||
| return ActorStorages( | ||
| key_value_stores=storage_mapping.get('keyValueStores', {}), | ||
| datasets=storage_mapping.get('datasets', {}), | ||
| request_queues=storage_mapping.get('requestQueues', {}), | ||
| ) | ||
|
|
||
|
|
||
| @docs_group('Configuration') | ||
| class Configuration(CrawleeConfiguration): | ||
| """A class for specifying the configuration of an Actor. | ||
|
|
@@ -446,6 +470,15 @@ class Configuration(CrawleeConfiguration): | |
| BeforeValidator(lambda data: json.loads(data) if isinstance(data, str) else data or None), | ||
| ] = None | ||
|
|
||
| actor_storages: Annotated[ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure about the naming here - we usually strip the |
||
| ActorStorages | None, | ||
| Field( | ||
| alias='actor_storages_json', | ||
| description='Storage IDs for the actor', | ||
Pijukatel marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ), | ||
| BeforeValidator(_load_storage_keys), | ||
| ] = None | ||
Pijukatel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| @model_validator(mode='after') | ||
| def disable_browser_sandbox_on_platform(self) -> Self: | ||
| """Disable the browser sandbox mode when running on the Apify platform. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -262,3 +262,41 @@ async def _get_default_kvs_client(configuration: Configuration) -> KeyValueStore | |
| raise ValueError("'Configuration.default_key_value_store_id' must be set.") | ||
|
|
||
| return apify_client_async.key_value_store(key_value_store_id=configuration.default_key_value_store_id) | ||
|
|
||
| @classmethod | ||
| async def register_aliases(cls, configuration: Configuration) -> None: | ||
vdusek marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| """Load alias mapping from configuration to the default kvs.""" | ||
vdusek marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if configuration.actor_storages is None: | ||
| return | ||
|
|
||
| configuration_mapping = {} | ||
|
|
||
| if configuration.default_dataset_id != configuration.actor_storages.datasets.get('default'): | ||
vdusek marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| logger.warning( | ||
| f'Conflicting default dataset ids: {configuration.default_dataset_id=},' | ||
| f" {configuration.actor_storages.datasets.get('default')=}" | ||
| ) | ||
|
|
||
| for mapping, storage_type in ( | ||
| (configuration.actor_storages.key_value_stores, 'KeyValueStore'), | ||
| (configuration.actor_storages.datasets, 'Dataset'), | ||
| (configuration.actor_storages.request_queues, 'RequestQueue'), | ||
| ): | ||
| for storage_alias, storage_id in mapping.items(): | ||
vdusek marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| configuration_mapping[ | ||
| cls( # noqa: SLF001# It is ok in own classmethod. | ||
Pijukatel marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| storage_type=storage_type, | ||
| alias='__default__' if storage_alias == 'default' else storage_alias, | ||
| configuration=configuration, | ||
| )._storage_key | ||
| ] = storage_id | ||
|
|
||
| # Bulk update the mapping in the default KVS with the configuration mapping. | ||
| client = await cls._get_default_kvs_client(configuration=configuration) | ||
| existing_mapping = ((await client.get_record(cls._ALIAS_MAPPING_KEY)) or {'value': {}}).get('value', {}) | ||
|
|
||
| # Update the existing mapping with the configuration mapping. | ||
| existing_mapping.update(configuration_mapping) | ||
| # Store the updated mapping back in the KVS and in memory. | ||
| await client.set_record(cls._ALIAS_MAPPING_KEY, existing_mapping) | ||
|
||
| cls._alias_map.update(existing_mapping) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| { | ||
vdusek marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| "actorSpecification": 1, | ||
| "version": "0.0", | ||
| "storages": { | ||
| "datasets": { | ||
| "default": { | ||
| "actorSpecification": 1, | ||
| "fields": { | ||
| "properties": { | ||
| "id": { "type": "string" } | ||
| } | ||
| } | ||
| }, | ||
| "custom": { | ||
| "actorSpecification": 1, | ||
| "fields": { | ||
| "properties": { | ||
| "id": { "type": "string" } | ||
| } | ||
| } | ||
Pijukatel marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| from apify import Actor | ||
vdusek marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| async def main() -> None: | ||
| async with Actor: | ||
| assert Actor.configuration.actor_storages | ||
| assert (await Actor.open_dataset(alias='custom')).id == Actor.configuration.actor_storages.datasets['custom'] | ||
Pijukatel marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from pathlib import Path | ||
| from typing import TYPE_CHECKING | ||
|
|
||
| if TYPE_CHECKING: | ||
| from ..conftest import MakeActorFunction, RunActorFunction | ||
|
|
||
| _ACTOR_SOURCE_DIR = Path(__file__).parent / 'actor_source' | ||
|
|
||
|
|
||
| def read_actor_source(filename: str) -> str: | ||
| return (_ACTOR_SOURCE_DIR / filename).read_text() | ||
|
|
||
|
|
||
| async def test_configuration_storages(make_actor: MakeActorFunction, run_actor: RunActorFunction) -> None: | ||
| actor = await make_actor( | ||
| label='schema_storages', | ||
| source_files={ | ||
| 'src/main.py': read_actor_source('main.py'), | ||
| '.actor/actor.json': read_actor_source('actor.json'), | ||
| }, | ||
| ) | ||
| run_result = await run_actor(actor) | ||
|
|
||
| assert run_result.status == 'SUCCEEDED' |
Uh oh!
There was an error while loading. Please reload this page.