Skip to content

Commit 8067e43

Browse files
authored
Validate the names of data catalogs. (#612)
1 parent ca537c8 commit 8067e43

File tree

3 files changed

+30
-1
lines changed

3 files changed

+30
-1
lines changed

docs/source/changes.md

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and
5151
in pytask-parallel.
5252
- {pull}`611` removes the initial task execution status from
5353
`pytask_execute_task_log_start`.
54+
- {pull}`612` adds validation for data catalog names.
5455

5556
## 0.4.7 - 2024-03-19
5657

src/_pytask/data_catalog.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import hashlib
1010
import inspect
1111
import pickle
12+
import re
1213
from pathlib import Path
1314
from typing import Any
1415

@@ -61,13 +62,26 @@ class DataCatalog:
6162

6263
default_node: type[PNode] = PickleNode
6364
entries: dict[str, PNode | PProvisionalNode] = field(factory=dict)
64-
name: str = "default"
65+
name: str = field(default="default")
6566
path: Path | None = None
6667
_session_config: dict[str, Any] = field(
6768
factory=lambda *x: {"check_casing_of_paths": True} # noqa: ARG005
6869
)
6970
_instance_path: Path = field(factory=_get_parent_path_of_data_catalog_module)
7071

72+
@name.validator
73+
def _check(self, attribute: str, value: str) -> None: # noqa: ARG002
74+
_rich_traceback_omit = True
75+
if not isinstance(value, str):
76+
msg = "The name of a data catalog must be a string."
77+
raise TypeError(msg)
78+
if not re.match(r"[a-zA-Z0-9-_]+", value):
79+
msg = (
80+
"The name of a data catalog must be a string containing only letters, "
81+
"numbers, hyphens, and underscores."
82+
)
83+
raise ValueError(msg)
84+
7185
def __attrs_post_init__(self) -> None:
7286
root_path, _ = find_project_root_and_config((self._instance_path,))
7387
self._session_config["paths"] = (root_path,)

tests/test_data_catalog.py

+14
Original file line numberDiff line numberDiff line change
@@ -234,3 +234,17 @@ def task_add_content(
234234
result = runner.invoke(cli, [tmp_path.as_posix()])
235235
assert result.exit_code == ExitCode.OK
236236
assert tmp_path.joinpath("output.txt").read_text() == "Hello, World!"
237+
238+
239+
@pytest.mark.end_to_end()
240+
def test_data_catalog_has_invalid_name(runner, tmp_path):
241+
source = """
242+
from pytask import DataCatalog
243+
244+
data_catalog = DataCatalog(name="?1")
245+
"""
246+
tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source))
247+
248+
result = runner.invoke(cli, [tmp_path.as_posix()])
249+
assert result.exit_code == ExitCode.COLLECTION_FAILED
250+
assert "The name of a data catalog" in result.stdout

0 commit comments

Comments
 (0)