Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/store paths #2272

Open
wants to merge 32 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
736ef8a
add path attribute to stores; migrate localstore to the new protocol
d-v-b Sep 27, 2024
8189cc7
add path kwarg to memory test fixture
d-v-b Sep 28, 2024
9696222
Merge branch 'v3' of https://github.com/zarr-developers/zarr-python i…
d-v-b Sep 28, 2024
fa13860
add path to stores
d-v-b Sep 30, 2024
c5586ce
Merge branch 'v3' of https://github.com/zarr-developers/zarr-python i…
d-v-b Sep 30, 2024
b4b9c28
Merge branch 'main' of github.com:zarr-developers/zarr-python into fe…
d-v-b Oct 17, 2024
c6c8843
add _path attribute to localstore that caches a pathlib.Path version …
d-v-b Oct 17, 2024
b5dd50c
remove some old roots
d-v-b Oct 17, 2024
d7fbbdc
make store test class get and set async
d-v-b Oct 17, 2024
0b1c454
remove an old root
d-v-b Oct 17, 2024
d378242
resolve get before _getting in zipstore tests
d-v-b Oct 17, 2024
669e258
Merge branch 'main' into feat/store-paths
d-v-b Oct 17, 2024
9d0d04b
add path kwarg to gpu memorystore test fixture
d-v-b Oct 17, 2024
471740b
Merge branch 'main' of github.com:zarr-developers/zarr-python into fe…
d-v-b Oct 18, 2024
9a83b86
zipstore store_kwargs propagates path
d-v-b Oct 20, 2024
84c545f
storetests store_kwargs defaults to not implemented
d-v-b Oct 20, 2024
bd912ef
memory store clear is scoped to the path
d-v-b Oct 20, 2024
0c7ab00
use relative path for zipfile listing
d-v-b Oct 20, 2024
d04934d
use path in with_mode for memorystore
d-v-b Oct 20, 2024
9e65afb
call resolve_path at the top of store routines for memorystore
d-v-b Oct 20, 2024
a5b186b
use path for gpumemorystore
d-v-b Oct 20, 2024
aab4138
use relative key in test_list_dir
d-v-b Oct 20, 2024
346d291
fix warning test
d-v-b Oct 20, 2024
e68a341
parameterize over path in memorystore tests
d-v-b Oct 20, 2024
eb93c24
Merge branch 'main' of github.com:zarr-developers/zarr-python into fe…
d-v-b Oct 20, 2024
59ac9ed
lint docstring
d-v-b Oct 20, 2024
fc4a780
put path in memorystore and gpu memorystore reprs
d-v-b Oct 20, 2024
a92cd4a
update localstore repr
d-v-b Oct 20, 2024
1bd1714
update zipstore repr
d-v-b Oct 20, 2024
808e9ff
change default path for remotestore to ''
d-v-b Oct 21, 2024
3d1d043
Merge branch 'main' of github.com:zarr-developers/zarr-python into fe…
d-v-b Oct 28, 2024
b5ef33c
Merge branch 'feat/store-paths' of github.com:d-v-b/zarr-python into …
d-v-b Oct 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 44 additions & 4 deletions src/zarr/abc/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,26 @@ class Store(ABC):

_mode: AccessMode
_is_open: bool

def __init__(self, *args: Any, mode: AccessModeLiteral = "r", **kwargs: Any) -> None:
self._is_open = False
self._mode = AccessMode.from_literal(mode)
path: str

# TODO: Make store immutable
# def __setattr__(self, *args: Any, **kwargs: Any) -> None:
# msg = (
# 'Stores are immutable. To modify a Store object, create a new one with the desired'
# 'attributes')
# raise NotImplementedError(msg)

def __init__(self, path: str = "", mode: AccessModeLiteral = "r") -> None:
object.__setattr__(self, "_is_open", False)
object.__setattr__(self, "_mode", AccessMode.from_literal(mode))
object.__setattr__(self, "path", validate_path(path))

def resolve_key(self, key: str) -> str:
key = parse_path(key)
if self.path == "":
return key
else:
return f"{self.path}/{key}"

@classmethod
async def open(cls, *args: Any, **kwargs: Any) -> Self:
Expand Down Expand Up @@ -386,6 +402,13 @@ async def _get_many(
for req in requests:
yield (req[0], await self.get(*req))

def with_path(self, path: str) -> Self:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def with_path(self, path: str) -> Self:
@abstractmethod
def with_path(self, path: str) -> Self:

Or do you think think we can do this in a generic way that applies to all stores?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we can make it generic unfortunately, because each store class might have its own extra attributes. maybe we could use __getstate__ to get around this?

"""
Return a copy of this store with a new path attribute
"""
# TODO: implement me
raise NotImplementedError


@runtime_checkable
class ByteGetter(Protocol):
Expand Down Expand Up @@ -423,3 +446,20 @@ async def set_or_delete(byte_setter: ByteSetter, value: Buffer | None) -> None:
await byte_setter.delete()
else:
await byte_setter.set(value)


def validate_path(path: str) -> str:
"""
Ensure that the input string is a valid relative path in the abstract zarr object storage scheme.
"""
if path.endswith("/"):
raise ValueError(f"Invalid path: {path} ends with '/'.")
if "//" in path:
raise ValueError(f"Invalid path: {path} contains '//'.")
if "\\" in path:
raise ValueError(f"Invalid path: {path} contains '\"'.")
return path


def parse_path(path: str) -> str:
return path.rstrip("/").lstrip("/")
4 changes: 2 additions & 2 deletions src/zarr/storage/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ async def make_store_path(
result = StorePath(await MemoryStore.open(mode=mode or "w"), path=path_normalized)
elif isinstance(store_like, Path):
result = StorePath(
await LocalStore.open(root=store_like, mode=mode or "r"), path=path_normalized
await LocalStore.open(path=store_like, mode=mode or "r"), path=path_normalized
)
elif isinstance(store_like, str):
storage_options = storage_options or {}
Expand All @@ -244,7 +244,7 @@ async def make_store_path(
)
else:
result = StorePath(
await LocalStore.open(root=Path(store_like), mode=mode or "r"), path=path_normalized
await LocalStore.open(path=Path(store_like), mode=mode or "r"), path=path_normalized
)
elif isinstance(store_like, dict):
# We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings.
Expand Down
96 changes: 52 additions & 44 deletions src/zarr/storage/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,38 @@
import os
import shutil
from pathlib import Path
from typing import TYPE_CHECKING, Self
from typing import TYPE_CHECKING

from zarr.abc.store import ByteRangeRequest, Store
from zarr.abc.store import Store
from zarr.core.buffer import Buffer
from zarr.core.common import concurrent_map

if TYPE_CHECKING:
from collections.abc import AsyncGenerator, Iterable
from typing import Self

from zarr.abc.store import ByteRangeRequest
from zarr.core.buffer import BufferPrototype
from zarr.core.common import AccessModeLiteral


def _get(
path: Path, prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None
) -> Buffer:
def _get(path: Path, prototype: BufferPrototype, byte_range: ByteRangeRequest | None) -> Buffer:
"""
Fetch a contiguous region of bytes from a file.

Parameters
----------

path : Path
The file to read bytes from.
prototype : BufferPrototype
The buffer prototype to use when reading the bytes.
byte_range : tuple[int | None, int | None] | None = None
The range of bytes to read. If `byte_range` is `None`, then the entire file will be read.
If `byte_range` is a tuple, the first value specifies the index of the first byte to read,
and the second value specifies the total number of bytes to read. If the total value is
`None`, then the entire file after the first byte will be read.
"""
if byte_range is not None:
if byte_range[0] is None:
start = 0
Expand Down Expand Up @@ -72,7 +88,7 @@ class LocalStore(Store):

Parameters
----------
root : str or Path
path : str or Path
Directory to use as root of store.
mode : str
Mode in which to open the store. Either 'r', 'r+', 'a', 'w', 'w-'.
Expand All @@ -83,41 +99,34 @@ class LocalStore(Store):
supports_deletes
supports_partial_writes
supports_listing
root
path
"""

supports_writes: bool = True
supports_deletes: bool = True
supports_partial_writes: bool = True
supports_listing: bool = True
_path: Path

root: Path

def __init__(self, root: Path | str, *, mode: AccessModeLiteral = "r") -> None:
super().__init__(mode=mode)
if isinstance(root, str):
root = Path(root)
if not isinstance(root, Path):
raise TypeError(
f'"root" must be a string or Path instance. Got an object with type {type(root)} instead.'
)
self.root = root
def __init__(self, path: Path | str, *, mode: AccessModeLiteral = "r") -> None:
super().__init__(mode=mode, path=str(path))
self._path = Path(self.path)

async def _open(self) -> None:
if not self.mode.readonly:
self.root.mkdir(parents=True, exist_ok=True)
self._path.mkdir(parents=True, exist_ok=True)
return await super()._open()

async def clear(self) -> None:
# docstring inherited
self._check_writable()
shutil.rmtree(self.root)
self.root.mkdir()
shutil.rmtree(self.path)
os.mkdir(self.path)

async def empty(self) -> bool:
# docstring inherited
try:
with os.scandir(self.root) as it:
with os.scandir(self.path) as it:
for entry in it:
if entry.is_file():
# stop once a file is found
Expand All @@ -129,16 +138,16 @@ async def empty(self) -> bool:

def with_mode(self, mode: AccessModeLiteral) -> Self:
# docstring inherited
return type(self)(root=self.root, mode=mode)
return type(self)(path=self.path, mode=mode)

def __str__(self) -> str:
return f"file://{self.root}"
return f"file:///{self.path}"

def __repr__(self) -> str:
return f"LocalStore({str(self)!r})"

def __eq__(self, other: object) -> bool:
return isinstance(other, type(self)) and self.root == other.root
return isinstance(other, type(self)) and self.path == other.path

async def get(
self,
Expand All @@ -149,8 +158,8 @@ async def get(
# docstring inherited
if not self._is_open:
await self._open()
assert isinstance(key, str)
path = self.root / key

path = self._path / key

try:
return await asyncio.to_thread(_get, path, prototype, byte_range)
Expand All @@ -166,7 +175,7 @@ async def get_partial_values(
args = []
for key, byte_range in key_ranges:
assert isinstance(key, str)
path = self.root / key
path = self._path / key
args.append((_get, path, prototype, byte_range))
return await concurrent_map(args, asyncio.to_thread, limit=None) # TODO: fix limit

Expand All @@ -188,7 +197,7 @@ async def _set(self, key: str, value: Buffer, exclusive: bool = False) -> None:
assert isinstance(key, str)
if not isinstance(value, Buffer):
raise TypeError("LocalStore.set(): `value` must a Buffer instance")
path = self.root / key
path = self._path / key
await asyncio.to_thread(_put, path, value, start=None, exclusive=exclusive)

async def set_partial_values(
Expand All @@ -199,48 +208,47 @@ async def set_partial_values(
args = []
for key, start, value in key_start_values:
assert isinstance(key, str)
path = self.root / key
path = os.path.join(self.path, key)
args.append((_put, path, value, start))
await concurrent_map(args, asyncio.to_thread, limit=None) # TODO: fix limit

async def delete(self, key: str) -> None:
# docstring inherited
self._check_writable()
path = self.root / key
path = self._path / key
if path.is_dir(): # TODO: support deleting directories? shutil.rmtree?
shutil.rmtree(path)
else:
await asyncio.to_thread(path.unlink, True) # Q: we may want to raise if path is missing

async def exists(self, key: str) -> bool:
# docstring inherited
path = self.root / key
path = self._path / key
return await asyncio.to_thread(path.is_file)

async def list(self) -> AsyncGenerator[str, None]:
# docstring inherited
to_strip = self.root.as_posix() + "/"
for p in list(self.root.rglob("*")):
# TODO: just invoke list_prefix with the prefix "/"
to_strip = self.path + "/"
for p in self._path.rglob("*"):
if p.is_file():
yield p.as_posix().replace(to_strip, "")
yield p.relative_to(to_strip).as_posix()

async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
# docstring inherited
to_strip = (
(self.root / prefix).as_posix() + "/"
) # TODO: fixme in https://github.com/zarr-developers/zarr-python/issues/2438
for p in (self.root / prefix).rglob("*"):
to_strip = os.path.join(self.path, prefix)
for p in (self._path / prefix).rglob("*"):
if p.is_file():
yield p.as_posix().replace(to_strip, "")
yield p.relative_to(to_strip).as_posix()

async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
# docstring inherited
base = self.root / prefix
to_strip = str(base) + "/"
base = os.path.join(self.path, prefix)
to_strip = base + "/"

try:
key_iter = base.iterdir()
key_iter = Path(base).iterdir()
for key in key_iter:
yield key.as_posix().replace(to_strip, "")
yield key.relative_to(to_strip).as_posix()
except (FileNotFoundError, NotADirectoryError):
pass
1 change: 1 addition & 0 deletions src/zarr/storage/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

if TYPE_CHECKING:
from collections.abc import AsyncGenerator, Generator, Iterable
from typing import Self

from zarr.core.buffer import Buffer, BufferPrototype
from zarr.core.common import AccessModeLiteral
Expand Down
Loading