Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better S3 Support for MagViews #1278

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions webknossos/webknossos/dataset/_array.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import re
from abc import ABC, abstractmethod
from collections.abc import Iterable, Iterator
Expand Down Expand Up @@ -431,8 +432,14 @@ def _make_kvstore(path: Path) -> str | dict[str, str | list[str]]:
"path": parsed_url.path.lstrip("/"),
"bucket": parsed_url.netloc,
}
if endpoint_url := path.storage_options.get("client_kwargs", {}).get(
"endpoint_url", None
if (
(
endpoint_url := path.storage_options.get("client_kwargs", {}).get(
"endpoint_url", None
)
)
or (endpoint_url := path.storage_options.get("endpoint_url", None))
or (endpoint_url := os.environ.get("S3_ENDPOINT_URL", None))
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check whether this is required, would be better to always infer endpoint_url from UPath.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should rely on the UPath

):
kvstore_spec["endpoint"] = endpoint_url
if "key" in path.storage_options and "secret" in path.storage_options:
Expand Down Expand Up @@ -780,6 +787,7 @@ def create(cls, path: Path, array_info: ArrayInfo) -> "Zarr3Array":
],
},
"create": True,
"open": True,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm?

}
).result()
return cls(path, _array)
Expand Down
43 changes: 12 additions & 31 deletions webknossos/webknossos/dataset/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,40 +226,13 @@ def __init__(self, dataset: "Dataset", properties: LayerProperties) -> None:

@property
def path(self) -> Path:
"""Gets the filesystem path to this layer's data.

The path is determined from the first mag's path parent directory if mags have paths,
otherwise uses the dataset path combined with layer name. Remote paths are handled
specially.
"""Gets the filesystem path to this layer's data. This is defined as a subdirectory of the dataset directory named like the layer.
Therefore, this directory does not contain the actual data of any linked or remote layers or mags.

Returns:
Path: Filesystem path to this layer's data directory

Raises:
AssertionError: If mags in layer point to different layers
"""

# Assume that all mags belong to the same layer. If they have a path use them as this layers path.
# This is necessary for remote layer / mag support.
maybe_mag_path_str = (
self._properties.mags[0].path if len(self._properties.mags) > 0 else None
)
maybe_layer_path = (
strip_trailing_slash(UPath(maybe_mag_path_str)).parent
if maybe_mag_path_str
else None
)
for mag in self._properties.mags:
is_same_layer = (
mag.path is None and maybe_layer_path is None
) or strip_trailing_slash(UPath(mag.path)).parent == maybe_layer_path
assert is_same_layer, "All mags of a layer must point to the same layer."
is_remote = maybe_layer_path and is_remote_path(maybe_layer_path)
return (
maybe_layer_path
if maybe_layer_path and is_remote
else self.dataset.path / self.name
)
return self.dataset.path / self.name
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you checked the usages of Layer.path in wklibs and vx?


@property
def resolved_path(self) -> Path:
Expand Down Expand Up @@ -691,7 +664,15 @@ def add_existing_remote_mag_view(
f"Cannot add mag {mag} as it already exists for layer {self}"
)
self._setup_mag(mag, mag_path)
self._properties.mags.append(mag_view._properties)
# since the remote mag view might belong to another dataset, it's property's path might be None, therefore, we get the path from the mag_view itself instead of it's properties
self._properties.mags.append(
MagViewProperties(
mag=mag_view.mag,
path=str(mag_view.path),
cube_length=mag_view._properties.cube_length,
axis_order=mag_view._properties.axis_order,
)
)
self.dataset._export_as_json()

return mag_view
Expand Down
Loading