-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_sample_data.py
121 lines (105 loc) · 3.78 KB
/
_sample_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from __future__ import annotations
import hashlib
import logging
import re
import shutil
import urllib
from pathlib import Path
from typing import Union
import requests
import wget
from napari.types import LayerDataTuple
from ome_zarr.io import parse_url
from ome_zarr.reader import Reader
from napari_ome_zarr_navigator import _TEST_DATA_DIR
logging.getLogger("ome_zarr").setLevel(logging.WARN)
def load_ome_zarr_from_zenodo(doi: str, zarr_url: Union[str, Path]):
doi_path = Path(_TEST_DATA_DIR).joinpath(doi.replace("/", "_"))
zarr_path = doi_path.joinpath(zarr_url)
if not doi_path.is_dir():
download_from_zenodo(doi, directory=doi_path)
shutil.unpack_archive(zarr_path.with_suffix(".zarr.zip"), doi_path)
reader = Reader(parse_url(zarr_path))
zarr_group = list(reader())[0]
return zarr_group
def download_from_zenodo(
doi: str,
overwrite: bool = False,
directory: Union[str, Path] = Path(),
access_token: str = None,
):
record_id = re.match(r".*zenodo.(\w+)", doi).group(1)
url = "https://zenodo.org/api/records/" + record_id
js = requests.get(url).json()
doi = js["metadata"]["doi"]
print("Title: " + js["metadata"]["title"])
print("Publication date: " + js["metadata"]["publication_date"])
print("DOI: " + js["metadata"]["doi"])
print(
"Total file size: {:.1f} MB".format(
sum(f["size"] / 10**6 for f in js["files"])
)
)
doi_path = Path(directory)
try:
doi_path.mkdir(exist_ok=overwrite, parents=True)
except FileExistsError:
print(f"{doi_path} exists. Don't overwrite.")
return
for file in js["files"]:
file_path = Path(doi_path).joinpath(file["key"])
algorithm, checksum = file["checksum"].split(":")
try:
link = urllib.parse.unquote(file["links"]["self"])
wget.download(
f"{link}?access_token={access_token}", str(directory)
)
check_passed, returned_checksum = verify_checksum(
file_path, algorithm, checksum
)
if check_passed:
print(f"\nChecksum is correct. ({checksum})")
else:
print(
f"\nChecksum is incorrect! ({checksum} got: {returned_checksum})"
)
except urllib.error.HTTPError:
pass
def verify_checksum(filename: Union[str, Path], algorithm, original_checksum):
h = hashlib.new(algorithm)
with open(filename, "rb") as f:
h.update(f.read())
returned_checksum = h.hexdigest()
if returned_checksum == original_checksum:
return True, returned_checksum
else:
return False, returned_checksum
def hiPSC_zarr() -> list[LayerDataTuple]:
doi = "10.5281_zenodo.11262587"
zarr_url = "20200812-CardiomyocyteDifferentiation14-Cycle1_mip.zarr"
return load_zarr(doi, zarr_url)
def leukemia_zarr() -> list[LayerDataTuple]:
doi = "10.5281_zenodo.8322727"
zarr_url = "operetta_plate.zarr"
return load_zarr(doi, zarr_url)
def load_zarr(doi: str, zarr_url: Union[str, Path]) -> list[LayerDataTuple]:
ome_zarr = load_ome_zarr_from_zenodo(doi, zarr_url)
if ome_zarr:
return [
(
ome_zarr.data,
{
"name": ome_zarr.metadata["name"],
"channel_axis": 0,
"contrast_limits": ome_zarr.metadata["contrast_limits"],
"colormap": ome_zarr.metadata["colormap"],
"metadata": {"sample_path": ome_zarr.zarr.path},
"scale": ome_zarr.metadata["coordinateTransformations"][0][
0
]["scale"][-3:],
},
"image",
)
]
else:
return [(None,)]