Skip to content

Commit 5c468e9

Browse files
committed
Use cloudnet-api-client to download raw files
1 parent e7f6749 commit 5c468e9

File tree

5 files changed

+41
-56
lines changed

5 files changed

+41
-56
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ classifiers = [
2020
dependencies = [
2121
"ceilopyter",
2222
"cftime",
23+
"cloudnet-api-client",
2324
"cloudnetpy[extras]>=1.72.0",
2425
"cloudnetpy_qc>=1.24.1",
2526
"doppy>=0.3.5",

scripts/cloudnet.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from typing import TypeVar
1717
from uuid import UUID
1818

19+
from cloudnet_api_client import APIClient
1920
from processing import utils
2021
from processing.dvas import Dvas
2122
from processing.fetch import fetch
@@ -189,7 +190,8 @@ def process_main(args):
189190
storage_api = StorageApi(config, session)
190191
pid_utils = PidUtils(config, session)
191192
dvas = Dvas(config, md_api)
192-
processor = Processor(md_api, storage_api, pid_utils, dvas)
193+
client = APIClient(f"{config.dataportal_url}/api/", session)
194+
processor = Processor(md_api, storage_api, pid_utils, dvas, client)
193195

194196
if args.cmd == "fetch":
195197
_print_fetch_header(args)

scripts/worker.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from tempfile import TemporaryDirectory
1010
from threading import Event
1111

12+
from cloudnet_api_client import APIClient
1213
from processing import utils
1314
from processing.config import Config
1415
from processing.dvas import Dvas
@@ -68,8 +69,9 @@ def __init__(self, config: Config):
6869
self.storage_api = StorageApi(self.config, self.session)
6970
self.pid_utils = PidUtils(self.config, self.session)
7071
self.dvas = Dvas(self.config, self.md_api)
72+
self.client = APIClient(f"{config.dataportal_url}/api/", self.session)
7173
self.processor = Processor(
72-
self.md_api, self.storage_api, self.pid_utils, self.dvas
74+
self.md_api, self.storage_api, self.pid_utils, self.dvas, self.client
7375
)
7476
self.logger = MemoryLogger()
7577
self.n_processed_tasks = 0

src/processing/fetch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def get_raw_instrument_metadata(self) -> list:
6363
return [m for m in metadata if not m["filename"].lower().endswith(".lv0")]
6464

6565
def get_raw_model_metadata(self) -> list:
66-
url = f"{self.api_url}raw-model-files"
66+
url = f"{self.api_url}/raw-model-files"
6767
payload = {
6868
**self.payload,
6969
"status": ["uploaded", "processed"],

src/processing/processor.py

Lines changed: 33 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import datetime
22
import logging
3-
import re
43
import uuid
54
from dataclasses import dataclass
65
from pathlib import Path
76
from typing import Literal
87

8+
from cloudnet_api_client import APIClient
99
from cloudnetpy.exceptions import PlottingError
1010
from cloudnetpy.model_evaluation.plotting.plotting import generate_L3_day_plots
1111
from cloudnetpy.plotting import Dimensions, PlotParameters, generate_figure
@@ -87,11 +87,13 @@ def __init__(
8787
storage_api: StorageApi,
8888
pid_utils: PidUtils,
8989
dvas: Dvas,
90+
client: APIClient,
9091
):
9192
self.md_api = md_api
9293
self.storage_api = storage_api
9394
self.pid_utils = pid_utils
9495
self.dvas = dvas
96+
self.client = client
9597

9698
def get_site(self, site_id: str) -> Site:
9799
site = self.md_api.get(f"api/sites/{site_id}")
@@ -214,6 +216,7 @@ def download_instrument(
214216
else:
215217
start_date, end_date = date
216218
start_date_ext, end_date_ext = start_date, end_date
219+
217220
if time_offset is not None:
218221
if largest_only:
219222
raise ValueError("Cannot use both time_offset and largest_only")
@@ -223,54 +226,49 @@ def download_instrument(
223226
start_date_ext -= datetime.timedelta(days=1)
224227
elif time_offset > TIMEDELTA_ZERO:
225228
end_date_ext += datetime.timedelta(days=1)
226-
payload = self._get_payload(
227-
site=site_id,
228-
date=(start_date_ext, end_date_ext),
229-
instrument=instrument_id,
229+
230+
metadata = self.client.raw_metadata(
231+
site_id,
232+
date_from=start_date_ext,
233+
date_to=end_date_ext,
234+
instrument_id=instrument_id,
230235
instrument_pid=instrument_pid,
231-
skip_created=True,
232236
filename_prefix=filename_prefix,
233237
filename_suffix=filename_suffix,
238+
status=["uploaded", "processed"],
234239
)
235-
upload_metadata = self.md_api.get("api/raw-files", payload)
236-
if include_pattern is not None:
237-
upload_metadata = _include_records_with_pattern_in_filename(
238-
upload_metadata, include_pattern
240+
if include_pattern:
241+
metadata = self.client.filter(metadata, include_pattern=include_pattern)
242+
if exclude_pattern:
243+
metadata = self.client.filter(metadata, exclude_pattern=exclude_pattern)
244+
if include_tag_subset:
245+
metadata = self.client.filter(
246+
metadata, include_tag_subset=include_tag_subset
239247
)
240-
if exclude_pattern is not None:
241-
upload_metadata = _exclude_records_with_pattern_in_filename(
242-
upload_metadata, exclude_pattern
248+
if exclude_tag_subset:
249+
metadata = self.client.filter(
250+
metadata, exclude_tag_subset=exclude_tag_subset
243251
)
244-
if include_tag_subset is not None:
245-
upload_metadata = [
246-
record
247-
for record in upload_metadata
248-
if include_tag_subset.issubset(set(record["tags"]))
249-
]
250-
if exclude_tag_subset is not None:
251-
upload_metadata = [
252-
record
253-
for record in upload_metadata
254-
if not exclude_tag_subset.issubset(set(record["tags"]))
255-
]
256-
if not upload_metadata:
252+
253+
if not metadata:
257254
if allow_empty:
258255
return [], []
259256
else:
260257
raise utils.RawDataMissingError
258+
261259
if largest_only:
262-
upload_metadata = [max(upload_metadata, key=lambda item: int(item["size"]))]
263-
full_paths, uuids = self.storage_api.download_raw_data(
264-
upload_metadata, directory
265-
)
260+
metadata = [max(metadata, key=lambda item: int(item.size))]
261+
262+
full_paths = self.client.download(metadata, directory, progress=False)
263+
uuids = [f.uuid for f in metadata]
264+
266265
if time_offset is not None:
267266
uuids = [
268-
meta["uuid"]
269-
for meta in upload_metadata
270-
if start_date
271-
<= datetime.date.fromisoformat(meta["measurementDate"])
272-
<= end_date
267+
meta.uuid
268+
for meta in metadata
269+
if start_date <= meta.measurement_date <= end_date
273270
]
271+
274272
if largest_only:
275273
return full_paths[0], uuids
276274
return full_paths, uuids
@@ -569,24 +567,6 @@ def _dimensions2dict(dimensions: Dimensions) -> dict:
569567
}
570568

571569

572-
def _include_records_with_pattern_in_filename(metadata: list, pattern: str) -> list:
573-
"""Includes only records with certain pattern."""
574-
return [
575-
row
576-
for row in metadata
577-
if re.search(pattern, row["filename"], flags=re.IGNORECASE)
578-
]
579-
580-
581-
def _exclude_records_with_pattern_in_filename(metadata: list, pattern: str) -> list:
582-
"""Excludes records with certain pattern."""
583-
return [
584-
row
585-
for row in metadata
586-
if not re.search(pattern, row["filename"], flags=re.IGNORECASE)
587-
]
588-
589-
590570
def _full_product_to_l3_product(full_product: str):
591571
"""Returns l3 product name."""
592572
return full_product.split("-")[1]

0 commit comments

Comments
 (0)