Skip to content

Commit

Permalink
added more guide on assets and made the collection id and title the s…
Browse files Browse the repository at this point in the history
…ame for the sake of findability
  • Loading branch information
mo-dkrz committed Feb 5, 2025
1 parent c438488 commit 51e2c9d
Show file tree
Hide file tree
Showing 5 changed files with 227 additions and 11 deletions.
1 change: 1 addition & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[submodule "dev-env/config"]
path = dev-env/config
url = https://github.com/FREVA-CLINT/freva-service-config.git
branch = opensearch-stac
2 changes: 1 addition & 1 deletion dev-env/config
34 changes: 32 additions & 2 deletions dev-env/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,17 +85,47 @@ services:
stac-api:
networks:
- freva-rest
image: ghcr.io/mo-dkrz/stac-fastapi-os:v2.2.9
image: ghcr.io/stac-utils/stac-fastapi-os:latest
environment:
- RUN_LOCAL_OS=1
- STAC_FASTAPI_TITLE=Freva STAC Service
- STAC_FASTAPI_DESCRIPTION=`Freva STAC Service` provides a SpatioTemporal Asset Catalog (STAC) API implementation geospatial data cataloging and discovery.
- STAC_FASTAPI_ROUTE_DEPENDENCIES=[{"routes":[{"path":"/collections/{collection_id}/items/{item_id}","method":["PUT","DELETE"]},{"path":"/collections/{collection_id}/items","method":["POST"]},{"path":"/collections","method":["POST"]},{"path":"/collections/{collection_id}","method":["PUT","DELETE"]},{"path":"/collections/{collection_id}/bulk_items","method":["POST"]},{"path":"/aggregations","method":["POST"]},{"path":"/collections/{collection_id}/aggregations","method":["POST"]},{"path":"/aggregate","method":["POST"]},{"path":"/aggregate","method":["POST"]},{"path":"/collections/{collection_id}/aggregate","method":["POST"]}],"dependencies":[{"method":"stac_fastapi.core.basic_auth.BasicAuth","kwargs":{"credentials":[{"username":"stac","password":"secret"}]}}]}]
- STAC_USERNAME=stac
- STAC_PASSWORD=secret
- APP_HOST=0.0.0.0
- APP_PORT=8080
- ES_PORT=9202
- ES_HOST=opensearch
- ES_USE_SSL=false
- ES_VERIFY_CERTS=false
- RELOAD=true
- WEB_CONCURRENCY=10
ports:
- "8083:8080"
depends_on:
opensearch:
condition: service_healthy

opensearch:
networks:
- freva-rest
container_name: os-container
image: opensearchproject/opensearch:latest
hostname: opensearch
environment:
- discovery.type=single-node
- plugins.security.disabled=true
- OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m
volumes:
- ./config/opensearch/opensearch.yml:/usr/share/opensearch/config/opensearch.yml
ports:
- "9202:9202"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9202/_cluster/health?wait_for_status=red"]
interval: 2s
timeout: 10s
retries: 5
start_period: 40s

stac-browser:
networks:
Expand Down
198 changes: 191 additions & 7 deletions freva-rest/src/freva_rest/databrowser_api/core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""The core functionality to interact with the apache solr search system."""

import ast
import asyncio
import io
import json
Expand Down Expand Up @@ -1553,9 +1554,98 @@ async def _create_stac_collection(self, collection_id: str) -> pystac.Collection
```
"""
)

stac_static_desc = dedent(
f"""
# STAC Static Catalog Setup
```bash
pip install pystac
```
# Load the STAC Catalog
```python
import pystac
import tarfile
import tempfile
import os
temp_dir = tempfile.mkdtemp(dir='/tmp')
with tarfile.open('stac-catalog-{collection_id}-{self.uniq_key}.tar.gz',
mode='r:gz') as tar:
tar.extractall(path=temp_dir)
cat = pystac.Catalog.from_file(os.path.join(temp_dir, 'catalog.json'))
```
💡: This has been desigend to work with the data locally. So you
can copy the catalog link from here and download and load the catalog
locally via the provided script.
"""
)

params_dict = (
ast.literal_eval(str(self.assets_prereqs.get("only_params")))
if self.assets_prereqs.get("only_params", "")
else {}
)
python_params = " ".join(
f"{k}={v},"
for k, v in params_dict.items()
if k not in ("translate", "stac_dynamic")
)
cli_params = " ".join(
f"{k}={v}" for k, v in params_dict.items()
if k not in ("translate", "stac_dynamic")
)

zarr_desc = dedent(
f"""
# Accessing Zarr Data
1. Install freva-client
```bash
pip install freva-client
```
2. Get the auth token and access the zarr data (Python) - recommended
```python
from freva_client import authenticate, databrowser
import xarray as xr
token_info = authenticate(username=<your_username>,\\
host='{self.config.proxy}')
db = databrowser({python_params} stream_zarr=True,\\
host='{self.config.proxy}')
xarray_dataset = xr.open_mfdataset(list(db))
```
3. Get the auth token and access the zarr data (CLI)
```bash
token=$(freva-client auth -u <username> --host {self.config.proxy}\\
|jq -r .access_token)
freva-client databrowser {cli_params} --stream-zarr\\
--host {self.config.proxy}
```
4. Access the zarr data directly (API - language agnostic)
```bash
curl -X GET {self.assets_prereqs.get('base_url')}api/ \\
freva-nextgen/databrowser/load/\\
{self.translator.flavour}?{self.assets_prereqs.get('only_params')} \\
-H "Authorization: Bearer YOUR_ACCESS_TOKEN"
```
💡: Read more about the
[freva-client](https://freva-clint.github.io/freva-nextgen/)
"""
)

local_access_desc = dedent(
f"""
# Accessing data locally where the data is stored
```python
import xarray as xr
ds = xr.open_mfdataset(list('{str(self.assets_prereqs.get("full_endpoint"))
.replace("stac-catalogue", "data-search")}'))
```
💡: Please make sure to have the required xarray packages installed.
"""
)
collection = pystac.Collection(
id=collection_id,
title=f"Dataset {collection_id[:13]}",
title=collection_id,
description=usage_desc.strip(),
extent=pystac.Extent(
# We need to define an initial temporal and spatial extent,
Expand Down Expand Up @@ -1587,14 +1677,28 @@ async def _create_stac_collection(self, collection_id: str) -> pystac.Collection
roles=["metadata"],
media_type="application/json",
),
"stac-static-catalogue": pystac.Asset(
href=str(self.assets_prereqs.get("full_endpoint")).replace(
"stac_dynamic=true", "stac_dynamic=false"
),
title="STAC Static Catalogue",
description=stac_static_desc,
roles=["metadata"],
),
"local-access": pystac.Asset(
href=str(self.assets_prereqs.get("full_endpoint")),
title="Access data locally",
description=local_access_desc,
roles=["metadata"],
),
"zarr-access": pystac.Asset(
href=(
f"{self.assets_prereqs.get('base_url')}api/freva-nextgen/"
f"databrowser/load/{self.translator.flavour}?"
f"{self.assets_prereqs.get('only_params')}"
),
title="Download Zarr Dataset",
description="Direct access to data in Zarr format.",
title="Stream Zarr Dataset",
description=zarr_desc,
roles=["data"],
media_type="application/vnd+zarr",
extra_fields={
Expand Down Expand Up @@ -1757,6 +1861,70 @@ async def _create_stac_item(self, result: Dict[str, Any]) -> pystac.Item:
"""
)
id = result.get(self.uniq_key, "")
params_dict = (
ast.literal_eval(str(self.assets_prereqs.get("only_params")))
if self.assets_prereqs.get("only_params", "")
else {}
)
python_params = " ".join(
f"{k}={v},"
for k, v in params_dict.items()
if k not in ("translate", "stac_dynamic")
) + f" {self.uniq_key}={id}"

cli_params = " ".join(
f"{k}={v}" for k, v in params_dict.items()
if k not in ("translate", "stac_dynamic")
) + f" {self.uniq_key}={id}"

zarr_desc = dedent(
f"""
# Accessing Zarr Data
1. Install freva-client
```bash
pip install freva-client
```
2. Get the auth token and access the zarr data (Python) - recommended
```python
from freva_client import authenticate, databrowser
import xarray as xr
token_info = authenticate(username=<your_username>, \\
host='{self.config.proxy}')
db = databrowser({python_params} {self.uniq_key}={id}, \\
stream_zarr=True, host='{self.config.proxy}')
xarray_dataset = xr.open_mfdataset(list(db))
```
3. Get the auth token and access the zarr data (CLI)
```bash
token=$(freva-client auth -u <username> --host {self.config.proxy}\\
|jq -r .access_token)
freva-client databrowser {cli_params} {self.uniq_key}={id} \\
--stream-zarr --host {self.config.proxy}
```
4. Access the zarr data directly (API - language agnostic)
```bash
curl -X GET {self.assets_prereqs.get('base_url')}api/ \\
freva-nextgen/databrowser/load/\\
{self.translator.flavour}?{self.assets_prereqs.get('only_params')}\\
&{self.uniq_key}={id} \\
-H "Authorization: Bearer YOUR_ACCESS_TOKEN"
```
💡: Read more about the
[freva-client](https://freva-clint.github.io/freva-nextgen/)
"""
) # noqa: E501

local_access_desc = dedent(
f"""
# Accessing data locally where the data is stored
```python
import xarray as xr
ds = xr.open_mfdataset('{id}')
```
💡: Please make sure to have the required xarray packages installed.
"""
)

normalized_id = (
id.replace("https://", "")
.replace("http://", "")
Expand Down Expand Up @@ -1822,8 +1990,9 @@ async def _create_stac_item(self, result: Dict[str, Any]) -> pystac.Item:
href=(
f"{self.assets_prereqs.get('base_url')}databrowser/?"
f"{self.assets_prereqs.get('only_params')}"
f"&{self.uniq_key}={id}"
),
title="Freva Web DaaBrowser",
title="Freva Web DataBrowser",
description=(
"Access the Freva web interface for data exploration and analysis"
),
Expand All @@ -1835,7 +2004,7 @@ async def _create_stac_item(self, result: Dict[str, Any]) -> pystac.Item:
str(self.assets_prereqs.get("full_endpoint")).replace(
"stac-catalogue", "intake-catalogue"
)
+ f"?{self.uniq_key}={id}"
+ f"&{self.uniq_key}={id}"
),
title="Intake Catalogue",
description=intake_desc,
Expand All @@ -1849,7 +2018,7 @@ async def _create_stac_item(self, result: Dict[str, Any]) -> pystac.Item:
f"{self.assets_prereqs.get('only_params')}&{self.uniq_key}={id}"
),
title="Stream Zarr Data",
description="Download the data in Zarr format",
description=zarr_desc,
roles=["data"],
media_type="application/vnd+zarr",
extra_fields={
Expand All @@ -1862,6 +2031,18 @@ async def _create_stac_item(self, result: Dict[str, Any]) -> pystac.Item:
},
},
),
"local-access": pystac.Asset(
href=(
f"{self.assets_prereqs.get('base_url')}api/freva-nextgen/"
f"databrowser/data-search/{self.translator.flavour}/"
f"{self.uniq_key}?"
f"{self.assets_prereqs.get('only_params')}"
f"&{self.uniq_key}={id}"
),
title="Access data locally",
description=local_access_desc,
media_type="application/netcdf"
),
}

for key, asset in assets.items():
Expand Down Expand Up @@ -1948,14 +2129,17 @@ async def init_stac_collection(
Chunks of the tar.gz archive when stac_dynamic is False
"""
logger.info("Creating STAC Catalogue for %s", collection_id)
filtered_params = dict(request.query_params)
filtered_params.pop('translate', None)
filtered_params.pop('stac_dynamic', None)
try:
self.assets_prereqs = {
"base_url": str(self.config.proxy) + "/",
"full_endpoint": (
f"{self.config.proxy}/"
f"{str(request.url).split(str(request.base_url))[-1]}"
),
"only_params": str(request.query_params)
"only_params": str(filtered_params) if filtered_params != {} else "",
}

if stac_dynamic:
Expand Down
3 changes: 2 additions & 1 deletion freva-rest/src/freva_rest/databrowser_api/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,8 @@ async def stac_catalogue(
raise HTTPException(status_code=404, detail="No results found.")
if total_count > max_results and max_results > 0:
raise HTTPException(status_code=413, detail="Result stream too big.")
collection_id = f"{flavour}-{str(uuid.uuid4())}"

collection_id = f"Dataset-{(f'{flavour}-{str(uuid.uuid4())}')[:18]}"
if stac_dynamic:
await stac_instance.stacapi_availability()

Expand Down

0 comments on commit 51e2c9d

Please sign in to comment.