Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,3 @@ r2d2_credentials.yaml
GEOS_mksi/
jedi_bundle/
output/


*.md
173 changes: 173 additions & 0 deletions docs/configuring_aws_server.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
# Quick guide to test R2D2 server with Swell on Discover

[Test outside of Swell](#2-to-test-outside-of-swell)

## 1. To test within Swell:

Make sure `~/.swell/r2d2_credentials.yaml` exists with your user/api_key/host/compiler and AWS credentials for S3 access.

#### a. Set ~/.swell/r2d2_credentials.yaml

```bash
# R2D2 API credentials
user: <your_username>
api_key: <your_key>
r2d2_host: discover
r2d2_compiler: intel
r2d2_server_host: "<enter_ip_address>"
r2d2_server_port: "8080"

# For S3 access
aws_access_key_id : <access_key_id>
aws_secret_access_key : <secret_access_key>
aws_session_token : "<session_token>"

```

#### b. Quick test

Run `IngestObs` directly without launching a full workflow:

```bash
# Create the experiment
swell create ingest_obs_marine

# Edit the generated experiment.yaml:
# - dry_run: false
# - obs_to_ingest: ['adt_cryosat2n']

# Run the task
swell task IngestObs /discover/nobackup/fgoktas/SwellExperiments/swell-ingest_obs/swell-ingest_obs-suite/experiment.yaml \
-d 2021-07-02T06:00:00Z \
-m geos_marine
```

## 2. Verify it stored

```python
python3 -c "
import r2d2
results = r2d2.search(
item='observation',
observation_type='adt_cryosat2n'
)
print(f'Found {len(results)} records')
for r in results:
print(r)
"
```

## 3. Test fetch

```python
python3 -c "
import r2d2
r2d2.fetch(
item='observation',
provider='odas',
observation_type='adt_cryosat2n',
file_extension='nc',
window_start='20210702T090000Z',
window_length='PT6H',
target_file='./test_fetch.nc'
)
print('Fetch OK')
"
```

## 4. Full workflow test

```bash
swell create ingest_obs_marine
# Edit experiment.yaml: dry_run: false
# Run the suite
swell launch /path/to/suite/swell-ingest_obs/swell-ingest_obs-suite
```

This runs `IngestObs` for every cycle time across the date range.

# To test outside of Swell:

#### a. Set up your environment

Copy or create the required scripts into your working directory:
- [load_r2d2.sh](../src/swell/utilities/scripts/load_r2d2.sh)
- [prod_setup_env.sh](../src/swell/utilities/scripts/prod_setup_env.sh)

Then source them to load R2D2 and set environment variables:

```bash
source load_r2d2.sh
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is this lind in load_r2d2.sh?

# load spack-stack modules
mod_swell

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It initializes the spack-stack on Discover so that the subsequent module load r2d2-client/... command works. Is this alias (mod_swell) defined for you? I assume it's not for most people, so I'll need to change it.

source prod_setup_env.sh
```

#### b. Configure AWS credentials:

Set up AWS credentials for S3 access.

```bash
mkdir -p ~/.aws

cat >> ~/.aws/credentials << 'EOF'
[aws-us-east-1]
aws_access_key_id = ACCESS_KEY
aws_secret_access_key = SECRET_KEY
EOF

cat >> ~/.aws/config << 'EOF'
[profile aws-us-east-1]
region = us-east-1
EOF
```

#### c. Test R2D2 store/fetch from Discover

```bash
python3 << 'EOF'
import r2d2

# Test metadata (API only)
print("Data hubs:")
for h in r2d2.search(item='data_hub'):
print(f" {h.get('name')} ({h.get('platform')})")

print("Data stores:")
for s in r2d2.search(item='data_store'):
print(f" {s.get('name')}")

print("Compute hosts:")
for c in r2d2.search(item='compute_host'):
print(f" {c.get('name')}")

# Test store (API + S3)
import tempfile, os
test_file = os.path.join(tempfile.gettempdir(), 'r2d2_test.txt')
with open(test_file, 'w') as f:
f.write("test from Discover\n")

r2d2.store(
item='observation',
provider='test',
observation_type='test_obs',
file_extension='txt',
window_start='20240101T120000Z',
window_length='PT6H',
source_file=test_file
)
print("Store OK")

# Test fetch (API + S3)
fetch_file = os.path.join(tempfile.gettempdir(), 'r2d2_fetched.txt')
r2d2.fetch(
item='observation',
provider='test',
observation_type='test_obs',
file_extension='txt',
window_start='20240101T120000Z',
window_length='PT6H',
target_file=fetch_file
)
print(f"Fetch OK: {open(fetch_file).read().strip()}")
EOF
```

3 changes: 2 additions & 1 deletion src/swell/tasks/ingest_obs.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ def process_obs_config(
provider = get_provider_for_observation(
obs_name, self.ioda_names_list, self.logger)

retrieval_method = config.get('retrieval_method') # cp or s3
# where the files are located
retrieval_method = config.get('retrieval_method')

# Determine source pattern based on method
source_pattern = config.get(
Expand Down
33 changes: 23 additions & 10 deletions src/swell/utilities/r2d2.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def _get_platform_r2d2_config(logger: Logger, platform: str = None) -> tuple:
return None, None

# Platform-specific R2D2 configurations
# Note: ~/.swell/r2d2_credentials.yaml overrides these values if specified
platform_configs = {
'nccs_discover_sles15': {
'host': 'discover-gmao',
Expand All @@ -75,10 +76,6 @@ def _get_platform_r2d2_config(logger: Logger, platform: str = None) -> tuple:
'host': 'discover-gmao',
'compiler': 'intel'
},
'aws': {
'host': 'aws-gmao',
'compiler': 'intel' # or 'gnu' depending on AWS setup
},
'generic': {
'host': None,
'compiler': None
Expand Down Expand Up @@ -138,21 +135,37 @@ def load_r2d2_credentials(
if 'api_key' in credentials and 'R2D2_API_KEY' not in os.environ:
os.environ['R2D2_API_KEY'] = credentials['api_key']

if 'r2d2_server_host' in credentials and 'R2D2_HOST' not in os.environ:
os.environ['R2D2_SERVER_HOST'] = credentials['r2d2_server_host']

if 'r2d2_server_port' in credentials and 'R2D2_SERVER_PORT' not in os.environ:
os.environ['R2D2_SERVER_PORT'] = str(credentials['r2d2_server_port'])

if 'aws_access_key_id' in credentials and 'AWS_ACCESS_KEY_ID' not in os.environ:
os.environ['AWS_ACCESS_KEY_ID'] = credentials['aws_access_key_id']

if 'aws_secret_access_key' in credentials and 'AWS_SECRET_ACCESS_KEY' not in os.environ:
os.environ['AWS_SECRET_ACCESS_KEY'] = credentials['aws_secret_access_key']

if 'aws_session_token' in credentials and 'AWS_SESSION_TOKEN' not in os.environ:
os.environ['AWS_SESSION_TOKEN'] = credentials['aws_session_token']

# Set host and compiler (YAML config takes precedence over platform detection)
if 'host' in credentials and 'R2D2_HOST' not in os.environ:
os.environ['R2D2_HOST'] = credentials['host']
logger.info(f"Using platform host '{r2d2_host}' (overriding YAML '{credentials['host']}')")
if 'r2d2_host' in credentials and 'R2D2_HOST' not in os.environ:
os.environ['R2D2_HOST'] = credentials['r2d2_host']
logger.info(f"Using platform host '{r2d2_host}' \
(overriding YAML '{credentials['r2d2_host']}')")
logger.warning("Using host from YAML file")

elif r2d2_host and 'R2D2_HOST' not in os.environ:
os.environ['R2D2_HOST'] = r2d2_host
logger.info(f"Set R2D2_HOST={r2d2_host} from platform configuration")

# Set compiler
if 'compiler' in credentials and 'R2D2_COMPILER' not in os.environ:
os.environ['R2D2_COMPILER'] = credentials['compiler']
if 'r2d2_compiler' in credentials and 'R2D2_COMPILER' not in os.environ:
os.environ['R2D2_COMPILER'] = credentials['r2d2_compiler']
logger.info(f"Using platform compiler '{r2d2_compiler}' \
(overriding YAML '{credentials['compiler']}')")
(overriding YAML '{credentials['r2d2_compiler']}')")
logger.warning("Using compiler from YAML file")

elif r2d2_compiler and 'R2D2_COMPILER' not in os.environ:
Expand Down
7 changes: 4 additions & 3 deletions src/swell/utilities/scripts/prod_setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@ unset R2D2_SERVER_PORT

export R2D2_USER=username
export R2D2_API_KEY=api_key
export R2D2_HOST=discover-gmao
export R2D2_COMPILER=intel
export R2D2_HOST="discover"
export R2D2_COMPILER="intel"
export R2D2_SERVER_HOST="http://13.217.72.149"
export R2D2_SERVER_PORT="8080"

source venv_client/bin/activate

echo “ R2D2 Production environment:”
echo “ R2D2_API_KEY: [set]”
echo “ R2D2_SERVER_HOST: $R2D2_SERVER_HOST '(should be empty)'”
echo “ R2D2_SERVER_PORT: $R2D2_SERVER_PORT '(should be empty)'”
echo “ - Client should default to https://r2d2-api.jcsda.org”
echo “ R2D2_HOST: $R2D2_HOST”
echo “ R2D2_COMPILER: $R2D2_COMPILER”
Loading