diff --git a/docs/configuring_aws_server.md b/docs/configuring_aws_server.md new file mode 100644 index 000000000..ec786a890 --- /dev/null +++ b/docs/configuring_aws_server.md @@ -0,0 +1,175 @@ +# Quick guide to test R2D2 server with Swell on Discover + +[Test outside of Swell](#2-to-test-outside-of-swell) + +## 1. To test within Swell: + +Make sure `~/.swell/r2d2_credentials.yaml` exists with your user/api_key/host/compiler and AWS credentials for S3 access. + +#### a. Set ~/.swell/r2d2_credentials.yaml + +```bash +# R2D2 API credentials +user: +api_key: +r2d2_host: discover +r2d2_compiler: intel +r2d2_server_host: "" +r2d2_server_port: "8080" + +# For S3 access +aws_access_key_id : +aws_secret_access_key : +aws_session_token : "" + +``` + +#### b. Quick test + +Run `IngestObs` directly without launching a full workflow: + +```bash +# Create the experiment +swell create ingest_obs_marine + +# Edit the generated experiment.yaml: +# - dry_run: false +# - obs_to_ingest: ['adt_cryosat2n'] + +# Run the task +swell task IngestObs /swell-ingest_obs/swell-ingest_obs-suite/experiment.yaml \ + -d 2021-07-02T06:00:00Z \ + -m geos_marine +``` + +### Verify it is stored + +```python +python3 -c " +import r2d2 +results = r2d2.search( + item='observation', + observation_type='adt_cryosat2n', + window_start='20210702T060000Z', + window_length='PT6H' +) +print(f'Found {len(results)} records') +for r in results: + print(r) +" +``` + +### Test fetch + +```python +python3 -c " +import r2d2 +r2d2.fetch( + item='observation', + provider='odas', + observation_type='adt_cryosat2n', + file_extension='nc', + window_start='20210702T090000Z', + window_length='PT6H', + target_file='./test_fetch.nc' +) +print('Fetch OK') +" +``` + +### Full workflow test + +```bash +swell create ingest_obs_marine +# Edit experiment.yaml: dry_run: false +# Run the suite +swell launch /path/to/suite/swell-ingest_obs/swell-ingest_obs-suite +``` + +This runs `IngestObs` for every cycle time across the date range. + +## 2. To test outside of Swell: + +#### a. Set up your environment + +Copy or create the required scripts into your working directory: +- [load_r2d2.sh](../src/swell/utilities/scripts/load_r2d2.sh) +- [prod_setup_env.sh](../src/swell/utilities/scripts/prod_setup_env.sh) + +Then source them to load R2D2 and set environment variables: + +```bash +source load_r2d2.sh +source prod_setup_env.sh +``` + +#### b. Configure AWS credentials: + +Set up AWS credentials for S3 access. + +```bash +mkdir -p ~/.aws + +cat >> ~/.aws/credentials << 'EOF' +[aws-us-east-1] +aws_access_key_id = ACCESS_KEY +aws_secret_access_key = SECRET_KEY +EOF + +cat >> ~/.aws/config << 'EOF' +[profile aws-us-east-1] +region = us-east-1 +EOF +``` + +#### c. Test R2D2 store/fetch from Discover + +```bash +python3 << 'EOF' +import r2d2 + +# Test metadata (API only) +print("Data hubs:") +for h in r2d2.search(item='data_hub'): + print(f" {h.get('name')} ({h.get('platform')})") + +print("Data stores:") +for s in r2d2.search(item='data_store'): + print(f" {s.get('name')}") + +print("Compute hosts:") +for c in r2d2.search(item='compute_host'): + print(f" {c.get('name')}") + +# Test store (API + S3) +import tempfile, os +test_file = os.path.join(tempfile.gettempdir(), 'r2d2_test.txt') +with open(test_file, 'w') as f: + f.write("test from Discover\n") + +r2d2.store( + item='observation', + provider='test', + observation_type='test_obs', + file_extension='txt', + window_start='20240101T120000Z', + window_length='PT6H', + source_file=test_file +) +print("Store OK") + +# Test fetch (API + S3) +fetch_file = os.path.join(tempfile.gettempdir(), 'r2d2_fetched.txt') +r2d2.fetch( + item='observation', + provider='test', + observation_type='test_obs', + file_extension='txt', + window_start='20240101T120000Z', + window_length='PT6H', + target_file=fetch_file +) +print(f"Fetch OK: {open(fetch_file).read().strip()}") +EOF +``` + diff --git a/docs/examples/r2d2/ingest_obs.md b/docs/examples/r2d2/ingest_obs.md index e682f137a..e165aeae5 100644 --- a/docs/examples/r2d2/ingest_obs.md +++ b/docs/examples/r2d2/ingest_obs.md @@ -14,8 +14,8 @@ Add your observation name to the list and set the date range: ```python qd.obs_to_ingest(['adt_cryosat2n', 'my_obs']) # Add yours here -qd.start_cycle_point("2021-07-02T06:00:00Z") # When to start -qd.final_cycle_point("2021-07-03T06:00:00Z") # When to stop +qd.start_cycle_point("2023-07-02T06:00:00Z") # When to start +qd.final_cycle_point("2023-07-03T06:00:00Z") # When to stop ``` ### Step 2: Make sure your observation is registered diff --git a/src/swell/tasks/ingest_obs.py b/src/swell/tasks/ingest_obs.py index 2f5cad8f3..708d6e6bb 100644 --- a/src/swell/tasks/ingest_obs.py +++ b/src/swell/tasks/ingest_obs.py @@ -153,7 +153,8 @@ def process_obs_config( provider = get_provider_for_observation( obs_name, self.ioda_names_list, self.logger) - retrieval_method = config.get('retrieval_method') # cp or s3 + # where the files are located + retrieval_method = config.get('retrieval_method') # Determine source pattern based on method source_pattern = config.get( diff --git a/src/swell/utilities/r2d2.py b/src/swell/utilities/r2d2.py index ae5df50a0..b40449648 100644 --- a/src/swell/utilities/r2d2.py +++ b/src/swell/utilities/r2d2.py @@ -106,6 +106,7 @@ def _get_platform_r2d2_config(logger: Logger, platform: str = None) -> tuple: return None, None # Platform-specific R2D2 configurations + # Note: ~/.swell/r2d2_credentials.yaml overrides these values if specified platform_configs = { 'nccs_discover_sles15': { 'host': 'discover-gmao', @@ -115,10 +116,6 @@ def _get_platform_r2d2_config(logger: Logger, platform: str = None) -> tuple: 'host': 'discover-gmao', 'compiler': 'intel' }, - 'aws': { - 'host': 'aws-gmao', - 'compiler': 'intel' # or 'gnu' depending on AWS setup - }, 'generic': { 'host': None, 'compiler': None @@ -180,10 +177,26 @@ def load_r2d2_credentials( if 'api_key' in credentials and 'R2D2_API_KEY' not in os.environ: os.environ['R2D2_API_KEY'] = credentials['api_key'] + if 'r2d2_server_host' in credentials and 'R2D2_SERVER_HOST' not in os.environ: + os.environ['R2D2_SERVER_HOST'] = credentials['r2d2_server_host'] + + if 'r2d2_server_port' in credentials and 'R2D2_SERVER_PORT' not in os.environ: + os.environ['R2D2_SERVER_PORT'] = str(credentials['r2d2_server_port']) + + if 'aws_access_key_id' in credentials and 'AWS_ACCESS_KEY_ID' not in os.environ: + os.environ['AWS_ACCESS_KEY_ID'] = credentials['aws_access_key_id'] + + if 'aws_secret_access_key' in credentials and 'AWS_SECRET_ACCESS_KEY' not in os.environ: + os.environ['AWS_SECRET_ACCESS_KEY'] = credentials['aws_secret_access_key'] + + if 'aws_session_token' in credentials and 'AWS_SESSION_TOKEN' not in os.environ: + os.environ['AWS_SESSION_TOKEN'] = credentials['aws_session_token'] + # Set host and compiler (YAML config takes precedence over platform detection) - if 'host' in credentials and 'R2D2_HOST' not in os.environ: - os.environ['R2D2_HOST'] = credentials['host'] - logger.info(f"Using platform host '{r2d2_host}' (overriding YAML '{credentials['host']}')") + if 'r2d2_host' in credentials and 'R2D2_HOST' not in os.environ: + os.environ['R2D2_HOST'] = credentials['r2d2_host'] + logger.info(f"Using platform host '{r2d2_host}' \ + (overriding YAML '{credentials['r2d2_host']}')") logger.warning("Using host from YAML file") elif r2d2_host and 'R2D2_HOST' not in os.environ: @@ -191,10 +204,10 @@ def load_r2d2_credentials( logger.info(f"Set R2D2_HOST={r2d2_host} from platform configuration") # Set compiler - if 'compiler' in credentials and 'R2D2_COMPILER' not in os.environ: - os.environ['R2D2_COMPILER'] = credentials['compiler'] + if 'r2d2_compiler' in credentials and 'R2D2_COMPILER' not in os.environ: + os.environ['R2D2_COMPILER'] = credentials['r2d2_compiler'] logger.info(f"Using platform compiler '{r2d2_compiler}' \ - (overriding YAML '{credentials['compiler']}')") + (overriding YAML '{credentials['r2d2_compiler']}')") logger.warning("Using compiler from YAML file") elif r2d2_compiler and 'R2D2_COMPILER' not in os.environ: diff --git a/src/swell/utilities/scripts/prod_setup_env.sh b/src/swell/utilities/scripts/prod_setup_env.sh index 48531d2b8..605460ee2 100644 --- a/src/swell/utilities/scripts/prod_setup_env.sh +++ b/src/swell/utilities/scripts/prod_setup_env.sh @@ -3,15 +3,14 @@ unset R2D2_SERVER_PORT export R2D2_USER=username export R2D2_API_KEY=api_key -export R2D2_HOST=discover-gmao -export R2D2_COMPILER=intel - -source venv_client/bin/activate +export R2D2_HOST="discover" +export R2D2_COMPILER="intel" +export R2D2_SERVER_HOST="http://13.217.72.149" +export R2D2_SERVER_PORT="8080" echo “ R2D2 Production environment:” echo “ R2D2_API_KEY: [set]” -echo “ R2D2_SERVER_HOST: $R2D2_SERVER_HOST '(should be empty)'” -echo “ R2D2_SERVER_PORT: $R2D2_SERVER_PORT '(should be empty)'” -echo “ - Client should default to https://r2d2-api.jcsda.org” +echo “ R2D2_SERVER_HOST: $R2D2_SERVER_HOST ” +echo “ R2D2_SERVER_PORT: $R2D2_SERVER_PORT ” echo “ R2D2_HOST: $R2D2_HOST” echo “ R2D2_COMPILER: $R2D2_COMPILER” \ No newline at end of file