firelab · Eoghan-Perkins · Mar 29, 2026 · Mar 28, 2026 · Mar 28, 2026 · Mar 28, 2026
diff --git a/.env b/.env
@@ -0,0 +1,12 @@
+# Redis
+REDIS_HOST=redis
+REDIS_PORT=6379
+REDIS_DB=0
+
+# NFGDA Service
+MAX_CONCURRENT_JOBS=2         # Max number of jobs to run at once
+MAX_NO_DATA_POLLS=10          # Polls radar S3 bucket this many times before giving up
+FILE_EXPIRATION_TIME=1440     # 24 hours (minutes)
+
+# Backend 
+MAX_JOB_DURATION=180          # Maximum total duration of job timebox (minutes, default is 3 hours)
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
@@ -0,0 +1,48 @@
+name: Integration Tests
+
+on:
+  pull_request:
+
+jobs:
+  integration-tests:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install test dependencies
+        run: pip install pytest requests
+
+      - name: Start Docker Compose stack
+        run: docker compose up -d --build
+
+      - name: Wait for backend to be ready
+        run: |
+          echo "Waiting for backend..."
+          for i in $(seq 1 30); do
+            if curl -sf http://localhost:8001/apis/stations > /dev/null; then
+              echo "Backend is up"
+              exit 0
+            fi
+            echo "  attempt $i/30..."
+            sleep 5
+          done
+          echo "Backend did not become ready in time"
+          docker compose logs backend
+          exit 1
+
+      - name: Run integration tests (non-slow)
+        run: pytest test_endpoints.py -v -m "not slow"
+
+      - name: Dump logs on failure
+        if: failure()
+        run: docker compose logs
+
+      - name: Tear down stack
+        if: always()
+        run: docker compose down
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -1,16 +1,16 @@
 name: Lint
 
-on: [push, pull_request]
+on: [pull_request]
 
 jobs:
   lint:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5  
+      - uses: actions/setup-python@v5
         with:
-          python-version: '3.x'
+          python-version: "3.x"
       - name: Install dependencies
         run: pip install ruff
       - name: Run linter
-        run: ruff .
+        run: ruff check .
diff --git a/README.md b/README.md
@@ -2,43 +2,59 @@
 
 This is a prototype web interface to interact with the gust front detection algorithm found [here](https://github.com/firelab/NFGDA).
 
-# How To Run 
+# How To Run
 
 First time running project?
-1. Navigate to project directory containing `docker-compose.yml'
-2. Run `docker compose up -d --build'
+
+1. Navigate to project directory containing `docker-compose.yml`
+2. Run `docker compose up -d --build`
 3. Navigate to http://localhost:5173
 4. Play widdit
 
 - To re-launch app, run `docker compose up -d`
 - To restart docker containers, run `docker compose restart -d`
 
-
 # Frontend
 
+# Backend (/backend and /nfgda_service)
 
-# Backend
+Backend directory structure:
 
-Backend directory structure: 
 - app.py contains the API endpoints
 - /apis contains the API endpoint definitions
 - API call logic defined in src/
-- src/ contains the backend logic (Not responsible for API endpoints that orchestrate or handle HTTP requests - Contains the business logic of the application only)
+- src/ contains the backend logic (Not responsible for API endpoints that orchestrate or handle HTTP requests. Contains the business logic of the application only)
+
+NFGDA Service directory structure:
 
+- /nfgda_service contains the NFGDA service logic
+- responsible for all NFGDA execution, output processing, and file management
 
-# Todo (before MSU handoff)
+And then there's a redis instance living at port 6379 where all the job status and asset information is stored.
 
-- Guard against short jobs that run forever for some reason
-- Figure out zoom level / blank frame issue on frontend
-- Switching to a new station view pauses slide deck playthrough
-- Convert geotiff output to cloud-optimized-geotiffs
-- Remove "expired" job files and produced resources after 24 hours (set to env variable)
-- Figure out what is a "reasonable" time to run a historical job and set a hard limit (Natalie said a few hours, we'll set it to 3 to start with in env)
-- Code cleanup / add comments where necessary
+# Todo before MSU handoff
+
+- [ ] Figure out zoom level / blank frame issue on frontend
+- [ ] Switching to a new station view pauses slide deck playthrough
+- [ ] Can we pretty up the landing page? Put a title on it somewhere before the research celebration?
+- [ ] Set opacity slider on frontend
+- [ ] Enhance resolution of output on frontend
+- [ ] Add a "clear" button to the map that clears all job assets from the map
+- [ ] Deliver frame time-stamps to the frontend
+- [ ] Switch to cloud-optimized geotiffs
+- [ ] Make some stuff environment variables instead of random variables everywhere
+- [ ] Discuss pixel-width of gust fronts written to output file next team meeting
+- [ ] Diff the NFGDA code used in nfgda_service with the original NFGDA code, see if there are any useful features we're missing out on or bugs we introduced
+- [ ] Backend code cleanup / add comments where necessary
 
 # "Nice to have" features
 
+- There a should probably be a warning that shows up for small numbers of assets per job (2 frames produced or less). Maybe if not enough assets are produced, the job request could automatically re-run with a larger time window?
 - Average time to job completion estimator (small addition: new counter in redis, average out)
-- Serve tiles instead of individual GeoTIFFs (big refactor)
+- Serve tiles instead of individual GeoTIFFs (big refactor, honestly might not be worth at as Cloud-optimized-geotiffs are kinda the future anyway)
 - Hash job IDs to make them unguessable, so resources can't be directly accessed via URL (little development effort, likely med/large refactor effort)
-
+
+# Todo after MSU handoff (futures devs read this pls)
+
+- Check that automatic asset deletion occurs within the timeframe specified (should be 24 hours)
+- Familiarize with the .env file and environment variables, and what they do
diff --git a/backend/apis/retrieve_frames.py b/backend/apis/retrieve_frames.py
@@ -1,7 +1,23 @@
 """
-Frame Data API: returns the list of rendered frames for a completed job.
+Frame Data API: returns a single rendered GeoTIFF frame for a completed job.
 """
 
-def get_frames(job_id: str):
-
-    pass
+import os
+from flask import send_file, abort
+
+
+def get_frame(job_id: str, index: int):
+    """Return a single GeoTIFF frame file for the given job and frame index."""
+    job_dir = "/processed_data/" + job_id
+    if not os.path.exists(job_dir):
+        abort(404, description="Job not found")
+
+    frame_path = job_dir + f"/frame_{index}.tif"
+    if not os.path.exists(frame_path):
+        abort(404, description="Frame not found")
+
+    return send_file(
+        frame_path,
+        mimetype="image/tiff",
+        as_attachment=False
+    )
diff --git a/backend/apis/run_request.py b/backend/apis/run_request.py
@@ -1,4 +1,5 @@
 
+import os
 import uuid
 from datetime import datetime, timedelta, timezone
 from flask import jsonify
@@ -15,14 +16,14 @@ def send_job_to_redis_queue(redis_client, request_fields: dict):
     Response shape:
     {
         "job_id": "<jobId>",
-        "status": 200
+        "status": 202
         OR
         "error": "<error message>",
         "status": 400
     }
     """
 
-    # Validate stationId
+    # validate stationId
     station_id = request_fields.get("stationId")
     if not station_id:
         return jsonify({"error": "Missing stationId request field"}), 400
@@ -32,7 +33,8 @@ def send_job_to_redis_queue(redis_client, request_fields: dict):
         StationService(redis_client).get_station(station_id)
     except ValueError:
         return jsonify({"error": f"Invalid station ID: {station_id}"}), 400
-    # Validate and/or set default timebox parameters
+
+    # validate and/or set default timebox parameters
     validation_error = validate_time_parameters(request_fields)
     if validation_error:
         return validation_error, 400
@@ -46,11 +48,14 @@ def send_job_to_redis_queue(redis_client, request_fields: dict):
 
     # add job to redis
     job_key = f"job:{job_id}"
+    expiry_minutes = int(os.getenv("FILE_EXPIRATION_TIME", "1440"))
+    expiry_timestamp = (datetime.now(timezone.utc) + timedelta(minutes=expiry_minutes)).strftime("%Y-%m-%dT%H:%M:%SZ")
     redis_client.hset(job_key, mapping={
         "stationId": request_fields["stationId"],
         "startUtc": request_fields["startUtc"],
         "endUtc": request_fields["endUtc"],
-        "status": "PENDING"
+        "status": "PENDING",
+        "asset_expiry_timestamp": expiry_timestamp
     })
 
     # push job id to job queue
@@ -63,13 +68,14 @@ def send_job_to_redis_queue(redis_client, request_fields: dict):
 def validate_time_parameters(request_fields: dict):
     """Validate the time parameters recieved via the request."""
 
-    # Default timebox when not provided: look back 15 minutes from now
-    # so the algorithm captures 2-3 recent NEXRAD scans for detection + forecast
-    # (2 scan minimum needed for forcasting)
+    # Default timebox when not provided: look back over the last ~25 minutes, ending
+    # 10 minutes ago. The 10-minute buffer ensures the algorithm's end time is always
+    # fully in the past — if endUtc is too close to "now" the algorithm enters live
+    # polling mode and runs indefinitely.
     now = datetime.now(timezone.utc)
     if not request_fields.get("startUtc") and not request_fields.get("endUtc"):
-        request_fields["startUtc"] = (now - timedelta(minutes=15)).strftime("%Y-%m-%dT%H:%M:%SZ")
-        request_fields["endUtc"] = now.strftime("%Y-%m-%dT%H:%M:%SZ")
+        request_fields["startUtc"] = (now - timedelta(minutes=35)).strftime("%Y-%m-%dT%H:%M:%SZ")
+        request_fields["endUtc"] = (now - timedelta(minutes=10)).strftime("%Y-%m-%dT%H:%M:%SZ")
     elif not request_fields.get("startUtc") or not request_fields.get("endUtc"):
         return jsonify({"error": "Must provide both startUtc and endUtc, or neither"})
 
@@ -88,15 +94,19 @@ def validate_time_parameters(request_fields: dict):
     if end_utc <= start_utc:
         return jsonify({"error": "endUtc must be after startUtc"})
 
-    # Duration must be between 5 minutes and 6 hours
+    # duration must be between 15 minutes and MAX_JOB_DURATION (default is 180 minutes / 3 hours)
+    max_duration = timedelta(minutes=int(os.getenv("MAX_JOB_DURATION", "180")))
+    max_hours = max_duration.total_seconds() / 3600
     duration = end_utc - start_utc
-    if duration < timedelta(minutes=5):
-        return jsonify({"error": "Timebox duration must be at least 5 minutes"})
-    if duration > timedelta(hours=6):
-        return jsonify({"error": "Timebox duration must not exceed 6 hours"})
-
-    # endUtc must not be in the future
-    if end_utc > now:
-        return jsonify({"error": "endUtc must not be later than the current time"})
+    if duration < timedelta(minutes=15):
+        return jsonify({"error": "Timebox duration must be at least 15 minutes"})
+    if duration > max_duration:
+        return jsonify({"error": f"Timebox duration must not exceed {max_hours:.0f} hours"})
+
+    # endUtc must be at least 5 minutes in the past — the algorithm enters a live
+    # polling loop if endUtc is too close to the current time, causing jobs to run
+    # indefinitely instead of processing a closed historical window.
+    if end_utc > now - timedelta(minutes=5):
+        return jsonify({"error": "endUtc must be at least 5 minutes in the past"})
 
     return None
diff --git a/backend/app.py b/backend/app.py
@@ -1,18 +1,17 @@
-import os
 import redis
-from flask import Flask, jsonify, request, send_file, abort
+from flask import Flask, jsonify, request
 from apis.stations import list_stations_api
 from apis.run_request import send_job_to_redis_queue
 from apis.status import get_job_status
-from apis.retrieve_frames import get_frames
+from apis.retrieve_frames import get_frame
 
 app = Flask(__name__)
 
 # Connect to the Redis container
 redis_client = redis.Redis(host='redis', port=6379, db=0, decode_responses=True)
 
 # Station List API
-@app.route("/APIs/stations", methods=["GET"])
+@app.route("/apis/stations", methods=["GET"])
 def stations_endpoint():
     """
     Returns:
@@ -27,7 +26,7 @@ def stations_endpoint():
 
 
 # Algorithm Runner API
-@app.route("/APIs/run", methods=["POST"])
+@app.route("/apis/run", methods=["POST"])
 def run_endpoint():
     """Takes station and time frame args, kicks off an NFGDA processing job, and returns the new job ID and status code."""
     if not request.json:
@@ -37,27 +36,14 @@ def run_endpoint():
 
 
 # Frame Data API
-@app.route("/APIs/jobs/<job_id>/frames/<int:index>", methods=["GET"])
-def get_frame(job_id, index):
+@app.route("/apis/jobs/<job_id>/frames/<int:index>", methods=["GET"])
+def get_frame_endpoint(job_id, index):
     """Takes job ID and frame index, returns a single GeoTIFF file."""
-
-    job_dir = "/processed_data/" + job_id
-    if not os.path.exists(job_dir):
-        abort(404, description="Job not found")
-
-    frame_path = job_dir + f"/frame_{index}.tif"
-    if not os.path.exists(frame_path):
-        abort(404, description="Frame not found")
-
-    return send_file(
-        frame_path,
-        mimetype="image/tiff",
-        as_attachment=False
-    )
+    return get_frame(job_id, index)
 
 
 # Job Status API
-@app.route("/APIs/status", methods=["GET"])
+@app.route("/apis/status", methods=["GET"])
 def status_endpoint():
     """Takes job ID, returns status."""
     job_id = request.args.get("job_id")

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -2,7 +2,7 @@ services:
   redis:
     image: redis:7-alpine
     ports:
-      - "6379:6379"
+      - "${REDIS_PORT}:6379"
 
   nfgda-service:
     build:
@@ -14,11 +14,12 @@ services:
       - redis
       - backend
     environment:
-      - REDIS_HOST=redis
-      - REDIS_PORT=6379
-      - REDIS_DB=0
-      - MAX_CONCURRENT_JOBS=2
-      - MAX_NO_DATA_POLLS=10
+      - REDIS_HOST=${REDIS_HOST}
+      - REDIS_PORT=${REDIS_PORT}
+      - REDIS_DB=${REDIS_DB}
+      - MAX_CONCURRENT_JOBS=${MAX_CONCURRENT_JOBS}
+      - MAX_NO_DATA_POLLS=${MAX_NO_DATA_POLLS}
+      - FILE_EXPIRATION_TIME=${FILE_EXPIRATION_TIME}
     volumes:
       - ./nfgda_output:/nfgda_output
       - ./processed_data:/processed_data
@@ -44,9 +45,10 @@ services:
     depends_on:
       - redis
     environment:
-      - REDIS_HOST=redis
-      - REDIS_PORT=6379
-      - REDIS_DB=0
+      - REDIS_HOST=${REDIS_HOST}
+      - REDIS_PORT=${REDIS_PORT}
+      - REDIS_DB=${REDIS_DB}
+      - MAX_JOB_DURATION=${MAX_JOB_DURATION}
     volumes:
       - ./nfgda_output:/nfgda_output
       - ./processed_data:/processed_data