Skip to content

Commit ca33d8d

Browse files
committed
Add new script from open ECMWF retrieval and python requirements
1 parent bf0ef18 commit ca33d8d

File tree

2 files changed

+185
-0
lines changed

2 files changed

+185
-0
lines changed
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
from datetime import datetime, timedelta
2+
import os
3+
import re
4+
import glob
5+
import shutil
6+
import mysql.connector
7+
import pandas as pd
8+
import yaml
9+
from cdo import Cdo
10+
from ecmwf.opendata import Client
11+
from common import USERNAME, PASSWORD
12+
13+
cdo = Cdo()
14+
15+
# ---------------------------------------------------------------------
16+
# Load configuration
17+
# ---------------------------------------------------------------------
18+
with open('/home/wwcs/wwcs/WWCS/config.yaml', 'r') as file:
19+
config = yaml.safe_load(file)
20+
21+
train_period = config['train_period']
22+
forecast_days = config['forecast_days']
23+
maxlat = config['maxlat']
24+
minlat = config['minlat']
25+
maxlon = config['maxlon']
26+
minlon = config['minlon']
27+
total_days = train_period + forecast_days
28+
29+
# ---------------------------------------------------------------------
30+
# Cleanup old files (>60 days)
31+
# ---------------------------------------------------------------------
32+
directory_path = "/srv/shiny-server/dashboard/ifsdata"
33+
date_pattern = r'(\d{4})-(\d{2})-(\d{2})'
34+
two_months_ago = datetime.now() - timedelta(days=60)
35+
36+
for filename in os.listdir(directory_path):
37+
match = re.search(date_pattern, filename)
38+
if match:
39+
year, month, day = map(int, match.groups())
40+
try:
41+
if datetime(year, month, day) < two_months_ago:
42+
os.remove(os.path.join(directory_path, filename))
43+
print(f"Deleted old file: {filename}")
44+
except ValueError:
45+
print(f"Invalid date in filename: {filename}")
46+
47+
# ---------------------------------------------------------------------
48+
# Setup variables
49+
# ---------------------------------------------------------------------
50+
outdir = "/srv/shiny-server/dashboard/ifsdata"
51+
tmpdir = os.path.join(outdir, "tmp")
52+
os.makedirs(tmpdir, exist_ok=True)
53+
54+
client = Client(source="ecmwf")
55+
steps = list(range(0, 243, 3)) # 0–240 hours
56+
57+
datelist = [d.strftime("%Y-%m-%d") for d in pd.date_range(
58+
datetime.today() - timedelta(days=total_days), datetime.today())]
59+
60+
# ---------------------------------------------------------------------
61+
# Get station coordinates
62+
# ---------------------------------------------------------------------
63+
cnx = mysql.connector.connect(user=USERNAME, password=PASSWORD,
64+
host='127.0.0.1', database='SitesHumans')
65+
cursor = cnx.cursor(dictionary=True)
66+
cursor.execute("SELECT siteID, latitude, longitude FROM Sites WHERE siteID NOT LIKE '%-S%'")
67+
stations = cursor.fetchall()
68+
69+
# ---------------------------------------------------------------------
70+
# Main loop
71+
# ---------------------------------------------------------------------
72+
for dat in datelist:
73+
# Check if station files exist
74+
missing_files = []
75+
for s in stations:
76+
station_file = os.path.join(outdir, f"ifs_{s['siteID'].replace(' ', '')}_{dat}.nc")
77+
if not os.path.isfile(station_file):
78+
missing_files.append(s)
79+
80+
if not missing_files:
81+
print(f"Skipping {dat}: all station files exist.")
82+
continue
83+
84+
tj_file = os.path.join(outdir, f"tj_area_{dat}.nc")
85+
if not os.path.isfile(tj_file):
86+
print(f"\n=== Downloading ECMWF open data for {dat} ===")
87+
88+
# Download control forecast
89+
for step in steps:
90+
target_cf = os.path.join(tmpdir, f"cf_step_{step}.grb")
91+
if not os.path.isfile(target_cf):
92+
client.retrieve(
93+
date=dat,
94+
time=0,
95+
step=step,
96+
stream="enfo",
97+
levtype="sfc",
98+
param="2t",
99+
type="cf",
100+
target=target_cf
101+
)
102+
103+
# Download ensemble members (pf = perturbed forecasts)
104+
members = range(1, 51) # ECMWF open data: 50 members
105+
for m in members:
106+
for step in steps:
107+
target_pf = os.path.join(tmpdir, f"pf{m:02d}_step_{step}.grb")
108+
if not os.path.isfile(target_pf):
109+
client.retrieve(
110+
date=dat,
111+
time=0,
112+
step=step,
113+
stream="enfo",
114+
levtype="sfc",
115+
param="2t",
116+
type="pf",
117+
number=m,
118+
target=target_pf
119+
)
120+
121+
# -----------------------------------------------------------------
122+
# Convert GRIBs to NetCDF and subset to area
123+
# -----------------------------------------------------------------
124+
print("Converting GRIB to NetCDF and cropping to area...")
125+
all_grbs = glob.glob(os.path.join(tmpdir, "*.grb"))
126+
nc_files = []
127+
128+
for grb in all_grbs:
129+
nc_path = grb.replace(".grb", ".nc")
130+
nc_crop = grb.replace(".grb", "_tj.nc")
131+
if not os.path.isfile(nc_crop):
132+
cdo.copy(input=grb, output=nc_path, options='-t ecmwf -f nc')
133+
cdo.sellonlatbox(f"{minlon},{maxlon},{minlat},{maxlat}",
134+
input=nc_path, output=nc_crop)
135+
nc_files.append(nc_crop)
136+
137+
# -----------------------------------------------------------------
138+
# Compute ensemble mean and standard deviation with CDO
139+
# -----------------------------------------------------------------
140+
print("Computing ensemble mean and standard deviation...")
141+
ensmean_file = os.path.join(tmpdir, "output_em.nc")
142+
ensstd_file = os.path.join(tmpdir, "output_es.nc")
143+
144+
# Build lists of cropped files by type
145+
pf_files = sorted(glob.glob(os.path.join(tmpdir, "pf*_tj.nc")))
146+
cf_files = sorted(glob.glob(os.path.join(tmpdir, "cf*_tj.nc")))
147+
148+
# Combine CF and PF files (control + members)
149+
ens_files = cf_files + pf_files
150+
151+
# Merge time dimension first
152+
merged_ens = os.path.join(tmpdir, "merged_ens.nc")
153+
cdo.mergetime(input=" ".join(ens_files), output=merged_ens)
154+
155+
# Compute mean and std over ensemble members
156+
cdo.ensmean(input=merged_ens, output=ensmean_file)
157+
cdo.ensstd(input=merged_ens, output=ensstd_file)
158+
159+
# Rename variables
160+
renamed_em = os.path.join(tmpdir, "output_em_rn.nc")
161+
renamed_es = os.path.join(tmpdir, "output_es_rn.nc")
162+
cdo.chname(r"\2t,IFS_T_mea", input=ensmean_file, output=renamed_em)
163+
cdo.chname(r"\2t,IFS_T_std", input=ensstd_file, output=renamed_es)
164+
165+
# Merge mean & std into single area file
166+
cdo.merge(input=f"{renamed_em} {renamed_es}", output=tj_file)
167+
print(f"Created {tj_file}")
168+
169+
shutil.rmtree(tmpdir)
170+
os.mkdir(tmpdir)
171+
172+
# ---------------------------------------------------------------------
173+
# Interpolate to stations using CDO (nearest neighbour)
174+
# ---------------------------------------------------------------------
175+
print(f"Interpolating {dat} to station points ...")
176+
for s in missing_files:
177+
fout = os.path.join(outdir, f"ifs_{s['siteID'].replace(' ', '')}_{dat}.nc")
178+
arg = f"lon={s['longitude']}_lat={s['latitude']}"
179+
cdo.remapnn(arg, input=tj_file, output=fout)
180+
print(f" -> {fout}")
181+
182+
print("\nAll done!")
183+
cnx.close()
184+

WWCS/requirements_python.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ pandas
1010
mysql-connector-python
1111
pyyaml
1212
python-dotenv
13+
ecmwf-opendata

0 commit comments

Comments
 (0)