|
| 1 | +from datetime import datetime, timedelta |
| 2 | +import os |
| 3 | +import re |
| 4 | +import glob |
| 5 | +import shutil |
| 6 | +import mysql.connector |
| 7 | +import pandas as pd |
| 8 | +import yaml |
| 9 | +from cdo import Cdo |
| 10 | +from ecmwf.opendata import Client |
| 11 | +from common import USERNAME, PASSWORD |
| 12 | + |
| 13 | +cdo = Cdo() |
| 14 | + |
| 15 | +# --------------------------------------------------------------------- |
| 16 | +# Load configuration |
| 17 | +# --------------------------------------------------------------------- |
| 18 | +with open('/home/wwcs/wwcs/WWCS/config.yaml', 'r') as file: |
| 19 | + config = yaml.safe_load(file) |
| 20 | + |
| 21 | +train_period = config['train_period'] |
| 22 | +forecast_days = config['forecast_days'] |
| 23 | +maxlat = config['maxlat'] |
| 24 | +minlat = config['minlat'] |
| 25 | +maxlon = config['maxlon'] |
| 26 | +minlon = config['minlon'] |
| 27 | +total_days = train_period + forecast_days |
| 28 | + |
| 29 | +# --------------------------------------------------------------------- |
| 30 | +# Cleanup old files (>60 days) |
| 31 | +# --------------------------------------------------------------------- |
| 32 | +directory_path = "/srv/shiny-server/dashboard/ifsdata" |
| 33 | +date_pattern = r'(\d{4})-(\d{2})-(\d{2})' |
| 34 | +two_months_ago = datetime.now() - timedelta(days=60) |
| 35 | + |
| 36 | +for filename in os.listdir(directory_path): |
| 37 | + match = re.search(date_pattern, filename) |
| 38 | + if match: |
| 39 | + year, month, day = map(int, match.groups()) |
| 40 | + try: |
| 41 | + if datetime(year, month, day) < two_months_ago: |
| 42 | + os.remove(os.path.join(directory_path, filename)) |
| 43 | + print(f"Deleted old file: {filename}") |
| 44 | + except ValueError: |
| 45 | + print(f"Invalid date in filename: {filename}") |
| 46 | + |
| 47 | +# --------------------------------------------------------------------- |
| 48 | +# Setup variables |
| 49 | +# --------------------------------------------------------------------- |
| 50 | +outdir = "/srv/shiny-server/dashboard/ifsdata" |
| 51 | +tmpdir = os.path.join(outdir, "tmp") |
| 52 | +os.makedirs(tmpdir, exist_ok=True) |
| 53 | + |
| 54 | +client = Client(source="ecmwf") |
| 55 | +steps = list(range(0, 243, 3)) # 0–240 hours |
| 56 | + |
| 57 | +datelist = [d.strftime("%Y-%m-%d") for d in pd.date_range( |
| 58 | + datetime.today() - timedelta(days=total_days), datetime.today())] |
| 59 | + |
| 60 | +# --------------------------------------------------------------------- |
| 61 | +# Get station coordinates |
| 62 | +# --------------------------------------------------------------------- |
| 63 | +cnx = mysql.connector.connect(user=USERNAME, password=PASSWORD, |
| 64 | + host='127.0.0.1', database='SitesHumans') |
| 65 | +cursor = cnx.cursor(dictionary=True) |
| 66 | +cursor.execute("SELECT siteID, latitude, longitude FROM Sites WHERE siteID NOT LIKE '%-S%'") |
| 67 | +stations = cursor.fetchall() |
| 68 | + |
| 69 | +# --------------------------------------------------------------------- |
| 70 | +# Main loop |
| 71 | +# --------------------------------------------------------------------- |
| 72 | +for dat in datelist: |
| 73 | + # Check if station files exist |
| 74 | + missing_files = [] |
| 75 | + for s in stations: |
| 76 | + station_file = os.path.join(outdir, f"ifs_{s['siteID'].replace(' ', '')}_{dat}.nc") |
| 77 | + if not os.path.isfile(station_file): |
| 78 | + missing_files.append(s) |
| 79 | + |
| 80 | + if not missing_files: |
| 81 | + print(f"Skipping {dat}: all station files exist.") |
| 82 | + continue |
| 83 | + |
| 84 | + tj_file = os.path.join(outdir, f"tj_area_{dat}.nc") |
| 85 | + if not os.path.isfile(tj_file): |
| 86 | + print(f"\n=== Downloading ECMWF open data for {dat} ===") |
| 87 | + |
| 88 | + # Download control forecast |
| 89 | + for step in steps: |
| 90 | + target_cf = os.path.join(tmpdir, f"cf_step_{step}.grb") |
| 91 | + if not os.path.isfile(target_cf): |
| 92 | + client.retrieve( |
| 93 | + date=dat, |
| 94 | + time=0, |
| 95 | + step=step, |
| 96 | + stream="enfo", |
| 97 | + levtype="sfc", |
| 98 | + param="2t", |
| 99 | + type="cf", |
| 100 | + target=target_cf |
| 101 | + ) |
| 102 | + |
| 103 | + # Download ensemble members (pf = perturbed forecasts) |
| 104 | + members = range(1, 51) # ECMWF open data: 50 members |
| 105 | + for m in members: |
| 106 | + for step in steps: |
| 107 | + target_pf = os.path.join(tmpdir, f"pf{m:02d}_step_{step}.grb") |
| 108 | + if not os.path.isfile(target_pf): |
| 109 | + client.retrieve( |
| 110 | + date=dat, |
| 111 | + time=0, |
| 112 | + step=step, |
| 113 | + stream="enfo", |
| 114 | + levtype="sfc", |
| 115 | + param="2t", |
| 116 | + type="pf", |
| 117 | + number=m, |
| 118 | + target=target_pf |
| 119 | + ) |
| 120 | + |
| 121 | + # ----------------------------------------------------------------- |
| 122 | + # Convert GRIBs to NetCDF and subset to area |
| 123 | + # ----------------------------------------------------------------- |
| 124 | + print("Converting GRIB to NetCDF and cropping to area...") |
| 125 | + all_grbs = glob.glob(os.path.join(tmpdir, "*.grb")) |
| 126 | + nc_files = [] |
| 127 | + |
| 128 | + for grb in all_grbs: |
| 129 | + nc_path = grb.replace(".grb", ".nc") |
| 130 | + nc_crop = grb.replace(".grb", "_tj.nc") |
| 131 | + if not os.path.isfile(nc_crop): |
| 132 | + cdo.copy(input=grb, output=nc_path, options='-t ecmwf -f nc') |
| 133 | + cdo.sellonlatbox(f"{minlon},{maxlon},{minlat},{maxlat}", |
| 134 | + input=nc_path, output=nc_crop) |
| 135 | + nc_files.append(nc_crop) |
| 136 | + |
| 137 | + # ----------------------------------------------------------------- |
| 138 | + # Compute ensemble mean and standard deviation with CDO |
| 139 | + # ----------------------------------------------------------------- |
| 140 | + print("Computing ensemble mean and standard deviation...") |
| 141 | + ensmean_file = os.path.join(tmpdir, "output_em.nc") |
| 142 | + ensstd_file = os.path.join(tmpdir, "output_es.nc") |
| 143 | + |
| 144 | + # Build lists of cropped files by type |
| 145 | + pf_files = sorted(glob.glob(os.path.join(tmpdir, "pf*_tj.nc"))) |
| 146 | + cf_files = sorted(glob.glob(os.path.join(tmpdir, "cf*_tj.nc"))) |
| 147 | + |
| 148 | + # Combine CF and PF files (control + members) |
| 149 | + ens_files = cf_files + pf_files |
| 150 | + |
| 151 | + # Merge time dimension first |
| 152 | + merged_ens = os.path.join(tmpdir, "merged_ens.nc") |
| 153 | + cdo.mergetime(input=" ".join(ens_files), output=merged_ens) |
| 154 | + |
| 155 | + # Compute mean and std over ensemble members |
| 156 | + cdo.ensmean(input=merged_ens, output=ensmean_file) |
| 157 | + cdo.ensstd(input=merged_ens, output=ensstd_file) |
| 158 | + |
| 159 | + # Rename variables |
| 160 | + renamed_em = os.path.join(tmpdir, "output_em_rn.nc") |
| 161 | + renamed_es = os.path.join(tmpdir, "output_es_rn.nc") |
| 162 | + cdo.chname(r"\2t,IFS_T_mea", input=ensmean_file, output=renamed_em) |
| 163 | + cdo.chname(r"\2t,IFS_T_std", input=ensstd_file, output=renamed_es) |
| 164 | + |
| 165 | + # Merge mean & std into single area file |
| 166 | + cdo.merge(input=f"{renamed_em} {renamed_es}", output=tj_file) |
| 167 | + print(f"Created {tj_file}") |
| 168 | + |
| 169 | + shutil.rmtree(tmpdir) |
| 170 | + os.mkdir(tmpdir) |
| 171 | + |
| 172 | + # --------------------------------------------------------------------- |
| 173 | + # Interpolate to stations using CDO (nearest neighbour) |
| 174 | + # --------------------------------------------------------------------- |
| 175 | + print(f"Interpolating {dat} to station points ...") |
| 176 | + for s in missing_files: |
| 177 | + fout = os.path.join(outdir, f"ifs_{s['siteID'].replace(' ', '')}_{dat}.nc") |
| 178 | + arg = f"lon={s['longitude']}_lat={s['latitude']}" |
| 179 | + cdo.remapnn(arg, input=tj_file, output=fout) |
| 180 | + print(f" -> {fout}") |
| 181 | + |
| 182 | +print("\nAll done!") |
| 183 | +cnx.close() |
| 184 | + |
0 commit comments