diff --git a/graphics/AnalyzeStats.py b/graphics/AnalyzeStats.py index 96915f1..ffc2f79 100755 --- a/graphics/AnalyzeStats.py +++ b/graphics/AnalyzeStats.py @@ -12,6 +12,7 @@ import logsetup import multiprocessing as mp import os +import pandas as pd from analysis.Analyses import Analyses import analysis.StatisticsDatabase as sdb @@ -66,12 +67,8 @@ def main(): # where each pair is colon delimited, e.g. exp1:long_name_exp1,exp2:long_name_exp2 exps = args.experiments.split(',') - firstCycle = None - lastCycle = None - if args.firstCycle: - firstCycle= dt.datetime.strptime(args.firstCycle, '%Y%m%dT%H') - if args.lastCycle: - lastCycle= dt.datetime.strptime(args.lastCycle, '%Y%m%dT%H') + firstCycle = pd.to_datetime(args.firstCycle) + lastCycle = pd.to_datetime(args.lastCycle) anconf.adjust_experiments(args.controlExperiment, exps, args.verifySpace, args.verifyType, firstCycle, lastCycle) diff --git a/graphics/analysis/AnalysisBase.py b/graphics/analysis/AnalysisBase.py index a3acd69..f4d6022 100644 --- a/graphics/analysis/AnalysisBase.py +++ b/graphics/analysis/AnalysisBase.py @@ -10,7 +10,6 @@ import logging import numpy as np from pathlib import Path -import plot_utils as pu import os import var_utils as vu import modelsp_utils as mu @@ -239,22 +238,26 @@ def initLimits(dmin0=np.NaN, dmax0=np.NaN, d=None): dmax = np.nanmax(d) return dmin, dmax + def oneHundredCenteredLimiter(self, dmin0=np.NaN, dmax0=np.NaN, d=None): - dmin, dmax = self.initLimits(dmin0, dmax0, d) + dmin, dmax = self.initLimits(dmin0, dmax0, d) - # update dmax, conditional on dmin - dmax = np.nanmax([dmax, 100.0/(dmin/100.0)]) + # Clamp dmin to its "Safety Envelope" (66.7% to 98.0%) + # This prevents division by zero and keeps the plot from being too squashed or too wide. + dmin = np.nanmax([dmin, 66.7]) + dmin = np.nanmin([dmin, 98.0]) - # set absolute min/max in case there are large outliers - #if np.isfinite(dmin): - dmin = np.nanmax([dmin, 66.7]) - dmin = np.nanmin([dmin, 98.0]) + # Clamp dmax to its "Safety Envelope" (102.0% to 150.0%) + dmax = np.nanmax([dmax, 102.0]) + dmax = np.nanmin([dmax, 150.0]) - #if np.isfinite(dmax): - dmax = np.nanmin([dmax, 150.0]) - dmax = np.nanmax([dmax, 102.0]) + # Symmetry Check + # Since dmin is at least 66.7, 100/(66.7/100) is ~150. + # This will never exceed our 150.0 ceiling. + dmax = np.nanmax([dmax, 100.0 / (dmin / 100.0)]) + + return dmin, dmax - return dmin, dmax def zeroCenteredPercentDifference(self, experiment, @@ -292,23 +295,24 @@ def zeroCenteredLabeler(self): #return '100 x (EXP-'+self.cntrlExpName+') / \n'+self.cntrlExpName+'' #return '100 x (EXP-CONTROL)/\nCONTROL' + def zeroCenteredLimiter(self, dmin0=np.NaN, dmax0=np.NaN, d=None): - dmin, dmax = self.initLimits(dmin0, dmax0, d) + dmin, dmax = self.initLimits(dmin0, dmax0, d) - # update dmax, conditional on dmin - #dmax = np.nanmax([dmax, 100.0/((100.0+dmin)/100.0) - 100.0]) - dmax = np.nanmax([dmax, 1.0/dmin]) + # Ensures we always see at least -2.0 to +2.0 + dmin = np.nanmin([dmin, -2.0]) + dmax = np.nanmax([dmax, 2.0]) - # set absolute min/max in case there are large outliers - #if np.isfinite(dmin): - dmin = np.nanmax([dmin, -33.3]) - dmin = np.nanmin([dmin, -2.0]) + # Prevents one bad observation from squishing the whole plot + dmin = np.nanmax([dmin, -33.3]) + dmax = np.nanmin([dmax, 50.0]) - #if np.isfinite(dmax): - dmax = np.nanmin([dmax, 50.0]) - dmax = np.nanmax([dmax, 2.0]) + # Symmetry (Optional/Legacy logic) + # With dmin clamped to -2.0, this maxes at 0.5. + # It won't override the 2.0 floor, but it's safe to keep. + dmax = np.nanmax([dmax, 1.0 / np.abs(dmin)]) - return dmin, dmax + return dmin, dmax def binMethodFile(self, binMethod, before = True): @@ -357,7 +361,7 @@ def statPlotAttributes(self, diagnosticGroup, statName, truncateDiagnostics = ommDiagnostics+mmoDiagnostics diagnosticGroup_ = diagnosticGroup #for diag in truncateDiagnostics: - # if pu.prepends(diag, diagnosticGroup_) or pu.postpends(diag, diagnosticGroup_): + # if diagnosticGroup_.startswith(diag) or diagnosticGroup_.endswith(diag): # diagnosticGroup_ = diag allDiagnosticNames_ = deepcopy(allDiagnosticNames) @@ -395,10 +399,10 @@ def statPlotAttributes(self, diagnosticGroup, statName, centralValue = None #for diag in truncateDiagnostics: - # if pu.prepends(diag, cntrlDiagnosticName) or pu.postpends(diag, cntrlDiagnosticName): + # if cntrlDiagnosticName.startswith(diag) or cntrlDiagnosticName.endswith(diag): # cntrlDiagnosticName = diag # for idiag, adiag in enumerate(allDiagnosticNames_): - # if pu.prepends(diag, adiag) or pu.postpends(diag, adiag): + # if adiag.startswith(diag) or adiag.endswith(diag): # allDiagnosticNames_[idiag] = diag oneCenteredRatioDiagPrefixes = ['CRx', 'CRy', 'InnovationRatio', 'SRx', 'SRy', 'OENI'] @@ -406,7 +410,7 @@ def statPlotAttributes(self, diagnosticGroup, statName, for diag in allDiagnosticNames_: oneCentered = False for diagPrefix in oneCenteredRatioDiagPrefixes: - oneCentered = oneCentered or pu.prepends(diagPrefix, diag) + oneCentered = oneCentered or diag.startswith(diagPrefix) allOneCenteredDiagnostics = allOneCenteredDiagnostics and oneCentered if allOneCenteredDiagnostics: centralValue = 1. @@ -419,7 +423,7 @@ def statPlotAttributes(self, diagnosticGroup, statName, # for diag in allDiagnosticNames_: # logScaled = False # for diagPrefix in logScaledDiagPrefixes: - # logScaled = logScaled or pu.prepends(diagPrefix, diag) + # logScaled = logScaled or diag.startswith(diagPrefix) # allLogScaledDiagnostics = allLogScaledDiagnostics and logScaled # if allLogScaledDiagnostics: # logScale = True and not isDifferencePlot diff --git a/graphics/analysis/BinValAxisStatsComposite.py b/graphics/analysis/BinValAxisStatsComposite.py index 705cad4..b4beb0f 100644 --- a/graphics/analysis/BinValAxisStatsComposite.py +++ b/graphics/analysis/BinValAxisStatsComposite.py @@ -132,9 +132,9 @@ def analyze_(self, workers = None): varMapLoc.append((varName, varLabel)) nsubplots = nVarsLoc - nxplots = np.int(np.ceil(np.sqrt(nsubplots))) + nxplots = int(np.ceil(np.sqrt(nsubplots))) while nsubplots%nxplots > 0 and nsubplots%nxplots / nxplots <= 0.5: nxplots += 1 - nyplots = np.int(np.ceil(np.true_divide(nsubplots, nxplots))) + nyplots = int(np.ceil(np.true_divide(nsubplots, nxplots))) ptLoc = {} diff --git a/graphics/analysis/StatisticsDatabase.py b/graphics/analysis/StatisticsDatabase.py index 47b2485..00e705e 100644 --- a/graphics/analysis/StatisticsDatabase.py +++ b/graphics/analysis/StatisticsDatabase.py @@ -26,7 +26,7 @@ def __init__(self, nrows): for attribName in su.fileStatAttributes: self.values[attribName] = np.empty(nrows, np.chararray) for statName in su.allFileStats: - self.values[statName] = np.empty(nrows, np.float) + self.values[statName] = np.empty(nrows, float) @classmethod def read(cls, statsFile, expName, fcTDelta, cyDTime): @@ -56,27 +56,24 @@ def concatasync(cls, asyncresults): return new def insert(self, other, srow): - assert srow >= 0, ("Error: can only insert MultipleBinnedStatistics rows >= 0, not ", srow) + assert srow >= 0, f"Error: can only insert MultipleBinnedStatistics rows >= 0, not {srow}" erow = srow + other.nrows - 1 - assert erow < self.nrows, ("Error: can only insert MultipleBinnedStatistics rows < ", self.nrows, ", not ", erow) + assert erow < self.nrows, f"Error: can only insert MultipleBinnedStatistics rows < {self.nrows}, not {erow}" + for key, val in other.values.items(): - if isinstance(val, Iterable): - assert key in self.values, key+" not in MultipleBinnedStatistics" - self.values[key][srow:erow+1] = val[:] + assert key in self.values, f"{key} not in MultipleBinnedStatistics" + self.values[key][srow:erow+1] = val def destroy(self): del self.values -def dfIndexLevels(df, index): - mi = df.index.names - return pu.uniqueMembers( - df.index.get_level_values( - mi.index(index) ).tolist() ) +def dfIndexLevels(df, index_name): + return df.index.get_level_values(index_name).unique().tolist() def dfVarVals(df, loc, var): - return pu.uniqueMembers(df.loc[loc, var].tolist()) + return df.loc[loc, var].unique().tolist() class StatsDB: @@ -401,95 +398,80 @@ def __str__(self): ): return self.df.to_string() - @classmethod - def fromLoc(cls, other, locDict, var=None): - return cls(other.locdf(other.locTuple(locDict), var)) - @classmethod def fromAggStats(cls, other, aggovers): return cls(other.aggStats(aggovers)) def append(self, otherDF = None): - if otherDF is None: return - - #Add otherDF (DataFrame object) to self.df - # adds new column names as needed - # adds meaningless NaN entries in columns that do not overlap between two DF's - # TODO: reduce memory footprint of NaN's via modifications to external data flows - appendDF = otherDF.copy(True) - - selfColumns = list(self.df.columns) - appendColumns = list(appendDF.columns) - - selfNRows = len(self.df.index) - for column in appendColumns: - if column not in selfColumns: - self.df.insert(len(list(self.df.columns)), column, [np.NaN]*selfNRows) - - appendNRows = len(appendDF.index) - for column in selfColumns: - if column not in appendColumns: - appendDF.insert(len(list(appendDF.columns)), column, [np.NaN]*appendNRows) - - self.df = self.df.append(appendDF, sort=True) - - def locTuple(self, locDict={}): - Loc = () - for index in list(locDict.keys()): - assert index in self.indexNames,( - "\n\nERROR: index name not in the multiindex, index = "+index - +", indexNames = ", self.indexNames) + if otherDF is None or otherDF.empty: + return + self.df = pd.concat([self.df, otherDF], sort=True) + + @classmethod + def fromLoc(cls, other, locDict, var=None): + return cls(other.loc(locDict, var)) - for index in self.indexNames: - indL = list(Loc) - if index not in locDict: - indL.append(slice(None)) - elif locDict[index] is None: - indL.append(slice(None)) - elif (isinstance(locDict[index], Iterable) and - not isinstance(locDict[index], str)): - indL.append(list(locDict[index])) + def loc(self, locDict, var=None): + # 1. Start with a boolean mask where everything is True + mask = np.ones(len(self.df), dtype=bool) + + # 2. Filter down level by level natively + for level_name, val in locDict.items(): + if val is None: + continue + + # Get the actual data for this index level + level_vals = self.df.index.get_level_values(level_name) + + # If the filter is a list of items, use native .isin() + if isinstance(val, (list, tuple, set, np.ndarray)): + mask = mask & level_vals.isin(val) + + # If it is a single value, use standard equality else: - indL.append(list([locDict[index]])) - Loc = tuple(indL) - return Loc + level_mask = (level_vals == val) - def locdf(self, Loc, var=None): - if var is None: - return self.df.loc[Loc, :] - else: - return self.df.loc[Loc, var] + # If no match is found, and we searched for a string (like '-0.25'), + # check if Pandas stored it as a float in the index. + if not level_mask.any() and isinstance(val, str): + try: + level_mask = (level_vals == float(val)) + except ValueError: + pass # It was a real string, not a number + + mask = mask & level_mask + + # 3. Apply the mask + filtered_df = self.df.loc[mask] + + # 4. Return specific column(s) if requested + if var is not None: + return filtered_df[var] + + return filtered_df def levels(self, index, locDict={}): - newDF = self.locdf(self.locTuple(locDict)) + newDF = self.loc(locDict) return dfIndexLevels(newDF, index) - def loc(self, locDict, var=None): - return self.locdf(self.locTuple(locDict), var) - def loc1(self, locDict, var=None): - s = self.loc(locDict, var).to_numpy() - if isinstance(s, Iterable): - if len(s) == 1: - return s[0] - else: + res = self.loc(locDict, var) + # if result is empty or has multiple values, return NaN + if len(res) != 1: return np.NaN - #assert len(s) == 1, "DFWrapper::loc0, locDict/var must return single location only" - #return s[0] - else: - return s + return res.item() def var(self, var): - return self.loc({}, var=var) + return self.df[var] def uniquevals(self, var, locDict={}): - return pu.uniqueMembers(self.loc(locDict, var).tolist()) + return self.loc(locDict, var).dropna().unique().tolist() def min(self, locDict, var=None): - return self.loc(locDict, var).dropna().min() + return self.loc(locDict, var).min() def max(self, locDict, var): - return self.loc(locDict, var).dropna().max() + return self.loc(locDict, var).max() def aggStats(self, aggovers): groupby = deepcopy(self.indexNames) @@ -502,42 +484,28 @@ def aggStats(self, aggovers): def TDelta_dir(tdelta, fmt): - subs = {} - fmts = {} - i = '{:d}' - i02 = '{:02d}' - - # "%D %HH:%MM:%SS" - subs["D"] = tdelta.days - fmts["D"] = i - - subs["HH"], hrem = divmod(tdelta.seconds, 3600) - fmts["HH"] = i02 - - subs["MM"], subs["SS"] = divmod(hrem, 60) - fmts["MM"] = i02 - fmts["SS"] = i02 - - ts = int(tdelta.total_seconds()) - - # "%h" - subs["h"], hrem = divmod(ts, 3600) - fmts["h"] = i - - # "%MIN:%SEC" - subs["MIN"], subs["SEC"] = divmod(ts, 60) - fmts["MIN"] = i - fmts["SEC"] = i02 - - subs["m"] = subs["MIN"] - fmts["m"] = fmts["MIN"] - - # "%s" - subs["s"] = ts - fmts["s"] = i - - out = fmt - for key in subs.keys(): - out = out.replace("%"+key, fmts[key].format(subs[key])) - - return out + """Formats a timedelta into a directory string using a replacement map.""" + # Pre-calculate all necessary values + total_seconds = int(tdelta.total_seconds()) + h_rem, rem = divmod(tdelta.seconds, 3600) + m_rem, s_rem = divmod(rem, 60) + + # Define the mapping (Key: Formatted Value) + subs = { + "%D": str(tdelta.days), + "%HH": f"{h_rem:02d}", + "%MM": f"{m_rem:02d}", + "%SS": f"{s_rem:02d}", + "%h": str(total_seconds // 3600), + "%MIN": str(total_seconds // 60), + "%SEC": f"{s_rem:02d}", + "%m": str(total_seconds // 60), + "%s": str(total_seconds) + } + + # Direct replacement loop + for key, val in subs.items(): + if key in fmt: + fmt = fmt.replace(key, val) + + return fmt diff --git a/graphics/analysis/category/CategoryBinMethodBase.py b/graphics/analysis/category/CategoryBinMethodBase.py index f5a5bd9..df7fae0 100644 --- a/graphics/analysis/category/CategoryBinMethodBase.py +++ b/graphics/analysis/category/CategoryBinMethodBase.py @@ -54,9 +54,9 @@ def subplotArrangement(self, binValsMap): nsubplots = nxplots * nyplots else: nsubplots = self.nVars - nxplots = np.int(np.ceil(np.sqrt(nsubplots))) + nxplots = int(np.ceil(np.sqrt(nsubplots))) while nsubplots%nxplots > 0 and nsubplots%nxplots / nxplots <= 0.5: nxplots += 1 - nyplots = np.int(np.ceil(np.true_divide(nsubplots, nxplots))) + nyplots = int(np.ceil(np.true_divide(nsubplots, nxplots))) return nxplots, nyplots, nsubplots diff --git a/graphics/analysis/category/FCScoreCard.py b/graphics/analysis/category/FCScoreCard.py index 584073b..f05bf85 100644 --- a/graphics/analysis/category/FCScoreCard.py +++ b/graphics/analysis/category/FCScoreCard.py @@ -356,9 +356,9 @@ def customSortIndex(ll, sorter): nsubplots = nxplots * nyplots else: nsubplots = nSubplotsPerExp - nxplots = np.int(np.ceil(np.sqrt(nsubplots))) + nxplots = int(np.ceil(np.sqrt(nsubplots))) while nsubplots%nxplots > 0 and nsubplots%nxplots / nxplots <= 0.5: nxplots += 1 - nyplots = np.int(np.ceil(np.true_divide(nsubplots, nxplots))) + nyplots = int(np.ceil(np.true_divide(nsubplots, nxplots))) xVals = self.fcTDeltas diff --git a/graphics/analysis/multidim/BinValAxes2D.py b/graphics/analysis/multidim/BinValAxes2D.py index 677b2e4..fdef1fd 100644 --- a/graphics/analysis/multidim/BinValAxes2D.py +++ b/graphics/analysis/multidim/BinValAxes2D.py @@ -167,14 +167,14 @@ def innerloops(self, # determine the coordinates of the structued X/Y grid points xUnique = np.array(pu.uniqueMembers(xCoords)) - xVals = np.asarray(xUnique, dtype=np.float) + xVals = np.asarray(xUnique, dtype=float) xSort = np.argsort(xVals) xVals = xVals[xSort] nXVals = len(xVals) xValsStr = list(xUnique[xSort]) yUnique = np.array(pu.uniqueMembers(yCoords)) - yVals = np.asarray(yUnique, dtype=np.float) + yVals = np.asarray(yUnique, dtype=float) ySort = np.argsort(yVals) yVals = yVals[ySort] nYVals = len(yVals) @@ -368,11 +368,15 @@ def innerloops(self, t = float(ciVals[statName][trait]) # automatically generate relative difference plots for positive-semi-definite statistics if useRelativeDifference: - # divide by cntrlLoc aggregated statName - t /= normalizingStat - if self.relativeErrorType == 'one hundred centered': - t += 1.0 - t *= 100.0 + if normalizingStat != 0 and np.isfinite(normalizingStat): + # divide by cntrlLoc aggregated statName + t /= normalizingStat + if self.relativeErrorType == 'one hundred centered': + t += 1.0 + t *= 100.0 + else: + t = np.NaN + planeVals[trait][iy, ix] = t # automatically generate relative difference plots for positive-semi-definite statistics diff --git a/graphics/analysis/multidim/BinValAxisProfile.py b/graphics/analysis/multidim/BinValAxisProfile.py index c385dc6..125945e 100644 --- a/graphics/analysis/multidim/BinValAxisProfile.py +++ b/graphics/analysis/multidim/BinValAxisProfile.py @@ -49,9 +49,9 @@ def innerloops(self, nsubplots = nxplots * nyplots else: nsubplots = np.nanmax([nVarsLoc, 1]) - nxplots = np.nanmax([np.int(np.ceil(np.sqrt(nsubplots))), 1]) + nxplots = np.nanmax([int(np.ceil(np.sqrt(nsubplots))), 1]) while nsubplots%nxplots > 0 and nsubplots%nxplots / nxplots <= 0.5: nxplots += 1 - nyplots = np.int(np.ceil(np.true_divide(nsubplots, nxplots))) + nyplots = int(np.ceil(np.true_divide(nsubplots, nxplots))) ptLoc = {} axisLimitsLoc = {} @@ -225,9 +225,9 @@ def innerloops(self, nsubplots = nxplots * nyplots else: nsubplots = np.nanmax([nVarsLoc, 1]) - nxplots = np.nanmax([np.int(np.ceil(np.sqrt(nsubplots))), 1]) + nxplots = np.nanmax([int(np.ceil(np.sqrt(nsubplots))), 1]) while nsubplots%nxplots > 0 and nsubplots%nxplots / nxplots <= 0.5: nxplots += 1 - nyplots = np.int(np.ceil(np.true_divide(nsubplots, nxplots))) + nyplots = int(np.ceil(np.true_divide(nsubplots, nxplots))) # Only bootstrap over the union of cyDTimes available # from both experiments at each fcTDelta diff --git a/graphics/analyze_config.py b/graphics/analyze_config.py index afcaf8e..e6b6493 100644 --- a/graphics/analyze_config.py +++ b/graphics/analyze_config.py @@ -96,6 +96,10 @@ def adjust_experiments(control, expArray, verifySpace, verifyType, firstCycle, l if verifyType: VerificationType = verifyType verificationChanged = True + if firstCycle and lastCycle and firstCycle > lastCycle: + raise ValueError(f"firstCycle ({firstCycle}) > lastCycle ({lastCycle})") + if firstCycle and lastCycle is None: + raise ValueError(f"if firstCycle is set, lastCycle must be set (-l option)") if firstCycle: dbConf['firstCycleDTime'] = firstCycle if lastCycle: @@ -305,8 +309,7 @@ def adjust_experiments(control, expArray, verifySpace, verifyType, firstCycle, l ## expDirectory is the top-level directory that contains data # from all experiments. The environment variable, EXP_DIR, can be used # to specify its value externally if desired. -user = os.environ['USER'] -dbConf['expDirectory'] = os.getenv('EXP_DIR','/glade/derecho/scratch/'+user+'/pandac') +dbConf['expDirectory'] = os.getenv('EXP_DIR', os.path.join(os.getenv('SCRATCH'), 'pandac')) ## hasFCLenDir whether directory structure includes forecast length # overridden to True within StatisticsDatabase.StatsDB when fcTDeltaLast > fcTDeltaFirst diff --git a/graphics/basic_plot_functions.py b/graphics/basic_plot_functions.py index c73c60b..6f55c86 100644 --- a/graphics/basic_plot_functions.py +++ b/graphics/basic_plot_functions.py @@ -1393,7 +1393,7 @@ def plotTimeSeries(fig, if nLines <= maxLegendEntries: if legend_inside: #INSIDE AXES - nlcol = np.int(np.ceil(np.sqrt(nLines))) + nlcol = int(np.ceil(np.sqrt(nLines))) lh = ax.legend(loc='best',fontsize=legendLabelFontSize1,frameon=True, framealpha=0.4,ncol=nlcol) lh.get_frame().set_linewidth(0.0) diff --git a/graphics/standalone/plot_diag.py b/graphics/standalone/plot_diag.py index 0f238a9..b12f73a 100644 --- a/graphics/standalone/plot_diag.py +++ b/graphics/standalone/plot_diag.py @@ -1,5 +1,6 @@ import argparse import glob +import logging import math import os import re @@ -86,7 +87,7 @@ def readdata(args: argparse.Namespace) -> None: makeDistributionPlots = True plot_allinOneDistri = True # plot profileObsTypes (includes all levels) and sfcObsTypes. plot_eachLevelDistri = False # plot every level separately for profileObsTypes. - plot_predictorDistri = False # Default is False; should turn on it with makeDistributionPlots = True + plot_predictorDistri = args.predictors # NOUTER: number of outer iterations # can be set as env variable 'NOUTER' @@ -204,7 +205,7 @@ def readdata(args: argparse.Namespace) -> None: # collect all file names that fit file name format obsoutfiles = list(diagdir.glob(f'{diagprefix}*{diagsuffix}')) if not obsoutfiles: - print(f"No obsout files matching '{diagprefix}*{diagsuffix}'") + logging.warning(f"No obsout files matching '{diagprefix}*{diagsuffix}'") # Group files by experiment-obstype combination # (e.g., 3dvar_aircraft), where each group @@ -229,14 +230,14 @@ def readdata(args: argparse.Namespace) -> None: # Loop over experiment-obstype groups for expt_obs, files in exob_groups.items(): if "_" not in expt_obs: - print(f"no obstype in {expt_obs}. skip.") + logging.warning(f"no obstype in {expt_obs}. skip.") continue # obstype is everything after first underscore obstype = expt_obs.split("_", 1)[1] # If obstype string is not found in one of the analyzedObsTypes... if not any(obstype in analyzedObsType for analyzedObsType in analyzedObsTypes): - print(f'{obstype} not in one of analyzedObsTypes. skip.') + logging.warning(f'{obstype} not in one of analyzedObsTypes. skip.') continue print(f"Processing experiment_obstype {expt_obs}") @@ -389,6 +390,11 @@ def get_nlocs(f): for varGrp in readVars: if varGrp is None: continue var, grp = vu.splitObsVarGrp(varGrp) + + if grp not in ncDB.groups: + logging.warning(f'group {grp} not in {file}') + continue + # recordNumber is not available in ctest data files # In that case, assume record and station counts are identical if (varGrp == record and @@ -427,7 +433,7 @@ def get_nlocs(f): if station is not None and stationID == station: station = None pass else: - print('Incorrect unicode format for '+varGrp+' in '+file) + logging.error('Incorrect unicode format for '+varGrp+' in '+file) raise UnicodeDecodeError(p) ss = ee @@ -487,7 +493,7 @@ def get_nlocs(f): db[var][~passan] = np.NaN if not np.isfinite(db[omb]).any(): - print('all values are NaN or inf: ', varName, obstype) + logging.warning('all values are NaN or inf: ', varName, obstype) continue # diagnose relative omb/oma for GNSSRO, which varies @@ -706,11 +712,17 @@ def get_nlocs(f): basic_plot_functions.plotDistri(db[latitude], db[longitude], db[ana][:,ich], obstype, shortname, units, expt_obs, 0, "ana", **kwargs) if plot_predictorDistri: - print('plotting predictors distribution for : '+obstype+ 'ch'+str(channel)) - for ipred, pred in enumerate(predVars): - basic_plot_functions.plotDistri(db[latitude], db[longitude], db[pred][:,ich], - obstype, predGroup[ipred], "ch"+str(channel), - expt_obs, 0, "ch"+str(channel), **kwargs) + print(f'plot {len(predVars)} predictors for {obstype} ch{channel}') + kwargs_copy = kwargs.copy() + # predictors don't have same range of values as obsType. Let matplotlib pick vmin, vmax. + del(kwargs_copy['vmin']) + del(kwargs_copy['vmax']) + for ipred, pred in enumerate(predVars): + basic_plot_functions.plotDistri( + db[latitude], db[longitude], db[pred][:,ich], + obstype, predGroup[ipred], "ch"+str(channel), + expt_obs, 0, "ch"+str(channel), **kwargs_copy + ) goodrange = np.nanpercentile(np.abs(np.concatenate((db[omb][:,ich], db[oma][:,ich]))), 98) kwargs.update({"cmap": plt.colormaps["RdBu_r"]}) kwargs.update({"vmin": -goodrange}) @@ -924,7 +936,7 @@ def scatter_verification(ifig, varName, varUnits, ivar, nvars, xlab = 'h(xb) - y' ylab = 'h(xa) - y' else: - print('WARNING: scatter_verification has no definitions for nfigtypes == ', nfigtypes) + logging.warning('scatter_verification has no definitions for nfigtypes == ', nfigtypes) continue # Uncomment these 2 lines to put x/y labels only on peripheral subplts @@ -996,18 +1008,18 @@ def scatter_one2ones(XVAL, YVALS, LEG, show_stats, XLAB, YLAB, VAR_NAME, UNITS, ha='left', va='top', transform=ax.transAxes) if len(XVAL) == 0: - print('WARNING in scatter_one2ones: len(XVAL)==0; skipping this dataset') + logging.warning('in scatter_one2ones: len(XVAL)==0; skipping this dataset') return 1 NVALS = np.asarray([]) for i, YVAL in enumerate(YVALS): if len(XVAL) != len(YVAL): - print('ERROR: Incorrect usage of scatter_one2ones, YVALS must be list of arrays.') + logging.error('Incorrect usage of scatter_one2ones, YVALS must be list of arrays.') os._exit() not_nan = np.isfinite(XVAL) & np.isfinite(YVAL) NVALS = np.append(NVALS, np.sum(not_nan)) if np.all(NVALS == 0): - print('WARNING in scatter_one2ones: all(XVAL/YVAL) are non-finite; skipping this dataset') + logging.warning('in scatter_one2ones: all(XVAL/YVAL) are non-finite; skipping this dataset') return 1 colors = [ @@ -1111,6 +1123,7 @@ def main(): help="Path to optional YAML config file for plot styling", ) parser.add_argument("--diagdir", help="Path to data", default="../..") + parser.add_argument("--predictors", action='store_true', help="plot predictors") args = parser.parse_args() readdata(args)