ACCORD-NWP · ewhelan · Oct 22, 2025
diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt
@@ -14,4 +14,5 @@ install_scripts (varbcdiagnose.sh plotvarbccoeff.py)
 install_scripts (rundiacov.sh diag_diacov.sh plotdiacov.py)
 install_scripts(clddet_diagnosis.sh clddet_plotter.py)
 install_scripts (datool_dfs.py)
+install_scripts (datool_obstool.py)
 #install_scripts (diacov.sh festat.sh)
diff --git a/scripts/datool_obstool.py b/scripts/datool_obstool.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python3
+
+import sys
+import argparse
+import shlex
+import numpy as np
+import matplotlib.pyplot as plt
+
+import pyodc as odc
+
+
+
+#python script.py --file1 dataDTG1.odb  --file2 dataDTG2.odb gpsdel
+#datool_obstool --file1=gpsdel_2021091200 --file2=gpsdel_2021091203 --plot gpsdel
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Read the input file and the type of obs you want to plot.",
+        formatter_class=argparse.RawTextHelpFormatter
+    )
+    parser.add_argument('--file1', help='ODB-2/ASCII file from first CCMA')
+    parser.add_argument('--file2', help='ODB-2/ASCII file from second CCMA')
+#convlist="ship_z airep_t airep_u airep_v radar_RH radar_DOW gpsdel ascat"
+#satlist="amsua mhs iasi seviri mwhs2 atms"
+    parser.add_argument('obstype', choices=['ship_z','airep_t','airep_u','airep_v','radar_RH','radar_DOW','gpsdel','ascat'],
+                        help="Obs:\n"
+                             "  ship_z )\n"
+                             "  airep_t )\n"
+                             "  airep_u )\n"
+                             "  airep_v )\n"
+                             "  radar_RH )\n"
+                             "  radar_DOW )\n"
+                             "  gpsdel )\n"
+                             "  ascat"
+    )
+    parser.add_argument('--plot-style', help='Matplotlib style for plotting', default='default')
+    parser.add_argument('--list-plot-styles', action='store_true', help='List available matplotlib plot styles and exit')
+
+    # Check if no arguments at all
+    if len(sys.argv) == 1:
+        print(f"datool_obstool: Error: No arguments provided.")
+        parser.print_usage()
+        sys.exit(1)
+
+    return parser.parse_args()
+
+    print(parser.parse_args)
+    print("Observation to plot", obs)
+    print("Period",file1, file2)
+
+def is_odb_file(path):
+    try:
+        with open(path, 'rb') as f:
+            magic = f.read(5)
+            # Check for uint16 0xFFFF followed by ASCII "ODA"
+            return magic[:2] == b'\xFF\xFF' and magic[2:] == b'ODA'
+    except Exception:
+        pass
+
+    try:
+        odc.Reader(path).columns
+        return True
+    except Exception:
+        return False
+
+def is_ascii_file(path):
+    try:
+        with open(path, 'r', encoding='utf-8') as f:
+            # Read first non-empty, non-comment line
+            for line in f:
+                if line.strip() and not line.strip().startswith("#"):
+                    parts = line.strip().split()
+                    return len(parts) >= 12  # your ASCII reader expects 12 fields
+    except Exception:
+        return False
+    return False
+
+def read_odb_file(path):
+    import pandas as pd
+
+    # Read the full table using odc.Reader
+    try:
+        df = odc.read_odb(path,single=True)
+    except Exception as e:
+        raise RuntimeError(f"Failed to read ODB file {path} using odc: {e}")
+
+    # Filter the data like odbsql
+    FG2BIG_MASK = 1 << 8
+#    FG2BIG_MASK = 1 << 20
+    filtered_df = df[
+        (df['varno@body'] != 91) &
+        (df['an_depar@body'].notnull()) &
+        ((df['datum_event1@body'] & FG2BIG_MASK) == 0)
+    ]
+    df = filtered_df
+    # Expected column names
+    expected_columns = [
+        'obstype@hdr', 'codetype@hdr', 'vertco_reference_1@body', 'sensor@hdr', 'statid@hdr',
+        'varno@body', 'lat@hdr', 'lon@hdr', 'obsvalue@body', 'final_obs_error@errstat',
+        'fg_depar@body', 'an_depar@body'
+    ]
+
+    # Verify all required columns exist
+    missing = [col for col in expected_columns if col not in df.columns]
+    if missing:
+        raise ValueError(f"Missing required columns in ODB file {path}: {', '.join(missing)}")
+
+    # Return list of records as dictionaries
+    return df[expected_columns].to_dict(orient='records')
+
+    print(obstyp)
+    print(varno)
+
+
+def read_next(file):
+    line = file.readline()
+    if not line:
+        return None
+
+    parts = shlex.split(line)
+    if len(parts) < 12:
+        raise ValueError(f"Bad INPUT: Malformed line with {len(parts)} fields: {line}. Expecting 12 columns in ASCII input.")
+
+    try:
+        return {
+            'obstype@hdr': int(parts[0]),
+            'codetype@hdr': int(parts[1]),
+            'vertco_reference_1@body': float(parts[2]),
+            'sensor@hdr': int(parts[3]),
+            'statid@hdr': parts[4].strip(),
+            'varno@body': int(parts[5]),
+            'lat@hdr': float(parts[6]),
+            'lon@hdr': float(parts[7]),
+            'obsvalue@body': float(parts[8]),
+            'final_obs_error@errstat': float(parts[9]),
+            'fg_depar@body': float(parts[10]),
+            'an_depar@body': float(parts[11])
+        }
+    except ValueError as e:
+        print(f"Error parsing line: {line}")
+        raise e
+
+def plot_results(mode="percent"):
+    dfsobsstr = (
+        'SYNOP-Z','SYNOP-T2','SYNOP-R2','SYNOP-U10','GNSS-ZTD',
+        'TEMP-U','TEMP-T','TEMP-Z','TEMP-Q','AIREP-T',
+        'AIREP-U','SATOB-U','BUOY-Z','BUOY-U','PILOT-Z',
+        'PILOT-U','AMSUA-TB','MHS-TB','ATMS-TB','MWHS2-TB',
+        'IASI-TB','CRIS-TB','SEVIRI-TB','SCATT-U','RADAR-Z',
+        'RADAR-U','TEMP_CLS','GPS-RO','SGNSS'
+    )
+#This part to prepare the ODB2 general file and select the VAR and write a ascii file with the six columns: satid","lat","lon","an_depar","fg_depar","obsvalue" 
+
+'''
+    values = []
+    labels = []
+
+    total_dfs = np.sum(valdfs)
+
+    for i in range(numvar):
+        dfs = valdfs[i]
+        nobs = numdfs[i]
+        if dfs == 0:
+            continue  # skip zeros
+
+        if mode == "raw":
+            val = dfs
+        elif mode == "perobs":
+            val = dfs / nobs if nobs > 0 else 0
+        elif mode == "percent":
+            val = (dfs / total_dfs) * 100 if total_dfs > 0 else 0
+        else:
+            print(f"Unknown plot mode: {mode}")
+            return
+
+        values.append(val)
+        labels.append(dfsobsstr[i])
+
+    plt.figure(figsize=(12, 8))
+    plt.barh(labels, values)
+    #convlist="ship_z airep_t airep_u airep_v radar_RH radar_DOW gpsdel ascat"
+    #plt.xlabel({ "ship_z": "ship_z", "airep_t": "airep_t", "airep_u": "airep_u","airep_v": "airep_v","radar_RH": "radar_RH","radar_DOW": "radar_DOW","gpsdel": "gpsdel", "ascat": "ascat" }[obs])
+    plt.title({ "ship_z": "ship_z", "airep_t": "airep_t", "airep_u": "airep_u","airep_v": "airep_v","radar_RH": "radar_RH","radar_DOW": "radar_DOW","gpsdel": "gpsdel", "ascat": "ascat" }[obs])
+
+    #plt.xlabel({ "raw": "Total DFS []", "perobs": "DFS per Observation []", "percent": "Total DFS [%]" }[mode])
+    #plt.title({ "raw": "Absolute DFS by Observation Type", "perobs": "Relative DFS (per observation) by Observation Type", "percent": "DFS Percentage Contribution by Observation Type" }[mode])
+    plt.gca().invert_yaxis()
+    plt.grid(True, axis='x', linestyle='--', alpha=0.6)
+
+    ax = plt.gca()
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+
+    plt.tight_layout()
+    filename = f"dfs_plot_{mode}.png"
+    plt.savefig(filename)
+    print(f"datool_dfs: Bar chart saved to {filename}")
+'''
+
+def main():
+
+    CDTG1='2021091200' #this would come from the name of file1
+    CDTG2='2021091203' #this would come from the name of file1
+    DIRIN='/home/mdy/test_tools/da_tools'
+    DIROUT='/home/mdy/test_tools/da_tools'
+    VAR='gpsdel' #this would come from the command line
+    exe1='preparing_conv.R'
+    exe2='monitoring_conv'
+    EXP='expname'
+
+    #this would be firts part 
+    # R CMD BATCH  "--args ${CDTG1} ${DIRIN} ${DIROUT} ${VAR}" ${exe1} > ${DIROUT}/prepare_${VAR}.out
+    # R CMD BATCH  "--args ${CDTG2} ${DIRIN} ${DIROUT} ${VAR}" ${exe1} > ${DIROUT}/prepare_${VAR}.out
+
+   #this would be the second part
+    #R CMD BATCH  "--args ${EXP} ${DIROUT} ${SDTG} ${EDTG} ${CYCLE} ${VAR}" ${exe2} > ${DIROUT}/monitor_${VAR}.out
+
+    #    global valdfs, numdfs, cntdfs, cntnot
+    args = parse_args()
+
+    # List plot styles available
+    if args.list_plot_styles:
+        print("datool_dfs: Available matplotlib plot styles:")
+        for style in plt.style.available:
+            print(f"  {style}")
+        sys.exit(0)
+
+    # Apply the selected plot style
+    try:
+        plt.style.use(args.plot_style)
+    except OSError:
+        print(f"Error: '{args.plot_style}' is not a valid matplotlib style.")
+        print("Use --list-plot-styles to see available options.")
+        sys.exit(1)
+
+    # Validate required files unless just listing styles
+    if not args.file1 or not args.file2:
+        parser.error("the following arguments are required: file1, file2 (unless using --list-plot-styles)")
+
+    file1 = args.file1
+    file2 = args.file2
+
+    type1_odb = is_odb_file(file1)
+    type2_odb = is_odb_file(file2)
+    type1_ascii = is_ascii_file(file1)
+    type2_ascii = is_ascii_file(file2)
+
+    if type1_odb and type2_odb:
+        print("datool_obstool: Detected ODB input files.")
+        records1 = read_odb_file(file1)
+        records2 = read_odb_file(file2)
+    elif type1_ascii and type2_ascii:
+        print("datool_obstool: Detected ASCII input files.")
+        f1 = open(file1)
+        f2 = open(file2)
+        f1.readline()
+        f2.readline()
+        records1 = []
+        records2 = []
+        while True:
+            rec1 = read_next(f1)
+            rec2 = read_next(f2)
+            if rec1 is None or rec2 is None:
+                break
+            records1.append(rec1)
+            records2.append(rec2)
+        f1.close()
+        f2.close()
+    else:
+        print("Error: Input files must both be ODB or ASCII. Mismatch or unknown format.")
+        sys.exit(1)
+
+    if len(records1) != len(records2):
+        print("Error: Input files have different number of records.")
+        sys.exit(1)
+
+    print(f"datool_obstool: Options OK. Let's process data ...")
+
+    if args.write_obstool:
+        with open("obstool.dat", "w") as out:
+            for i in range(numvar):
+                out.write(f"{i+1:6d}{numdfs[i]:7d}{valdfs[i]:10.3f}\n")
+        print("datool_obstool: Raw Obstooldata written to obstool.dat")
+
+    if args.plot:
+        plot_results(args.plot)
+
+if __name__ == "__main__":
+    main()
+
+
diff --git a/tests/tools/test_datool_obstool_help.sh b/tests/tools/test_datool_obstool_help.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+set -uex
+
+# A unique working directory
+wd=$(pwd)
+test_wd=$(pwd)/test_datool_obstool_help
+
+mkdir -p ${test_wd}
+cd ${test_wd}
+
+datool_obstool.py -h > /dev/null && echo "datool_obstool help/usage works"
+
+
+# Clean up
+cd ${wd}
+rm -rf ${test_wd}