diff --git a/app/split-netcdf-python/README.md b/app/split-netcdf-python/README.md deleted file mode 100644 index e0939ae2..00000000 --- a/app/split-netcdf-python/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# split-netcdf - -Split NetCDF files by variable - -Some sample input parameters for rose-app.conf -inputDir=/work/c2b/canopy/split-netcdf -outputDir=$TMPDIR/tmp -date=19210101T0000Z -component=river_month_inst - -rose app-run diff --git a/app/split-netcdf-python/bin/split-netcdf b/app/split-netcdf-python/bin/split-netcdf deleted file mode 100755 index 7723a017..00000000 --- a/app/split-netcdf-python/bin/split-netcdf +++ /dev/null @@ -1,94 +0,0 @@ -#!/bin/python - -# Split NetCDF files by variable -# -# Can be tiled or not. Component is optional, defaults to all. -# -# Input format: date.component(.tileX).nc -# Output format: date.component.var(.tileX).nc - -import os -import glob -import subprocess -import cdo -import sys -from pathlib import Path - -#Set variables -inputDir = os.environ['inputDir'] -outputDir = os.environ['outputDir'] -date = os.environ['date'] -component = os.environ['component'] -use_subdirs = os.environ['use_subdirs'] - -print("Arguments:") -print(" input dir: "+inputDir) -print(" output dir: "+outputDir) -print(" date: "+date) -print(" component: "+component) -print(" use subdirs: "+use_subdirs) -print("Utilities:") -type(cdo) - -#Verify input directory exists and is a directory -if inputDir == "": - print("Error: Input directory "+ inputDir + " does not exists or isnt a directory") - sys.exit(1) - -#Verify output directory exists and is a directory -if outputDir == "": - print("Error: Output directory" + outputDir + " does not exist or isn't a directory") - sys.exit(1) - -#Find files to split -#extend globbing used to find both tiled and non-tiled files -curr_dir = os.getcwd() -os.chdir("curr_dir/inputDir") - -#If in sub-dir mode, process the sub-directories instead of the main one -if use_subdirs: - for subdir in os.listdir(): - recent_dirs=[] - recent_dirs.append(subdir) #pushd -# files-glob.glob( - files=glob.glob('*'+'.'+component+'?'+'(.tile?)'+'.nc') - #files=$(echo *.$component?(.tile?).nc) - - # Exit if no input files found - if len(files) == 0: - print("No input files found, skipping the subdir "+subdir) - os.chdir(recent_dirs[-2]) #popd - continue - - # Create output subdir if needed - os.mkdir(outputDir/subdir) - - # Split the files by variable - # Note: cdo may miss some weird land variables related to metadata/cell_measures - for file in files: - newfile = subprocess.call(["sed 's/nc$//' {file}"],shell=True) -# subprocess.call("cdo --history splitname $file $outputDir/$subdir/$(echo $file | sed 's/nc$//')", shell=True) - cdo=Cdo() - cdo.splitname(input=file, - output='outputDir/subdir/newfile') - #cdo --history splitname file outputDir/subdir/newfile - - os.chdir(recent_dirs[-2]) #popd -else: - files=glob.glob('*'+'.'+component+'?'+'(.tile?)'+'.nc') - # Exit if not input files are found - if len(files) == 0: - print("ERROR: No input files found") - sys.exit(1) - - # Split the files by variable - for file in files: - #newfile=file | sed 's/nc$//' - newfile = subprocess.call(["sed 's/nc$//' {file}"],shell=True) - cdo=Cdo() - cdo.splitname(input=file, - output='outputDir/newfile') - #cdo --history splitname file outputDir/newfile - -print("Natural end of the NetCDF splitting") -sys.exit(0) #check this diff --git a/app/split-netcdf-python/meta/rose-meta.conf b/app/split-netcdf-python/meta/rose-meta.conf deleted file mode 100644 index 316e80e5..00000000 --- a/app/split-netcdf-python/meta/rose-meta.conf +++ /dev/null @@ -1,18 +0,0 @@ -[env] - -[env=component] -compulsory=false -type=real - -[env=date] -compulsory=false - -[env=inputDir] -compulsory=false -length=10 -title=Directory containing the input files to be split - -[env=outputDir] -compulsory=false -title=Directory to write the split-out files -type=boolean diff --git a/app/split-netcdf-python/rose-app.conf b/app/split-netcdf-python/rose-app.conf deleted file mode 100644 index b90b0528..00000000 --- a/app/split-netcdf-python/rose-app.conf +++ /dev/null @@ -1,4 +0,0 @@ -[command] -default=split-netcdf - -[env] diff --git a/app/split-netcdf/bin/split-netcdf b/app/split-netcdf/bin/split-netcdf deleted file mode 100755 index bfcecc35..00000000 --- a/app/split-netcdf/bin/split-netcdf +++ /dev/null @@ -1,182 +0,0 @@ -#!/bin/bash -set -euo pipefail -set -x - -# -# Split NetCDF files by variable -# -# Can be tiled or not. Component is optional, defaults to all. -# -# Input format: date.component(.tileX).nc -# Output format: date.component.var(.tileX).nc -# - -echo Arguments: -echo " input dir: $inputDir" -echo " output dir: $outputDir" -echo " date: $date" -echo " component: $component" -echo " use subdirs: ${use_subdirs:=}" -echo Utilities: -type cdo - -# Verify input directory exists and is a directory -if [[ ! -d $inputDir ]]; then - echo "Error: Input directory '${inputDir}' does not exist or isn't a directory" - exit 1 -fi - -# Verify output directory exists and is a directory -if [[ ! -d $outputDir ]]; then - echo "Error: Output directory '${outputDir}' does not exist or isn't a directory" - exit 1 -fi - -# Setup PYTHONPATH and io lists for the data lineage tool -if [ ! -z "${EPMT_DATA_LINEAGE+x}" ] && [ "$EPMT_DATA_LINEAGE" = "1" ]; then - export PYTHONPATH=$CYLC_SUITE_DEF_PATH:$PYTHONPATH - export input_file_list= - export output_file_list= - echo "Set PYTHONPATH and created i/o lists" -fi - -# Find the files to split -# extended globbing used to find both tiled and non-tiled files -cd $inputDir -shopt -s extglob - -# If in sub-dir mode, process the sub-directories instead of the main one -if [[ $use_subdirs ]]; then - for subdir in $(ls); do - pushd $subdir - files=$(echo *.$component?(.tile?).nc) - - # Exit if no input files are found - if [[ $files =~ \* ]]; then - echo No input files found, skipping the subdir "$subdir" - popd - continue - fi - - # Create the output subdir if needed - mkdir -p $outputDir/$subdir - - # Split the files by variable - # Note: cdo may miss some weird land variables related to metadata/cell_measures - for file in $files; do - cdo --history splitname $file $outputDir/$subdir/$(echo $file | sed 's/nc$//') - done - - if [ ! -z "${EPMT_DATA_LINEAGE+x}" ] && [ "$EPMT_DATA_LINEAGE" = "1" ]; then - - epmt annotate EPMT_DATA_LINEAGE_IN_PATH="$inputDir/$subdir/" - echo "[DATA LINEAGE] Annotated $inputDir/$subdir/ to EPMT_DATA_LINEAGE_IN_PATH" - - epmt annotate EPMT_DATA_LINEAGE_OUT_PATH="$outputDir/$subdir/" - echo "[DATA LINEAGE] Annotated $outputDir/$subdir to EPMT_DATA_LINEAGE_OUT_PATH" - - start_time=$(date +%s) - - for file in $files; do - hash_val=$(/home/Cole.Harvey/.conda/envs/bloom-filter-env/bin/python \ - -m data_lineage.bloomfilter.HashGen $inputDir/$subdir/$file) - export input_file_list="${input_file_list}$file $hash_val," - echo "[DATA LINEAGE] Added $file to input list with hash_val: $hash_val" - done - - end_time=$(date +%s) - duration=$((end_time - start_time)) - echo "Time spent hashing and creating the file list: $duration seconds" - - cd $outputDir/$subdir - start_time=$(date +%s) - - for output_file in $(ls); do - hash_val=$(/home/Cole.Harvey/.conda/envs/bloom-filter-env/bin/python \ - -m data_lineage.bloomfilter.HashGen $(pwd)/$output_file) - export output_file_list="${output_file_list}$output_file $hash_val," - echo "[DATA LINEAGE] Added $output_file to output list with hash_val: $hash_val" - done - - end_time=$(date +%s) - duration=$((end_time - start_time)) - echo "Time spent hashing and creating the file list: $duration seconds" - - cd ../.. - fi - popd - done -else - files=$(echo *.$component?(.tile?).nc) - - # Exit if no input files are found - if [[ $files =~ \* ]]; then - echo ERROR: No input files found - exit 1 - fi - - # Split the files by variable - for file in $files; do - cdo --history splitname $file $outputDir/$(echo $file | sed 's/nc$//') - done - - if [ ! -z "${EPMT_DATA_LINEAGE+x}" ] && [ "$EPMT_DATA_LINEAGE" = "1" ]; then - epmt annotate EPMT_DATA_LINEAGE_IN_PATH="$inputDir/" - echo "[DATA LINEAGE] Annotated $inputDir/ to EPMT_DATA_LINEAGE_IN_PATH" - - epmt annotate EPMT_DATA_LINEAGE_OUT_PATH="$outputDir/" - echo "[DATA LINEAGE] Annotated $outputDir/ to EPMT_DATA_LINEAGE_OUT_PATH" - - start_time=$(date +%s) - - for file in $files; do - hash_val=$(/home/Cole.Harvey/.conda/envs/bloom-filter-env/bin/python \ - -m data_lineage.bloomfilter.HashGen $inputDir/$file) - export input_file_list="${input_file_list}$file $hash_val," - echo "[DATA LINEAGE] Added $file to input list with hash_val: $hash_val" - done - - end_time=$(date +%s) - duration=$((end_time - start_time)) - echo "Time spent hashing and creating the file list: $duration seconds" - - cd $outputDir/ - - start_time=$(date +%s) - - for output_file in $(ls); do - hash_val=$(/home/Cole.Harvey/.conda/envs/bloom-filter-env/bin/python \ - -m data_lineage.bloomfilter.HashGen $(pwd)/$output_file) - export output_file_list="${output_file_list}$output_file $hash_val," - echo "[DATA LINEAGE] Added $output_file to output list with hash_val: $hash_val" - done - - end_time=$(date +%s) - duration=$((end_time - start_time)) - echo "Time spent hashing and creating the file list: $duration seconds" - - cd .. - fi - -fi - -if [ ! -z "${EPMT_DATA_LINEAGE+x}" ] && [ "$EPMT_DATA_LINEAGE" = "1" ]; then - - # Annotate to EPMT - if [ -n "$input_file_list" ]; then - compressed_bytes=$(/home/Cole.Harvey/.conda/envs/bloom-filter-env/bin/python \ - -m data_lineage.bloomfilter.StringCompression "${input_file_list}") - epmt -v annotate EPMT_DATA_LINEAGE_IN="${compressed_bytes%*,}" - echo "[DATA LINEAGE] Annotated input files to EPMT_LINEAGE_IN" - fi - - if [ -n "$output_file_list" ]; then - compressed_bytes=$(/home/Cole.Harvey/.conda/envs/bloom-filter-env/bin/python \ - -m data_lineage.bloomfilter.StringCompression "${output_file_list}") - epmt -v annotate EPMT_DATA_LINEAGE_OUT="${compressed_bytes%*,}" - echo "[DATA LINEAGE] Annotated output files to EPMT_LINEAGE_OUT" - fi -fi - -echo Natural end of the NetCDF splitting -exit 0 diff --git a/app/split-netcdf/meta/rose-meta.conf b/app/split-netcdf/meta/rose-meta.conf deleted file mode 100644 index 316e80e5..00000000 --- a/app/split-netcdf/meta/rose-meta.conf +++ /dev/null @@ -1,18 +0,0 @@ -[env] - -[env=component] -compulsory=false -type=real - -[env=date] -compulsory=false - -[env=inputDir] -compulsory=false -length=10 -title=Directory containing the input files to be split - -[env=outputDir] -compulsory=false -title=Directory to write the split-out files -type=boolean diff --git a/app/split-netcdf/rose-app.conf b/app/split-netcdf/rose-app.conf deleted file mode 100644 index b90b0528..00000000 --- a/app/split-netcdf/rose-app.conf +++ /dev/null @@ -1,4 +0,0 @@ -[command] -default=split-netcdf - -[env] diff --git a/flow.cylc b/flow.cylc index 1bd0d003..b6bbb8d4 100644 --- a/flow.cylc +++ b/flow.cylc @@ -620,28 +620,33 @@ rename-split-to-pp-regrid => remap-pp-components-static => combin {% endif %} [[SPLIT-NETCDF]] - pre-script = mkdir -p $outputDir - script = rose task-run --verbose --app-key split-netcdf + [[[environment]]] + {# $date was used as an input arg in the earlier version of #} + {# split-netcdf without getting referenced in the script itself. #} + {# I think it's a reference to an older script that got removed in #} + {# the first round of cylc rewrites; keeping it here in case #} + {# we turn out to need it during the user testing. #} date = $CYLC_TASK_CYCLE_POINT {% if DO_NATIVE %} [[SPLIT-NETCDF-NATIVE]] inherit = SPLIT-NETCDF + script = fre pp split-netcdf-wrapper -i $inputDir -o $outputDir -s $history_file -c $component -y $CYLC_WORKFLOW_RUN_DIR/{{ YAML }} [[[environment]]] inputDir = $CYLC_WORKFLOW_SHARE_DIR/cycle/$CYLC_TASK_CYCLE_POINT/history/native outputDir = $CYLC_WORKFLOW_SHARE_DIR/cycle/$CYLC_TASK_CYCLE_POINT/split/native - component = $CYLC_TASK_PARAM_native + history_file = $CYLC_TASK_PARAM_native {% endif %} {% if DO_REGRID %} [[SPLIT-NETCDF-REGRID]] inherit = SPLIT-NETCDF + script = fre pp split-netcdf-wrapper -i $inputDir -o $outputDir -s $history_file -c $component -y $CYLC_WORKFLOW_RUN_DIR/{{ YAML }} --use-subdirs [[[environment]]] inputDir = $CYLC_WORKFLOW_SHARE_DIR/cycle/$CYLC_TASK_CYCLE_POINT/history/regrid-xy outputDir = $CYLC_WORKFLOW_SHARE_DIR/cycle/$CYLC_TASK_CYCLE_POINT/split/regrid-xy - use_subdirs = 1 - component = $CYLC_TASK_PARAM_regrid + history_file = $CYLC_TASK_PARAM_regrid {% endif %} {% if DO_NATIVE %} @@ -652,7 +657,7 @@ rename-split-to-pp-regrid => remap-pp-components-static => combin [[split-netcdf-native]] inherit = SPLIT-NETCDF-NATIVE, [[[environment]]] - component = $CYLC_TASK_PARAM_native_static + history_file = $CYLC_TASK_PARAM_native_static {% endif %} {% endif %} @@ -664,7 +669,7 @@ rename-split-to-pp-regrid => remap-pp-components-static => combin [[split-netcdf-regrid]] inherit = SPLIT-NETCDF-REGRID, [[[environment]]] - component = $CYLC_TASK_PARAM_regrid_static + history_file = $CYLC_TASK_PARAM_regrid_static {% endif %} {% endif %} diff --git a/site/gaea.cylc b/site/gaea.cylc index 3a939126..1af504ec 100644 --- a/site/gaea.cylc +++ b/site/gaea.cylc @@ -68,7 +68,7 @@ {% endif %} [[SPLIT-NETCDF]] - pre-script = module load cdo nco && mkdir -p $outputDir + pre-script = module load fre/{{ FRE_VERSION }} && mkdir -p $outputDir [[RENAME-SPLIT-TO-PP]] pre-script = module load cray-hdf5 cray-netcdf cdo fre/{{ FRE_VERSION }} && mkdir -p $outputDir diff --git a/site/ppan.cylc b/site/ppan.cylc index b6439796..26b2727b 100644 --- a/site/ppan.cylc +++ b/site/ppan.cylc @@ -69,7 +69,8 @@ {% endif %} [[SPLIT-NETCDF]] - pre-script = module load cdo nco && mkdir -p $outputDir + pre_script = module load fre/{{ FRE_VERSION }}; mkdir -p $outputDir + {# pre-script = set +u; module load miniforge; conda activate fre-cli; set -u ; mkdir -p $outputDir #} [[RENAME-SPLIT-TO-PP]] pre-script = module load netcdf-c cdo fre/{{ FRE_VERSION }} && mkdir -p $outputDir diff --git a/site/ppan_test.cylc b/site/ppan_test.cylc index 3ed31bf7..558c0361 100644 --- a/site/ppan_test.cylc +++ b/site/ppan_test.cylc @@ -109,7 +109,7 @@ {% endif %} [[SPLIT-NETCDF]] - pre-script = module load cdo nco && mkdir -p $outputDir + pre-script = module load fre/{{ FRE_VERSION }} && mkdir -p $outputDir [[RENAME-SPLIT-TO-PP]] pre-script = module load netcdf-c cdo fre/{{ FRE_VERSION }} && mkdir -p $outputDir