-
Notifications
You must be signed in to change notification settings - Fork 22
Fold fre pp rename-split into fre pp split-netcdf --rename
#783
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Copilot
wants to merge
13
commits into
main
Choose a base branch
from
copilot/noaa-gfdl-717-fold-rename-split
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 4 commits
Commits
Show all changes
13 commits
Select commit
Hold shift + click to select a range
487425b
Initial plan
Copilot d8dae1d
Merge branch 'rename-split' into copilot/noaa-gfdl-717-fold-rename-split
Copilot 77cb692
Fold rename-split into split-netcdf --rename flag with CLI and unit t…
Copilot 0b895cd
Address code review: fix typo and add error handling for rename rollback
Copilot e7b8c47
Address review: move rename logic into split_file_xarray, use fixture…
Copilot 0a41b94
Move rename_split_script import to top of split_netcdf_script.py
Copilot 1d80fe5
Convert split_file_xarray to 4-space indentation
Copilot e3a7474
Improve documentation for output directory structure
ilaflott 1c81d7c
Clarify ncgen_setup fixture purpose with comment
ilaflott 4f655d2
Write directly to final renamed path (no write + copy + delete)
Copilot 6b2b43e
Merge rename tests into test_split_netcdf.py, delete separate file
Copilot 73a872d
Add try/finally for decoded_dataset cleanup, improve error messages
Copilot a38af69
Merge branch 'main' into copilot/noaa-gfdl-717-fold-rename-split
ilaflott File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,234 @@ | ||
| ''' | ||
| Tests split-netcdf with --rename flag. | ||
| Tests the combined split + rename functionality that reorganizes | ||
| split netcdf files into a nested directory structure with frequency and duration. | ||
|
|
||
| Uses the existing split-netcdf test data (atmos_daily, ocean_static) to verify | ||
| the --rename flag behavior via both CLI (CliRunner) and direct import. | ||
| ''' | ||
|
|
||
| import pytest | ||
| import re | ||
| import subprocess | ||
| import os | ||
| from os import path as osp | ||
| import pathlib | ||
| from pathlib import Path | ||
| from fre import fre | ||
| from fre.pp import split_netcdf_script | ||
| from fre.pp import rename_split_script | ||
|
|
||
| import click | ||
| from click.testing import CliRunner | ||
| runner = CliRunner() | ||
|
|
||
| test_dir = osp.realpath("fre/tests/test_files/ascii_files/split_netcdf") | ||
|
|
||
| cases = {"ts": {"dir": "atmos_daily.tile3", | ||
| "nc": "00010101.atmos_daily.tile3.nc", | ||
| "cdl": "00010101.atmos_daily.tile3.cdl"}, | ||
| "static": {"dir": "ocean_static", | ||
| "nc": "00010101.ocean_static.nc", | ||
| "cdl": "00010101.ocean_static.cdl"}} | ||
|
|
||
| casedirs = [osp.join(test_dir, el) for el in [cases["ts"]["dir"], cases["static"]["dir"]]] | ||
|
|
||
| rename_outdir_prefix = "new_rename_" | ||
|
|
||
|
|
||
| def test_split_rename_setup(): | ||
ilaflott marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| '''Sets up the netcdf files needed for split+rename testing.''' | ||
| ncgen_commands = [] | ||
| sp_stat = [] | ||
| for testcase in cases.keys(): | ||
| cds = osp.join(test_dir, cases[testcase]["dir"]) | ||
| ncgen_commands.append(["ncgen3", "-k", "netCDF-4", "-o", | ||
| osp.join(cds, cases[testcase]["nc"]), | ||
| osp.join(cds, cases[testcase]["cdl"])]) | ||
| for ncg in ncgen_commands: | ||
| sp = subprocess.run(ncg, check=True, capture_output=True) | ||
| sp_stat.append(sp.returncode) | ||
| sp_success = [el == 0 for el in sp_stat] | ||
| assert all(sp_success) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("workdir,infile,outfiledir,varlist", | ||
| [pytest.param(casedirs[0], cases["ts"]["nc"], | ||
| rename_outdir_prefix + "ts_all", "all", | ||
| id="rename_ts_all"), | ||
| pytest.param(casedirs[1], cases["static"]["nc"], | ||
| rename_outdir_prefix + "static_all", "all", | ||
| id="rename_static_all")]) | ||
| def test_split_rename_cli_run(workdir, infile, outfiledir, varlist): | ||
ilaflott marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| '''Tests split-netcdf with --rename flag via CLI CliRunner. | ||
|
|
||
| Verifies that the command exits successfully when --rename is used. | ||
| ''' | ||
| infile = osp.join(workdir, infile) | ||
| outfiledir = osp.join(workdir, outfiledir) | ||
| split_netcdf_args = ["pp", "split-netcdf", | ||
| "--file", infile, | ||
| "--outputdir", outfiledir, | ||
| "--variables", varlist, | ||
| "--rename"] | ||
| result = runner.invoke(fre.fre, args=split_netcdf_args) | ||
| print(result.output) | ||
| if result.exception: | ||
| import traceback | ||
| traceback.print_exception(type(result.exception), result.exception, result.exception.__traceback__) | ||
| assert result.exit_code == 0 | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("workdir,outfiledir,expected_component", | ||
| [pytest.param(casedirs[0], | ||
| rename_outdir_prefix + "ts_all", | ||
| "atmos_daily", | ||
| id="rename_ts_structure"), | ||
| pytest.param(casedirs[1], | ||
| rename_outdir_prefix + "static_all", | ||
| "ocean_static", | ||
| id="rename_static_structure")]) | ||
| def test_split_rename_cli_structure(workdir, outfiledir, expected_component): | ||
| '''Verifies that split+rename created the expected nested directory structure. | ||
|
|
||
| After split+rename: | ||
| - Timeseries: outputdir/component/freq/duration/component.date1-date2.var.tile.nc | ||
| - Static: outputdir/component/P0Y/P0Y/component.var.nc | ||
|
|
||
| Also verifies no flat .nc files remain at the root of outputdir. | ||
| ''' | ||
| outfiledir = osp.join(workdir, outfiledir) | ||
| outpath = Path(outfiledir) | ||
|
|
||
| # Check that the component directory was created | ||
| component_dir = outpath / expected_component | ||
| assert component_dir.is_dir(), f"Expected component directory {component_dir} not found" | ||
|
|
||
| # Check that no flat .nc files remain at the root of outfiledir | ||
| root_nc_files = list(outpath.glob("*.nc")) | ||
| assert len(root_nc_files) == 0, f"Flat .nc files remain at root: {root_nc_files}" | ||
|
|
||
| # Check that .nc files exist somewhere in the nested structure | ||
| nested_nc_files = list(outpath.rglob("*.nc")) | ||
| assert len(nested_nc_files) > 0, "No .nc files found in nested structure" | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("workdir,outfiledir", | ||
| [pytest.param(casedirs[0], | ||
| rename_outdir_prefix + "ts_all", | ||
| id="rename_ts_freq"), | ||
| pytest.param(casedirs[1], | ||
| rename_outdir_prefix + "static_all", | ||
| id="rename_static_freq")]) | ||
| def test_split_rename_cli_freq_dirs(workdir, outfiledir): | ||
| '''Verifies that split+rename created frequency and duration subdirectories. | ||
|
|
||
| For timeseries (atmos_daily), expects freq/duration dirs (e.g. P1D/P6M) | ||
| For static (ocean_static), expects P0Y/P0Y | ||
| ''' | ||
| outfiledir = osp.join(workdir, outfiledir) | ||
| outpath = Path(outfiledir) | ||
|
|
||
| # Find all .nc files | ||
| nc_files = list(outpath.rglob("*.nc")) | ||
| assert len(nc_files) > 0 | ||
|
|
||
| # Check that each .nc file is at least 3 levels deep | ||
| # (component/freq/duration/file.nc) | ||
| for nc_file in nc_files: | ||
| rel_path = nc_file.relative_to(outpath) | ||
| parts = rel_path.parts | ||
| assert len(parts) >= 4, \ | ||
| f"File {nc_file} is not deep enough: {parts}" | ||
|
|
||
|
|
||
| def test_split_rename_import_run(): | ||
| '''Tests split+rename via direct import (standard import path). | ||
|
|
||
| Uses split_file_xarray + rename_file + link_or_copy directly. | ||
| ''' | ||
| workdir = casedirs[0] | ||
| infile_name = cases["ts"]["nc"] | ||
| infile = osp.join(workdir, infile_name) | ||
| outfiledir = osp.join(workdir, rename_outdir_prefix + "import_ts") | ||
|
|
||
| # Split the file | ||
| split_netcdf_script.split_file_xarray(infile, outfiledir, "all") | ||
|
|
||
| # Rename the split files | ||
| outpath = Path(outfiledir) | ||
| basename = Path(infile).stem | ||
| pattern = f"{basename}.*.nc" | ||
| split_files = list(outpath.glob(pattern)) | ||
| assert len(split_files) > 0, "No split files were created" | ||
|
|
||
| for split_file in split_files: | ||
| new_rel_path = rename_split_script.rename_file(split_file) | ||
| new_full_path = outpath / new_rel_path | ||
| rename_split_script.link_or_copy(str(split_file), str(new_full_path)) | ||
| split_file.unlink() | ||
|
|
||
| # Verify no flat .nc files remain at root | ||
| root_nc_files = list(outpath.glob("*.nc")) | ||
| assert len(root_nc_files) == 0, f"Flat .nc files remain at root: {root_nc_files}" | ||
|
|
||
| # Verify nested structure was created | ||
| nested_nc_files = list(outpath.rglob("*.nc")) | ||
| assert len(nested_nc_files) > 0, "No .nc files found in nested structure" | ||
|
|
||
| # Verify component directory | ||
| component_dir = outpath / "atmos_daily" | ||
| assert component_dir.is_dir(), f"Component directory {component_dir} not found" | ||
|
|
||
|
|
||
| def test_split_rename_without_flag(): | ||
| '''Tests that split-netcdf without --rename produces flat output (no nesting). | ||
|
|
||
| This verifies backward compatibility. | ||
| ''' | ||
| workdir = casedirs[0] | ||
| infile_name = cases["ts"]["nc"] | ||
| infile = osp.join(workdir, infile_name) | ||
| outfiledir = osp.join(workdir, rename_outdir_prefix + "no_rename") | ||
|
|
||
| split_netcdf_args = ["pp", "split-netcdf", | ||
| "--file", infile, | ||
| "--outputdir", outfiledir, | ||
| "--variables", "all"] | ||
| result = runner.invoke(fre.fre, args=split_netcdf_args) | ||
| assert result.exit_code == 0 | ||
|
|
||
| outpath = Path(outfiledir) | ||
| # Verify flat .nc files exist at root (no nesting) | ||
| root_nc_files = list(outpath.glob("*.nc")) | ||
| assert len(root_nc_files) > 0, "No flat .nc files at root without --rename" | ||
|
|
||
| # Verify no subdirectories were created | ||
| subdirs = [d for d in outpath.iterdir() if d.is_dir()] | ||
| assert len(subdirs) == 0, f"Subdirs created without --rename: {subdirs}" | ||
|
|
||
|
|
||
| def test_split_rename_cleanup(): | ||
ilaflott marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| '''Cleans up files and dirs created for split+rename tests.''' | ||
| el_list = [] | ||
| dir_list = [] | ||
| for path, subdirs, files in os.walk(test_dir): | ||
| for name in files: | ||
| if name.endswith(".nc"): | ||
| el_list.append(osp.join(path, name)) | ||
| for name in subdirs: | ||
| if name.startswith(rename_outdir_prefix): | ||
| dir_list.append(osp.join(path, name)) | ||
| for nc in el_list: | ||
| pathlib.Path.unlink(Path(nc)) | ||
| # Sort in reverse to delete deepest dirs first | ||
| all_dirs = [] | ||
| for d in dir_list: | ||
| for path, subdirs, files in os.walk(d, topdown=False): | ||
| all_dirs.append(path) | ||
| for d in sorted(all_dirs, reverse=True): | ||
| if osp.isdir(d): | ||
| pathlib.Path.rmdir(Path(d)) | ||
| dir_deleted = [not osp.isdir(el) for el in dir_list] | ||
| el_deleted = [not osp.isfile(el) for el in el_list] | ||
| assert all(el_deleted + dir_deleted) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -213,3 +213,15 @@ def test_cli_fre_pp_split_netcdf_opt_dne(): | |
| ''' fre pp split-netcdf optionDNE ''' | ||
| result = runner.invoke(fre.fre, args=["pp", "split-netcdf", "optionDNE"]) | ||
| assert result.exit_code == 2 | ||
|
|
||
| def test_cli_fre_pp_split_netcdf_rename_help(): | ||
| ''' fre pp split-netcdf --help includes --rename option ''' | ||
| result = runner.invoke(fre.fre, args=["pp", "split-netcdf", "--help"]) | ||
| assert result.exit_code == 0 | ||
| assert "--rename" in result.output | ||
|
|
||
| def test_cli_fre_pp_split_netcdf_diag_manifest_help(): | ||
| ''' fre pp split-netcdf --help includes --diag-manifest option ''' | ||
| result = runner.invoke(fre.fre, args=["pp", "split-netcdf", "--help"]) | ||
| assert result.exit_code == 0 | ||
| assert "--diag-manifest" in result.output | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This and the new test above it do not test anything other than the existence of the click options! |
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"During splitting", not "after splitting", to make it clear it's a single operation not two.