-
Notifications
You must be signed in to change notification settings - Fork 31
tests: add unit tests for functions without direct test coverage #94
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,76 @@ | ||
| """ | ||
| Unit tests for ops.chunking module. | ||
| """ | ||
| import numpy as np | ||
| import pytest | ||
| import xarray as xr | ||
|
|
||
| from mllam_data_prep.ops.chunking import check_chunk_size, chunk_dataset | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def small_dataset(): | ||
| """Create a small test dataset.""" | ||
| return xr.Dataset( | ||
| { | ||
| "var1": (["x", "y"], np.random.random((10, 10))), | ||
| "var2": (["x", "y"], np.random.random((10, 10))), | ||
| }, | ||
| coords={"x": range(10), "y": range(10)}, | ||
| ) | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def large_dataset(): | ||
| """Create a dataset that will exceed chunk size warning.""" | ||
| # Create dataset with large chunks that exceed 1GB warning | ||
| # Using float64 (8 bytes), need > 1GB / 8 = 134217728 elements | ||
| # For simplicity, create a smaller but still large dataset | ||
| size = 5000 | ||
| return xr.Dataset( | ||
| { | ||
| "large_var": (["x", "y"], np.random.random((size, size))), | ||
| }, | ||
| coords={"x": range(size), "y": range(size)}, | ||
| ) | ||
|
|
||
|
|
||
| def test_check_chunk_size_small_chunks(small_dataset, caplog): | ||
| """Test check_chunk_size with small chunks (should not warn).""" | ||
| chunks = {"x": 5, "y": 5} | ||
| check_chunk_size(small_dataset, chunks) | ||
| # Should not log any warnings | ||
| assert len(caplog.records) == 0 | ||
|
|
||
|
|
||
| def test_check_chunk_size_large_chunks(large_dataset, caplog): | ||
| """Test check_chunk_size with large chunks (should warn).""" | ||
| # Use chunks that will create large memory usage | ||
| chunks = {"x": 1000, "y": 1000} | ||
| check_chunk_size(large_dataset, chunks) | ||
| # Should log a warning | ||
| assert len(caplog.records) > 0 | ||
| assert "exceeds" in caplog.records[0].message.lower() | ||
|
||
|
|
||
|
|
||
| def test_check_chunk_size_missing_dimension(small_dataset): | ||
| """Test check_chunk_size when dimension doesn't exist in variable.""" | ||
| chunks = {"x": 5, "z": 10} # z doesn't exist | ||
| # Should not raise, just skip the missing dimension | ||
| check_chunk_size(small_dataset, chunks) | ||
|
|
||
|
|
||
| def test_chunk_dataset_success(small_dataset): | ||
| """Test chunk_dataset successfully chunks a dataset.""" | ||
| chunks = {"x": 5, "y": 5} | ||
| chunked = chunk_dataset(small_dataset, chunks) | ||
| assert isinstance(chunked, xr.Dataset) | ||
| # Check that chunking was applied | ||
| assert chunked["var1"].chunks is not None | ||
|
|
||
|
|
||
| def test_chunk_dataset_invalid_chunks(small_dataset): | ||
| """Test chunk_dataset with invalid chunk specification.""" | ||
| chunks = {"x": -1} # Invalid chunk size | ||
| with pytest.raises(Exception, match="Error chunking dataset"): | ||
| chunk_dataset(small_dataset, chunks) | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,47 @@ | ||||||
| """ | ||||||
| Unit tests for ops.loading module. | ||||||
| """ | ||||||
| import tempfile | ||||||
| from pathlib import Path | ||||||
|
|
||||||
|
||||||
| import tempfile | |
| from pathlib import Path |
Copilot
AI
Feb 20, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sample_dataset.to_netcdf(...) requires an optional NetCDF engine (typically scipy, netCDF4, or h5netcdf). The project dependencies don’t appear to include any of these, so this test may fail in CI depending on the environment. Consider using pytest.importorskip(...) for the chosen engine and specifying it explicitly (e.g., engine="scipy"), or adding an explicit test dependency to ensure NetCDF support is available.
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
| @@ -0,0 +1,84 @@ | ||||
| """ | ||||
| Unit tests for helper functions in ops.selection module. | ||||
| """ | ||||
| import datetime | ||||
|
||||
| import datetime |
Copilot
AI
Feb 20, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test locks in an IndexError for the single-point coordinate case, which is an implementation accident (coming from indexing all_steps[0] on an empty diff array). It would be more maintainable to update check_step to raise a descriptive ValueError when ds[coord] has fewer than 2 points, and assert that error type/message here instead.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
large_datasetallocates a 5000x5000 float64 array (~200MB) during test collection/execution, which is likely to slow down or OOM CI. You can trigger the chunk-size warning without a huge dataset (the implementation only useschunksand dtype), so this fixture should be removed or made tiny.