From fe111f57bc47b19401182058479e4a6b188c9857 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Thu, 14 Nov 2024 21:23:10 +0100 Subject: [PATCH 1/7] add codespell pre-commit hook --- .pre-commit-config.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2bdb1ecaa69..74f4e8963e8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -46,3 +46,9 @@ repos: rev: ebf0b5e44d67f8beaa1cd13a0d0393ea04c6058d hooks: - id: validate-cff + - repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + additional_dependencies: + - tomli From 3374320a5a8d7079e6a76f8e0eb446a1b13c03a8 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Thu, 14 Nov 2024 21:52:25 +0100 Subject: [PATCH 2/7] apply codespell --- properties/test_pandas_roundtrip.py | 8 ++-- pyproject.toml | 59 ++++++++++++++++------------- xarray/core/accessor_str.py | 40 +++++++++---------- xarray/tests/test_accessor_str.py | 2 +- xarray/tests/test_backends.py | 8 ++-- xarray/tests/test_cftime_offsets.py | 2 +- xarray/tests/test_cftimeindex.py | 4 +- xarray/tests/test_coarsen.py | 6 +-- xarray/tests/test_dataset.py | 4 +- xarray/tests/test_variable.py | 2 +- 10 files changed, 70 insertions(+), 65 deletions(-) diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py index 8fc32e75cbd..91cb9d7cc52 100644 --- a/properties/test_pandas_roundtrip.py +++ b/properties/test_pandas_roundtrip.py @@ -95,12 +95,12 @@ def test_roundtrip_dataset(dataset) -> None: @given(numeric_series, st.text()) -def test_roundtrip_pandas_series(ser, ix_name) -> None: +def test_roundtrip_pandas_series(ser, ix_name) -> None: # codespell:ignore ser # Need to name the index, otherwise Xarray calls it 'dim_0'. - ser.index.name = ix_name - arr = xr.DataArray(ser) + ser.index.name = ix_name # codespell:ignore ser + arr = xr.DataArray(ser) # codespell:ignore ser roundtripped = arr.to_pandas() - pd.testing.assert_series_equal(ser, roundtripped) + pd.testing.assert_series_equal(ser, roundtripped) # codespell:ignore ser xr.testing.assert_identical(arr, roundtripped.to_xarray()) diff --git a/pyproject.toml b/pyproject.toml index 55c8d92bfdb..2c6f9ebefb2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,8 +50,8 @@ dev = [ "sphinx_autosummary_accessors", "xarray[complete]", ] -io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr", "fsspec", "cftime", "pooch"] etc = ["sparse"] +io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr", "fsspec", "cftime", "pooch"] parallel = ["dask[complete]"] viz = ["cartopy", "matplotlib", "nc-time-axis", "seaborn"] @@ -234,40 +234,39 @@ extend-exclude = [ ] [tool.ruff.lint] +extend-safe-fixes = [ + "TID252", # absolute imports +] extend-select = [ - "F", # Pyflakes - "E", # pycodestyle errors - "W", # pycodestyle warnings - "I", # isort - "UP", # pyupgrade - "B", # flake8-bugbear - "C4", # flake8-comprehensions - "PIE", # flake8-pie - "TID", # flake8-tidy-imports (absolute imports) - "PGH", # pygrep-hooks + "F", # Pyflakes + "E", # pycodestyle errors + "W", # pycodestyle warnings + "I", # isort + "UP", # pyupgrade + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "PIE", # flake8-pie + "TID", # flake8-tidy-imports (absolute imports) + "PGH", # pygrep-hooks "PERF", # Perflint "RUF", ] -extend-safe-fixes = [ - "TID252", # absolute imports -] ignore = [ - "E402", # module level import not at top of file - "E501", # line too long - let the formatter worry about that - "E731", # do not assign a lambda expression, use a def - "UP007", # use X | Y for type annotations - "UP027", # deprecated - "C40", # unnecessary generator, comprehension, or literal - "PIE790", # unnecessary pass statement + "E402", # module level import not at top of file + "E501", # line too long - let the formatter worry about that + "E731", # do not assign a lambda expression, use a def + "UP007", # use X | Y for type annotations + "UP027", # deprecated + "C40", # unnecessary generator, comprehension, or literal + "PIE790", # unnecessary pass statement "PERF203", # try-except within a loop incurs performance overhead - "RUF001", # string contains ambiguous unicode character - "RUF002", # docstring contains ambiguous acute accent unicode character - "RUF003", # comment contains ambiguous no-break space unicode character - "RUF005", # consider upacking operator instead of concatenation - "RUF012", # mutable class attributes + "RUF001", # string contains ambiguous unicode character + "RUF002", # docstring contains ambiguous acute accent unicode character + "RUF003", # comment contains ambiguous no-break space unicode character + "RUF005", # consider unpacking operator instead of concatenation + "RUF012", # mutable class attributes ] - [tool.ruff.lint.per-file-ignores] # don't enforce absolute imports "asv_bench/**" = ["TID252"] @@ -349,3 +348,9 @@ test = "pytest" ignore = [ "PP308", # This option creates a large amount of log lines. ] + +[tool.codespell] +enable-color = true +ignore-words-list = "nd,coo,COO,nin,Marge,Commun" +quiet-level = 3 +skip = "whats-new.rst" diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index e44ef75a88b..2528e567c4c 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -2560,7 +2560,7 @@ def split( Returns ------- - splitted : same type as values or object array + split : same type as values or object array Examples -------- @@ -2576,8 +2576,8 @@ def split( Split once and put the results in a new dimension - >>> values.str.split(dim="splitted", maxsplit=1) - Size: 864B + >>> values.str.split(dim="split", maxsplit=1) + Size: 864B array([[['abc', 'def'], ['spam', 'eggs\tswallow'], ['red_blue', '']], @@ -2585,12 +2585,12 @@ def split( [['test0', 'test1\ntest2\n\ntest3'], ['', ''], ['abra', 'ka\nda\tbra']]], dtype='>> values.str.split(dim="splitted") - Size: 768B + >>> values.str.split(dim="split") + Size: 768B array([[['abc', 'def', '', ''], ['spam', 'eggs', 'swallow', ''], ['red_blue', '', '', '']], @@ -2598,7 +2598,7 @@ def split( [['test0', 'test1', 'test2', 'test3'], ['', '', '', ''], ['abra', 'ka', 'da', 'bra']]], dtype='>> values.str.split(dim="splitted", sep=" ") - Size: 2kB + >>> values.str.split(dim="split", sep=" ") + Size: 2kB array([[['abc', 'def', ''], ['spam\t\teggs\tswallow', '', ''], ['red_blue', '', '']], @@ -2631,7 +2631,7 @@ def split( [['test0\ntest1\ntest2\n\ntest3', '', ''], ['', '', ''], ['abra', '', 'ka\nda\tbra']]], dtype='>> values.str.rsplit(dim="splitted", maxsplit=1) - Size: 816B + >>> values.str.rsplit(dim="split", maxsplit=1) + Size: 816B array([[['abc', 'def'], ['spam\t\teggs', 'swallow'], ['', 'red_blue']], @@ -2703,12 +2703,12 @@ def rsplit( [['test0\ntest1\ntest2', 'test3'], ['', ''], ['abra ka\nda', 'bra']]], dtype='>> values.str.rsplit(dim="splitted") - Size: 768B + >>> values.str.rsplit(dim="split") + Size: 768B array([[['', '', 'abc', 'def'], ['', 'spam', 'eggs', 'swallow'], ['', '', '', 'red_blue']], @@ -2716,7 +2716,7 @@ def rsplit( [['test0', 'test1', 'test2', 'test3'], ['', '', '', ''], ['abra', 'ka', 'da', 'bra']]], dtype='>> values.str.rsplit(dim="splitted", sep=" ") - Size: 2kB + >>> values.str.rsplit(dim="split", sep=" ") + Size: 2kB array([[['', 'abc', 'def'], ['', '', 'spam\t\teggs\tswallow'], ['', '', 'red_blue']], @@ -2749,7 +2749,7 @@ def rsplit( [['', '', 'test0\ntest1\ntest2\n\ntest3'], ['', '', ''], ['abra', '', 'ka\nda\tbra']]], dtype=' None: assert_equal(result, expected) # case sensitive without regex - result = values.str.contains("fO", regex=False, case=True) + result = values.str.contains("fO", regex=False, case=True) # codespell:ignore expected = xr.DataArray([False, False, True, False]) assert result.dtype == expected.dtype assert_equal(result, expected) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 7be6eb5ed0d..764b2eae7ed 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1250,12 +1250,12 @@ def test_roundtrip_endian(self) -> None: pass def test_invalid_dataarray_names_raise(self) -> None: - te = (TypeError, "string or None") - ve = (ValueError, "string must be length 1 or") + terr = (TypeError, "string or None") + verr = (ValueError, "string must be length 1 or") data = np.random.random((2, 2)) da = xr.DataArray(data) for name, (error, msg) in zip( - [0, (4, 5), True, ""], [te, te, te, ve], strict=True + [0, (4, 5), True, ""], [terr, terr, terr, verr], strict=True ): ds = Dataset({name: da}) with pytest.raises(error) as excinfo: @@ -3314,7 +3314,7 @@ def test_append(self) -> None: with self.create_zarr_target() as store: if has_zarr_v3: - # TOOD: verify these + # TODO: verify these expected = { "set": 17, "get": 12, diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index f6f97108c1d..0ce6ad3cd73 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1548,7 +1548,7 @@ def test_cftime_or_date_range_invalid_inclusive_value(function: Callable) -> Non if function == cftime_range and not has_cftime: pytest.skip("requires cftime") - with pytest.raises(ValueError, match="nclusive"): + with pytest.raises(ValueError, match="inclusive"): function("2000", periods=3, inclusive="foo") diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 116487e2bcf..9c92c0e19f1 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -1102,8 +1102,8 @@ def test_cftimeindex_repr_formatting_width(periods, display_width): len_intro_str = len("CFTimeIndex(") with xr.set_options(display_width=display_width): repr_str = index.__repr__() - splitted = repr_str.split("\n") - for i, s in enumerate(splitted): + split = repr_str.split("\n") + for i, s in enumerate(split): # check that lines not longer than OPTIONS['display_width'] assert len(s) <= display_width, f"{len(s)} {s} {display_width}" if i > 0: diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py index ab04a7b3cde..5280bdf0019 100644 --- a/xarray/tests/test_coarsen.py +++ b/xarray/tests/test_coarsen.py @@ -262,7 +262,7 @@ class TestCoarsenConstruct: def test_coarsen_construct(self, dask: bool) -> None: ds = Dataset( { - "vart": ("time", np.arange(48), {"a": "b"}), + "vart": ("time", np.arange(48), {"a": "b"}), # codespell:ignore vart "varx": ("x", np.arange(10), {"a": "b"}), "vartx": (("x", "time"), np.arange(480).reshape(10, 48), {"a": "b"}), "vary": ("y", np.arange(12)), @@ -275,9 +275,9 @@ def test_coarsen_construct(self, dask: bool) -> None: ds = ds.chunk({"x": 4, "time": 10}) expected = xr.Dataset(attrs={"foo": "bar"}) - expected["vart"] = ( + expected["vart"] = ( # codespell:ignore vart ("year", "month"), - duck_array_ops.reshape(ds.vart.data, (-1, 12)), + duck_array_ops.reshape(ds.vart.data, (-1, 12)), # codespell:ignore vart {"a": "b"}, ) expected["varx"] = ( diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index be82655515d..2bec5a5718a 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4930,8 +4930,8 @@ def test_from_dataframe_categorical_index_string_categories(self) -> None: categories=pd.Index(["foo", "bar", "baz"], dtype="string"), ) ) - ser = pd.Series(1, index=cat) - ds = ser.to_xarray() + series = pd.Series(1, index=cat) + ds = series.to_xarray() assert ds.coords.dtypes["index"] == np.dtype("O") @requires_sparse diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 9c6f50037d3..11a7ec34b18 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -871,7 +871,7 @@ def test_getitem_error(self): v = Variable(["x", "y", "z"], np.arange(60).reshape(3, 4, 5)) ind = Variable(["x"], [0, 1]) - with pytest.raises(IndexError, match=r"Dimensions of indexers mis"): + with pytest.raises(IndexError, match=r"Dimensions of indexers mismatch"): v[:, ind] @pytest.mark.parametrize( From 85c91cb835dc558698c06f0de203822ac2027736 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Thu, 14 Nov 2024 22:04:00 +0100 Subject: [PATCH 3/7] fix capitalization --- xarray/tests/test_cftime_offsets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 0ce6ad3cd73..49ca138f6aa 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1548,7 +1548,7 @@ def test_cftime_or_date_range_invalid_inclusive_value(function: Callable) -> Non if function == cftime_range and not has_cftime: pytest.skip("requires cftime") - with pytest.raises(ValueError, match="inclusive"): + with pytest.raises(ValueError, match="Inclusive"): function("2000", periods=3, inclusive="foo") From d21cf39eb3c2625039555c1b8f352a4865aead46 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Thu, 14 Nov 2024 22:08:58 +0100 Subject: [PATCH 4/7] try fixing codespell in CI --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2c6f9ebefb2..54e9df97c01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -353,4 +353,4 @@ ignore = [ enable-color = true ignore-words-list = "nd,coo,COO,nin,Marge,Commun" quiet-level = 3 -skip = "whats-new.rst" +skip = "./doc/whats-new.rst" From 40024748a9dc0adc0a93ce397faadbf49d4246a3 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Thu, 14 Nov 2024 22:18:03 +0100 Subject: [PATCH 5/7] fix match for good, maybe --- xarray/tests/test_cftime_offsets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 49ca138f6aa..504437ce572 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1548,7 +1548,7 @@ def test_cftime_or_date_range_invalid_inclusive_value(function: Callable) -> Non if function == cftime_range and not has_cftime: pytest.skip("requires cftime") - with pytest.raises(ValueError, match="Inclusive"): + with pytest.raises(ValueError, match="nclusi"): function("2000", periods=3, inclusive="foo") From e58628790ced342351b7be93d592b223879cf801 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Fri, 15 Nov 2024 21:23:30 +0100 Subject: [PATCH 6/7] use ignore-words-list instead of skipping whats-new --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 54e9df97c01..8f946c614f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -351,6 +351,5 @@ ignore = [ [tool.codespell] enable-color = true -ignore-words-list = "nd,coo,COO,nin,Marge,Commun" +ignore-words-list = "nd,coo,COO,nin,Marge,Commun,Claus,Soler,Tung,Celles,slowy" quiet-level = 3 -skip = "./doc/whats-new.rst" From e2ce0cee6966e6192d8d455567586ab1b5a4584e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Nov 2024 15:38:46 +0000 Subject: [PATCH 7/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .pre-commit-config.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cfd7566c5fd..b928c39353c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -62,10 +62,10 @@ repos: - repo: https://github.com/citation-file-format/cff-converter-python rev: ebf0b5e44d67f8beaa1cd13a0d0393ea04c6058d hooks: - - id: validate-cff + - id: validate-cff - repo: https://github.com/codespell-project/codespell rev: v2.3.0 hooks: - - id: codespell - additional_dependencies: - - tomli \ No newline at end of file + - id: codespell + additional_dependencies: + - tomli