From fe111f57bc47b19401182058479e4a6b188c9857 Mon Sep 17 00:00:00 2001
From: Michael Niklas <mick.niklas@gmail.com>
Date: Thu, 14 Nov 2024 21:23:10 +0100
Subject: [PATCH 1/7] add codespell pre-commit hook

---
 .pre-commit-config.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2bdb1ecaa69..74f4e8963e8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -46,3 +46,9 @@ repos:
     rev: ebf0b5e44d67f8beaa1cd13a0d0393ea04c6058d
     hooks:
     - id: validate-cff
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.3.0
+    hooks:
+    - id: codespell
+      additional_dependencies:
+        - tomli

From 3374320a5a8d7079e6a76f8e0eb446a1b13c03a8 Mon Sep 17 00:00:00 2001
From: Michael Niklas <mick.niklas@gmail.com>
Date: Thu, 14 Nov 2024 21:52:25 +0100
Subject: [PATCH 2/7] apply codespell

---
 properties/test_pandas_roundtrip.py |  8 ++--
 pyproject.toml                      | 59 ++++++++++++++++-------------
 xarray/core/accessor_str.py         | 40 +++++++++----------
 xarray/tests/test_accessor_str.py   |  2 +-
 xarray/tests/test_backends.py       |  8 ++--
 xarray/tests/test_cftime_offsets.py |  2 +-
 xarray/tests/test_cftimeindex.py    |  4 +-
 xarray/tests/test_coarsen.py        |  6 +--
 xarray/tests/test_dataset.py        |  4 +-
 xarray/tests/test_variable.py       |  2 +-
 10 files changed, 70 insertions(+), 65 deletions(-)

diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py
index 8fc32e75cbd..91cb9d7cc52 100644
--- a/properties/test_pandas_roundtrip.py
+++ b/properties/test_pandas_roundtrip.py
@@ -95,12 +95,12 @@ def test_roundtrip_dataset(dataset) -> None:
 
 
 @given(numeric_series, st.text())
-def test_roundtrip_pandas_series(ser, ix_name) -> None:
+def test_roundtrip_pandas_series(ser, ix_name) -> None:  # codespell:ignore ser
     # Need to name the index, otherwise Xarray calls it 'dim_0'.
-    ser.index.name = ix_name
-    arr = xr.DataArray(ser)
+    ser.index.name = ix_name  # codespell:ignore ser
+    arr = xr.DataArray(ser)  # codespell:ignore ser
     roundtripped = arr.to_pandas()
-    pd.testing.assert_series_equal(ser, roundtripped)
+    pd.testing.assert_series_equal(ser, roundtripped)  # codespell:ignore ser
     xr.testing.assert_identical(arr, roundtripped.to_xarray())
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 55c8d92bfdb..2c6f9ebefb2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,8 +50,8 @@ dev = [
   "sphinx_autosummary_accessors",
   "xarray[complete]",
 ]
-io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr", "fsspec", "cftime", "pooch"]
 etc = ["sparse"]
+io = ["netCDF4", "h5netcdf", "scipy", 'pydap; python_version<"3.10"', "zarr", "fsspec", "cftime", "pooch"]
 parallel = ["dask[complete]"]
 viz = ["cartopy", "matplotlib", "nc-time-axis", "seaborn"]
 
@@ -234,40 +234,39 @@ extend-exclude = [
 ]
 
 [tool.ruff.lint]
+extend-safe-fixes = [
+  "TID252", # absolute imports
+]
 extend-select = [
-  "F",    # Pyflakes
-  "E",    # pycodestyle errors
-  "W",    # pycodestyle warnings
-  "I",    # isort
-  "UP",   # pyupgrade
-  "B",    # flake8-bugbear
-  "C4",   # flake8-comprehensions
-  "PIE",  # flake8-pie
-  "TID",  # flake8-tidy-imports (absolute imports)
-  "PGH",  # pygrep-hooks
+  "F", # Pyflakes
+  "E", # pycodestyle errors
+  "W", # pycodestyle warnings
+  "I", # isort
+  "UP", # pyupgrade
+  "B", # flake8-bugbear
+  "C4", # flake8-comprehensions
+  "PIE", # flake8-pie
+  "TID", # flake8-tidy-imports (absolute imports)
+  "PGH", # pygrep-hooks
   "PERF", # Perflint
   "RUF",
 ]
-extend-safe-fixes = [
-  "TID252",  # absolute imports
-]
 ignore = [
-  "E402",    # module level import not at top of file
-  "E501",    # line too long - let the formatter worry about that
-  "E731",    # do not assign a lambda expression, use a def
-  "UP007",   # use X | Y for type annotations
-  "UP027",   # deprecated
-  "C40",     # unnecessary generator, comprehension, or literal
-  "PIE790",  # unnecessary pass statement
+  "E402", # module level import not at top of file
+  "E501", # line too long - let the formatter worry about that
+  "E731", # do not assign a lambda expression, use a def
+  "UP007", # use X | Y for type annotations
+  "UP027", # deprecated
+  "C40", # unnecessary generator, comprehension, or literal
+  "PIE790", # unnecessary pass statement
   "PERF203", # try-except within a loop incurs performance overhead
-  "RUF001",  # string contains ambiguous unicode character
-  "RUF002",  # docstring contains ambiguous acute accent unicode character
-  "RUF003",  # comment contains ambiguous no-break space unicode character
-  "RUF005",  # consider upacking operator instead of concatenation
-  "RUF012",  # mutable class attributes
+  "RUF001", # string contains ambiguous unicode character
+  "RUF002", # docstring contains ambiguous acute accent unicode character
+  "RUF003", # comment contains ambiguous no-break space unicode character
+  "RUF005", # consider unpacking operator instead of concatenation
+  "RUF012", # mutable class attributes
 ]
 
-
 [tool.ruff.lint.per-file-ignores]
 # don't enforce absolute imports
 "asv_bench/**" = ["TID252"]
@@ -349,3 +348,9 @@ test = "pytest"
 ignore = [
   "PP308", # This option creates a large amount of log lines.
 ]
+
+[tool.codespell]
+enable-color = true
+ignore-words-list = "nd,coo,COO,nin,Marge,Commun"
+quiet-level = 3
+skip = "whats-new.rst"
diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py
index e44ef75a88b..2528e567c4c 100644
--- a/xarray/core/accessor_str.py
+++ b/xarray/core/accessor_str.py
@@ -2560,7 +2560,7 @@ def split(
 
         Returns
         -------
-        splitted : same type as values or object array
+        split : same type as values or object array
 
         Examples
         --------
@@ -2576,8 +2576,8 @@ def split(
 
         Split once and put the results in a new dimension
 
-        >>> values.str.split(dim="splitted", maxsplit=1)
-        <xarray.DataArray (X: 2, Y: 3, splitted: 2)> Size: 864B
+        >>> values.str.split(dim="split", maxsplit=1)
+        <xarray.DataArray (X: 2, Y: 3, split: 2)> Size: 864B
         array([[['abc', 'def'],
                 ['spam', 'eggs\tswallow'],
                 ['red_blue', '']],
@@ -2585,12 +2585,12 @@ def split(
                [['test0', 'test1\ntest2\n\ntest3'],
                 ['', ''],
                 ['abra', 'ka\nda\tbra']]], dtype='<U18')
-        Dimensions without coordinates: X, Y, splitted
+        Dimensions without coordinates: X, Y, split
 
         Split as many times as needed and put the results in a new dimension
 
-        >>> values.str.split(dim="splitted")
-        <xarray.DataArray (X: 2, Y: 3, splitted: 4)> Size: 768B
+        >>> values.str.split(dim="split")
+        <xarray.DataArray (X: 2, Y: 3, split: 4)> Size: 768B
         array([[['abc', 'def', '', ''],
                 ['spam', 'eggs', 'swallow', ''],
                 ['red_blue', '', '', '']],
@@ -2598,7 +2598,7 @@ def split(
                [['test0', 'test1', 'test2', 'test3'],
                 ['', '', '', ''],
                 ['abra', 'ka', 'da', 'bra']]], dtype='<U8')
-        Dimensions without coordinates: X, Y, splitted
+        Dimensions without coordinates: X, Y, split
 
         Split once and put the results in lists
 
@@ -2622,8 +2622,8 @@ def split(
 
         Split only on spaces
 
-        >>> values.str.split(dim="splitted", sep=" ")
-        <xarray.DataArray (X: 2, Y: 3, splitted: 3)> Size: 2kB
+        >>> values.str.split(dim="split", sep=" ")
+        <xarray.DataArray (X: 2, Y: 3, split: 3)> Size: 2kB
         array([[['abc', 'def', ''],
                 ['spam\t\teggs\tswallow', '', ''],
                 ['red_blue', '', '']],
@@ -2631,7 +2631,7 @@ def split(
                [['test0\ntest1\ntest2\n\ntest3', '', ''],
                 ['', '', ''],
                 ['abra', '', 'ka\nda\tbra']]], dtype='<U24')
-        Dimensions without coordinates: X, Y, splitted
+        Dimensions without coordinates: X, Y, split
 
         See Also
         --------
@@ -2678,7 +2678,7 @@ def rsplit(
 
         Returns
         -------
-        rsplitted : same type as values or object array
+        rsplit : same type as values or object array
 
         Examples
         --------
@@ -2694,8 +2694,8 @@ def rsplit(
 
         Split once and put the results in a new dimension
 
-        >>> values.str.rsplit(dim="splitted", maxsplit=1)
-        <xarray.DataArray (X: 2, Y: 3, splitted: 2)> Size: 816B
+        >>> values.str.rsplit(dim="split", maxsplit=1)
+        <xarray.DataArray (X: 2, Y: 3, split: 2)> Size: 816B
         array([[['abc', 'def'],
                 ['spam\t\teggs', 'swallow'],
                 ['', 'red_blue']],
@@ -2703,12 +2703,12 @@ def rsplit(
                [['test0\ntest1\ntest2', 'test3'],
                 ['', ''],
                 ['abra  ka\nda', 'bra']]], dtype='<U17')
-        Dimensions without coordinates: X, Y, splitted
+        Dimensions without coordinates: X, Y, split
 
         Split as many times as needed and put the results in a new dimension
 
-        >>> values.str.rsplit(dim="splitted")
-        <xarray.DataArray (X: 2, Y: 3, splitted: 4)> Size: 768B
+        >>> values.str.rsplit(dim="split")
+        <xarray.DataArray (X: 2, Y: 3, split: 4)> Size: 768B
         array([[['', '', 'abc', 'def'],
                 ['', 'spam', 'eggs', 'swallow'],
                 ['', '', '', 'red_blue']],
@@ -2716,7 +2716,7 @@ def rsplit(
                [['test0', 'test1', 'test2', 'test3'],
                 ['', '', '', ''],
                 ['abra', 'ka', 'da', 'bra']]], dtype='<U8')
-        Dimensions without coordinates: X, Y, splitted
+        Dimensions without coordinates: X, Y, split
 
         Split once and put the results in lists
 
@@ -2740,8 +2740,8 @@ def rsplit(
 
         Split only on spaces
 
-        >>> values.str.rsplit(dim="splitted", sep=" ")
-        <xarray.DataArray (X: 2, Y: 3, splitted: 3)> Size: 2kB
+        >>> values.str.rsplit(dim="split", sep=" ")
+        <xarray.DataArray (X: 2, Y: 3, split: 3)> Size: 2kB
         array([[['', 'abc', 'def'],
                 ['', '', 'spam\t\teggs\tswallow'],
                 ['', '', 'red_blue']],
@@ -2749,7 +2749,7 @@ def rsplit(
                [['', '', 'test0\ntest1\ntest2\n\ntest3'],
                 ['', '', ''],
                 ['abra', '', 'ka\nda\tbra']]], dtype='<U24')
-        Dimensions without coordinates: X, Y, splitted
+        Dimensions without coordinates: X, Y, split
 
         See Also
         --------
diff --git a/xarray/tests/test_accessor_str.py b/xarray/tests/test_accessor_str.py
index d7838ff0667..f5ff1124c93 100644
--- a/xarray/tests/test_accessor_str.py
+++ b/xarray/tests/test_accessor_str.py
@@ -130,7 +130,7 @@ def test_contains(dtype) -> None:
     assert_equal(result, expected)
 
     # case sensitive without regex
-    result = values.str.contains("fO", regex=False, case=True)
+    result = values.str.contains("fO", regex=False, case=True)  # codespell:ignore
     expected = xr.DataArray([False, False, True, False])
     assert result.dtype == expected.dtype
     assert_equal(result, expected)
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 7be6eb5ed0d..764b2eae7ed 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1250,12 +1250,12 @@ def test_roundtrip_endian(self) -> None:
                     pass
 
     def test_invalid_dataarray_names_raise(self) -> None:
-        te = (TypeError, "string or None")
-        ve = (ValueError, "string must be length 1 or")
+        terr = (TypeError, "string or None")
+        verr = (ValueError, "string must be length 1 or")
         data = np.random.random((2, 2))
         da = xr.DataArray(data)
         for name, (error, msg) in zip(
-            [0, (4, 5), True, ""], [te, te, te, ve], strict=True
+            [0, (4, 5), True, ""], [terr, terr, terr, verr], strict=True
         ):
             ds = Dataset({name: da})
             with pytest.raises(error) as excinfo:
@@ -3314,7 +3314,7 @@ def test_append(self) -> None:
 
         with self.create_zarr_target() as store:
             if has_zarr_v3:
-                # TOOD: verify these
+                # TODO: verify these
                 expected = {
                     "set": 17,
                     "get": 12,
diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py
index f6f97108c1d..0ce6ad3cd73 100644
--- a/xarray/tests/test_cftime_offsets.py
+++ b/xarray/tests/test_cftime_offsets.py
@@ -1548,7 +1548,7 @@ def test_cftime_or_date_range_invalid_inclusive_value(function: Callable) -> Non
     if function == cftime_range and not has_cftime:
         pytest.skip("requires cftime")
 
-    with pytest.raises(ValueError, match="nclusive"):
+    with pytest.raises(ValueError, match="inclusive"):
         function("2000", periods=3, inclusive="foo")
 
 
diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py
index 116487e2bcf..9c92c0e19f1 100644
--- a/xarray/tests/test_cftimeindex.py
+++ b/xarray/tests/test_cftimeindex.py
@@ -1102,8 +1102,8 @@ def test_cftimeindex_repr_formatting_width(periods, display_width):
     len_intro_str = len("CFTimeIndex(")
     with xr.set_options(display_width=display_width):
         repr_str = index.__repr__()
-        splitted = repr_str.split("\n")
-        for i, s in enumerate(splitted):
+        split = repr_str.split("\n")
+        for i, s in enumerate(split):
             # check that lines not longer than OPTIONS['display_width']
             assert len(s) <= display_width, f"{len(s)} {s} {display_width}"
             if i > 0:
diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py
index ab04a7b3cde..5280bdf0019 100644
--- a/xarray/tests/test_coarsen.py
+++ b/xarray/tests/test_coarsen.py
@@ -262,7 +262,7 @@ class TestCoarsenConstruct:
     def test_coarsen_construct(self, dask: bool) -> None:
         ds = Dataset(
             {
-                "vart": ("time", np.arange(48), {"a": "b"}),
+                "vart": ("time", np.arange(48), {"a": "b"}),  # codespell:ignore vart
                 "varx": ("x", np.arange(10), {"a": "b"}),
                 "vartx": (("x", "time"), np.arange(480).reshape(10, 48), {"a": "b"}),
                 "vary": ("y", np.arange(12)),
@@ -275,9 +275,9 @@ def test_coarsen_construct(self, dask: bool) -> None:
             ds = ds.chunk({"x": 4, "time": 10})
 
         expected = xr.Dataset(attrs={"foo": "bar"})
-        expected["vart"] = (
+        expected["vart"] = (  # codespell:ignore vart
             ("year", "month"),
-            duck_array_ops.reshape(ds.vart.data, (-1, 12)),
+            duck_array_ops.reshape(ds.vart.data, (-1, 12)),  # codespell:ignore vart
             {"a": "b"},
         )
         expected["varx"] = (
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index be82655515d..2bec5a5718a 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -4930,8 +4930,8 @@ def test_from_dataframe_categorical_index_string_categories(self) -> None:
                 categories=pd.Index(["foo", "bar", "baz"], dtype="string"),
             )
         )
-        ser = pd.Series(1, index=cat)
-        ds = ser.to_xarray()
+        series = pd.Series(1, index=cat)
+        ds = series.to_xarray()
         assert ds.coords.dtypes["index"] == np.dtype("O")
 
     @requires_sparse
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 9c6f50037d3..11a7ec34b18 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -871,7 +871,7 @@ def test_getitem_error(self):
 
         v = Variable(["x", "y", "z"], np.arange(60).reshape(3, 4, 5))
         ind = Variable(["x"], [0, 1])
-        with pytest.raises(IndexError, match=r"Dimensions of indexers mis"):
+        with pytest.raises(IndexError, match=r"Dimensions of indexers mismatch"):
             v[:, ind]
 
     @pytest.mark.parametrize(

From 85c91cb835dc558698c06f0de203822ac2027736 Mon Sep 17 00:00:00 2001
From: Michael Niklas <mick.niklas@gmail.com>
Date: Thu, 14 Nov 2024 22:04:00 +0100
Subject: [PATCH 3/7] fix capitalization

---
 xarray/tests/test_cftime_offsets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py
index 0ce6ad3cd73..49ca138f6aa 100644
--- a/xarray/tests/test_cftime_offsets.py
+++ b/xarray/tests/test_cftime_offsets.py
@@ -1548,7 +1548,7 @@ def test_cftime_or_date_range_invalid_inclusive_value(function: Callable) -> Non
     if function == cftime_range and not has_cftime:
         pytest.skip("requires cftime")
 
-    with pytest.raises(ValueError, match="inclusive"):
+    with pytest.raises(ValueError, match="Inclusive"):
         function("2000", periods=3, inclusive="foo")
 
 

From d21cf39eb3c2625039555c1b8f352a4865aead46 Mon Sep 17 00:00:00 2001
From: Michael Niklas <mick.niklas@gmail.com>
Date: Thu, 14 Nov 2024 22:08:58 +0100
Subject: [PATCH 4/7] try fixing codespell in CI

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2c6f9ebefb2..54e9df97c01 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -353,4 +353,4 @@ ignore = [
 enable-color = true
 ignore-words-list = "nd,coo,COO,nin,Marge,Commun"
 quiet-level = 3
-skip = "whats-new.rst"
+skip = "./doc/whats-new.rst"

From 40024748a9dc0adc0a93ce397faadbf49d4246a3 Mon Sep 17 00:00:00 2001
From: Michael Niklas <mick.niklas@gmail.com>
Date: Thu, 14 Nov 2024 22:18:03 +0100
Subject: [PATCH 5/7] fix match for good, maybe

---
 xarray/tests/test_cftime_offsets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py
index 49ca138f6aa..504437ce572 100644
--- a/xarray/tests/test_cftime_offsets.py
+++ b/xarray/tests/test_cftime_offsets.py
@@ -1548,7 +1548,7 @@ def test_cftime_or_date_range_invalid_inclusive_value(function: Callable) -> Non
     if function == cftime_range and not has_cftime:
         pytest.skip("requires cftime")
 
-    with pytest.raises(ValueError, match="Inclusive"):
+    with pytest.raises(ValueError, match="nclusi"):
         function("2000", periods=3, inclusive="foo")
 
 

From e58628790ced342351b7be93d592b223879cf801 Mon Sep 17 00:00:00 2001
From: Michael Niklas <mick.niklas@gmail.com>
Date: Fri, 15 Nov 2024 21:23:30 +0100
Subject: [PATCH 6/7] use ignore-words-list instead of skipping whats-new

---
 pyproject.toml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 54e9df97c01..8f946c614f1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -351,6 +351,5 @@ ignore = [
 
 [tool.codespell]
 enable-color = true
-ignore-words-list = "nd,coo,COO,nin,Marge,Commun"
+ignore-words-list = "nd,coo,COO,nin,Marge,Commun,Claus,Soler,Tung,Celles,slowy"
 quiet-level = 3
-skip = "./doc/whats-new.rst"

From e2ce0cee6966e6192d8d455567586ab1b5a4584e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Nov 2024 15:38:46 +0000
Subject: [PATCH 7/7] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .pre-commit-config.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index cfd7566c5fd..b928c39353c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -62,10 +62,10 @@ repos:
   - repo: https://github.com/citation-file-format/cff-converter-python
     rev: ebf0b5e44d67f8beaa1cd13a0d0393ea04c6058d
     hooks:
-    - id: validate-cff
+      - id: validate-cff
   - repo: https://github.com/codespell-project/codespell
     rev: v2.3.0
     hooks:
-    - id: codespell
-      additional_dependencies:
-        - tomli
\ No newline at end of file
+      - id: codespell
+        additional_dependencies:
+          - tomli