[ci] [python-package] update pre-commit hooks to latest versions (#6817)

microsoft · Feb 7, 2025 · 81922a7 · 81922a7
1 parent 2db0b25
commit 81922a7
Show file tree

Hide file tree

Showing 13 changed files with 34 additions and 36 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -24,7 +24,7 @@ repos:
         args: ["--strict"]
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.8.3
+    rev: v0.9.5
     hooks:
       # Run the linter.
       - id: ruff
@@ -39,7 +39,7 @@ repos:
     hooks:
       - id: shellcheck
   - repo: https://github.com/crate-ci/typos
-    rev: v1.28.3
+    rev: v1.29.5
     hooks:
       - id: typos
         args: ["--force-exclude"]

diff --git a/examples/parallel_learning/train.conf b/examples/parallel_learning/train.conf
@@ -11,7 +11,7 @@ boosting_type = gbdt
 # alias: application, app
 objective = binary
 
-# eval metrics, support multi metric, delimite by ',' , support following metrics
+# eval metrics, support multi metric, delimited by ',' , support following metrics
 # l1
 # l2 , default metric for regression
 # ndcg , default metric for lambdarank

diff --git a/examples/regression/train.conf b/examples/regression/train.conf
@@ -11,7 +11,7 @@ boosting_type = gbdt
 # alias: application, app
 objective = regression
 
-# eval metrics, support multi metric, delimite by ',' , support following metrics
+# eval metrics, support multi metric, delimited by ',' , support following metrics
 # l1
 # l2 , default metric for regression
 # ndcg , default metric for lambdarank

diff --git a/examples/xendcg/train.conf b/examples/xendcg/train.conf
@@ -11,7 +11,7 @@ boosting_type = gbdt
 # alias: application, app
 objective = rank_xendcg
 
-# eval metrics, support multi metric, delimite by ',' , support following metrics
+# eval metrics, support multi metric, delimited by ',' , support following metrics
 # l1
 # l2 , default metric for regression
 # ndcg , default metric for lambdarank

diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
@@ -381,7 +381,7 @@ def _list_to_1d_numpy(
         return np.asarray(data, dtype=dtype)  # SparseArray should be supported as well
     else:
         raise TypeError(
-            f"Wrong type({type(data).__name__}) for {name}.\n" "It should be list, numpy 1-D array or pandas Series"
+            f"Wrong type({type(data).__name__}) for {name}.\nIt should be list, numpy 1-D array or pandas Series"
         )
 
 
@@ -803,8 +803,7 @@ def _check_for_bad_pandas_dtypes(pandas_dtypes_series: pd_Series) -> None:
     ]
     if bad_pandas_dtypes:
         raise ValueError(
-            'pandas dtypes must be int, float or bool.\n'
-            f'Fields with bad pandas dtypes: {", ".join(bad_pandas_dtypes)}'
+            f"pandas dtypes must be int, float or bool.\nFields with bad pandas dtypes: {', '.join(bad_pandas_dtypes)}"
         )
 
 
@@ -3298,7 +3297,7 @@ def get_data(self) -> Optional[_LGBM_TrainDataType]:
                     self.data = np.array(list(self._yield_row_from_seqlist(self.data, self.used_indices)))
                 else:
                     _log_warning(
-                        f"Cannot subset {type(self.data).__name__} type of raw data.\n" "Returning original raw data"
+                        f"Cannot subset {type(self.data).__name__} type of raw data.\nReturning original raw data"
                     )
             self._need_slice = False
         if self.data is None:
@@ -3718,7 +3717,7 @@ def __init__(
             self.model_from_string(model_str)
         else:
             raise TypeError(
-                "Need at least one training dataset or model file or model string " "to create Booster instance"
+                "Need at least one training dataset or model file or model string to create Booster instance"
             )
         self.params = params
 
@@ -4052,7 +4051,7 @@ def add_valid(self, data: Dataset, name: str) -> "Booster":
         if not isinstance(data, Dataset):
             raise TypeError(f"Validation data should be Dataset instance, met {type(data).__name__}")
         if data._predictor is not self.__init_predictor:
-            raise LightGBMError("Add validation data failed, " "you should use same predictor for these data")
+            raise LightGBMError("Add validation data failed, you should use same predictor for these data")
         _safe_call(
             _LIB.LGBM_BoosterAddValidData(
                 self._handle,
@@ -4138,7 +4137,7 @@ def update(
             if not isinstance(train_set, Dataset):
                 raise TypeError(f"Training data should be Dataset instance, met {type(train_set).__name__}")
             if train_set._predictor is not self.__init_predictor:
-                raise LightGBMError("Replace training data failed, " "you should use same predictor for these data")
+                raise LightGBMError("Replace training data failed, you should use same predictor for these data")
             self.train_set = train_set
             _safe_call(
                 _LIB.LGBM_BoosterResetTrainingData(

diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py
@@ -393,7 +393,7 @@ def _final_iteration_check(self, *, env: CallbackEnv, metric_name: str, i: int)
             if self.verbose:
                 best_score_str = "\t".join([_format_eval_result(x, show_stdv=True) for x in self.best_score_list[i]])
                 _log_info(
-                    "Did not meet early stopping. " f"Best iteration is:\n[{self.best_iter[i] + 1}]\t{best_score_str}"
+                    f"Did not meet early stopping. Best iteration is:\n[{self.best_iter[i] + 1}]\t{best_score_str}"
                 )
                 if self.first_metric_only:
                     _log_info(f"Evaluated only: {metric_name}")

diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py
@@ -1166,7 +1166,7 @@ def __init__(
     _before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs")  # type: ignore
     __init__.__doc__ = f"""
         {_before_kwargs}client : dask.distributed.Client or None, optional (default=None)
-        {' ':4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
+        {" ":4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
         {_kwargs}{_after_kwargs}
         """
 
@@ -1221,7 +1221,7 @@ def fit(  # type: ignore[override]
     _base_doc = _base_doc[: _base_doc.find("eval_group :")] + _base_doc[_base_doc.find("eval_metric :") :]
 
     # DaskLGBMClassifier support for callbacks and init_model is not tested
-    fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
+    fit.__doc__ = f"""{_base_doc[: _base_doc.find("callbacks :")]}**kwargs
         Other parameters passed through to ``LGBMClassifier.fit()``.
 
     Returns
@@ -1369,7 +1369,7 @@ def __init__(
     _before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs")  # type: ignore
     __init__.__doc__ = f"""
         {_before_kwargs}client : dask.distributed.Client or None, optional (default=None)
-        {' ':4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
+        {" ":4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
         {_kwargs}{_after_kwargs}
         """
 
@@ -1424,7 +1424,7 @@ def fit(  # type: ignore[override]
     _base_doc = _base_doc[: _base_doc.find("eval_group :")] + _base_doc[_base_doc.find("eval_metric :") :]
 
     # DaskLGBMRegressor support for callbacks and init_model is not tested
-    fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
+    fit.__doc__ = f"""{_base_doc[: _base_doc.find("callbacks :")]}**kwargs
         Other parameters passed through to ``LGBMRegressor.fit()``.
 
     Returns
@@ -1536,7 +1536,7 @@ def __init__(
     _before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs")  # type: ignore
     __init__.__doc__ = f"""
         {_before_kwargs}client : dask.distributed.Client or None, optional (default=None)
-        {' ':4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
+        {" ":4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
         {_kwargs}{_after_kwargs}
         """
 
@@ -1596,11 +1596,11 @@ def fit(  # type: ignore[override]
         _base_doc[: _base_doc.find("feature_name :")]
         + "eval_at : list or tuple of int, optional (default=(1, 2, 3, 4, 5))\n"
         + f"{' ':8}The evaluation positions of the specified metric.\n"
-        + f"{' ':4}{_base_doc[_base_doc.find('feature_name :'):]}"
+        + f"{' ':4}{_base_doc[_base_doc.find('feature_name :') :]}"
     )
 
     # DaskLGBMRanker support for callbacks and init_model is not tested
-    fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
+    fit.__doc__ = f"""{_base_doc[: _base_doc.find("callbacks :")]}**kwargs
         Other parameters passed through to ``LGBMRanker.fit()``.
 
     Returns

diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py
@@ -247,7 +247,7 @@ def plot_split_value_histogram(
 
     hist, split_bins = booster.get_split_value_histogram(feature=feature, bins=bins, xgboost_style=False)
     if np.count_nonzero(hist) == 0:
-        raise ValueError("Cannot plot split value histogram, " f"because feature {feature} was not used in splitting")
+        raise ValueError(f"Cannot plot split value histogram, because feature {feature} was not used in splitting")
     width = width_coef * (split_bins[1] - split_bins[0])
     centred = (split_bins[:-1] + split_bins[1:]) / 2
 

diff --git a/src/treelearner/parallel_tree_learner.h b/src/treelearner/parallel_tree_learner.h
@@ -47,7 +47,7 @@ class FeatureParallelTreeLearner: public TREELEARNER_T {
 /*!
 * \brief Data parallel learning algorithm.
 *        Workers use local data to construct histograms locally, then sync up global histograms.
-*        It is recommonded used when #data is large or #feature is small
+*        It is recommended used when #data is large or #feature is small
 */
 template <typename TREELEARNER_T>
 class DataParallelTreeLearner: public TREELEARNER_T {

diff --git a/tests/python_package_test/test_arrow.py b/tests/python_package_test/test_arrow.py
@@ -20,9 +20,9 @@
 else:
     import pyarrow as pa  # type: ignore
 
-    assert (
-        lgb.compat.PYARROW_INSTALLED is True
-    ), "'pyarrow' and its dependencies must be installed to run the arrow tests"
+    assert lgb.compat.PYARROW_INSTALLED is True, (
+        "'pyarrow' and its dependencies must be installed to run the arrow tests"
+    )
 
 # ----------------------------------------------------------------------------------------------- #
 #                                            UTILITIES                                            #

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
@@ -2168,8 +2168,7 @@ def has_interaction(treef):
     trainset = generate_trainset_for_monotone_constraints_tests(test_with_categorical_variable)
     for test_with_interaction_constraints in [True, False]:
         error_msg = (
-            "Model not correctly constrained "
-            f"(test_with_interaction_constraints={test_with_interaction_constraints})"
+            f"Model not correctly constrained (test_with_interaction_constraints={test_with_interaction_constraints})"
         )
         for monotone_constraints_method in ["basic", "intermediate", "advanced"]:
             params = {

diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
@@ -1425,9 +1425,9 @@ def test_getting_feature_names_in_np_input(estimator_class):
 def test_getting_feature_names_in_pd_input(estimator_class):
     X, y = load_digits(n_class=2, return_X_y=True, as_frame=True)
     col_names = X.columns.to_list()
-    assert isinstance(col_names, list) and all(
-        isinstance(c, str) for c in col_names
-    ), "input data must have feature names for this test to cover the expected functionality"
+    assert isinstance(col_names, list) and all(isinstance(c, str) for c in col_names), (
+        "input data must have feature names for this test to cover the expected functionality"
+    )
     params = {"n_estimators": 2, "num_leaves": 7}
     if estimator_class is lgb.LGBMModel:
         model = estimator_class(**{**params, "objective": "binary"})

diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py
@@ -251,12 +251,12 @@ def assert_subtree_valid(root):
     right_child = root["right_child"]
     (l_w, l_c) = assert_subtree_valid(left_child)
     (r_w, r_c) = assert_subtree_valid(right_child)
-    assert (
-        abs(root["internal_weight"] - (l_w + r_w)) <= 1e-3
-    ), "root node's internal weight should be approximately the sum of its child nodes' internal weights"
-    assert (
-        root["internal_count"] == l_c + r_c
-    ), "root node's internal count should be exactly the sum of its child nodes' internal counts"
+    assert abs(root["internal_weight"] - (l_w + r_w)) <= 1e-3, (
+        "root node's internal weight should be approximately the sum of its child nodes' internal weights"
+    )
+    assert root["internal_count"] == l_c + r_c, (
+        "root node's internal count should be exactly the sum of its child nodes' internal counts"
+    )
     return (root["internal_weight"], root["internal_count"])