Skip to content

Commit

Permalink
[ci] [python-package] update pre-commit hooks to latest versions (#6817)
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb authored Feb 7, 2025
1 parent 2db0b25 commit 81922a7
Show file tree
Hide file tree
Showing 13 changed files with 34 additions and 36 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ repos:
args: ["--strict"]
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.8.3
rev: v0.9.5
hooks:
# Run the linter.
- id: ruff
Expand All @@ -39,7 +39,7 @@ repos:
hooks:
- id: shellcheck
- repo: https://github.com/crate-ci/typos
rev: v1.28.3
rev: v1.29.5
hooks:
- id: typos
args: ["--force-exclude"]
Expand Down
2 changes: 1 addition & 1 deletion examples/parallel_learning/train.conf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ boosting_type = gbdt
# alias: application, app
objective = binary

# eval metrics, support multi metric, delimite by ',' , support following metrics
# eval metrics, support multi metric, delimited by ',' , support following metrics
# l1
# l2 , default metric for regression
# ndcg , default metric for lambdarank
Expand Down
2 changes: 1 addition & 1 deletion examples/regression/train.conf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ boosting_type = gbdt
# alias: application, app
objective = regression

# eval metrics, support multi metric, delimite by ',' , support following metrics
# eval metrics, support multi metric, delimited by ',' , support following metrics
# l1
# l2 , default metric for regression
# ndcg , default metric for lambdarank
Expand Down
2 changes: 1 addition & 1 deletion examples/xendcg/train.conf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ boosting_type = gbdt
# alias: application, app
objective = rank_xendcg

# eval metrics, support multi metric, delimite by ',' , support following metrics
# eval metrics, support multi metric, delimited by ',' , support following metrics
# l1
# l2 , default metric for regression
# ndcg , default metric for lambdarank
Expand Down
13 changes: 6 additions & 7 deletions python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def _list_to_1d_numpy(
return np.asarray(data, dtype=dtype) # SparseArray should be supported as well
else:
raise TypeError(
f"Wrong type({type(data).__name__}) for {name}.\n" "It should be list, numpy 1-D array or pandas Series"
f"Wrong type({type(data).__name__}) for {name}.\nIt should be list, numpy 1-D array or pandas Series"
)


Expand Down Expand Up @@ -803,8 +803,7 @@ def _check_for_bad_pandas_dtypes(pandas_dtypes_series: pd_Series) -> None:
]
if bad_pandas_dtypes:
raise ValueError(
'pandas dtypes must be int, float or bool.\n'
f'Fields with bad pandas dtypes: {", ".join(bad_pandas_dtypes)}'
f"pandas dtypes must be int, float or bool.\nFields with bad pandas dtypes: {', '.join(bad_pandas_dtypes)}"
)


Expand Down Expand Up @@ -3298,7 +3297,7 @@ def get_data(self) -> Optional[_LGBM_TrainDataType]:
self.data = np.array(list(self._yield_row_from_seqlist(self.data, self.used_indices)))
else:
_log_warning(
f"Cannot subset {type(self.data).__name__} type of raw data.\n" "Returning original raw data"
f"Cannot subset {type(self.data).__name__} type of raw data.\nReturning original raw data"
)
self._need_slice = False
if self.data is None:
Expand Down Expand Up @@ -3718,7 +3717,7 @@ def __init__(
self.model_from_string(model_str)
else:
raise TypeError(
"Need at least one training dataset or model file or model string " "to create Booster instance"
"Need at least one training dataset or model file or model string to create Booster instance"
)
self.params = params

Expand Down Expand Up @@ -4052,7 +4051,7 @@ def add_valid(self, data: Dataset, name: str) -> "Booster":
if not isinstance(data, Dataset):
raise TypeError(f"Validation data should be Dataset instance, met {type(data).__name__}")
if data._predictor is not self.__init_predictor:
raise LightGBMError("Add validation data failed, " "you should use same predictor for these data")
raise LightGBMError("Add validation data failed, you should use same predictor for these data")
_safe_call(
_LIB.LGBM_BoosterAddValidData(
self._handle,
Expand Down Expand Up @@ -4138,7 +4137,7 @@ def update(
if not isinstance(train_set, Dataset):
raise TypeError(f"Training data should be Dataset instance, met {type(train_set).__name__}")
if train_set._predictor is not self.__init_predictor:
raise LightGBMError("Replace training data failed, " "you should use same predictor for these data")
raise LightGBMError("Replace training data failed, you should use same predictor for these data")
self.train_set = train_set
_safe_call(
_LIB.LGBM_BoosterResetTrainingData(
Expand Down
2 changes: 1 addition & 1 deletion python-package/lightgbm/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def _final_iteration_check(self, *, env: CallbackEnv, metric_name: str, i: int)
if self.verbose:
best_score_str = "\t".join([_format_eval_result(x, show_stdv=True) for x in self.best_score_list[i]])
_log_info(
"Did not meet early stopping. " f"Best iteration is:\n[{self.best_iter[i] + 1}]\t{best_score_str}"
f"Did not meet early stopping. Best iteration is:\n[{self.best_iter[i] + 1}]\t{best_score_str}"
)
if self.first_metric_only:
_log_info(f"Evaluated only: {metric_name}")
Expand Down
14 changes: 7 additions & 7 deletions python-package/lightgbm/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -1166,7 +1166,7 @@ def __init__(
_before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs") # type: ignore
__init__.__doc__ = f"""
{_before_kwargs}client : dask.distributed.Client or None, optional (default=None)
{' ':4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
{" ":4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
{_kwargs}{_after_kwargs}
"""

Expand Down Expand Up @@ -1221,7 +1221,7 @@ def fit( # type: ignore[override]
_base_doc = _base_doc[: _base_doc.find("eval_group :")] + _base_doc[_base_doc.find("eval_metric :") :]

# DaskLGBMClassifier support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
fit.__doc__ = f"""{_base_doc[: _base_doc.find("callbacks :")]}**kwargs
Other parameters passed through to ``LGBMClassifier.fit()``.
Returns
Expand Down Expand Up @@ -1369,7 +1369,7 @@ def __init__(
_before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs") # type: ignore
__init__.__doc__ = f"""
{_before_kwargs}client : dask.distributed.Client or None, optional (default=None)
{' ':4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
{" ":4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
{_kwargs}{_after_kwargs}
"""

Expand Down Expand Up @@ -1424,7 +1424,7 @@ def fit( # type: ignore[override]
_base_doc = _base_doc[: _base_doc.find("eval_group :")] + _base_doc[_base_doc.find("eval_metric :") :]

# DaskLGBMRegressor support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
fit.__doc__ = f"""{_base_doc[: _base_doc.find("callbacks :")]}**kwargs
Other parameters passed through to ``LGBMRegressor.fit()``.
Returns
Expand Down Expand Up @@ -1536,7 +1536,7 @@ def __init__(
_before_kwargs, _kwargs, _after_kwargs = _base_doc.partition("**kwargs") # type: ignore
__init__.__doc__ = f"""
{_before_kwargs}client : dask.distributed.Client or None, optional (default=None)
{' ':4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
{" ":4}Dask client. If ``None``, ``distributed.default_client()`` will be used at runtime. The Dask client used by this class will not be saved if the model object is pickled.
{_kwargs}{_after_kwargs}
"""

Expand Down Expand Up @@ -1596,11 +1596,11 @@ def fit( # type: ignore[override]
_base_doc[: _base_doc.find("feature_name :")]
+ "eval_at : list or tuple of int, optional (default=(1, 2, 3, 4, 5))\n"
+ f"{' ':8}The evaluation positions of the specified metric.\n"
+ f"{' ':4}{_base_doc[_base_doc.find('feature_name :'):]}"
+ f"{' ':4}{_base_doc[_base_doc.find('feature_name :') :]}"
)

# DaskLGBMRanker support for callbacks and init_model is not tested
fit.__doc__ = f"""{_base_doc[:_base_doc.find('callbacks :')]}**kwargs
fit.__doc__ = f"""{_base_doc[: _base_doc.find("callbacks :")]}**kwargs
Other parameters passed through to ``LGBMRanker.fit()``.
Returns
Expand Down
2 changes: 1 addition & 1 deletion python-package/lightgbm/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def plot_split_value_histogram(

hist, split_bins = booster.get_split_value_histogram(feature=feature, bins=bins, xgboost_style=False)
if np.count_nonzero(hist) == 0:
raise ValueError("Cannot plot split value histogram, " f"because feature {feature} was not used in splitting")
raise ValueError(f"Cannot plot split value histogram, because feature {feature} was not used in splitting")
width = width_coef * (split_bins[1] - split_bins[0])
centred = (split_bins[:-1] + split_bins[1:]) / 2

Expand Down
2 changes: 1 addition & 1 deletion src/treelearner/parallel_tree_learner.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class FeatureParallelTreeLearner: public TREELEARNER_T {
/*!
* \brief Data parallel learning algorithm.
* Workers use local data to construct histograms locally, then sync up global histograms.
* It is recommonded used when #data is large or #feature is small
* It is recommended used when #data is large or #feature is small
*/
template <typename TREELEARNER_T>
class DataParallelTreeLearner: public TREELEARNER_T {
Expand Down
6 changes: 3 additions & 3 deletions tests/python_package_test/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
else:
import pyarrow as pa # type: ignore

assert (
lgb.compat.PYARROW_INSTALLED is True
), "'pyarrow' and its dependencies must be installed to run the arrow tests"
assert lgb.compat.PYARROW_INSTALLED is True, (
"'pyarrow' and its dependencies must be installed to run the arrow tests"
)

# ----------------------------------------------------------------------------------------------- #
# UTILITIES #
Expand Down
3 changes: 1 addition & 2 deletions tests/python_package_test/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2168,8 +2168,7 @@ def has_interaction(treef):
trainset = generate_trainset_for_monotone_constraints_tests(test_with_categorical_variable)
for test_with_interaction_constraints in [True, False]:
error_msg = (
"Model not correctly constrained "
f"(test_with_interaction_constraints={test_with_interaction_constraints})"
f"Model not correctly constrained (test_with_interaction_constraints={test_with_interaction_constraints})"
)
for monotone_constraints_method in ["basic", "intermediate", "advanced"]:
params = {
Expand Down
6 changes: 3 additions & 3 deletions tests/python_package_test/test_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -1425,9 +1425,9 @@ def test_getting_feature_names_in_np_input(estimator_class):
def test_getting_feature_names_in_pd_input(estimator_class):
X, y = load_digits(n_class=2, return_X_y=True, as_frame=True)
col_names = X.columns.to_list()
assert isinstance(col_names, list) and all(
isinstance(c, str) for c in col_names
), "input data must have feature names for this test to cover the expected functionality"
assert isinstance(col_names, list) and all(isinstance(c, str) for c in col_names), (
"input data must have feature names for this test to cover the expected functionality"
)
params = {"n_estimators": 2, "num_leaves": 7}
if estimator_class is lgb.LGBMModel:
model = estimator_class(**{**params, "objective": "binary"})
Expand Down
12 changes: 6 additions & 6 deletions tests/python_package_test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,12 +251,12 @@ def assert_subtree_valid(root):
right_child = root["right_child"]
(l_w, l_c) = assert_subtree_valid(left_child)
(r_w, r_c) = assert_subtree_valid(right_child)
assert (
abs(root["internal_weight"] - (l_w + r_w)) <= 1e-3
), "root node's internal weight should be approximately the sum of its child nodes' internal weights"
assert (
root["internal_count"] == l_c + r_c
), "root node's internal count should be exactly the sum of its child nodes' internal counts"
assert abs(root["internal_weight"] - (l_w + r_w)) <= 1e-3, (
"root node's internal weight should be approximately the sum of its child nodes' internal weights"
)
assert root["internal_count"] == l_c + r_c, (
"root node's internal count should be exactly the sum of its child nodes' internal counts"
)
return (root["internal_weight"], root["internal_count"])


Expand Down

0 comments on commit 81922a7

Please sign in to comment.