Skip to content

Commit a0e80a7

Browse files
Added Gradient Boosting Classifier (TheAlgorithms#10944)
* Added Gradient Boosting Classifier * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update gradient_boosting_classifier.py * Update gradient_boosting_classifier.py * Update gradient_boosting_classifier.py * Update gradient_boosting_classifier.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 5df16f1 commit a0e80a7

File tree

1 file changed

+118
-0
lines changed

1 file changed

+118
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import numpy as np
2+
from sklearn.datasets import load_iris
3+
from sklearn.metrics import accuracy_score
4+
from sklearn.model_selection import train_test_split
5+
from sklearn.tree import DecisionTreeRegressor
6+
7+
8+
class GradientBoostingClassifier:
9+
def __init__(self, n_estimators: int = 100, learning_rate: float = 0.1) -> None:
10+
"""
11+
Initialize a GradientBoostingClassifier.
12+
13+
Parameters:
14+
- n_estimators (int): The number of weak learners to train.
15+
- learning_rate (float): The learning rate for updating the model.
16+
17+
Attributes:
18+
- n_estimators (int): The number of weak learners.
19+
- learning_rate (float): The learning rate.
20+
- models (list): A list to store the trained weak learners.
21+
"""
22+
self.n_estimators = n_estimators
23+
self.learning_rate = learning_rate
24+
self.models: list[tuple[DecisionTreeRegressor, float]] = []
25+
26+
def fit(self, features: np.ndarray, target: np.ndarray) -> None:
27+
"""
28+
Fit the GradientBoostingClassifier to the training data.
29+
30+
Parameters:
31+
- features (np.ndarray): The training features.
32+
- target (np.ndarray): The target values.
33+
34+
Returns:
35+
None
36+
37+
>>> import numpy as np
38+
>>> from sklearn.datasets import load_iris
39+
>>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
40+
>>> iris = load_iris()
41+
>>> X, y = iris.data, iris.target
42+
>>> clf.fit(X, y)
43+
>>> # Check if the model is trained
44+
>>> len(clf.models) == 100
45+
True
46+
"""
47+
for _ in range(self.n_estimators):
48+
# Calculate the pseudo-residuals
49+
residuals = -self.gradient(target, self.predict(features))
50+
# Fit a weak learner (e.g., decision tree) to the residuals
51+
model = DecisionTreeRegressor(max_depth=1)
52+
model.fit(features, residuals)
53+
# Update the model by adding the weak learner with a learning rate
54+
self.models.append((model, self.learning_rate))
55+
56+
def predict(self, features: np.ndarray) -> np.ndarray:
57+
"""
58+
Make predictions on input data.
59+
60+
Parameters:
61+
- features (np.ndarray): The input data for making predictions.
62+
63+
Returns:
64+
- np.ndarray: An array of binary predictions (-1 or 1).
65+
66+
>>> import numpy as np
67+
>>> from sklearn.datasets import load_iris
68+
>>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
69+
>>> iris = load_iris()
70+
>>> X, y = iris.data, iris.target
71+
>>> clf.fit(X, y)
72+
>>> y_pred = clf.predict(X)
73+
>>> # Check if the predictions have the correct shape
74+
>>> y_pred.shape == y.shape
75+
True
76+
"""
77+
# Initialize predictions with zeros
78+
predictions = np.zeros(features.shape[0])
79+
for model, learning_rate in self.models:
80+
predictions += learning_rate * model.predict(features)
81+
return np.sign(predictions) # Convert to binary predictions (-1 or 1)
82+
83+
def gradient(self, target: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
84+
"""
85+
Calculate the negative gradient (pseudo-residuals) for logistic loss.
86+
87+
Parameters:
88+
- target (np.ndarray): The target values.
89+
- y_pred (np.ndarray): The predicted values.
90+
91+
Returns:
92+
- np.ndarray: An array of pseudo-residuals.
93+
94+
>>> import numpy as np
95+
>>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
96+
>>> target = np.array([0, 1, 0, 1])
97+
>>> y_pred = np.array([0.2, 0.8, 0.3, 0.7])
98+
>>> residuals = clf.gradient(target, y_pred)
99+
>>> # Check if residuals have the correct shape
100+
>>> residuals.shape == target.shape
101+
True
102+
"""
103+
return -target / (1 + np.exp(target * y_pred))
104+
105+
106+
if __name__ == "__main__":
107+
iris = load_iris()
108+
X, y = iris.data, iris.target
109+
X_train, X_test, y_train, y_test = train_test_split(
110+
X, y, test_size=0.2, random_state=42
111+
)
112+
113+
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
114+
clf.fit(X_train, y_train)
115+
116+
y_pred = clf.predict(X_test)
117+
accuracy = accuracy_score(y_test, y_pred)
118+
print(f"Accuracy: {accuracy:.2f}")

0 commit comments

Comments
 (0)