Skip to content

Commit f5c64ab

Browse files
authored
Create adaboost.py
1 parent 1dc2932 commit f5c64ab

File tree

1 file changed

+118
-0
lines changed

1 file changed

+118
-0
lines changed

Day-13-Adaboost/adaboost.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
"""
2+
Adaboost Algorithm Blog post:
3+
https://www.mygreatlearning.com/blog/adaboost-algorithm/
4+
"""
5+
import torch
6+
from sklearn.datasets import load_breast_cancer
7+
from sklearn.model_selection import train_test_split
8+
from sklearn.metrics import accuracy_score
9+
10+
class stump:
11+
"Each Stump is a weak classifier and combination of them are referred as Boosting Mechanism"
12+
def __init__(self):
13+
"""
14+
* Polarity is used to classify sample as either 1 or -1
15+
* feature index is for identifying node for separating classes
16+
* features are compared against threshold value
17+
* Alpha value indicates the classifier accuracy
18+
"""
19+
self.polarity = 1
20+
self.feature_index = None
21+
self.threshold = None
22+
self.alpha = None
23+
24+
class Adaboost:
25+
def __init__(self, num_classifiers):
26+
"""
27+
:param num_classifiers: Number of weak classifiers
28+
"""
29+
self.num_classifiers = num_classifiers
30+
31+
def fit(self, X, y):
32+
"""
33+
:param X: Input tensor
34+
:param y: output tensor
35+
:return: Creates a list of weak classifier with set of properties as
36+
mentioned in stump class.
37+
* Initialize weights to 1/N, N is number of samples
38+
* Iterate through different weak classifiers
39+
* Minimum error given for using a certain feature value threshold for predicting sample label
40+
* Iterate through each feature and its unique values to find the threshold value
41+
* Label samples with value less than threshold as -1
42+
* Error, Sum of weights of misclassified samples
43+
* If the error is over 50% we flip the polarity so that samples that were classified as 0 are
44+
classified as 1, and vice versa. E.g error = 0.8 => (1 - error) = 0.2
45+
* If this threshold resulted in the smallest error we save the configuration
46+
* Calculate the alpha which is used to update the sample weights,
47+
Alpha is also an approximation of this classifier's proficiency
48+
* set all predictions to '1' initially
49+
* The indexes where the sample values are below threshold, label them as -1
50+
* Updated weights and normalize to one
51+
* save each weak classifier
52+
"""
53+
n_samples, n_features = X.shape[0], X.shape[1]
54+
weight = torch.zeros(n_samples).fill_(1/n_samples)
55+
self.clfs = []
56+
for _ in range(self.num_classifiers):
57+
clf = stump()
58+
minimum_error = float('inf')
59+
for feature_i in range(n_features):
60+
feature_values = X[:, feature_i].unsqueeze(1)
61+
unqiue_values = feature_values.unique()
62+
for threshold in unqiue_values:
63+
p = 1
64+
prediction = torch.ones(y.shape)
65+
prediction[X[:, feature_i] < threshold] = -1
66+
error = torch.sum(weight[y != prediction])
67+
if error > 0.5:
68+
error = 1 - error
69+
p = -1
70+
71+
if error < minimum_error:
72+
clf.polarity = p
73+
clf.threshold = threshold
74+
clf.feature_index = feature_i
75+
minimum_error = error
76+
77+
clf.alpha = 0.5 * torch.log(1.0 - minimum_error) / (minimum_error + 1e-10)
78+
predictions = torch.ones(y.shape)
79+
negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold)
80+
predictions[negative_idx] = -1
81+
82+
weight *= torch.exp(-clf.alpha * y * predictions)
83+
weight /= torch.sum(weight)
84+
85+
self.clfs.append(clf)
86+
87+
def predict(self, X):
88+
"""
89+
same process as mentioned above.
90+
:param X:
91+
:return: predicted estimate of ground truth.
92+
"""
93+
n_samples = X.shape[0]
94+
y_pred = torch.zeros((n_samples, 1))
95+
for clf in self.clfs:
96+
predictions = torch.ones(y_pred.shape)
97+
negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold)
98+
predictions[negative_idx] = -1
99+
y_pred += clf.alpha * predictions
100+
101+
print(y_pred)
102+
y_pred = torch.sign(y_pred).flatten()
103+
print(y_pred)
104+
return y_pred
105+
106+
if __name__ == '__main__':
107+
breast_cancer = load_breast_cancer()
108+
torch.manual_seed(0)
109+
X = torch.tensor(breast_cancer.data, dtype=torch.float)
110+
y = torch.tensor(breast_cancer.target)
111+
n_classes = len(torch.unique(y))
112+
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
113+
clf = Adaboost(num_classifiers=20)
114+
clf.fit(x_train, y_train)
115+
y_pred = clf.predict(x_test)
116+
117+
accuracy = accuracy_score(y_test, y_pred)
118+
print ("Accuracy:", accuracy)

0 commit comments

Comments
 (0)