Skip to content

Commit 209942d

Browse files
committedDec 10, 2022
added k-means clustering
1 parent fd1da60 commit 209942d

File tree

1 file changed

+70
-0
lines changed

1 file changed

+70
-0
lines changed
 

‎K-Means Clustering/KMeans.py

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import numpy as np
2+
3+
4+
class K_Means:
5+
def __init__(self, k: int = 2, tolerence = 0.001, max_itr = 300):
6+
"""Class for K-Means Clustering
7+
8+
Args:
9+
- k (int, optional): total cluster numbers. Defaults to 2.
10+
- tolerence (float, optional): tolerence to centroids movement. Defaults to 0.001.
11+
- max_itr (int, optional): maximum iterations till which clustering is done. Defaults to 300.
12+
"""
13+
self.k = k
14+
self.tol = tolerence
15+
self.max_itr = max_itr
16+
17+
def fit(self, X, y = None) -> list:
18+
"""Fit method for training the model.
19+
20+
Args:
21+
- X (arrayLike, list, npArray): data for training.
22+
- y (arrayLike, list, npArray, optional): For consistency of API. Even if inserted, it is ignored. Defaults to None.
23+
"""
24+
self.centroids = {}
25+
self.__moves_history = []
26+
for i in range(self.k):
27+
self.centroids[i] = X[i]
28+
29+
for i in range(self.max_itr):
30+
self.classifications = {}
31+
32+
for i in range(self.k):
33+
self.classifications[i] = []
34+
35+
for featureset in X:
36+
distances = [np.linalg.norm(featureset-self.centroids[centroid]) for centroid in self.centroids]
37+
classification = distances.index(min(distances))
38+
self.classifications[classification].append(featureset)
39+
40+
prev_centroids = dict(self.centroids)
41+
for classification in self.classifications:
42+
self.centroids[classification] = np.average(self.classifications[classification], axis=0)
43+
44+
optimized = True
45+
46+
for c in self.centroids:
47+
original_centroid = prev_centroids[c]
48+
current_centroid = self.centroids[c]
49+
move = np.sum((current_centroid-original_centroid) / original_centroid*100.0)
50+
if move > self.tol:
51+
self.__moves_history.append(move)
52+
optimized = False
53+
54+
if optimized:
55+
break
56+
57+
return self.__moves_history
58+
59+
def predict(self, X):
60+
"""Predict method for predicting values.
61+
62+
Args:
63+
- X (arrayLike, list, npArray): input data.
64+
"""
65+
predicted = []
66+
for point in X:
67+
distances = [np.linalg.norm(point-self.centroids[centroid]) for centroid in self.centroids]
68+
classification = distances.index(min(distances))
69+
predicted.append(classification)
70+
return predicted

0 commit comments

Comments
 (0)
Please sign in to comment.