|
| 1 | +import numpy as np |
| 2 | + |
| 3 | + |
| 4 | +class K_Means: |
| 5 | + def __init__(self, k: int = 2, tolerence = 0.001, max_itr = 300): |
| 6 | + """Class for K-Means Clustering |
| 7 | +
|
| 8 | + Args: |
| 9 | + - k (int, optional): total cluster numbers. Defaults to 2. |
| 10 | + - tolerence (float, optional): tolerence to centroids movement. Defaults to 0.001. |
| 11 | + - max_itr (int, optional): maximum iterations till which clustering is done. Defaults to 300. |
| 12 | + """ |
| 13 | + self.k = k |
| 14 | + self.tol = tolerence |
| 15 | + self.max_itr = max_itr |
| 16 | + |
| 17 | + def fit(self, X, y = None) -> list: |
| 18 | + """Fit method for training the model. |
| 19 | +
|
| 20 | + Args: |
| 21 | + - X (arrayLike, list, npArray): data for training. |
| 22 | + - y (arrayLike, list, npArray, optional): For consistency of API. Even if inserted, it is ignored. Defaults to None. |
| 23 | + """ |
| 24 | + self.centroids = {} |
| 25 | + self.__moves_history = [] |
| 26 | + for i in range(self.k): |
| 27 | + self.centroids[i] = X[i] |
| 28 | + |
| 29 | + for i in range(self.max_itr): |
| 30 | + self.classifications = {} |
| 31 | + |
| 32 | + for i in range(self.k): |
| 33 | + self.classifications[i] = [] |
| 34 | + |
| 35 | + for featureset in X: |
| 36 | + distances = [np.linalg.norm(featureset-self.centroids[centroid]) for centroid in self.centroids] |
| 37 | + classification = distances.index(min(distances)) |
| 38 | + self.classifications[classification].append(featureset) |
| 39 | + |
| 40 | + prev_centroids = dict(self.centroids) |
| 41 | + for classification in self.classifications: |
| 42 | + self.centroids[classification] = np.average(self.classifications[classification], axis=0) |
| 43 | + |
| 44 | + optimized = True |
| 45 | + |
| 46 | + for c in self.centroids: |
| 47 | + original_centroid = prev_centroids[c] |
| 48 | + current_centroid = self.centroids[c] |
| 49 | + move = np.sum((current_centroid-original_centroid) / original_centroid*100.0) |
| 50 | + if move > self.tol: |
| 51 | + self.__moves_history.append(move) |
| 52 | + optimized = False |
| 53 | + |
| 54 | + if optimized: |
| 55 | + break |
| 56 | + |
| 57 | + return self.__moves_history |
| 58 | + |
| 59 | + def predict(self, X): |
| 60 | + """Predict method for predicting values. |
| 61 | +
|
| 62 | + Args: |
| 63 | + - X (arrayLike, list, npArray): input data. |
| 64 | + """ |
| 65 | + predicted = [] |
| 66 | + for point in X: |
| 67 | + distances = [np.linalg.norm(point-self.centroids[centroid]) for centroid in self.centroids] |
| 68 | + classification = distances.index(min(distances)) |
| 69 | + predicted.append(classification) |
| 70 | + return predicted |
0 commit comments