-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathclustering.py
42 lines (31 loc) · 1009 Bytes
/
clustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from time import time
import numpy as np
from matplotlib import pyplot as plt
from sklearn.cluster import AgglomerativeClustering
import pandas as pd
ward_mat = []
average_mat = []
complete_mat = []
for j in range(5):
array = np.random.randint(100000, size=(5000, 2))
for linkage in ('ward', 'average', 'complete'):
clustering = AgglomerativeClustering(linkage=linkage, n_clusters=4)
t0 = time()
clustering.fit(array)
print("%s : %.2fs" % (linkage, time() - t0))
arr = [0, 0, 0, 0]
for i in range(5000):
arr[clustering.labels_[i]] += 1
arr.sort()
mat = linkage + "_mat"
eval(mat).append(arr)
ward_mat.sort()
complete_mat.sort()
average_mat.sort()
df_ward = pd.DataFrame(ward_mat, columns=list("1234"))
df_avg = pd.DataFrame(average_mat, columns=list("1234"))
df_comp = pd.DataFrame(complete_mat, columns=list("1234"))
df_comp.plot(kind='area')
df_avg.plot(kind='area')
df_ward.plot(kind='area')
plt.show()