-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathml41.py
More file actions
131 lines (95 loc) · 3.78 KB
/
ml41.py
File metadata and controls
131 lines (95 loc) · 3.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
import numpy as np
from sklearn.datasets.samples_generator import make_blobs
import random
X, y = make_blobs(n_samples=50, centers=3, n_features=2)
# X = np.array([[1,2],
# [1.5,1.8],
# [5,8],
# [8,8],
# [1,0.6],
# [9,11],
# [8,2],
# [10,2],
# [9,3],])
# plt.scatter(X[:,0],X[:,1], s=150)
# plt.show()
colors = 10 * ['g', 'r', 'c', 'b', 'k']
class Mean_Shift:
def __init__(self, radius=None, radius_norm_step=100):
self.radius = radius
self.radius_norm_step = radius_norm_step
def fit(self, data):
if self.radius == None:
all_data_centroid = np.average(data, axis=0)
all_data_norm = np.linalg.norm(all_data_centroid)
self.radius = all_data_norm / self.radius_norm_step
centroids = {}
for i in range(len(data)):
centroids[i] = data[i]
weights = [i for i in range(self.radius_norm_step)][::-1]
while True:
new_centroids = []
for i in centroids:
in_bandwidth = []
centroid = centroids[i]
for featureset in data:
distance = np.linalg.norm(featureset-centroid)
if distance == 0:
distance = 0.000000001
weight_index = int(distance/self.radius)
if weight_index > self.radius_norm_step-1:
weight_index = self.radius_norm_step-1
to_add = (weights[weight_index]**2)*[featureset]
in_bandwidth += to_add
new_centroid = np.average(in_bandwidth, axis=0)
new_centroids.append(tuple(new_centroid))
uniques = sorted(list(set(new_centroids)))
to_pop = []
for i in uniques:
if i in to_pop:
break
for ii in uniques:
if i == ii:
pass
elif np.linalg.norm(np.array(i) - np.array(ii)) <= self.radius and ii not in to_pop:
to_pop.append(ii)
break
for i in to_pop:
uniques.remove(i)
prev_centroids = dict(centroids)
centroids = {}
for i in range(len(uniques)):
centroids[i] = np.array(uniques[i])
optimized = True
for i in centroids:
if not np.array_equal(prev_centroids[i], centroids[i]):
optimized = False
if not optimized:
break
if optimized:
break
self.centroids = centroids
self.classifications = {}
for i in range(len(self.centroids)):
self.classifications[i] = []
for featureset in data:
distances = [np.linalg.norm(featureset-self.centroids[centroid]) for centroid in self.centroids]
classification = distances.index(min(distances))
self.classifications[classification].append(featureset)
def predict(self, data):
distances = [np.linalg.norm(data-self.centroids[centroid]) for centroid in self.centroids]
classification = distances.index(min(distances))
return classification
clf = Mean_Shift()
clf.fit(X)
centroids = clf.centroids
for classification in clf.classifications:
color = colors[classification]
for featureset in clf.classifications[classification]:
plt.scatter(featureset[0], featureset[1], marker='x', color=color, s=150, lw=5)
for c in centroids:
plt.scatter(centroids[c][0], centroids[c][1], color='k', marker='*', s=150)
plt.show()