5
5
6
6
7
7
class Cluster :
8
-
9
8
def __init__ (self , members = np .empty (shape = 1 ), distance_threshold = 0.01 ):
10
9
"""
11
10
Class for the implementation of the cluster in a FoF sense
@@ -38,34 +37,34 @@ def distance_threshold(self, value):
38
37
raise ValueError ("Distance Threshold not valid." )
39
38
else :
40
39
self ._distance_threshold = value
41
-
40
+
42
41
def update_clustering_index (self , distance ):
43
- # function to update an index to keep track of the "goodness" of the
44
- # cluster, directly proportional to the number of components and
42
+ # function to update an index to keep track of the "goodness" of the
43
+ # cluster, directly proportional to the number of components and
45
44
# inversely to the distance
46
-
47
- self .clustering_index = len (self .members )* (1 / (len (self .members )- 1 )* self .clustering_index + 1 / (distance ))
48
-
45
+
46
+ self .clustering_index = len (self .members ) * (
47
+ 1 / (len (self .members ) - 1 ) * self .clustering_index + 1 / (distance )
48
+ )
49
+
49
50
def average_weights (self ):
50
51
# function to return the averaged weights of the cluster
51
-
52
- member_weights = np .zeros (
53
- shape = len (self .members [0 ].weights )
54
- )
52
+
53
+ member_weights = np .zeros (shape = len (self .members [0 ].weights ))
55
54
for member in self .members :
56
55
member_weights = member_weights + member .weights
57
- self .average_members_weights = member_weights / len (self .members )
58
-
56
+ self .average_members_weights = member_weights / len (self .members )
57
+
59
58
def add_member (self , new_member , distance ):
60
59
# function to add a new member ot the existing cluster
61
-
60
+
62
61
self .members = np .append (self .members , new_member )
63
-
62
+
64
63
# update the index with the new member
65
64
self .update_clustering_index (distance )
66
65
67
- class Neuron :
68
66
67
+ class Neuron :
69
68
def __init__ (self , x_0 , y_0 , weights ):
70
69
"""
71
70
Class which creates the single neurons of the SOM grid
@@ -103,9 +102,16 @@ def weights(self, value):
103
102
104
103
105
104
class SOM :
106
-
107
- def __init__ (self , x_size = 20 , y_size = 20 , size_neurons = 10000 , learning_rate_0 = 0.5 , radius_0 = 0.1 ,
108
- cluster_distance_threshold = 0.04 , input_data = None ):
105
+ def __init__ (
106
+ self ,
107
+ x_size = 20 ,
108
+ y_size = 20 ,
109
+ size_neurons = 10000 ,
110
+ learning_rate_0 = 0.5 ,
111
+ radius_0 = 0.1 ,
112
+ cluster_distance_threshold = 0.04 ,
113
+ input_data = None ,
114
+ ):
109
115
"""
110
116
Class for the implementation of the self-organizing maps
111
117
:type x_size: int
@@ -119,35 +125,27 @@ def __init__(self, x_size=20, y_size=20, size_neurons=10000, learning_rate_0=0.5
119
125
self .x_size = x_size
120
126
self .y_size = y_size
121
127
self .size_neurons = size_neurons
122
-
128
+
123
129
self .iteration = 0
124
130
self .time_constant = 200
125
131
self .learning_rate_0 = learning_rate_0
126
132
self .learning_rate = learning_rate_0
127
133
self .radius_0 = radius_0
128
134
self .radius = radius_0
129
-
135
+
130
136
self .cluster_distance_threshold = cluster_distance_threshold
131
-
137
+
132
138
self .input_data = input_data
133
-
134
- self .neuron_map = np .zeros (
135
- shape = (x_size , y_size ),
136
- dtype = object
137
- )
138
- self .clusters = np .array (
139
- [],
140
- dtype = object
141
- )
139
+
140
+ self .neuron_map = np .zeros (shape = (x_size , y_size ), dtype = object )
141
+ self .clusters = np .array ([], dtype = object )
142
142
self .matches_input_to_clusters = []
143
143
self .averaged_spectra_df = []
144
-
144
+
145
145
for i in range (self ._x_size ):
146
146
for j in range (self ._y_size ):
147
147
self ._neuron_map [i ][j ] = Neuron (
148
- i / x_size ,
149
- j / y_size ,
150
- np .random .uniform (1E-3 , 9E-4 , size_neurons )
148
+ i / x_size , j / y_size , np .random .uniform (1e-3 , 9e-4 , size_neurons )
151
149
)
152
150
153
151
@property
@@ -252,7 +250,7 @@ def input_data(self, value):
252
250
for vector in value :
253
251
if len (vector ) != len_0 :
254
252
raise ValueError ("Input data of different lengths." )
255
- if len (value ) < 300 : # this
253
+ if len (value ) < 300 : # this
256
254
raise ValueError ("Too few input data." )
257
255
self ._input_data = value
258
256
@@ -281,15 +279,17 @@ def matches_input_to_clusters(self, value):
281
279
self ._matches_input_to_clusters = value
282
280
283
281
def find_bmu (self , input_vector ):
284
- # compute euclidian distance from the input vector
282
+ # compute euclidian distance from the input vector
285
283
# to the weight vector of the neurons
286
284
distances = np .array (
287
- [np .linalg .norm (self .neuron_map [i ][j ].weights - input_vector )
288
- for i in range (self .x_size )
289
- for j in range (self .y_size )]
285
+ [
286
+ np .linalg .norm (self .neuron_map [i ][j ].weights - input_vector )
287
+ for i in range (self .x_size )
288
+ for j in range (self .y_size )
289
+ ]
290
290
).reshape ((self .x_size , self .y_size ))
291
-
292
- # return the index of the neuron
291
+
292
+ # return the index of the neuron
293
293
# with minimal distance (a.k.a. the best-matching unit)
294
294
minimal_distance = np .where (distances == np .amin (distances ))
295
295
return [minimal_distance [0 ][0 ], minimal_distance [1 ][0 ]]
@@ -302,16 +302,17 @@ def update_grid(self, input_vector):
302
302
for neuron_line in self .neuron_map :
303
303
for neuron in neuron_line :
304
304
# find each neuron that falls into the radius from the bmu at this iteration
305
- if (neuron .x - bmu .x ) ** 2 + (neuron .y - bmu .y ) ** 2 <= self .radius ** 2 :
305
+ if (neuron .x - bmu .x ) ** 2 + (
306
+ neuron .y - bmu .y
307
+ ) ** 2 <= self .radius ** 2 :
306
308
# update weights of the found neurons accordingly
307
309
neuron .weights = neuron .weights + self .learning_rate * (
308
- input_vector - neuron .weights )
310
+ input_vector - neuron .weights
311
+ )
309
312
310
313
# update positions of the found neurons accordingly
311
- neuron .x += self .learning_rate * (
312
- bmu .x - neuron .x )
313
- neuron .y += self .learning_rate * (
314
- bmu .y - neuron .y )
314
+ neuron .x += self .learning_rate * (bmu .x - neuron .x )
315
+ neuron .y += self .learning_rate * (bmu .y - neuron .y )
315
316
self .update_learning_rate ()
316
317
self .update_radius ()
317
318
self .iteration = self .iteration + 1
@@ -322,16 +323,14 @@ def update_radius(self):
322
323
323
324
def update_learning_rate (self ):
324
325
# update the learning rate with the known formula
325
- self .learning_rate = self .learning_rate_0 * np .exp (- self .iteration / self .time_constant )
326
+ self .learning_rate = self .learning_rate_0 * np .exp (
327
+ - self .iteration / self .time_constant
328
+ )
326
329
327
330
def find_clusters (self ):
328
331
# FoF
329
332
# make list of valid points
330
- list_points = [
331
- [i , j ]
332
- for i in range (self .x_size )
333
- for j in range (self .y_size )
334
- ]
333
+ list_points = [[i , j ] for i in range (self .x_size ) for j in range (self .y_size )]
335
334
336
335
while list_points :
337
336
# choose random valid point to start with
@@ -341,8 +340,10 @@ def find_clusters(self):
341
340
cluster = Cluster ([start_neuron ], self .cluster_distance_threshold )
342
341
for point in list_points :
343
342
# calculate distance for each point to the starting neuron
344
- distance = np .sqrt ((self .neuron_map [point [0 ]][point [1 ]].x - start_neuron .x ) ** 2 + (
345
- self .neuron_map [point [0 ]][point [1 ]].y - start_neuron .y ) ** 2 )
343
+ distance = np .sqrt (
344
+ (self .neuron_map [point [0 ]][point [1 ]].x - start_neuron .x ) ** 2
345
+ + (self .neuron_map [point [0 ]][point [1 ]].y - start_neuron .y ) ** 2
346
+ )
346
347
if distance <= cluster .distance_threshold :
347
348
# add member to cluster
348
349
cluster .add_member (self .neuron_map [point [0 ]][point [1 ]], distance )
@@ -353,11 +354,17 @@ def find_clusters(self):
353
354
for j in range (1 , len (cluster .members )):
354
355
for point in list_points :
355
356
# calculate distance for each remaining point to the friends of the starting neuron
356
- distance = np .sqrt ((self .neuron_map [point [0 ]][point [1 ]].x - cluster .members [j ].x ) ** 2 + (
357
- self .neuron_map [point [0 ]][point [1 ]].y - cluster .members [j ].y ) ** 2 )
357
+ distance = np .sqrt (
358
+ (self .neuron_map [point [0 ]][point [1 ]].x - cluster .members [j ].x )
359
+ ** 2
360
+ + (self .neuron_map [point [0 ]][point [1 ]].y - cluster .members [j ].y )
361
+ ** 2
362
+ )
358
363
if distance <= cluster .distance_threshold :
359
364
# add member to cluster
360
- cluster .add_member (self .neuron_map [point [0 ]][point [1 ]], distance )
365
+ cluster .add_member (
366
+ self .neuron_map [point [0 ]][point [1 ]], distance
367
+ )
361
368
# remove indexes from list of valid points
362
369
list_points .remove (point )
363
370
# more or less subjective threshold for number of members
@@ -374,56 +381,71 @@ def find_clusters(self):
374
381
# self.clusters = sorted(self.clusters, key=lambda n: n.clustering_index)
375
382
376
383
def match_input_to_cluster (self ):
377
- matches_df = pd .DataFrame (
378
- columns = ['Cluster_number' , 'Distance' , 'Index' ]
379
- )
384
+ matches_df = pd .DataFrame (columns = ["Cluster_number" , "Distance" , "Index" ])
380
385
# associate each spectrum to a cluster, plot them
381
386
count = 0
382
387
for spectrum in self .input_data :
383
388
distances = np .array ([])
384
389
for cluster in self .clusters :
385
- distances = np .append (distances , np .linalg .norm (cluster .average_members_weights - spectrum ))
386
-
390
+ distances = np .append (
391
+ distances ,
392
+ np .linalg .norm (cluster .average_members_weights - spectrum ),
393
+ )
394
+
387
395
# store the best matching cluster with the minimal distance as an array of
388
396
# [cluster_number, distance, index], where cluster_number is related to the ordering
389
397
# in the clusters array, hence based on the best clustering index
390
398
matches_df = matches_df .append (
391
- pd .DataFrame ([[np .where (distances == np .amin (distances ))[0 ][0 ],
392
- np .amin (distances ), count ]], columns = ['Cluster_number' , 'Distance' , 'Index' ],), ignore_index = True
399
+ pd .DataFrame (
400
+ [
401
+ [
402
+ np .where (distances == np .amin (distances ))[0 ][0 ],
403
+ np .amin (distances ),
404
+ count ,
405
+ ]
406
+ ],
407
+ columns = ["Cluster_number" , "Distance" , "Index" ],
408
+ ),
409
+ ignore_index = True ,
393
410
)
394
411
count += 1
395
412
396
413
# sort the results from lowest to highest distance for each cluster_number
397
- self .matches_input_to_clusters = matches_df .sort_values (['Cluster_number' , 'Distance' , 'Index' ], ascending = [True , True , False ])
414
+ self .matches_input_to_clusters = matches_df .sort_values (
415
+ ["Cluster_number" , "Distance" , "Index" ], ascending = [True , True , False ]
416
+ )
398
417
399
-
400
418
def average_spectra (self ):
401
419
# create the apposite dataframe for the averged spectra per cluster
402
420
self .averaged_spectra_df = pd .DataFrame (
403
- columns = [' Cluster_number' , ' Avg_Spectrum' ]
421
+ columns = [" Cluster_number" , " Avg_Spectrum" ]
404
422
)
405
423
# cycle through the clusters
406
424
for i in range (len (self .clusters )):
407
425
# mock spectra variable
408
426
spectra = np .zeros (len (self .input_data [0 ]))
409
-
427
+
410
428
# get spectra from i-th cluster
411
- df = self .matches_input_to_clusters .loc [self .matches_input_to_clusters ['Cluster_number' ]
412
- == i ]
413
- # cycle through the single spectra, average them and add them
429
+ df = self .matches_input_to_clusters .loc [
430
+ self .matches_input_to_clusters ["Cluster_number" ] == i
431
+ ]
432
+ # cycle through the single spectra, average them and add them
414
433
# to the dataframe
415
434
for j in range (0 , len (df )):
416
435
spectra = spectra + self .input_data [df .iloc [j ].Index ]
417
436
self .averaged_spectra_df = self .averaged_spectra_df .append (
418
- pd .DataFrame ([[i , spectra / len (df )]], columns = ['Cluster_number' , 'Avg_Spectrum' ]), ignore_index = True
437
+ pd .DataFrame (
438
+ [[i , spectra / len (df )]], columns = ["Cluster_number" , "Avg_Spectrum" ]
439
+ ),
440
+ ignore_index = True ,
419
441
)
420
442
421
443
def start (self , num_cycles = 1 ):
422
- # repeating the som cylce for a certain number of times,
444
+ # repeating the som cylce for a certain number of times,
423
445
# with decreasing impacting parameters
424
446
for n in range (0 , num_cycles ):
425
- self .radius = (1 / ( n + 1 ))* self .radius_0
426
- self .learning_rate = (1 / ( n + 1 ))* self .learning_rate_0
447
+ self .radius = (1 / ( n + 1 )) * self .radius_0
448
+ self .learning_rate = (1 / ( n + 1 )) * self .learning_rate_0
427
449
[self .update_grid (vector ) for vector in self .input_data ]
428
450
self .find_clusters ()
429
451
self .match_input_to_cluster ()
0 commit comments