Skip to content

Commit 7b7f99a

Browse files
committed
small reformatting
1 parent 8fd44cd commit 7b7f99a

File tree

3 files changed

+519
-358
lines changed

3 files changed

+519
-358
lines changed

SOM.py

Lines changed: 99 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66

77
class Cluster:
8-
98
def __init__(self, members=np.empty(shape=1), distance_threshold=0.01):
109
"""
1110
Class for the implementation of the cluster in a FoF sense
@@ -38,34 +37,34 @@ def distance_threshold(self, value):
3837
raise ValueError("Distance Threshold not valid.")
3938
else:
4039
self._distance_threshold = value
41-
40+
4241
def update_clustering_index(self, distance):
43-
# function to update an index to keep track of the "goodness" of the
44-
# cluster, directly proportional to the number of components and
42+
# function to update an index to keep track of the "goodness" of the
43+
# cluster, directly proportional to the number of components and
4544
# inversely to the distance
46-
47-
self.clustering_index = len(self.members)*(1/(len(self.members)-1)*self.clustering_index + 1/(distance))
48-
45+
46+
self.clustering_index = len(self.members) * (
47+
1 / (len(self.members) - 1) * self.clustering_index + 1 / (distance)
48+
)
49+
4950
def average_weights(self):
5051
# function to return the averaged weights of the cluster
51-
52-
member_weights = np.zeros(
53-
shape = len(self.members[0].weights)
54-
)
52+
53+
member_weights = np.zeros(shape=len(self.members[0].weights))
5554
for member in self.members:
5655
member_weights = member_weights + member.weights
57-
self.average_members_weights = member_weights/len(self.members)
58-
56+
self.average_members_weights = member_weights / len(self.members)
57+
5958
def add_member(self, new_member, distance):
6059
# function to add a new member ot the existing cluster
61-
60+
6261
self.members = np.append(self.members, new_member)
63-
62+
6463
# update the index with the new member
6564
self.update_clustering_index(distance)
6665

67-
class Neuron:
6866

67+
class Neuron:
6968
def __init__(self, x_0, y_0, weights):
7069
"""
7170
Class which creates the single neurons of the SOM grid
@@ -103,9 +102,16 @@ def weights(self, value):
103102

104103

105104
class SOM:
106-
107-
def __init__(self, x_size=20, y_size=20, size_neurons=10000, learning_rate_0=0.5, radius_0=0.1,
108-
cluster_distance_threshold=0.04, input_data=None):
105+
def __init__(
106+
self,
107+
x_size=20,
108+
y_size=20,
109+
size_neurons=10000,
110+
learning_rate_0=0.5,
111+
radius_0=0.1,
112+
cluster_distance_threshold=0.04,
113+
input_data=None,
114+
):
109115
"""
110116
Class for the implementation of the self-organizing maps
111117
:type x_size: int
@@ -119,35 +125,27 @@ def __init__(self, x_size=20, y_size=20, size_neurons=10000, learning_rate_0=0.5
119125
self.x_size = x_size
120126
self.y_size = y_size
121127
self.size_neurons = size_neurons
122-
128+
123129
self.iteration = 0
124130
self.time_constant = 200
125131
self.learning_rate_0 = learning_rate_0
126132
self.learning_rate = learning_rate_0
127133
self.radius_0 = radius_0
128134
self.radius = radius_0
129-
135+
130136
self.cluster_distance_threshold = cluster_distance_threshold
131-
137+
132138
self.input_data = input_data
133-
134-
self.neuron_map = np.zeros(
135-
shape=(x_size, y_size),
136-
dtype=object
137-
)
138-
self.clusters = np.array(
139-
[],
140-
dtype=object
141-
)
139+
140+
self.neuron_map = np.zeros(shape=(x_size, y_size), dtype=object)
141+
self.clusters = np.array([], dtype=object)
142142
self.matches_input_to_clusters = []
143143
self.averaged_spectra_df = []
144-
144+
145145
for i in range(self._x_size):
146146
for j in range(self._y_size):
147147
self._neuron_map[i][j] = Neuron(
148-
i / x_size,
149-
j / y_size,
150-
np.random.uniform(1E-3, 9E-4, size_neurons)
148+
i / x_size, j / y_size, np.random.uniform(1e-3, 9e-4, size_neurons)
151149
)
152150

153151
@property
@@ -252,7 +250,7 @@ def input_data(self, value):
252250
for vector in value:
253251
if len(vector) != len_0:
254252
raise ValueError("Input data of different lengths.")
255-
if len(value) < 300: # this
253+
if len(value) < 300: # this
256254
raise ValueError("Too few input data.")
257255
self._input_data = value
258256

@@ -281,15 +279,17 @@ def matches_input_to_clusters(self, value):
281279
self._matches_input_to_clusters = value
282280

283281
def find_bmu(self, input_vector):
284-
# compute euclidian distance from the input vector
282+
# compute euclidian distance from the input vector
285283
# to the weight vector of the neurons
286284
distances = np.array(
287-
[np.linalg.norm(self.neuron_map[i][j].weights - input_vector)
288-
for i in range(self.x_size)
289-
for j in range(self.y_size)]
285+
[
286+
np.linalg.norm(self.neuron_map[i][j].weights - input_vector)
287+
for i in range(self.x_size)
288+
for j in range(self.y_size)
289+
]
290290
).reshape((self.x_size, self.y_size))
291-
292-
# return the index of the neuron
291+
292+
# return the index of the neuron
293293
# with minimal distance (a.k.a. the best-matching unit)
294294
minimal_distance = np.where(distances == np.amin(distances))
295295
return [minimal_distance[0][0], minimal_distance[1][0]]
@@ -302,16 +302,17 @@ def update_grid(self, input_vector):
302302
for neuron_line in self.neuron_map:
303303
for neuron in neuron_line:
304304
# find each neuron that falls into the radius from the bmu at this iteration
305-
if (neuron.x - bmu.x) ** 2 + (neuron.y - bmu.y) ** 2 <= self.radius ** 2:
305+
if (neuron.x - bmu.x) ** 2 + (
306+
neuron.y - bmu.y
307+
) ** 2 <= self.radius ** 2:
306308
# update weights of the found neurons accordingly
307309
neuron.weights = neuron.weights + self.learning_rate * (
308-
input_vector - neuron.weights)
310+
input_vector - neuron.weights
311+
)
309312

310313
# update positions of the found neurons accordingly
311-
neuron.x += self.learning_rate * (
312-
bmu.x - neuron.x)
313-
neuron.y += self.learning_rate * (
314-
bmu.y - neuron.y)
314+
neuron.x += self.learning_rate * (bmu.x - neuron.x)
315+
neuron.y += self.learning_rate * (bmu.y - neuron.y)
315316
self.update_learning_rate()
316317
self.update_radius()
317318
self.iteration = self.iteration + 1
@@ -322,16 +323,14 @@ def update_radius(self):
322323

323324
def update_learning_rate(self):
324325
# update the learning rate with the known formula
325-
self.learning_rate = self.learning_rate_0 * np.exp(-self.iteration / self.time_constant)
326+
self.learning_rate = self.learning_rate_0 * np.exp(
327+
-self.iteration / self.time_constant
328+
)
326329

327330
def find_clusters(self):
328331
# FoF
329332
# make list of valid points
330-
list_points = [
331-
[i, j]
332-
for i in range(self.x_size)
333-
for j in range(self.y_size)
334-
]
333+
list_points = [[i, j] for i in range(self.x_size) for j in range(self.y_size)]
335334

336335
while list_points:
337336
# choose random valid point to start with
@@ -341,8 +340,10 @@ def find_clusters(self):
341340
cluster = Cluster([start_neuron], self.cluster_distance_threshold)
342341
for point in list_points:
343342
# calculate distance for each point to the starting neuron
344-
distance = np.sqrt((self.neuron_map[point[0]][point[1]].x - start_neuron.x) ** 2 + (
345-
self.neuron_map[point[0]][point[1]].y - start_neuron.y) ** 2)
343+
distance = np.sqrt(
344+
(self.neuron_map[point[0]][point[1]].x - start_neuron.x) ** 2
345+
+ (self.neuron_map[point[0]][point[1]].y - start_neuron.y) ** 2
346+
)
346347
if distance <= cluster.distance_threshold:
347348
# add member to cluster
348349
cluster.add_member(self.neuron_map[point[0]][point[1]], distance)
@@ -353,11 +354,17 @@ def find_clusters(self):
353354
for j in range(1, len(cluster.members)):
354355
for point in list_points:
355356
# calculate distance for each remaining point to the friends of the starting neuron
356-
distance = np.sqrt((self.neuron_map[point[0]][point[1]].x - cluster.members[j].x) ** 2 + (
357-
self.neuron_map[point[0]][point[1]].y - cluster.members[j].y) ** 2)
357+
distance = np.sqrt(
358+
(self.neuron_map[point[0]][point[1]].x - cluster.members[j].x)
359+
** 2
360+
+ (self.neuron_map[point[0]][point[1]].y - cluster.members[j].y)
361+
** 2
362+
)
358363
if distance <= cluster.distance_threshold:
359364
# add member to cluster
360-
cluster.add_member(self.neuron_map[point[0]][point[1]], distance)
365+
cluster.add_member(
366+
self.neuron_map[point[0]][point[1]], distance
367+
)
361368
# remove indexes from list of valid points
362369
list_points.remove(point)
363370
# more or less subjective threshold for number of members
@@ -374,56 +381,71 @@ def find_clusters(self):
374381
# self.clusters = sorted(self.clusters, key=lambda n: n.clustering_index)
375382

376383
def match_input_to_cluster(self):
377-
matches_df = pd.DataFrame(
378-
columns=['Cluster_number', 'Distance', 'Index']
379-
)
384+
matches_df = pd.DataFrame(columns=["Cluster_number", "Distance", "Index"])
380385
# associate each spectrum to a cluster, plot them
381386
count = 0
382387
for spectrum in self.input_data:
383388
distances = np.array([])
384389
for cluster in self.clusters:
385-
distances = np.append(distances, np.linalg.norm(cluster.average_members_weights - spectrum))
386-
390+
distances = np.append(
391+
distances,
392+
np.linalg.norm(cluster.average_members_weights - spectrum),
393+
)
394+
387395
# store the best matching cluster with the minimal distance as an array of
388396
# [cluster_number, distance, index], where cluster_number is related to the ordering
389397
# in the clusters array, hence based on the best clustering index
390398
matches_df = matches_df.append(
391-
pd.DataFrame([[np.where(distances == np.amin(distances))[0][0],
392-
np.amin(distances), count]], columns=['Cluster_number', 'Distance', 'Index'],), ignore_index=True
399+
pd.DataFrame(
400+
[
401+
[
402+
np.where(distances == np.amin(distances))[0][0],
403+
np.amin(distances),
404+
count,
405+
]
406+
],
407+
columns=["Cluster_number", "Distance", "Index"],
408+
),
409+
ignore_index=True,
393410
)
394411
count += 1
395412

396413
# sort the results from lowest to highest distance for each cluster_number
397-
self.matches_input_to_clusters = matches_df.sort_values(['Cluster_number', 'Distance', 'Index'], ascending=[True, True, False])
414+
self.matches_input_to_clusters = matches_df.sort_values(
415+
["Cluster_number", "Distance", "Index"], ascending=[True, True, False]
416+
)
398417

399-
400418
def average_spectra(self):
401419
# create the apposite dataframe for the averged spectra per cluster
402420
self.averaged_spectra_df = pd.DataFrame(
403-
columns=['Cluster_number', 'Avg_Spectrum']
421+
columns=["Cluster_number", "Avg_Spectrum"]
404422
)
405423
# cycle through the clusters
406424
for i in range(len(self.clusters)):
407425
# mock spectra variable
408426
spectra = np.zeros(len(self.input_data[0]))
409-
427+
410428
# get spectra from i-th cluster
411-
df = self.matches_input_to_clusters.loc[self.matches_input_to_clusters['Cluster_number']
412-
== i]
413-
# cycle through the single spectra, average them and add them
429+
df = self.matches_input_to_clusters.loc[
430+
self.matches_input_to_clusters["Cluster_number"] == i
431+
]
432+
# cycle through the single spectra, average them and add them
414433
# to the dataframe
415434
for j in range(0, len(df)):
416435
spectra = spectra + self.input_data[df.iloc[j].Index]
417436
self.averaged_spectra_df = self.averaged_spectra_df.append(
418-
pd.DataFrame([[i, spectra/len(df)]], columns=['Cluster_number', 'Avg_Spectrum']), ignore_index=True
437+
pd.DataFrame(
438+
[[i, spectra / len(df)]], columns=["Cluster_number", "Avg_Spectrum"]
439+
),
440+
ignore_index=True,
419441
)
420442

421443
def start(self, num_cycles=1):
422-
# repeating the som cylce for a certain number of times,
444+
# repeating the som cylce for a certain number of times,
423445
# with decreasing impacting parameters
424446
for n in range(0, num_cycles):
425-
self.radius = (1/(n+1))*self.radius_0
426-
self.learning_rate = (1/(n+1))*self.learning_rate_0
447+
self.radius = (1 / (n + 1)) * self.radius_0
448+
self.learning_rate = (1 / (n + 1)) * self.learning_rate_0
427449
[self.update_grid(vector) for vector in self.input_data]
428450
self.find_clusters()
429451
self.match_input_to_cluster()

0 commit comments

Comments
 (0)