-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathdata_transforms.py
249 lines (213 loc) · 6.51 KB
/
data_transforms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
"""Collection of useful data transforms."""
# Imports
import numpy as np
import Neuron
import scipy
import scipy.linalg # SciPy Linear Algebra Library
from numpy.linalg import inv
def get_leaves(nodes, parents):
"""
Compute the list of leaf nodes.
Parameters
----------
nodes: list
list of all nodes in the tree
parents: list
list of parents for each node
Returns
-------
leaves: list
sorted list of leaf nodes
"""
leaves = np.sort(list(set(nodes) - set(parents)))
return leaves
def encode_prufer(parents, verbose=0):
"""
Convert the parents sequence to a prufer sequence.
Parameters
----------
parents: list
list of parents for each node
verbose: bool
default is False
Returns
-------
prufer: list
corresponding prufer sequence
"""
n_nodes = len(parents)
nodes = range(n_nodes)
prufer = list()
for n in range(n_nodes - 2):
# Recalculate all the leaves
leaves = get_leaves(nodes, parents)
if verbose:
print 'leaves', leaves
# Add the parent of the lowest numbered leaf to the sequence
leaf_idx = np.where(nodes == leaves[0])[0][0]
prufer.append(parents[leaf_idx])
if verbose:
print 'prufer', prufer
# Remove the lowest numbered leaf and its corresponding parent
del nodes[leaf_idx]
del parents[leaf_idx]
if verbose:
print 'nodes', nodes
print 'parents', parents
print 60*'-'
return prufer
def decode_prufer(prufer, verbose=0):
"""
Convert the prufer sequence to a parents sequence.
Parameters
----------
prufer: list
prufer sequence
verbose: bool
default is False
Returns
-------
parents: list
corresponding list of parents for each node
"""
n_nodes = len(prufer) + 2
n_prufer = len(prufer)
nodes = range(n_nodes)
parents = -1 * np.ones(n_nodes)
for n in range(n_prufer):
if verbose:
print nodes
print prufer
leaves = list(get_leaves(nodes, prufer))
k = leaves[0]
j = prufer[0]
if k == 0:
k = leaves[1]
if verbose:
print k, j
parents[k] = j
leaf_idx = np.where(nodes == k)[0][0]
del nodes[leaf_idx]
del prufer[0]
if verbose:
print 60*'-'
parents[nodes[1]] = nodes[0]
return list(parents.astype(int))
def reordering_prufer(parents, locations):
"""
Reorder a given parents sequence.
Parent labels < children labels.
Parameters
----------
parents: numpy array
sequence of parents indices
starts with -1
locations: numpy array
n - 1 x 3
Returns
-------
parents_reordered: numpy array
sequence of parents indices
locations_reordered: numpy array
n - 1 x 3
"""
length = len(parents)
# Construct the adjacency matrix
adjacency = np.zeros([length, length])
adjacency[parents[1:], range(1, length)] = 1
# Discover the permutation with Schur decomposition
full_adjacency = np.linalg.inv(np.eye(length) - adjacency)
full_adjacency_permuted, permutation_matrix = \
scipy.linalg.schur(full_adjacency)
# Reorder the parents
parents_reordered = \
np.argmax(np.eye(length) - np.linalg.inv(full_adjacency_permuted),
axis=0)
parents_reordered[0] = -1
# Reorder the locations
locations = np.append([[0., 0., 0.]], locations, axis=0)
locations_reordered = np.dot(permutation_matrix, locations)
return parents_reordered, locations_reordered[1:, :]
def swc_to_neuron(matrix):
"""
Return the Neuron object from swc matrix.
Parameters
----------
matrix: numpy array
numpy array of the size n_nodes*7.
Return
------
Neuron: Neuron
a neuron obj with the given swc format.
"""
return Neuron(file_format='Matrix of swc', input_file=matrix)
def downsample_neuron(neuron,
method='random',
number=30):
"""
Downsampling neuron with different methods.
Parameters
----------
neuron: Neuron
given neuron to subsample.
number: int
the number of subsamling.
method: str
the methods to subsample. It can be: 'random', 'regularize','prune',
'strighten', 'strighten-prune'.
Return
------
Neuron: Neuron
a subsampled neuron with given number of nodes.
"""
if(method == 'random'):
return subsample.random_subsample(neuron, number)
def get_data(neuron_database, method, subsampling_numbers):
"""
Preparing data for the learning.
Parameters
----------
neuron_database: list
the elements of the list are Neuron obj.
method: str
the method to subsample.
subsampling_numbers: array of int
The range of number to subsample.
Returns
-------
data: dic
a dic of two classes: 'morphology' and 'geometry'.
'geometry' is a list of size sampling_division. The i-th element of the
list is an array of size (datasize* n_nodes - 1*3).
'morphology' is a list of size sampling_division. The i-th element of
the list is an array of size (datasize* n_nodes* n_nodes -2).
"""
l = len(neuron_database)
morph = np.zeros([l, subsampling_numbers - 2])
geo = np.zeros([l, subsampling_numbers - 1, 3])
data = dict()
for i in range(l):
sub_neuron = downsample_neuron(neuron=neuron_database[i],
method=method,
number=subsampling_numbers)
par = sub_neuron.parent_index
par[0] = -1
morph[i, :] = encode_prufer(par.tolist())
geo[i, :, :] = sub_neuron.location[:, 1:].T
data['morphology'] = dict()
data['morphology']['n'+str(subsampling_numbers)] = morph
data['geometry'] = dict()
data['geometry']['n'+str(subsampling_numbers)] = geo
return data
def make_swc_from_prufer_and_locations(data):
# the prufer code and the location are given.
parents_code = np.array(decode_prufer(list(data['morphology'])))
location = data['geometry']
M = np.zeros([len(parents_code), 7])
M[:, 0] = np.arange(1, len(parents_code)+1)
M[0, 1] = 1
M[1:, 1] = 2
M[1:, 2:5] = location
parents_code[1:] = parents_code[1:] + 1
M[:, 6] = parents_code
return Neuron(file_format='Matrix of swc', input_file=M)