@@ -39,13 +39,13 @@ def __init__(self, in_c, filter_h, filter_w, filter_num, stride=1, padding=0, mo
39
39
self .filter_num = filter_num
40
40
self .stride = stride
41
41
self .padding = padding
42
- self .nesterov = nesterov
43
42
44
43
self .W = \
45
44
{'val' : 0.01 * np .random .normal (loc = 0 , scale = 1.0 , size = (filter_h * filter_w * in_c , filter_num )),
46
45
'grad' : 0 ,
47
46
'v' : 0 ,
48
- 'momentum' : momentum }
47
+ 'momentum' : momentum ,
48
+ 'nesterov' : nesterov }
49
49
self .b = {'val' : 0.01 * np .random .normal (loc = 0 , scale = 1.0 , size = (1 , filter_num )), 'grad' : 0 }
50
50
self .a = None
51
51
self .input_shape = None
@@ -84,18 +84,21 @@ def update(self, learning_rate=0, regularization_rate=0):
84
84
v_prev = self .W ['v' ]
85
85
self .W ['v' ] = self .W ['momentum' ] * self .W ['v' ] - learning_rate * (
86
86
self .W ['grad' ] + regularization_rate * self .W ['val' ])
87
- if self .nesterov :
87
+ if self .W [ ' nesterov' ] :
88
88
self .W ['val' ] += (1 + self .W ['momentum' ]) * self .W ['v' ] - self .W ['momentum' ] * v_prev
89
89
else :
90
90
self .W ['val' ] += self .W ['v' ]
91
91
self .b ['val' ] -= learning_rate * (self .b ['grad' ])
92
92
93
93
def get_params (self ):
94
- return {'W' : self .W ['val' ], 'b' : self .b ['val' ]}
94
+ return {'W' : self .W ['val' ], 'momentum' : self . W [ 'momentum' ], 'nesterov' : self . W [ 'nesterov' ], ' b' : self .b ['val' ]}
95
95
96
96
def set_params (self , params ):
97
- self .W ['val' ] = params ['W' ]
98
- self .b ['val' ] = params ['b' ]
97
+ self .W ['val' ] = params .get ('W' )
98
+ self .b ['val' ] = params .get ('b' )
99
+
100
+ self .W ['momentum' ] = params .get ('momentum' , 0.0 )
101
+ self .W ['nesterov' ] = params .get ('nesterov' , False )
99
102
100
103
101
104
class MaxPool (Layer ):
@@ -159,11 +162,11 @@ def __init__(self, num_in, num_out, momentum=0, nesterov=False):
159
162
assert isinstance (num_in , int ) and num_in > 0
160
163
assert isinstance (num_out , int ) and num_out > 0
161
164
162
- self .nesterov = nesterov
163
165
self .W = {'val' : 0.01 * np .random .normal (loc = 0 , scale = 1.0 , size = (num_in , num_out )),
164
166
'grad' : 0 ,
165
167
'v' : 0 ,
166
- 'momentum' : momentum }
168
+ 'momentum' : momentum ,
169
+ 'nesterov' : nesterov }
167
170
self .b = {'val' : 0.01 * np .random .normal (loc = 0 , scale = 1.0 , size = (1 , num_out )), 'grad' : 0 }
168
171
self .inputs = None
169
172
@@ -189,18 +192,21 @@ def update(self, learning_rate=0, regularization_rate=0):
189
192
v_prev = self .W ['v' ]
190
193
self .W ['v' ] = self .W ['momentum' ] * self .W ['v' ] - learning_rate * (
191
194
self .W ['grad' ] + regularization_rate * self .W ['val' ])
192
- if self .nesterov :
195
+ if self .W [ ' nesterov' ] :
193
196
self .W ['val' ] += (1 + self .W ['momentum' ]) * self .W ['v' ] - self .W ['momentum' ] * v_prev
194
197
else :
195
198
self .W ['val' ] += self .W ['v' ]
196
199
self .b ['val' ] -= learning_rate * self .b ['grad' ]
197
200
198
201
def get_params (self ):
199
- return {'W' : self .W ['val' ], 'b' : self .b ['val' ]}
202
+ return {'W' : self .W ['val' ], 'momentum' : self . W [ 'momentum' ], 'nesterov' : self . W [ 'nesterov' ], ' b' : self .b ['val' ]}
200
203
201
204
def set_params (self , params ):
202
- self .W ['val' ] = params ['W' ]
203
- self .b ['val' ] = params ['b' ]
205
+ self .W ['val' ] = params .get ('W' )
206
+ self .b ['val' ] = params .get ('b' )
207
+
208
+ self .W ['momentum' ] = params .get ('momentum' , 0.0 )
209
+ self .W ['nesterov' ] = params .get ('nesterov' , False )
204
210
205
211
206
212
class ReLU (Layer ):
0 commit comments