@@ -70,18 +70,22 @@ def __init__(self, learning_rate: float = 0.01, momentum: float = 0.9, **kwargs)
7070 for key , value in kwargs .items ():
7171 setattr (self , key , value )
7272
73- def update (self , layer_index : int , weights : np .ndarray , weights_grad : np .ndarray ,
74- bias : np .ndarray = None , bias_grad : np .ndarray = None ):
75- if not hasattr (self , 'velocity_w' ):
76- self .velocity_w = np .zeros_like (weights )
77- self .velocity_w = self .momentum * self .velocity_w - self .learning_rate * weights_grad
78- weights += self .velocity_w
73+ def update (self , layer_index , weights , weights_grad , bias = None , bias_grad = None ):
74+ if layer_index not in self .velocity_w :
75+ self .velocity_w [layer_index ] = np .zeros_like (weights )
76+ if bias is not None and layer_index not in self .velocity_b :
77+ self .velocity_b [layer_index ] = np .zeros_like (bias )
78+
79+ self .velocity_w [layer_index ] = (
80+ self .momentum * self .velocity_w [layer_index ] - self .learning_rate * weights_grad
81+ )
82+ weights += self .velocity_w [layer_index ]
7983
80- if bias is not None and bias_grad is not None :
81- if not hasattr ( self , 'velocity_b' ) or self .velocity_b is None or self . velocity_b . shape != bias . shape :
82- self .velocity_b = np . zeros_like ( bias )
83- self . velocity_b = self . momentum * self . velocity_b - self . learning_rate * bias_grad
84- bias += self .velocity_b
84+ if bias is not None :
85+ self .velocity_b [ layer_index ] = (
86+ self .momentum * self . velocity_b [ layer_index ] - self . learning_rate * bias_grad
87+ )
88+ bias += self .velocity_b [ layer_index ]
8589
8690 def get_config (self ) -> dict :
8791 return {
@@ -115,17 +119,28 @@ def __init__(self, learning_rate: float = 0.01, rho: float = 0.9, epsilon: float
115119 for key , value in kwargs .items ():
116120 setattr (self , key , value )
117121
118- def update (self , layer_index : int , weights : np . ndarray , weights_grad : np . ndarray , bias : np . ndarray = None , bias_grad : np . ndarray = None ):
119- if not hasattr ( self , ' sq_grads_w' ) :
120- self .sq_grads_w = np .zeros_like (weights )
121- self . sq_grads_w = self . rho * self . sq_grads_w + ( 1 - self .rho ) * np . square ( weights_grad )
122- weights -= self . learning_rate * weights_grad / ( np . sqrt ( self .sq_grads_w ) + self . epsilon )
122+ def update (self , layer_index , weights , weights_grad , bias = None , bias_grad = None ):
123+ if layer_index not in self . sq_grads_w :
124+ self .sq_grads_w [ layer_index ] = np .zeros_like (weights )
125+ if bias is not None and layer_index not in self .sq_grads_b :
126+ self .sq_grads_b [ layer_index ] = np . zeros_like ( bias )
123127
124- if bias is not None and bias_grad is not None :
125- if not hasattr (self , 'sq_grads_b' ) or self .sq_grads_b is None or self .sq_grads_b .shape != bias .shape :
126- self .sq_grads_b = np .zeros_like (bias )
127- self .sq_grads_b = self .rho * self .sq_grads_b + (1 - self .rho ) * np .square (bias_grad )
128- bias -= self .learning_rate * bias_grad / (np .sqrt (self .sq_grads_b ) + self .epsilon )
128+ self .sq_grads_w [layer_index ] = (
129+ self .rho * self .sq_grads_w [layer_index ] + (1 - self .rho ) * np .square (weights_grad )
130+ )
131+ weights -= (
132+ self .learning_rate * weights_grad
133+ / (np .sqrt (self .sq_grads_w [layer_index ]) + self .epsilon )
134+ )
135+
136+ if bias is not None :
137+ self .sq_grads_b [layer_index ] = (
138+ self .rho * self .sq_grads_b [layer_index ] + (1 - self .rho ) * np .square (bias_grad )
139+ )
140+ bias -= (
141+ self .learning_rate * bias_grad
142+ / (np .sqrt (self .sq_grads_b [layer_index ]) + self .epsilon )
143+ )
129144
130145 def get_config (self ) -> dict :
131146 return {
0 commit comments