33from  copy  import  deepcopy 
44
55import  numpy  as  np 
6+ from  scipy  import  optimize 
67from  scipy .special  import  expit 
78from  scipy .stats  import  norm 
89
9- from  .utils  import  logger , set_log_level , power_iteration 
10+ from  functools  import  partial 
11+ 
12+ from  .utils  import  logger , set_log_level 
1013from  .base  import  BaseEstimator , is_classifier , check_version 
1114
1215
@@ -192,17 +195,6 @@ def _grad_L2loss(distr, alpha, Tau, reg_lambda, X, y, eta, beta):
192195    return  g 
193196
194197
195- def  _learning_rate (distr , X , reg_lambda , alpha ):
196-     if  distr  ==  'gaussian' :
197-         s  =  power_iteration (X .T .dot (X )) +  reg_lambda  *  (1  -  alpha )
198-         return  0.99  /  s 
199-     elif  distr  ==  'binomial' :
200-         s  =  (np .linalg .norm (X .T .dot (X )) **  2 ) /  4 
201-         return  0.99  /  s 
202-     else :
203-         return  1e-4 
204- 
205- 
206198def  _gradhess_logloss_1d (distr , xk , y , z , eta ):
207199    """ 
208200    Compute gradient (1st derivative) 
@@ -380,8 +372,8 @@ class GLM(BaseEstimator):
380372        'cdfast' (Newton coordinate gradient descent). 
381373        default: 'batch-gradient' 
382374    learning_rate : float | 'auto' 
383-         learning rate for gradient descent. If "auto", it is 0.95 / L  
384-         where the differentiable part of the loss function is L-smooth . 
375+         learning rate for gradient descent. If "auto", backtracking line  
376+         search is performed using scipy.optimize.line_search . 
385377        default: "auto" 
386378    max_iter : int 
387379        maximum iterations for the model. 
@@ -627,12 +619,6 @@ def fit(self, X, y):
627619        self : instance of GLM 
628620            The fitted model. 
629621        """ 
630-         if  self .learning_rate  ==  'auto' :
631-             step_size  =  _learning_rate (self .distr , X ,
632-                                        self .reg_lambda , self .alpha )
633-             print ('Step size calculated as %f'  %  step_size )
634-         else :
635-             step_size  =  self .learning_rate 
636622        np .random .RandomState (self .random_state )
637623
638624        # checks for group 
@@ -682,6 +668,17 @@ def fit(self, X, y):
682668                                    reg_lambda , X , y , self .eta ,
683669                                    beta )
684670
671+                 if  self .learning_rate  ==  'auto' :
672+                     func  =  partial (_loss , self .distr , alpha , self .Tau ,
673+                                    reg_lambda , X , y , self .eta , self .group )
674+                     fprime  =  partial (_grad_L2loss , self .distr , alpha , self .Tau ,
675+                                      reg_lambda , X , y , self .eta )
676+                     step_size , _ , _ , _ , _ , _  =  optimize .linesearch .line_search (
677+                         func , fprime , beta , - grad , grad , c1 = 1e-4 )
678+                     if  step_size  is  None :
679+                         step_size  =  1e-4 
680+                 else :
681+                     step_size  =  self .learning_rate 
685682                beta  =  beta  -  step_size  *  grad 
686683            elif  self .solver  ==  'cdfast' :
687684                beta , z  =  \
@@ -698,16 +695,15 @@ def fit(self, X, y):
698695            # Compute and save loss 
699696            L .append (_loss (self .distr , alpha , self .Tau , reg_lambda ,
700697                           X , y , self .eta , self .group , beta ))
701-             print (L [- 1 ])
702-             # if t > 1: 
703-             #     DL.append(L[-1] - L[-2]) 
704-             #     if np.abs(DL[-1] / L[-1]) < tol: 
705-             #         msg = ('\tConverged. Loss function:' 
706-             #                ' {0:.2f}').format(L[-1]) 
707-             #         logger.info(msg) 
708-             #         msg = ('\tdL/L: {0:.6f}\n'.format(DL[-1] / L[-1])) 
709-             #         logger.info(msg) 
710-             #         break 
698+             if  t  >  1 :
699+                 DL .append (L [- 1 ] -  L [- 2 ])
700+                 if  np .abs (DL [- 1 ] /  L [- 1 ]) <  tol :
701+                     msg  =  ('\t Converged. Loss function:' 
702+                            ' {0:.2f}' ).format (L [- 1 ])
703+                     logger .info (msg )
704+                     msg  =  ('\t dL/L: {0:.6f}\n ' .format (DL [- 1 ] /  L [- 1 ]))
705+                     logger .info (msg )
706+                     break 
711707
712708        # Update the estimated variables 
713709        self .beta0_  =  beta [0 ]
@@ -906,8 +902,8 @@ class GLMCV(object):
906902        'cdfast' (Newton coordinate gradient descent). 
907903        default: 'batch-gradient' 
908904    learning_rate : float | 'auto' 
909-         learning rate for gradient descent. If "auto", it is 0.95 / L  
910-         where the differentiable part of the loss function is L-smooth . 
905+         learning rate for gradient descent. If "auto", backtracking line  
906+         search is performed using scipy.optimize.line_search . 
911907        default: "auto" 
912908    max_iter : int 
913909        maximum iterations for the model. 
0 commit comments