Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/plot_community_crime.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
glm = GLM(distr='gaussian', alpha=0.05, score_metric='pseudo_R2')
glmcv = GridSearchCV(glm, param_grid, cv=cv)
glmcv.fit(X_train, y_train)
y_test_hat = glmcv.predict(X_test)

print("test set pseudo $R^2$ = %f" % glmcv.score(X_test, y_test))

Expand Down
33 changes: 27 additions & 6 deletions pyglmnet/pyglmnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@

import warnings
from copy import deepcopy
from functools import partial

import numpy as np

from scipy import optimize
from scipy.special import expit
from scipy.stats import norm

from .utils import logger, set_log_level, _check_params
from .base import BaseEstimator, is_classifier, check_version

Expand Down Expand Up @@ -468,9 +472,10 @@ class GLM(BaseEstimator):
'batch-gradient' (vanilla batch gradient descent)
'cdfast' (Newton coordinate gradient descent).
default: 'batch-gradient'
learning_rate: float
learning rate for gradient descent.
default: 2e-1
learning_rate : float | 'auto'
learning rate for gradient descent. If "auto", line
search is performed using scipy.optimize.line_search.
default: "auto"
max_iter: int
maximum iterations for the model.
default: 1000
Expand Down Expand Up @@ -532,7 +537,7 @@ def __init__(self, distr='poisson', alpha=0.5,
Tau=None, group=None,
reg_lambda=0.1,
solver='batch-gradient',
learning_rate=2e-1, max_iter=1000,
learning_rate='auto', max_iter=1000,
tol=1e-3, eta=2.0, score_metric='deviance',
fit_intercept=True,
random_state=0, callback=None, verbose=False):
Expand Down Expand Up @@ -810,6 +815,8 @@ def fit(self, X, y):
ActiveSet = np.ones_like(beta)

# Iterative updates
L = [_loss(self.distr, alpha, self.Tau, reg_lambda,
X, y, self.eta, self.group, beta, self.fit_intercept)]
for t in range(0, self.max_iter):
self.n_iter_ += 1

Expand All @@ -823,7 +830,21 @@ def fit(self, X, y):
msg = ('\tConverged in {0:d} iterations'.format(t))
logger.info(msg)
break
beta = beta - self.learning_rate * grad

if self.learning_rate == 'auto':
func = partial(_loss, self.distr, alpha, self.Tau,
reg_lambda, X, y, self.eta, self.group,
fit_intercept=self.fit_intercept)
fprime = partial(_grad_L2loss, self.distr, alpha, self.Tau,
reg_lambda, X, y, self.eta,
fit_intercept=self.fit_intercept)
step_size, _, _, _, _, _ = optimize.linesearch.line_search(
func, fprime, beta, -grad, grad, L, c1=1e-4)
if step_size is None:
step_size = 1e-4
else:
step_size = self.learning_rate
beta = beta - step_size * grad

elif self.solver == 'cdfast':
beta_old = deepcopy(beta)
Expand Down Expand Up @@ -1116,7 +1137,7 @@ def __init__(self, distr='poisson', alpha=0.5,
Tau=None, group=None,
reg_lambda=None, cv=10,
solver='batch-gradient',
learning_rate=2e-1, max_iter=1000,
learning_rate='auto', max_iter=1000,
tol=1e-3, eta=2.0, score_metric='deviance',
fit_intercept=True,
random_state=0, verbose=False):
Expand Down
5 changes: 2 additions & 3 deletions tests/test_pyglmnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,12 +415,11 @@ def test_random_state_consistency():
ypred_a = glm_a.fit_predict(Xtrain, ytrain)
glm_b = GLM(distr="gaussian", random_state=1)
ypred_b = glm_b.fit_predict(Xtrain, ytrain)
ypred_c = glm_b.fit_predict(Xtrain, ytrain)
glm_a.fit(Xtrain, ytrain)
glm_b.fit(Xtrain, ytrain)

# Consistency between two different models
assert_array_equal(ypred_a, ypred_b)
# Consistency between different run of the same model
assert_array_equal(ypred_b, ypred_c)

# Test also cross-validation
glm_cv_a = GLMCV(distr="gaussian", cv=3, random_state=1)
Expand Down