diff --git a/README.md b/README.md index e4319cf..a1594d5 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,8 @@ to run stability selection with complementary pairs bootstrapping. ## Feedback and contributing -Feedback and contributions are much appreciated. If you have any feedback, please post it on the [issue tracker](https://github.com/scikit-learn-contrib/stability-selection/issues). +* Feedback and contributions are much appreciated. If you have any feedback, please post it on the [issue tracker](https://github.com/scikit-learn-contrib/stability-selection/issues). + ## References @@ -115,3 +116,4 @@ Feedback and contributions are much appreciated. If you have any feedback, pleas error control: another look at stability selection. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 75(1), pp.55-80. +[3] https://github.com/scikit-learn-contrib/stability-selection \ No newline at end of file diff --git a/examples/plot_randomized_lasso_path.py b/examples/plot_randomized_lasso_path.py index fb7d4e7..3409642 100644 --- a/examples/plot_randomized_lasso_path.py +++ b/examples/plot_randomized_lasso_path.py @@ -40,9 +40,9 @@ def generate_experiment_data(n=200, p=200, rho=0.6, random_state=3245): lambda_grid = np.linspace(0.001, 0.5, num=100) for weakness in [0.2, 0.5, 1.0]: - estimator = RandomizedLasso(weakness=weakness) - selector = StabilitySelection(base_estimator=estimator, lambda_name='alpha', - lambda_grid=lambda_grid, threshold=0.9, verbose=1) + estimator = RandomizedLasso(weakness=weakness,normalize=True) + selector = StabilitySelection(base_estimator=estimator, lambda_name='alpha',lambda_grid=lambda_grid, + threshold=0.9, verbose=1) selector.fit(X, y) fig, ax = plot_stability_path(selector) diff --git a/examples/plot_stability_scores.py b/examples/plot_stability_scores.py index 0baeb37..0ee75bd 100644 --- a/examples/plot_stability_scores.py +++ b/examples/plot_stability_scores.py @@ -36,10 +36,9 @@ def _generate_dummy_classification_data(p=1000, n=1000, k=5, random_state=123321 base_estimator = Pipeline([ ('scaler', StandardScaler()), - ('model', LogisticRegression(penalty='l1')) + ('model', LogisticRegression()) ]) - selector = StabilitySelection(base_estimator=base_estimator, lambda_name='model__C', - lambda_grid=np.logspace(-5, -1, 50)) + selector = StabilitySelection(base_estimator=base_estimator, lambda_name='model__C',lambda_grid=np.logspace(-5, -1, 50)) selector.fit(X, y) fig, ax = plot_stability_path(selector) diff --git a/requirements.txt b/requirements.txt index b948a3f..aaeb911 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -nose>=1.1.2 -scikit-learn>=0.19 -matplotlib>=2.0.0 -numpy>=1.8.0 \ No newline at end of file +nose==1.3.7 +scikit-learn>=1.2.2 +matplotlib==3.7.1 +numpy==1.24.3 \ No newline at end of file diff --git a/stability_selection/randomized_lasso.py b/stability_selection/randomized_lasso.py index 72267fa..addad1e 100644 --- a/stability_selection/randomized_lasso.py +++ b/stability_selection/randomized_lasso.py @@ -18,7 +18,7 @@ from scipy.sparse import issparse from sklearn.linear_model import LogisticRegression, Lasso -from sklearn.linear_model.base import _preprocess_data +from sklearn.linear_model._base import _preprocess_data from sklearn.utils import check_X_y, check_random_state __all__ = ['RandomizedLogisticRegression', 'RandomizedLasso'] @@ -120,9 +120,9 @@ def __init__(self, weakness=0.5, alpha=1.0, fit_intercept=True, normalize=False, tol=1e-4, warm_start=False, positive=False, random_state=None, selection='cyclic'): self.weakness = weakness + self.normalize = normalize super(RandomizedLasso, self).__init__( - alpha=alpha, fit_intercept=fit_intercept, - normalize=normalize, precompute=precompute, copy_X=copy_X, + alpha=alpha, fit_intercept=fit_intercept, precompute=precompute, copy_X=copy_X, max_iter=max_iter, tol=tol, warm_start=warm_start, positive=positive, random_state=random_state, selection=selection) @@ -149,9 +149,10 @@ def fit(self, X, y): weights = weakness * random_state.randint(0, 1 + 1, size=(n_features,)) - # TODO: I am afraid this will do double normalization if set to true - #X, y, _, _ = _preprocess_data(X, y, self.fit_intercept, normalize=self.normalize, copy=False, - # sample_weight=None, return_mean=False) + # TODO: I am afraid this will do double normalization if set to true. + X, y, X_offset, y_offset, X_scale = _preprocess_data(X, y, self.fit_intercept, + normalize=self.normalize,copy=False, + sample_weight=None,check_input=True) # TODO: Check if this is a problem if it happens before standardization X_rescaled = _rescale_data(X, weights) diff --git a/stability_selection/stability_selection.py b/stability_selection/stability_selection.py index 1f95af8..6a62116 100644 --- a/stability_selection/stability_selection.py +++ b/stability_selection/stability_selection.py @@ -19,11 +19,12 @@ """ from warnings import warn - import matplotlib.pyplot as plt import numpy as np from sklearn.base import BaseEstimator, TransformerMixin, clone -from sklearn.externals.joblib import Parallel, delayed +from joblib import Parallel, delayed + + from sklearn.feature_selection import SelectFromModel from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline