diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index 6d7e43058..070bd9feb 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -42,6 +42,7 @@ Enhancements - Adding :func:`moabb.analysis.plotting.dataset_bubble_plot` plus the corresponding tutorial (:gh:`753` by `Pierre Guetschel`_) - Adding :func:`moabb.datasets.utils.plot_all_datasets` and update the tutorial (:gh:`758` by `Pierre Guetschel`_) - Improve the dataset model cards in each API page (:gh:`765` by `Pierre Guetschel`_) +- Refactor :class:`moabb.evaluation.CrossSessionEvaluation`, :class:`moabb.evaluation.CrossSubjectEvaluation` and :class:`moabb.evaluation.WithinSessionEvaluation` to use the new splitter classes (:gh:`769` by `Bruno Aristimunha`_) - Adding tutorial on using mne-features (:gh:`762` by `Alexander de Ranitz`_, `Luuk Neervens`_, `Charlynn van Osch`_ and `Bruno Aristimunha`_) - Creating tutorial to expose the pre-processing steps (:gh:`771` by `Bruno Aristimunha`_) - Add function to auto-generate tables for the paper results documentation page (:gh:`785` by `Lucas Heck`_) diff --git a/examples/advanced_examples/plot_grid_search_withinsession.py b/examples/advanced_examples/plot_grid_search_withinsession.py index a2f1aefb7..f63bb82be 100644 --- a/examples/advanced_examples/plot_grid_search_withinsession.py +++ b/examples/advanced_examples/plot_grid_search_withinsession.py @@ -9,7 +9,6 @@ """ import os -from pickle import load import matplotlib.pyplot as plt import seaborn as sns @@ -132,44 +131,3 @@ ) sns.pointplot(data=result, y="score", x="pipeline", ax=axes, palette="Set1") axes.set_ylabel("ROC AUC") - -########################################################## -# Load Best Model Parameter -# ------------------------- -# The best model are automatically saved in a pickle file, in the -# results directory. It is possible to load those model for each -# dataset, subject and session. Here, we could see that the grid -# search found a l1_ratio that is different from the baseline -# value. - -with open( - "./Results/Models_WithinSession/BNCI2014-001/1/1test/GridSearchEN/fitted_model_best.pkl", - "rb", -) as pickle_file: - GridSearchEN_Session_E = load(pickle_file) - -print( - "Best Parameter l1_ratio Session_E GridSearchEN ", - GridSearchEN_Session_E.best_params_["LogistReg__l1_ratio"], -) - -print( - "Best Parameter l1_ratio Session_E VanillaEN: ", - pipelines["VanillaEN"].steps[2][1].l1_ratio, -) - -with open( - "./Results/Models_WithinSession/BNCI2014-001/1/0train/GridSearchEN/fitted_model_best.pkl", - "rb", -) as pickle_file: - GridSearchEN_Session_T = load(pickle_file) - -print( - "Best Parameter l1_ratio Session_T GridSearchEN ", - GridSearchEN_Session_T.best_params_["LogistReg__l1_ratio"], -) - -print( - "Best Parameter l1_ratio Session_T VanillaEN: ", - pipelines["VanillaEN"].steps[2][1].l1_ratio, -) diff --git a/moabb/evaluations/__init__.py b/moabb/evaluations/__init__.py index 9f8eceff5..4a5695f48 100644 --- a/moabb/evaluations/__init__.py +++ b/moabb/evaluations/__init__.py @@ -10,4 +10,4 @@ WithinSessionEvaluation, ) from .splitters import CrossSessionSplitter, CrossSubjectSplitter, WithinSessionSplitter -from .utils import create_save_path, save_model_cv, save_model_list +from .utils import _create_save_path, _save_model_cv diff --git a/moabb/evaluations/base.py b/moabb/evaluations/base.py index c15c2699d..7f8d70158 100644 --- a/moabb/evaluations/base.py +++ b/moabb/evaluations/base.py @@ -3,29 +3,23 @@ from warnings import warn import pandas as pd +from joblib import Parallel, delayed from sklearn.base import BaseEstimator -from sklearn.model_selection import GridSearchCV from moabb.analysis import Results from moabb.datasets.base import BaseDataset -from moabb.evaluations.utils import _convert_sklearn_params_to_optuna +from moabb.evaluations.utils import ( + _convert_sklearn_params_to_optuna, + check_search_available, +) from moabb.paradigms.base import BaseParadigm +search_methods, optuna_available = check_search_available() + log = logging.getLogger(__name__) # Making the optuna soft dependency -try: - from optuna.integration import OptunaSearchCV - - optuna_available = True -except ImportError: - optuna_available = False - -if optuna_available: - search_methods = {"grid": GridSearchCV, "optuna": OptunaSearchCV} -else: - search_methods = {"grid": GridSearchCV} class BaseEvaluation(ABC): @@ -83,6 +77,8 @@ class BaseEvaluation(ABC): optuna, time_out parameters. """ + search = False + def __init__( self, paradigm, @@ -201,7 +197,6 @@ def process(self, pipelines, param_grid=None, postprocess_pipeline=None): This pipeline must be "fixed" because it will not be trained, i.e. no call to ``fit`` will be made. - Returns ------- results: pd.DataFrame @@ -216,26 +211,44 @@ def process(self, pipelines, param_grid=None, postprocess_pipeline=None): if not (isinstance(pipeline, BaseEstimator)): raise (ValueError("pipelines must only contains Pipelines " "instance")) - res_per_db = [] - for dataset in self.datasets: - log.info("Processing dataset: {}".format(dataset.code)) - process_pipeline = self.paradigm.make_process_pipelines( + # Prepare dataset processing parameters + processing_params = [ + ( dataset, - return_epochs=self.return_epochs, - return_raws=self.return_raws, - postprocess_pipeline=postprocess_pipeline, - )[0] - # (we only keep the pipeline for the first frequency band, better ideas?) - - results = self.evaluate( - dataset, - pipelines, - param_grid=param_grid, - process_pipeline=process_pipeline, - postprocess_pipeline=postprocess_pipeline, + self.paradigm.make_process_pipelines( + dataset, + return_epochs=self.return_epochs, + return_raws=self.return_raws, + postprocess_pipeline=postprocess_pipeline, + )[0], ) + for dataset in self.datasets + ] + + # Parallel processing... + parallel_results = Parallel(n_jobs=self.n_jobs)( + delayed( + lambda d, p: list( + self.evaluate( + d, + pipelines, + param_grid=param_grid, + process_pipeline=p, + postprocess_pipeline=postprocess_pipeline, + ) + ) + )(dataset, process_pipeline) + for dataset, process_pipeline in processing_params + ) + + res_per_db = [] + # Process results in order + for (dataset, process_pipeline), results in zip( + processing_params, parallel_results + ): for res in results: self.push_result(res, pipelines, process_pipeline) + res_per_db.append( self.results.to_dataframe( pipelines=pipelines, process_pipeline=process_pipeline @@ -316,9 +329,12 @@ def _grid_search(self, param_grid, name, grid_clf, inner_cv): return_train_score=True, **extra_params, ) + self.search = True return search else: + self.search = True return grid_clf else: + self.search = False return grid_clf diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py index a2e39800d..93d4d55f6 100644 --- a/moabb/evaluations/evaluations.py +++ b/moabb/evaluations/evaluations.py @@ -12,14 +12,21 @@ LeaveOneGroupOut, StratifiedKFold, StratifiedShuffleSplit, - cross_validate, ) -from sklearn.model_selection._validation import _fit_and_score, _score +from sklearn.model_selection._validation import _score from sklearn.preprocessing import LabelEncoder from tqdm import tqdm from moabb.evaluations.base import BaseEvaluation -from moabb.evaluations.utils import create_save_path, save_model_cv, save_model_list +from moabb.evaluations.splitters import ( + CrossSessionSplitter, + CrossSubjectSplitter, + WithinSessionSplitter, +) +from moabb.evaluations.utils import ( + _create_save_path, + _save_model_cv, +) try: @@ -29,6 +36,7 @@ except ImportError: _carbonfootprint = False + log = logging.getLogger(__name__) # Numpy ArrayLike is only available starting from Numpy 1.20 and Python 3.8 @@ -134,7 +142,6 @@ def __init__( super().__init__(**kwargs) # flake8: noqa: C901 - def _evaluate( self, dataset, @@ -172,8 +179,13 @@ def _evaluate( # Initialize CodeCarbon tracker = EmissionsTracker(save_to_file=False, log_level="error") tracker.start() + t_start = time() - cv = StratifiedKFold(5, shuffle=True, random_state=self.random_state) + self.cv = WithinSessionSplitter( + n_folds=5, + shuffle=True, + random_state=self.random_state, + ) inner_cv = StratifiedKFold( 3, shuffle=True, random_state=self.random_state ) @@ -185,17 +197,6 @@ def _evaluate( grid_clf = clone(clf) - # Create folder for grid search results - create_save_path( - self.hdf5_path, - dataset.code, - subject, - session, - name, - grid=True, - eval_type="WithinSession", - ) - # Implement Grid Search grid_clf = self._grid_search( param_grid=param_grid, @@ -203,64 +204,51 @@ def _evaluate( grid_clf=grid_clf, inner_cv=inner_cv, ) + if self.hdf5_path is not None and self.save_model: - model_save_path = create_save_path( + model_save_path = _create_save_path( self.hdf5_path, dataset.code, subject, session, name, - grid=False, + grid=self.search, eval_type="WithinSession", ) - if isinstance(X, BaseEpochs): - scorer = get_scorer(self.paradigm.scoring) - acc = list() - X_ = X[ix] - y_ = y[ix] if self.mne_labels else y_cv - for cv_ind, (train, test) in enumerate(cv.split(X_, y_)): - cvclf = clone(grid_clf) - cvclf.fit(X_[train], y_[train]) - acc.append(scorer(cvclf, X_[test], y_[test])) - - if self.hdf5_path is not None and self.save_model: - save_model_cv( - model=cvclf, - save_path=model_save_path, - cv_index=cv_ind, - ) + scorer = get_scorer(self.paradigm.scoring) + acc = list() + X_ = X[ix] + y_ = y[ix] if self.mne_labels else y_cv + meta_ = metadata[ix].reset_index(drop=True) + + for cv_ind, (train, test) in enumerate(self.cv.split(y_, meta_)): + cvclf = clone(grid_clf) + + cvclf.fit(X_[train], y_[train]) + + score = scorer(cvclf, X_[test], y_[test]) + + acc.append(score) - acc = np.array(acc) - score = acc.mean() - else: - results = cross_validate( - grid_clf, - X[ix], - y_cv, - cv=cv, - scoring=self.paradigm.scoring, - n_jobs=self.n_jobs, - error_score=self.error_score, - return_estimator=True, - ) - score = results["test_score"].mean() if self.hdf5_path is not None and self.save_model: - save_model_list( - results["estimator"], - score_list=results["test_score"], + _save_model_cv( + model=cvclf, save_path=model_save_path, + cv_index=cv_ind, ) + acc = np.array(acc) + score = acc.mean() + if _carbonfootprint: emissions = tracker.stop() if emissions is None: emissions = np.nan duration = time() - t_start - nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1] res = { - "time": duration / 5.0, # 5 fold CV + "time": duration / self.cv.n_folds, # 5 fold CV "dataset": dataset, "subject": subject, "session": session, @@ -519,7 +507,8 @@ def evaluate( tracker.start() # we want to store a results per session - cv = LeaveOneGroupOut() + self.cv = CrossSessionSplitter(random_state=self.random_state) + inner_cv = StratifiedKFold( 3, shuffle=True, random_state=self.random_state ) @@ -532,62 +521,42 @@ def evaluate( ) if self.hdf5_path is not None and self.save_model: - model_save_path = create_save_path( + model_save_path = _create_save_path( hdf5_path=self.hdf5_path, code=dataset.code, subject=subject, session="", name=name, - grid=False, + grid=self.search, eval_type="CrossSession", ) - for cv_ind, (train, test) in enumerate(cv.split(X, y, groups)): + for cv_ind, (train, test) in enumerate(self.cv.split(y, metadata)): model_list = [] if _carbonfootprint: tracker.start() t_start = time() - if isinstance(X, BaseEpochs): - cvclf = clone(grid_clf) - cvclf.fit(X[train], y[train]) - model_list.append(cvclf) - score = scorer(cvclf, X[test], y[test]) - if self.hdf5_path is not None and self.save_model: - save_model_cv( - model=cvclf, - save_path=model_save_path, - cv_index=str(cv_ind), - ) - else: - result = _fit_and_score( - estimator=clone(grid_clf), - X=X, - y=y, - scorer=scorer, - train=train, - test=test, - verbose=False, - parameters=None, - fit_params=None, - error_score=self.error_score, - return_estimator=True, - score_params={}, + cvclf = clone(grid_clf) + + cvclf.fit(X[train], y[train]) + + model_list.append(cvclf) + score = scorer(cvclf, X[test], y[test]) + + if self.hdf5_path is not None and self.save_model: + _save_model_cv( + model=cvclf, + save_path=model_save_path, + cv_index=str(cv_ind), ) - score = result["test_scores"] - model_list = result["estimator"] + if _carbonfootprint: emissions = tracker.stop() if emissions is None: emissions = 0 duration = time() - t_start - if self.hdf5_path is not None and self.save_model: - save_model_list( - model_list=model_list, - score_list=score, - save_path=model_save_path, - ) nchan = X.info["nchan"] if isinstance(X, BaseEpochs) else X.shape[1] res = { @@ -677,7 +646,6 @@ def evaluate( if len(run_pipes) == 0: return - # get the data X, y, metadata = self.paradigm.get_data( dataset=dataset, return_epochs=self.return_epochs, @@ -686,8 +654,6 @@ def evaluate( postprocess_pipeline=postprocess_pipeline, process_pipelines=[process_pipeline], ) - - # encode labels le = LabelEncoder() y = y if self.mne_labels else le.fit_transform(y) @@ -700,15 +666,20 @@ def evaluate( # perform leave one subject out CV if self.n_splits is None: - cv = LeaveOneGroupOut() + cv_class = LeaveOneGroupOut + cv_kwargs = {} else: - cv = GroupKFold(n_splits=self.n_splits) + cv_class = GroupKFold + cv_kwargs = {"n_splits": self.n_splits} n_subjects = self.n_splits + self.cv = CrossSubjectSplitter( + cv_class=cv_class, random_state=self.random_state, **cv_kwargs + ) + inner_cv = StratifiedKFold(3, shuffle=True, random_state=self.random_state) # Implement Grid Search - if _carbonfootprint: # Initialise CodeCarbon tracker = EmissionsTracker(save_to_file=False, log_level="error") @@ -716,7 +687,7 @@ def evaluate( # Progressbar at subject level for cv_ind, (train, test) in enumerate( tqdm( - cv.split(X, y, groups), + self.cv.split(y, metadata), total=n_subjects, desc=f"{dataset.code}-CrossSubject", ) @@ -734,7 +705,26 @@ def evaluate( clf = self._grid_search( param_grid=param_grid, name=name, grid_clf=clf, inner_cv=inner_cv ) + + if self.hdf5_path is not None and self.save_model: + # Save the best model from grid search + model_save_path = _create_save_path( + hdf5_path=self.hdf5_path, + code=dataset.code, + subject=subject, + session="", + name=name, + grid=self.search, + eval_type="CrossSubject", + ) + _save_model_cv( + model=clf, + save_path=model_save_path, + cv_index=str(cv_ind), + ) + model = deepcopy(clf).fit(X[train], y[train]) + if _carbonfootprint: emissions = tracker.stop() if emissions is None: @@ -742,17 +732,18 @@ def evaluate( duration = time() - t_start if self.hdf5_path is not None and self.save_model: - model_save_path = create_save_path( + + model_save_path = _create_save_path( hdf5_path=self.hdf5_path, code=dataset.code, subject=subject, session="", name=name, - grid=False, + grid=self.search, eval_type="CrossSubject", ) - save_model_cv( + _save_model_cv( model=model, save_path=model_save_path, cv_index=str(cv_ind) ) # we eval on each session diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py index cc6fbc197..f642e1191 100644 --- a/moabb/evaluations/utils.py +++ b/moabb/evaluations/utils.py @@ -7,6 +7,7 @@ from mne.utils.config import _open_lock from numpy import argmax +from sklearn.model_selection import GridSearchCV from sklearn.pipeline import Pipeline @@ -53,7 +54,7 @@ def _check_if_is_pytorch_steps(model): return skorch_valid -def save_model_cv(model: object, save_path: str | Path, cv_index: str | int): +def _save_model_cv(model: object, save_path: str | Path, cv_index: str | int): """Save a model fitted to a given fold from cross-validation. Parameters @@ -95,7 +96,7 @@ def save_model_cv(model: object, save_path: str | Path, cv_index: str | int): dump(model, file, protocol=HIGHEST_PROTOCOL) -def save_model_list(model_list: list | Pipeline, score_list: Sequence, save_path: str): +def _save_model_list(model_list: list | Pipeline, score_list: Sequence, save_path: str): """Save a list of models fitted to a folder. Parameters @@ -119,14 +120,14 @@ def save_model_list(model_list: list | Pipeline, score_list: Sequence, save_path model_list = [model_list] for cv_index, model in enumerate(model_list): - save_model_cv(model, save_path, str(cv_index)) + _save_model_cv(model, save_path, str(cv_index)) best_model = model_list[argmax(score_list)] - save_model_cv(best_model, save_path, "best") + _save_model_cv(best_model, save_path, "best") -def create_save_path( +def _create_save_path( hdf5_path, code: str, subject: int | str, @@ -166,7 +167,7 @@ def create_save_path( if grid: path_save = ( Path(hdf5_path) - / f"GridSearch_{eval_type}" + / f"Search_{eval_type}" / code / f"{str(subject)}" / str(session) @@ -218,3 +219,20 @@ def _convert_sklearn_params_to_optuna(param_grid: dict) -> dict: except Exception as e: raise ValueError(f"Conversion failed for parameter {key}: {e}") return optuna_params + + +def check_search_available(): + """Check if optuna is available""" + try: + from optuna.integration import OptunaSearchCV + + optuna_available = True + except ImportError: + optuna_available = False + + if optuna_available: + search_methods = {"grid": GridSearchCV, "optuna": OptunaSearchCV} + else: + search_methods = {"grid": GridSearchCV} + + return search_methods, optuna_available diff --git a/moabb/tests/acceptance_tests/reference_results_dataset_BNCI2014_001.csv b/moabb/tests/acceptance_tests/reference_results_dataset_BNCI2014_001.csv new file mode 100644 index 000000000..b029c526f --- /dev/null +++ b/moabb/tests/acceptance_tests/reference_results_dataset_BNCI2014_001.csv @@ -0,0 +1,19 @@ +,score,time,samples,subject,session,channels,n_sessions,dataset,pipeline +0,0.7430556,0.28345227,288.0,1,0train,22,2,BNCI2014-001,mdm +1,0.6944444,0.2819698,288.0,1,1test,22,2,BNCI2014-001,mdm +2,0.5486111,0.28295708,288.0,2,0train,22,2,BNCI2014-001,mdm +3,0.5555556,0.28221202,288.0,2,1test,22,2,BNCI2014-001,mdm +4,0.6527778,0.27323103,288.0,3,0train,22,2,BNCI2014-001,mdm +5,0.6319444,0.28558397,288.0,3,1test,22,2,BNCI2014-001,mdm +6,0.4652778,0.28424382,288.0,4,0train,22,2,BNCI2014-001,mdm +7,0.6076389,0.28512216,288.0,4,1test,22,2,BNCI2014-001,mdm +8,0.4340278,0.26603198,288.0,5,0train,22,2,BNCI2014-001,mdm +9,0.47569445,0.2672441,288.0,5,1test,22,2,BNCI2014-001,mdm +10,0.38194445,0.28032613,288.0,6,0train,22,2,BNCI2014-001,mdm +11,0.4652778,0.29096103,288.0,6,1test,22,2,BNCI2014-001,mdm +12,0.5625,0.26360798,288.0,7,0train,22,2,BNCI2014-001,mdm +13,0.46875,0.26497293,288.0,7,1test,22,2,BNCI2014-001,mdm +14,0.6041667,0.27954388,288.0,8,0train,22,2,BNCI2014-001,mdm +15,0.6111111,0.29071403,288.0,8,1test,22,2,BNCI2014-001,mdm +16,0.5451389,0.27546215,288.0,9,0train,22,2,BNCI2014-001,mdm +17,0.7326389,0.2862649,288.0,9,1test,22,2,BNCI2014-001,mdm diff --git a/moabb/tests/acceptance_tests/reference_results_dataset_BNCI2015_001.csv b/moabb/tests/acceptance_tests/reference_results_dataset_BNCI2015_001.csv new file mode 100644 index 000000000..97d2c3265 --- /dev/null +++ b/moabb/tests/acceptance_tests/reference_results_dataset_BNCI2015_001.csv @@ -0,0 +1,29 @@ +,score,time,samples,subject,session,channels,n_sessions,dataset,pipeline +0,0.9898,0.104274035,200.0,1,0A,13,2,BNCI2015-001,mdm +1,0.996,0.109023094,200.0,1,1B,13,2,BNCI2015-001,mdm +2,0.9822,0.11902189,200.0,2,0A,13,2,BNCI2015-001,mdm +3,0.9817,0.10449815,200.0,2,1B,13,2,BNCI2015-001,mdm +4,0.9411,0.10515785,200.0,3,0A,13,2,BNCI2015-001,mdm +5,0.9713,0.10190797,200.0,3,1B,13,2,BNCI2015-001,mdm +6,0.8777,0.107106924,200.0,4,0A,13,2,BNCI2015-001,mdm +7,0.9653,0.10397911,200.0,4,1B,13,2,BNCI2015-001,mdm +8,0.8416,0.105483055,200.0,5,0A,13,2,BNCI2015-001,mdm +9,0.8118,0.10831189,200.0,5,1B,13,2,BNCI2015-001,mdm +10,0.6624,0.12765789,200.0,6,0A,13,2,BNCI2015-001,mdm +11,0.6314,0.10389686,200.0,6,1B,13,2,BNCI2015-001,mdm +12,0.8948,0.10865617,200.0,7,0A,13,2,BNCI2015-001,mdm +13,0.8931,0.09851694,200.0,7,1B,13,2,BNCI2015-001,mdm +14,0.6032,0.18366313,400.0,8,0A,13,2,BNCI2015-001,mdm +15,0.7523,0.19959378,400.0,8,1B,13,2,BNCI2015-001,mdm +16,0.8488,0.18477702,400.0,8,2C,13,2,BNCI2015-001,mdm +17,0.7601,0.1761918,400.0,9,0A,13,2,BNCI2015-001,mdm +18,0.8687,0.17262912,400.0,9,1B,13,2,BNCI2015-001,mdm +19,0.9154,0.17855692,400.0,9,2C,13,2,BNCI2015-001,mdm +20,0.6787,0.21773195,400.0,10,0A,13,2,BNCI2015-001,mdm +21,0.6402,0.20742917,400.0,10,1B,13,2,BNCI2015-001,mdm +22,0.6116,0.19268918,400.0,10,2C,13,2,BNCI2015-001,mdm +23,0.7974,0.20285797,400.0,11,0A,13,2,BNCI2015-001,mdm +24,0.7403,0.20020509,400.0,11,1B,13,2,BNCI2015-001,mdm +25,0.7949,0.18860793,400.0,11,2C,13,2,BNCI2015-001,mdm +26,0.6574,0.10171008,200.0,12,0A,13,2,BNCI2015-001,mdm +27,0.6693,0.10934806,200.0,12,1B,13,2,BNCI2015-001,mdm diff --git a/moabb/tests/acceptance_tests/test_accurary.py b/moabb/tests/acceptance_tests/test_accurary.py new file mode 100644 index 000000000..5b99ee760 --- /dev/null +++ b/moabb/tests/acceptance_tests/test_accurary.py @@ -0,0 +1,46 @@ +from pathlib import Path + +import numpy as np +import pandas as pd +import pytest +from pyriemann.classification import MDM +from pyriemann.estimation import XdawnCovariances +from sklearn.pipeline import make_pipeline +from sklearn.utils import check_random_state + +from moabb.datasets import BNCI2014_001, BNCI2015_001 +from moabb.evaluations import CrossSessionEvaluation +from moabb.paradigms import MotorImagery + + +@pytest.mark.parametrize("dataset_class", [BNCI2014_001, BNCI2015_001]) +def test_decoding_performance_stable(dataset_class): + dataset_name = dataset_class.__name__ + random_state = check_random_state(42) + + dataset_cls = dataset_class + dataset = dataset_cls() + paradigm = MotorImagery() + + # Simple pipeline + pipeline = make_pipeline(XdawnCovariances(nfilter=4), MDM(n_jobs=4)) + + # Evaluate + evaluation = CrossSessionEvaluation( + paradigm=paradigm, datasets=[dataset], overwrite=True, random_state=random_state + ) + results = evaluation.process({"mdm": pipeline}) + results.drop(columns=["time"], inplace=True) + results["score"] = results["score"].astype(np.float32) + results["samples"] = results["samples"].astype(int) + results["subject"] = results["subject"].astype(int) + + folder_path = Path(__file__).parent / "reference_results_dataset_{}.csv".format( + dataset_name + ) + reference_performance = pd.read_csv(folder_path) + reference_performance.drop(columns=["time", "Unnamed: 0"], inplace=True) + reference_performance["score"] = reference_performance["score"].astype(np.float32) + reference_performance["samples"] = reference_performance["samples"].astype(int) + + pd.testing.assert_frame_equal(results, reference_performance) diff --git a/moabb/tests/test_evaluations.py b/moabb/tests/test_evaluations.py index 0dc4f98c0..6beb73edb 100644 --- a/moabb/tests/test_evaluations.py +++ b/moabb/tests/test_evaluations.py @@ -18,7 +18,8 @@ from moabb.datasets.fake import FakeDataset from moabb.evaluations import evaluations as ev from moabb.evaluations.base import optuna_available -from moabb.evaluations.utils import create_save_path, save_model_cv, save_model_list +from moabb.evaluations.utils import _create_save_path as create_save_path +from moabb.evaluations.utils import _save_model_cv as save_model_cv from moabb.paradigms.motor_imagery import FakeImageryParadigm @@ -393,17 +394,6 @@ def test_save_model_cv(self): # Assert that the saved model file exists assert os.path.isfile(os.path.join(save_path, "fitted_model_0.pkl")) - def test_save_model_list(self): - step = Dummy() - model = Pipeline([("step", step)]) - model_list = [model] - score_list = [0.8] - save_path = "test_save_path" - save_model_list(model_list, score_list, save_path) - - # Assert that the saved model file for best model exists - assert os.path.isfile(os.path.join(save_path, "fitted_model_best.pkl")) - def test_create_save_path(self): hdf5_path = "base_path" code = "evaluation_code" @@ -454,21 +444,6 @@ def test_save_model_cv_with_pytorch_model(self): assert os.path.isfile(os.path.join(save_path, "step_fitted_0_history.json")) assert os.path.isfile(os.path.join(save_path, "step_fitted_0_criterion.pkl")) - def test_save_model_list_with_multiple_models(self): - model1 = Dummy() - model2 = Dummy() - model_list = [model1, model2] - score_list = [0.8, 0.9] - save_path = "test_save_path" - save_model_list(model_list, score_list, save_path) - - # Assert that the saved model files for each model exist - assert os.path.isfile(os.path.join(save_path, "fitted_model_0.pkl")) - assert os.path.isfile(os.path.join(save_path, "fitted_model_1.pkl")) - - # Assert that the saved model file for the best model exists - assert os.path.isfile(os.path.join(save_path, "fitted_model_best.pkl")) - def test_create_save_path_with_cross_session_evaluation(self): hdf5_path = "base_path" code = "evaluation_code" @@ -516,19 +491,6 @@ def test_save_model_cv_without_hdf5_path(self): with pytest.raises(IOError): save_model_cv(model, save_path, cv_index) - def test_save_model_list_with_single_model(self): - model = Dummy() - model_list = model - score_list = [0.8] - save_path = "test_save_path" - save_model_list(model_list, score_list, save_path) - - # Assert that the saved model file for the single model exists - assert os.path.isfile(os.path.join(save_path, "fitted_model_0.pkl")) - - # Assert that the saved model file for the best model exists - assert os.path.isfile(os.path.join(save_path, "fitted_model_best.pkl")) - def test_create_save_path_with_cross_subject_evaluation(self): hdf5_path = "base_path" code = "evaluation_code" diff --git a/pyproject.toml b/pyproject.toml index 5bbb054b5..a65bf9a8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ license = "BSD-3-Clause" [tool.poetry.dependencies] python = ">=3.10" -numpy = "^2.0" +numpy = ">=2.0" scipy = "^1.9.3" mne = "^1.10.0" pandas = ">=1.5.2" @@ -30,7 +30,7 @@ memory-profiler = "^0.61.0" edflib-python = "^1.0.6" edfio = "^0.4.2" pytest = "^8.3.5" -mne-bids = ">=0.14" +mne-bids = ">=0.16" scikit-learn = "<1.6" # Optional dependencies for carbon emission