diff --git a/man/Population.Rd b/man/Population.Rd index aa64dbd..6cc242f 100644 --- a/man/Population.Rd +++ b/man/Population.Rd @@ -34,7 +34,7 @@ aggregate predictions and feature importances across the population. \item{\code{filter_by_mask(mask)}}{Filter population using a logical vector (1/0). Parameter: mask (integer vector 1/0 indicating which individuals to keep). Returns filtered Population object.} \item{\code{filter_by_k(min_k, max_k)}}{Filter population by number of features (k). Parameters: min_k (minimum number of features), max_k (maximum number of features). Returns filtered Population object.} \item{\code{get_fbm(alpha)}}{Get Family of Best Models (FBM) using confidence interval selection. This method selects models with performance statistically equivalent to the best model. Parameters: alpha (confidence level, default 0.05 for 95\% confidence). If FBM selection fails, alpha\% to keep). Returns Population object containing the FBM.} -\item{\code{get_first_pct(pct)}}{Get first percentage of individuals sorted by fitness. Parameter: pct (percentage 0-100). Returns Population object containing the selected individuals. +\item{\code{get_first_pct(pct)}}{Get first percentage of individuals sorted by fitness. Parameter: pct (percentage 0-100). Returns Population object containing the selected individuals.} \item{\code{fit(data, param)}}{Compute fitness metrics for all individuals on new data or parameters and sort it. Parameters: data (new Data object to fit on), param (Param object containing fit function and penalties).} \item{\code{prune_by_threshold(threshold, n_perm, seed, min_k)}}{Prune all individuals by importance threshold. Parameters: threshold (importance threshold), n_perm (number of permutations, default 100), seed (base seed for RNG, default 4815162342), min_k (minimum features to keep, default 1). Returns new Population with pruned individuals.} \item{\code{prune_by_quantile(quantile, eps, n_perm, seed, min_k)}}{Prune all individuals by importance quantile. Parameters: quantile (quantile value 0-1), eps (epsilon value, default 0.0), n_perm (number of permutations, default 100), seed (base seed for RNG, default 4815162342), min_k (minimum features to keep, default 1). Returns new Population with pruned individuals.} diff --git a/man/caret-integration-Gpredomics-Summary-Function-for-Caret.Rd b/man/caret-integration-Gpredomics-Summary-Function-for-Caret.Rd index 9614509..5b7d211 100644 --- a/man/caret-integration-Gpredomics-Summary-Function-for-Caret.Rd +++ b/man/caret-integration-Gpredomics-Summary-Function-for-Caret.Rd @@ -1,9 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/gpredomics_caret.R -\name{caret-integration -Gpredomics Summary Function for Caret} -\alias{caret-integration -Gpredomics Summary Function for Caret} +\name{gpredomicsSummary} +\alias{gpredomicsSummary} \alias{gpredomicsSummary} \title{Caret Integration for gpredomicsR} \usage{ diff --git a/sample/param.yaml b/sample/param.yaml index 0ff5d25..b2be7a4 100644 --- a/sample/param.yaml +++ b/sample/param.yaml @@ -56,7 +56,7 @@ cv: importance: compute_importance: false # Should importance be computed? - n_permutations_oob: 100 # Number of permutations per feature for OOB importance. + n_permutations_mda: 100 # Number of permutations per feature for MDA importance. scaled_importance: true # Scale importance by feature prevalence inside folds. importance_aggregation: mean # Aggregation method for importances: "mean" or "median". @@ -93,7 +93,7 @@ beam: method: LimitedExhaustive # LimitedExhaustive: generate all combinations (k out of features_to_keep). ParallelForward: extend each extendable model by one feature chosen from features_to_keep. kmin: 2 # Number of variables used in the initial population. kmax: 100 # Maximum number of variables to consider in a single model (variable count limit for beam algorithm). - best_models_ci_alpha: 1e-5 # Alpha for the family of best models confidence interval based on the best fit. Smaller alpha -> larger best_model range. + best_models_criterion: 10 # If <=1: alpha for FBM confidence interval. If >1: keep that many best models per k. Smaller alpha -> larger best_model range. max_nb_of_models: 20000 # Limits the number of features_to_keep at each epoch according to the number of models made possible by them (truncated according to significance). mcmc: diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 0169414..42a736f 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -588,8 +588,8 @@ dependencies = [ [[package]] name = "gpredomics" -version = "0.7.5" -source = "git+https://github.com/predomics/gpredomics.git?tag=v0.7.5#79cc49a390abbe5082ed26ed851f33c33b4e0578" +version = "0.7.7" +source = "git+https://github.com/predomics/gpredomics.git?tag=v0.7.7#23b5d2e19ae3fb38db056882b9884154822b1dfd" dependencies = [ "argmin", "bincode", diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index f72457f..f99971f 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -9,7 +9,7 @@ name = 'rgpredomicsR' [dependencies] extendr-api = '*' -gpredomics = { git = "https://github.com/predomics/gpredomics.git", tag="v0.7.5" } +gpredomics = { git = "https://github.com/predomics/gpredomics.git", tag="v0.7.7" } flexi_logger = "0.27" chrono = "0.4" log = "0.4" diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index e75dec0..150c279 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -275,8 +275,8 @@ impl Param { ("max_epochs", Robj::from(self.intern.ga.max_epochs)), ("min_epochs", Robj::from(self.intern.ga.min_epochs)), ("max_age_best_model", Robj::from(self.intern.ga.max_age_best_model)), - ("kmin", Robj::from(self.intern.ga.kmin)), - ("kmax", Robj::from(self.intern.ga.kmax)), + ("kmin", Robj::from(self.intern.ga.k_min)), + ("kmax", Robj::from(self.intern.ga.k_max)), ("select_elite_pct", Robj::from(self.intern.ga.select_elite_pct)), ("select_niche_pct", Robj::from(self.intern.ga.select_niche_pct)), ("select_random_pct", Robj::from(self.intern.ga.select_random_pct)), @@ -292,8 +292,8 @@ impl Param { // Convert GA fields let beam = List::from_pairs(vec![ ("method", Robj::from(format!("{:?}", self.intern.beam.method))), - ("kmin", Robj::from(self.intern.beam.kmin)), - ("kmax", Robj::from(self.intern.beam.kmax)), + ("kmin", Robj::from(self.intern.beam.k_start)), + ("kmax", Robj::from(self.intern.beam.k_stop)), ("best_models_criterion", Robj::from(self.intern.beam.best_models_criterion)), ("max_nb_of_models", Robj::from(self.intern.beam.max_nb_of_models)), ]); @@ -311,7 +311,7 @@ impl Param { // Convert Importance fields let importance = List::from_pairs(vec![ ("compute_importance", Robj::from(self.intern.importance.compute_importance)), - ("n_permutations_oob",Robj::from(self.intern.importance.n_permutations_oob)), + ("n_permutations_mda",Robj::from(self.intern.importance.n_permutations_mda)), ("scaled_importance", Robj::from(self.intern.importance.scaled_importance)), ("importance_aggregation", Robj::from(format!("{:?}",self.intern.importance.importance_aggregation))) ]); @@ -389,8 +389,8 @@ impl Param { "max_epochs" => self.intern.ga.max_epochs = value as usize, "min_epochs" => self.intern.ga.min_epochs = value as usize, "max_age_best_model" => self.intern.ga.max_age_best_model = value as usize, - "k_min" => self.intern.ga.kmin = value as usize, - "k_max" => self.intern.ga.kmax = value as usize, + "k_min" => self.intern.ga.k_min = value as usize, + "k_max" => self.intern.ga.k_max = value as usize, "select_elite_pct" => self.intern.ga.select_elite_pct = value, "select_niche_pct" => self.intern.ga.select_niche_pct = value, "select_random_pct" => self.intern.ga.select_random_pct = value, @@ -428,7 +428,7 @@ impl Param { "threshold_windows_pct" => self.intern.voting.threshold_windows_pct = value, // Importance parameters - "n_permutations_oob" => self.intern.importance.n_permutations_oob = value as usize, + "n_permutations_mda" => self.intern.importance.n_permutations_mda = value as usize, // GPU parameters "max_total_memory_mb" => self.intern.gpu.max_total_memory_mb = value as u64, @@ -1900,7 +1900,7 @@ impl Individual { feature_seeds.insert(f, seeds); } - let ic: ImportanceCollection = self.intern.compute_oob_feature_importance(&data.intern, permutations, &features_to_process, &feature_seeds); + let ic: ImportanceCollection = self.intern.compute_mda_feature_importance(&data.intern, permutations, &features_to_process, &feature_seeds); let mut feature = Vec::with_capacity(ic.importances.len()); let mut importance = Vec::with_capacity(ic.importances.len()); @@ -2598,7 +2598,7 @@ impl Experiment { /// @name Experiment$compute_cv_importance /// @description /// Compute cross-validated feature importance (MDA-like) aggregated across CV folds. - /// This uses the native CV::compute_cv_oob_feature_importance logic on the original + /// This uses the native CV::compute_cv_mda_feature_importance logic on the original /// training data and CV folds stored in the Experiment. /// /// @param n_perm Number of permutations (default: 1000) @@ -2643,7 +2643,7 @@ impl Experiment { // 3) Native CV importance (agrégé across folds) let mut rng = ChaCha8Rng::seed_from_u64(seed); - let mut ic: ImportanceCollection = cv.compute_cv_oob_feature_importance( + let mut ic: ImportanceCollection = cv.compute_cv_mda_feature_importance( &self.param_arc, permutations, &mut rng, @@ -2845,7 +2845,7 @@ impl Experiment { let mut rng = ChaCha8Rng::seed_from_u64(base_seed); let mut ic: ImportanceCollection = last_pop - .compute_pop_oob_feature_importance( + .compute_pop_mda_feature_importance( &fold_data, permutations, &mut rng, @@ -4094,7 +4094,7 @@ impl Population { let pool = ThreadPoolBuilder::new().num_threads(threads).build().unwrap(); let mut ic: ImportanceCollection = pool.install(|| { - self.intern.compute_pop_oob_feature_importance( + self.intern.compute_pop_mda_feature_importance( &data.intern, permutations, &mut rng, @@ -4212,7 +4212,7 @@ impl Population { let mut col = vec![0.0_f64; n_feat]; - let ic = ind.compute_oob_feature_importance( + let ic = ind.compute_mda_feature_importance( &data.intern, permutations, &feats_for_ind,