diff --git a/CHANGELOG.md b/CHANGELOG.md
index cae23a694..9ceffd498 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,18 @@
+# v0.9.0 - Mar 2026
+
+### Major Changes
++ Add pipeline-based Compressor module for search space compression in high-dimensional BBO,
+ supporting dimension selection (SHAP / Correlation / Adaptive / Expert), range compression,
+ projection (REMBO / HesBO / KPCA), quantization, and filling strategies.
++ Add `SpaceAdapter` for flexible configuration space transformation.
++ Add `MFAdvisor` with unified multi-fidelity scheduler and batched suggestion support in SMBO.
++ Refactor `AcquisitionOptimizer` with modular SearchGenerator and StrategySelector abstractions.
+
+### Bug Fixes
++ Fix TL surrogate robustness for multi-fidelity data edge cases.
+
+
+
# v0.8.4 - Sep 9, 2024
### Major Changes
diff --git a/README.md b/README.md
index b17b807b0..1f915281d 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,8 @@
## OpenBox: Generalized and Efficient Blackbox Optimization System
**OpenBox** is an efficient and generalized blackbox optimization (BBO) system, which supports the following
characteristics: 1) **BBO with multiple objectives and constraints**, 2) **BBO with transfer learning**, 3)
-**BBO with distributed parallelization**, 4) **BBO with multi-fidelity acceleration** and 5) **BBO with early stops**.
+**BBO with distributed parallelization**, 4) **BBO with multi-fidelity acceleration**, 5) **BBO with early stops**
+and 6) **BBO with search space compression**.
OpenBox is designed and developed by the AutoML team from the [DAIR Lab](http://net.pku.edu.cn/~cuibin/) at Peking
University, and its goal is to make blackbox optimization easier to apply both in industry and academia, and help
facilitate data science.
@@ -116,8 +117,19 @@ The design of OpenBox follows the following principles:
Interleaved RS and LS
Differential Evolution
L-BFGS-B
+ CMA-ES
+ Upper Bound Maximizer
+
+ - Search Space Compressor
+
+ - Dimension Selection (SHAP, Correlation, Adaptive, Expert)
+ - Range Compression (Boundary, KDE, SHAP-weighted)
+ - Projection (REMBO, HesBO, KPCA)
+ - Quantization & Filling
+
+
diff --git a/README_zh_CN.md b/README_zh_CN.md
index 69a76b0a0..a7460d871 100644
--- a/README_zh_CN.md
+++ b/README_zh_CN.md
@@ -21,7 +21,7 @@
## OpenBox: 通用高效的黑盒优化系统
**OpenBox** 是解决黑盒优化(超参数优化)问题的高效且通用的开源系统,支持以下特性: 1) **多目标与带约束的黑盒优化**。2)
-**迁移学习**。3) **分布式并行验证**。4) **多精度优化加速**。5) **早停机制**。
+**迁移学习**。3) **分布式并行验证**。4) **多精度优化加速**。5) **早停机制**。6) **搜索空间压缩**。
OpenBox是由北京大学[DAIR实验室](http://net.pku.edu.cn/~cuibin/)自动化机器学习(AutoML)小组设计并开发的,目标是
使黑盒优化在学术界和工业界的应用更加便捷,并促进数据科学的发展。
@@ -108,8 +108,19 @@ OpenBox是一个提供通用黑盒优化服务的系统。用户可以使用REST
- Interleaved RS and LS
- Differential Evolution
- L-BFGS-B
+ - CMA-ES
+ - Upper Bound Maximizer
+
+ - Search Space Compressor
+
+ - Dimension Selection (SHAP, Correlation, Adaptive, Expert)
+ - Range Compression (Boundary, KDE, SHAP-weighted)
+ - Projection (REMBO, HesBO, KPCA)
+ - Quantization & Filling
+
+
|
diff --git a/openbox/acq_optimizer/base.py b/openbox/acq_optimizer/base.py
new file mode 100644
index 000000000..81897a23e
--- /dev/null
+++ b/openbox/acq_optimizer/base.py
@@ -0,0 +1,381 @@
+import numpy as np
+from abc import ABC, abstractmethod
+from typing import List, Tuple, Any
+from ConfigSpace import ConfigurationSpace, Configuration
+from openbox import logger
+from openbox.acquisition_function.acquisition import AbstractAcquisitionFunction
+from .generator import SearchGenerator, LocalSearchGenerator,CMAESGenerator
+from .utils import convert_configurations_to_array
+from .selector import StrategySelector, FixedSelector
+from openbox.utils.history import Observation, History
+
+class AcquisitionOptimizer(ABC):
+ def __init__(
+ self,
+ acquisition_function: AbstractAcquisitionFunction,
+ config_space: ConfigurationSpace,
+ rng: np.random.RandomState = np.random.RandomState(42)
+ ):
+ self.acq = acquisition_function
+ self.config_space = config_space
+
+ self.rng = rng
+ self.iter_id = 0
+
+ @abstractmethod
+ def _maximize(self, history:History, num_points: int, excluded_configs: List[Configuration] = [], **kwargs) -> List[Tuple]:
+ pass
+
+ def maximize(self, history:History, num_points: int, excluded_configs: List[Configuration] = [], **kwargs) -> List:
+ results = self._maximize(history, num_points, excluded_configs, **kwargs)
+ return [result[1] for result in results]
+
+ def _evaluate_batch(self, configs: List[Configuration], **kwargs) -> np.ndarray:
+ return self._acquisition_function(configs, **kwargs).flatten()
+
+ def _sort_configs_by_acq_value(self, configs, **kwargs):
+ acq_values = self._acquisition_function(configs, **kwargs).flatten()
+ random_values = self.rng.rand(len(acq_values))
+ # Sort by acquisition value (primary) and random tie-breaker (secondary)
+ # Last column is primary sort key
+ indices = np.lexsort((random_values.flatten(), acq_values.flatten()))
+ return [(acq_values[ind], configs[ind]) for ind in indices[::-1]]
+
+ def _acquisition_function(self, configs, **kwargs):
+ X = convert_configurations_to_array(configs)
+ return self.acq(X, **kwargs)
+
+
+ def _filter_excluded_configs(self, configs: List[Configuration], excluded_configs: List[Configuration]) -> List[Configuration]:
+ """Filter out excluded configurations from candidates
+
+ Parameters
+ ----------
+ configs : List[Configuration]
+ Candidate configurations
+ excluded_configs : List[Configuration]
+ Configurations to exclude
+
+ Returns
+ -------
+ List[Configuration]
+ Filtered configurations
+ """
+ if not excluded_configs:
+ return configs
+
+ excluded_set = set()
+ for config in excluded_configs:
+ excluded_set.add(tuple(sorted(config.get_dictionary().items())))
+
+ filtered = []
+ for config in configs:
+ config_key = tuple(sorted(config.get_dictionary().items()))
+ if config_key not in excluded_set:
+ filtered.append(config)
+
+ return filtered
+
+ def _prepare_observations_for_strategy(self, history:History, strategy, **kwargs) -> List[Any]:
+ """Prepare observations for strategy by sorting by actual y value (standard BO approach)
+
+ For LocalSearchGenerator, sort observations by actual y value (ascending, assuming minimization).
+
+ Parameters
+ ----------
+ observations : List[Observation]
+ Historical observations, each should have .y attribute
+ strategy : SearchGenerator
+ The strategy that will use these observations
+ **kwargs
+ Additional arguments (unused, kept for API compatibility)
+
+ Returns
+ -------
+ List[Observation]
+ Observations sorted by y value (ascending, best first), or original if sorting not needed
+ """
+ if isinstance(strategy, LocalSearchGenerator) and history:
+ observations=history.observations
+ sorted_observations = sorted(observations, key=lambda obs: obs.objectives[0])
+ return sorted_observations
+ return history.observations
+
+ def reset(self):
+ self.iter_id = 0
+
+
+class CompositeOptimizer(AcquisitionOptimizer):
+ """Composite Optimizer, use different search strategies [across] different iterations.
+
+ Each iteration, the optimizer will select configurations sorted by acquisition value
+
+ use the strategy pattern to combine multiple search strategies:
+ 1. use StrategySelector to select a strategy
+ 2. strategy generates candidate configurations
+ 3. batch evaluate all candidates' acquisition value
+ 4. select the best num_points configurations
+
+ Parameters
+ ----------
+ acquisition_function : AcquisitionFunction
+ acquisition function
+ config_space : ConfigurationSpace
+ configuration space
+ strategies : List[SearchGenerator]
+ strategy list
+ selector : StrategySelector
+ strategy selector, if not provided, use FixedSelector(0)
+ rng : np.random.RandomState
+ random number generator, if not provided, use np.random.RandomState(42)
+ candidate_multiplier : float, default=3.0
+ candidate multiplier, generate num_points * candidate_multiplier candidates
+ then select the best num_points configurations through acquisition function
+
+ Examples
+ --------
+ >>> from .generator import LocalSearchGenerator, RandomSearchGenerator
+ >>> from .selector import ProbabilisticSelector
+ >>>
+ >>> # create strategy
+ >>> local = LocalSearchGenerator(max_neighbors=50)
+ >>> random = RandomSearchGenerator()
+ >>>
+ >>> # create selector (85% local search, 15% random search)
+ >>> selector = ProbabilisticSelector([0.85, 0.15])
+ >>>
+ >>> # create composite optimizer
+ >>> optimizer = CompositeOptimizer(
+ ... acquisition_function=acq_func,
+ ... config_space=config_space,
+ ... strategies=[local, random],
+ ... selector=selector
+ ... )
+ >>>
+ >>> # use
+ >>> best_configs = optimizer.maximize(runhistory, num_points=10)
+ """
+
+ def __init__(self,
+ acquisition_function: AbstractAcquisitionFunction,
+ config_space: ConfigurationSpace,
+ strategies: List[SearchGenerator],
+ selector: StrategySelector = FixedSelector(0),
+ rng: np.random.RandomState = np.random.RandomState(42),
+ candidate_multiplier: float = 3.0):
+ super().__init__(acquisition_function, config_space, rng)
+
+ if not strategies:
+ raise ValueError("At least one strategy is required")
+
+ self.strategies = strategies
+ self.selector = selector
+ self.candidate_multiplier = candidate_multiplier
+
+ def _maximize(self, history:History, num_points: int, excluded_configs: List[Configuration] = [], **kwargs) -> List[Tuple]:
+ """use strategy to generate candidates, then batch evaluate and select the best num_points configurations
+
+ process:
+ 1. use selector to select a strategy
+ 2. strategy generates candidates (generate num_points * candidate_multiplier candidates)
+ 3. batch evaluate all candidates' acquisition value
+ 4. select the best num_points configurations
+
+ Parameters
+ ----------
+ observations : List[Any]
+ historical observations
+ num_points : int
+ number of configurations to return
+ excluded_configs : List[Configuration]
+ configurations to exclude from generation
+ **kwargs
+ additional arguments passed to acquisition function
+
+ Returns
+ -------
+ List[Tuple[float, Configuration]]
+ list of (acquisition_value, configuration) pairs
+ """
+ strategy = self.selector.select(self.strategies, self.iter_id)
+ logger.info(f"CompositeOptimizer: select strategy: {type(strategy).__name__}")
+
+ ##sorted_observations = self._prepare_observations_for_strategy(history, strategy, **kwargs)
+ n_candidates = int(num_points * self.candidate_multiplier)
+ candidates = strategy.generate(
+ history=history,
+ num_points=n_candidates,
+ rng=self.rng,
+ acq_function=self.acq,
+ **kwargs
+ )
+
+ if not candidates:
+ raise RuntimeError(
+ f"Strategy {type(strategy).__name__} generated no candidates. "
+ "This should not happen if sampling_strategy is properly configured."
+ )
+
+ candidates = self._filter_excluded_configs(candidates, excluded_configs)
+ if not candidates:
+ raise RuntimeError("All generated candidates were excluded. Consider increasing candidate_multiplier.")
+
+ scores = self._evaluate_batch(candidates, **kwargs)
+ sorted_indices = np.argsort(scores)[::-1][: num_points]
+ results = [(scores[idx], candidates[idx]) for idx in sorted_indices]
+ self.iter_id += 1
+
+ return results
+
+ def reset(self):
+ super().reset()
+ if hasattr(self.selector, 'reset'):
+ self.selector.reset()
+
+
+class QuotaCompositeOptimizer(AcquisitionOptimizer):
+ """Quota Composite Optimizer - ensure final configurations returned contain configurations from different strategies according to quotas
+
+ The difference between CompositeOptimizer:
+ - CompositeOptimizer: use different strategies across iterations, in each iteration, use the same strategy
+ - QuotaCompositeOptimizer: each strategy independently sort and select top-k according to quotas, ensure diversity
+
+ Parameters
+ ----------
+ acquisition_function : AcquisitionFunction
+ acquisition function
+ config_space : ConfigurationSpace
+ configuration space
+ strategies : List[SearchGenerator]
+ strategy list
+ quotas : List[int]
+ each strategy's quota, representing the number of points contributed by each strategy in the final return
+ for example, [3, 1] means in each 4 points, 3 points come from strategy 0, 1 point come from strategy 1
+ rng : np.random.RandomState
+ random number generator
+ candidate_multiplier : float, default=3.0
+ candidate multiplier, each strategy generates quota * candidate_multiplier candidates
+
+ Examples
+ --------
+ >>> local = LocalSearchGenerator(...)
+ >>> random = RandomSearchGenerator(...)
+ >>>
+ >>> # quotas=[3, 1] means in each 4 points, 3 points come from local, 1 point come from random
+ >>> optimizer = QuotaCompositeOptimizer(
+ ... acquisition_function=acq_func,
+ ... config_space=config_space,
+ ... strategies=[local, random],
+ ... quotas=[3, 1]
+ ... )
+ >>>
+ >>> # return 8 points: 6 points from local + 2 points from random, interleaved
+ >>> configs = optimizer.maximize(observations, num_points=8)
+ """
+
+ def __init__(self,
+ acquisition_function: AbstractAcquisitionFunction,
+ config_space: ConfigurationSpace,
+ strategies: List[SearchGenerator],
+ quotas: List[int],
+ rng: np.random.RandomState = np.random.RandomState(42),
+ candidate_multiplier: float = 3.0):
+ super().__init__(acquisition_function, config_space, rng)
+
+ if not strategies:
+ raise ValueError("At least one strategy is required")
+ if len(strategies) != len(quotas):
+ raise ValueError(f"Number of strategies ({len(strategies)}) must match number of quotas ({len(quotas)})")
+ if not all(q > 0 for q in quotas):
+ raise ValueError("All quotas must be positive integers")
+
+ self.strategies = strategies
+ self.quotas = quotas
+ self.total_quota = sum(quotas)
+ self.candidate_multiplier = candidate_multiplier
+
+ def _maximize(self,
+ history:History,
+ num_points: int,
+ excluded_configs: List[Configuration] = [],
+ **kwargs) -> List[Tuple]:
+ strategy_num_points = []
+ remaining = num_points
+ for i, quota in enumerate(self.quotas):
+ if i == len(self.quotas) - 1:
+ n = remaining
+ else:
+ n = int(np.ceil(num_points * quota / self.total_quota))
+ n = min(n, remaining)
+ strategy_num_points.append(n)
+ remaining -= n
+
+ strategy_results = [] # List[List[(score, config)]]
+ for i, (strategy, n_points) in enumerate(zip(self.strategies, strategy_num_points)):
+ if n_points <= 0:
+ strategy_results.append([])
+ continue
+
+ logger.info(f"QuotaCompositeOptimizer: strategy {type(strategy).__name__} generating {n_points} points")
+
+ ##sorted_observations = self._prepare_observations_for_strategy(history, strategy, **kwargs)
+
+ n_candidates = int(n_points * self.candidate_multiplier)
+ candidates = strategy.generate(
+ history=history,
+ num_points=n_candidates,
+ rng=self.rng,
+ acq_function=self.acq,
+ **kwargs
+ )
+
+ if not candidates:
+ logger.warning(f"Strategy {type(strategy).__name__} generated no candidates")
+ strategy_results.append([])
+ continue
+
+ candidates = self._filter_excluded_configs(candidates, excluded_configs)
+ if not candidates:
+ logger.warning(f"Strategy {type(strategy).__name__}: all candidates were excluded")
+ strategy_results.append([])
+ continue
+
+ scores = self._evaluate_batch(candidates, **kwargs)
+ sorted_indices = np.argsort(scores)[::-1][: n_points]
+ results = [(scores[idx], candidates[idx]) for idx in sorted_indices]
+ logger.info(f"QuotaCompositeOptimizer: strategy {type(strategy).__name__} generated {len(results)} points, sorted by acquisition value: {scores[sorted_indices]}")
+ strategy_results.append(results)
+
+ final_results = self._interleave_results(strategy_results)
+ self.iter_id += 1
+
+ return final_results[: num_points]
+
+ def _interleave_results(self, strategy_results: List[List[Tuple]]) -> List[Tuple]:
+ """interleave the results from different strategies according to quotas
+
+ for example, quotas=[3,1], strategy_results=[[a1,a2,a3,a4], [b1,b2]]
+ final result: [a1, a2, a3, b1, a4, b2, ...]
+ """
+ result = []
+ indices = [0] * len(self.strategies)
+
+ while True:
+ added_this_round = False
+
+ for strategy_idx, quota in enumerate(self.quotas):
+ results = strategy_results[strategy_idx]
+ idx = indices[strategy_idx]
+
+ for _ in range(quota):
+ if idx < len(results):
+ result.append(results[idx])
+ idx += 1
+ added_this_round = True
+
+ indices[strategy_idx] = idx
+
+ if not added_this_round:
+ break
+
+ return result
diff --git a/openbox/acq_optimizer/generator.py b/openbox/acq_optimizer/generator.py
new file mode 100644
index 000000000..8c25fa47b
--- /dev/null
+++ b/openbox/acq_optimizer/generator.py
@@ -0,0 +1,396 @@
+import numpy as np
+from abc import ABC, abstractmethod
+from typing import List, Optional, Any,Tuple
+from ConfigSpace import Configuration
+from ConfigSpace.util import get_one_exchange_neighbourhood
+from openbox.utils.history import Observation, History
+from openbox.utils.util_funcs import get_types
+import warnings
+from openbox.acquisition_function.acquisition import AbstractAcquisitionFunction
+from openbox.utils.constants import MAXINT
+from ..compressor.sampling import SamplingStrategy
+import scipy.optimize
+
+MAX_INT = 10000
+
+class SearchGenerator(ABC):
+ @abstractmethod
+ def generate(self,
+ history: History,
+ num_points: int,
+ rng: np.random.RandomState,
+ acq_function=None,
+ **kwargs) -> List[Configuration]:
+ pass
+
+class RandomSearchGenerator(SearchGenerator):
+ def __init__(self, sampling_strategy:SamplingStrategy=None,config_space=None,random_state=None,batch_size=None):
+ if sampling_strategy is not None:
+ self.sampling_strategy = sampling_strategy
+ self.config_space=sampling_strategy.get_spaces()[0]
+ elif config_space is not None:
+ self.config_space=config_space
+ else:
+ raise ValueError("sampling_strategy and config_space is required!")
+ if random_state is None:
+ self.random_state='high'
+ else:
+ self.random_state=random_state
+
+ if batch_size is None:
+ types, bounds = get_types(self.config_space)
+ dim = np.sum(types == 0)
+ self.batch_size = min(5000, max(2000, 200 * dim))
+ else:
+ self.batch_size = batch_size
+
+ def generate(self,
+ history:History,
+ num_points: int,
+ rng: np.random.RandomState,
+ acq_function=None,
+ **kwargs) -> List[Configuration]:
+ if self.random_state=='high':
+ configs = self.sampling_strategy.sample(num_points)
+ for config in configs:
+ config.origin = f'Random Search'
+
+ elif self.random_state=='medium':
+ from openbox.utils.samplers import SobolSampler
+ cur_idx = 0
+ configs = list()
+ while cur_idx < num_points:
+ batch_size = min(self.batch_size, num_points - cur_idx)
+ turbo_state = kwargs.get('turbo_state', None)
+ if turbo_state is None:
+ lower_bounds = None
+ upper_bounds = None
+ else:
+ num_objectives=history.num_objectives
+ if num_objectives > 1:
+ # TODO implement adaptive strategy to choose trust region center for MO
+ raise NotImplementedError()
+ else:
+ incumbent_config = rng.choice(history.get_incumbent_configs())
+ x_center = incumbent_config.get_array()
+ lower_bounds = x_center - turbo_state.length / 2.0
+ upper_bounds = x_center + turbo_state.length / 2.0
+
+ sobol_sampler = SobolSampler(self.config_space, batch_size,
+ lower_bounds, upper_bounds,
+ random_state=rng.randint(0, int(1e8)))
+ _configs = sobol_sampler.generate(return_config=True)
+ configs.extend([_configs[idx] for idx in range(len(_configs))])
+ cur_idx += self.batch_size
+ for config in configs:
+ config.origin = f'BatchMC Search'
+
+ elif self.random_state=='low':
+ if acq_function is None:
+ raise ValueError('acq_function is required!')
+ d=len(self.config_space.get_hyperparameters())
+ bound=(0.0,1.0)
+ configs=[]
+ x_tries = rng.uniform(bound[0], bound[1], size=(num_points, d))
+ for i in range(x_tries.shape[0]):
+ # convert array to Configuration
+ config = Configuration(self.config_space, vector=x_tries[i])
+ config.origin = 'MESMO Search'
+ configs.append(config)
+
+ else:
+ raise ValueError('Random_state is invalid!')
+
+ return configs
+
+class LocalSearchGenerator(SearchGenerator):
+ def __init__(self,
+ max_steps: Optional[int] = None,
+ n_steps_plateau_walk: int = 10,
+ remove_duplicates: bool = True,
+ config_space=None,
+ sampling_strategy:SamplingStrategy=None):
+ self.max_steps = max_steps
+ self.n_steps_plateau_walk = n_steps_plateau_walk#高原行走步数
+ self.remove_duplicates = remove_duplicates
+
+ if sampling_strategy is not None:
+ self.sampling_strategy=sampling_strategy
+ self.config_space=sampling_strategy.get_spaces()[0]
+ elif config_space is not None:
+ self.config_space=config_space
+ else:
+ raise ValueError("sampling_strategy and config_space is required!")
+
+ def generate(self,
+ history: History,
+ num_points: int,
+ rng: np.random.RandomState,
+ acq_function=None,
+ **kwargs) -> List[Configuration]:
+ init_points = self._get_initial_points(
+ rng,acq_function, num_points, history)
+
+ configs = []
+ # Start N local search from different random start points
+ for start_point in init_points:
+ acq_val, configuration = self._one_iter(
+ rng,acq_function, start_point, self.n_steps_plateau_walk,**kwargs)
+
+ configuration.origin = "Local Search"
+ configs.append(configuration)
+
+ if self.remove_duplicates:
+ configs=self._remove_duplicates(configs)
+
+ return configs
+
+ def _sort_configs_by_acq_value(
+ self,
+ rng,
+ acquisition_function,
+ configs: List[Configuration]
+ ) -> List[Tuple[float, Configuration]]:
+ """Sort the given configurations by acquisition value
+
+ Parameters
+ ----------
+ acquisition_function: AbstractAcquisitionFunction
+ acquisition function
+ configs : list(Configuration)
+
+ Returns
+ -------
+ list: (acquisition value, Candidate solutions),
+ ordered by their acquisition function value
+ """
+
+ acq_values = acquisition_function(configs)
+
+ # From here
+ # http://stackoverflow.com/questions/20197990/how-to-make-argsort-result-to-be-random-between-equal-values
+ random = rng.rand(len(acq_values))
+ # Last column is primary sort key!
+ indices = np.lexsort((random.flatten(), acq_values.flatten()))
+
+ # Cannot use zip here because the indices array cannot index the
+ # rand_configs list, because the second is a pure python list
+ return [(acq_values[ind][0], configs[ind]) for ind in indices[::-1]]
+
+ def _get_initial_points(self, rng,acquisition_function, num_points, history):
+
+ if history.empty():
+ init_points = self.config_space.sample_configuration(
+ size=num_points)
+ else:
+ # initiate local search with best configurations from previous runs
+ configs_previous_runs = history.configurations
+ configs_previous_runs_sorted = self._sort_configs_by_acq_value(
+ rng,acquisition_function, configs_previous_runs)
+ num_configs_local_search = int(min(
+ len(configs_previous_runs_sorted),
+ num_points)
+ )
+ init_points = list(
+ map(lambda x: x[1],
+ configs_previous_runs_sorted[:num_configs_local_search])
+ )
+
+ return init_points
+
+ def _one_iter(
+ self,
+ rng,
+ acquisition_function: AbstractAcquisitionFunction,
+ start_point: Configuration,
+ n_steps_plateau_walk,
+ **kwargs
+ ) -> Tuple[float, Configuration]:
+
+ incumbent = start_point
+ # Compute the acquisition value of the incumbent
+ acq_val_incumbent = acquisition_function([incumbent], **kwargs)[0]
+
+ plateau_step=0
+ local_search_steps = 0
+ neighbors_looked_at = 0
+ while True:
+
+ local_search_steps += 1
+ changed_inc = False
+
+ # Get one exchange neighborhood returns an iterator (in contrast of
+ # the previously returned list).
+ all_neighbors = get_one_exchange_neighbourhood(
+ incumbent, seed=rng.randint(MAXINT))
+
+ for neighbor in all_neighbors:
+ acq_val = acquisition_function([neighbor], **kwargs)
+ neighbors_looked_at += 1
+
+ if acq_val > acq_val_incumbent:
+ # logger.debug("Switch to one of the neighbors")
+ incumbent = neighbor
+ acq_val_incumbent = acq_val
+ changed_inc = True
+ plateau_step=0
+ break
+
+ if (self.max_steps is not None and local_search_steps == self.max_steps):
+ break
+
+ if not changed_inc:
+ if plateau_step>=n_steps_plateau_walk:
+ break
+ else:
+ plateau_step+=1
+
+ return acq_val_incumbent, incumbent
+
+ def _remove_duplicates(self, configs: List[Configuration]) -> List[Configuration]:
+ seen = set()
+ unique = []
+ for config in configs:
+ key = str(sorted(config.get_dictionary().items()))
+ if key not in seen:
+ seen.add(key)
+ unique.append(config)
+ return unique
+
+class CMAESGenerator(SearchGenerator):
+ def __init__(self,sampling_strategy:SamplingStrategy=None,config_space=None,sigma=0.5):
+ if sampling_strategy is not None:
+ self.sampling_strategy=sampling_strategy
+ self.config_space=sampling_strategy.get_spaces()[0]
+ elif config_space is not None:
+ self.config_space=config_space
+ else:
+ raise ValueError("sampling_strategy and config_space is required!")
+ self.sigma=sigma
+
+ def generate(self,
+ history:History,
+ num_points: int,
+ rng: np.random.RandomState,
+ acq_function=None,
+ **kwargs) -> List[Configuration]:
+ if acq_function is None:
+ raise ValueError("acq_function is required.")
+ try:
+ from cma import CMAEvolutionStrategy
+ except ImportError:
+ raise ImportError("Package cma is not installed!")
+
+ types, bounds = get_types(self.config_space)
+ assert all(types == 0)
+
+ # Check Constant Hyperparameter
+ const_idx = list()
+ for i, bound in enumerate(bounds):
+ if np.isnan(bound[1]):
+ const_idx.append(i)
+
+ hp_num = len(bounds) - len(const_idx)
+ es = CMAEvolutionStrategy(hp_num * [0], self.sigma, inopts={'bounds': [0, 1]})
+ eval_num = 0
+ configs = list()
+ while eval_num < num_points:
+ X = es.ask(number=es.popsize)
+ _X = X.copy()
+ for i in range(len(_X)):
+ for index in const_idx:
+ _X[i] = np.insert(_X[i], index, 0)
+ _X = np.asarray(_X)
+ values = acq_function(_X,**kwargs)
+ values = np.reshape(values, (-1,))
+ es.tell(X, values)
+ configs.extend([(values[i], _X[i]) for i in range(es.popsize)])
+ eval_num += es.popsize
+ configs.sort(reverse=True, key=lambda x: x[0])
+ Configs=[_[1] for _ in configs]
+ configs=[Configuration(self.config_space,vector=Configs[i],origin = f'CMAES Search') for i in range(num_points)]
+ return configs
+
+class ScipySearchGenerator(SearchGenerator):
+ def __init__(self,sampling_strategy:SamplingStrategy=None,config_space=None,method=None):
+ if sampling_strategy is not None:
+ self.sampling_strategy = sampling_strategy
+ self.config_space=self.sampling_strategy.get_spaces()[0]
+ elif config_space is not None:
+ self.config_space=config_space
+ else:
+ raise ValueError("sampling_strategy and config_space is required!")
+ self.method=method
+
+
+ def generate(self,
+ history:History,
+ num_points: int,
+ rng: np.random.RandomState,
+ acq_function=None,
+ initial_configs=None,
+ **kwargs) -> List[Configuration]:
+
+ if acq_function is None:
+ raise ValueError('acq_function is required!')
+
+
+ def negative_acq(x):
+ # shape of x = (d,)
+ x = np.clip(x, 0.0, 1.0) # fix numerical problem in L-BFGS-B
+ try:
+ # self.config_space._check_forbidden(x)
+ Configuration(self.config_space, vector=x).is_valid_configuration()
+ except ValueError:
+ return np.inf
+ return -acq_function(x,**kwargs)
+
+ if self.method=='local':
+ types, bounds = get_types(self.config_space) # todo: support constant hp in scipy optimizer
+ assert all(types == 0), 'Scipy optimizer (L-BFGS-B) only supports Integer and Float parameters.'
+ self.bounds = bounds
+ options = dict(disp=False, maxiter=1000)
+ self.scipy_config = dict(tol=None, method='L-BFGS-B', options=options)
+ if initial_configs is None:
+ if history is None:
+ initial_configs=self.sampling_strategy.sample(num_points)
+
+ for init_config in initial_configs:
+ initial_config=init_config.get_array()
+
+ configs=list()
+ with warnings.catch_warnings():
+ # ignore warnings of np.inf
+ warnings.filterwarnings("ignore", message="invalid value encountered in subtract", category=RuntimeWarning)
+ result = scipy.optimize.minimize(fun=negative_acq,
+ x0=initial_config,
+ bounds=self.bounds,
+ **self.scipy_config)
+ try:
+ x = np.clip(result.x, 0.0, 1.0) # fix numerical problem in L-BFGS-B
+ config = Configuration(self.config_space, vector=x,origin = f'Scipy Search')
+ config.is_valid_configuration()
+ configs.append(config)
+ except Exception:
+ pass
+ if not configs:
+ raise ValueError()
+
+ elif self.method=='global':
+ configs = []
+ result = scipy.optimize.differential_evolution(func=negative_acq,bounds=self.bounds)
+ try:
+ x = np.clip(result.x, 0.0, 1.0) # fix numerical problem in L-BFGS-B
+ config = Configuration(self.config_space, vector=x,origin = f'ScipyGlobal Search')
+ config.is_valid_configuration()
+ configs.append(config)
+ except Exception:
+ pass
+ if not configs:
+ raise ValueError()
+
+ else:
+ raise ValueError("method should be local or global")
+
+ return configs
+
\ No newline at end of file
diff --git a/openbox/acq_optimizer/selector.py b/openbox/acq_optimizer/selector.py
new file mode 100644
index 000000000..849616d33
--- /dev/null
+++ b/openbox/acq_optimizer/selector.py
@@ -0,0 +1,281 @@
+import numpy as np
+from abc import ABC, abstractmethod
+from typing import List, Optional
+from .generator import SearchGenerator
+
+
+class StrategySelector(ABC):
+ @abstractmethod
+ def select(self, strategies: List[SearchGenerator], iteration: int) -> SearchGenerator:
+ """Select a strategy from the list of strategies
+
+ Parameters
+ ----------
+ strategies : List[SearchGenerator]
+ List of strategies to choose from
+ iteration : int
+ Current iteration number
+
+ Returns
+ -------
+ SearchGenerator
+ Selected strategy
+ """
+ pass
+
+ def reset(self):
+ pass
+
+
+class FixedSelector(StrategySelector):
+ """Fixed selector
+
+ Always select the fixed strategy.
+
+ Suitable scenarios: only one strategy, or want to fix using a specific strategy.
+ """
+
+ def __init__(self, index: int = 0):
+ """
+ Parameters
+ ----------
+ index : int, default=0
+ Index of the fixed strategy to select
+ """
+ self.index = index
+
+ def select(self, strategies: List[SearchGenerator], iteration: int) -> SearchGenerator:
+ if self.index >= len(strategies):
+ raise ValueError(f"Index {self.index} out of range for {len(strategies)} strategies")
+ return strategies[self.index]
+
+
+class ProbabilisticSelector(StrategySelector):
+ """Probabilistic selector
+
+ Select a strategy from the list of strategies according to the given probability distribution.
+
+ Parameters
+ ----------
+ probabilities : List[float]
+ Probability of selecting each strategy, and must sum to 1
+ rng : np.random.RandomState, optional
+ Random number generator
+
+ Examples
+ --------
+ >>> # 85% probability select local search, 15% probability select random search
+ >>> selector = ProbabilisticSelector([0.85, 0.15])
+ >>> strategy = selector.select([local_strategy, random_strategy], iteration=0)
+ """
+
+ def __init__(self,
+ probabilities: List[float],
+ rng: np.random.RandomState):
+ sum_probs = sum(probabilities)
+ if abs(sum_probs - 1.0) > 1e-6:
+ probabilities = [prob / sum_probs for prob in probabilities]
+
+ self.probabilities = np.array(probabilities)
+ self.rng = rng
+
+ def select(self, strategies: List[SearchGenerator], iteration: int) -> SearchGenerator:
+ if len(strategies) != len(self.probabilities):
+ raise ValueError(
+ f"Number of strategies ({len(strategies)}) must match "
+ f"number of probabilities ({len(self.probabilities)})"
+ )
+
+ idx = self.rng.choice(len(strategies), p=self.probabilities)
+ return strategies[idx]
+
+
+class InterleavedSelector(StrategySelector):
+ """Interleaved selector
+
+ Select a strategy from the list of strategies according to the given weights.
+
+ Parameters
+ ----------
+ weights : List[int]
+ Weight of each strategy (execution count ratio)
+ For example, [4, 1] means 4 times out of 5, select strategy 0, and 1 time out of 5, select strategy 1
+
+ Examples
+ --------
+ >>> # 4 times local search, 1 time random search
+ >>> selector = InterleavedSelector([4, 1])
+ >>> for i in range(10):
+ ... strategy = selector.select([local, random], i)
+ # Result: local, local, local, local, random, local, local, local, local, random
+ """
+
+ def __init__(self, weights: List[int]):
+ if not all(w > 0 for w in weights):
+ raise ValueError("All weights must be positive")
+
+ self.weights = weights
+ self.total = sum(weights)
+ self._counter = 0
+
+ def select(self, strategies: List[SearchGenerator], iteration: int) -> SearchGenerator:
+ if len(strategies) != len(self.weights):
+ raise ValueError(
+ f"Number of strategies ({len(strategies)}) must match "
+ f"number of weights ({len(self.weights)})"
+ )
+
+ position = self._counter % self.total
+
+ cumsum = 0
+ for i, weight in enumerate(self.weights):
+ cumsum += weight
+ if position < cumsum:
+ self._counter += 1
+ return strategies[i]
+
+ self._counter += 1
+ return strategies[0]
+
+ def reset(self):
+ self._counter = 0
+
+
+class RoundRobinSelector(StrategySelector):
+ """RoundRobin selector
+
+ Select a strategy from the list of strategies in a round-robin manner (each strategy has the same weight).
+
+ Examples
+ --------
+ >>> selector = RoundRobinSelector()
+ >>> for i in range(6):
+ ... strategy = selector.select([s1, s2, s3], i)
+ # Result: s1, s2, s3, s1, s2, s3
+ """
+
+ def __init__(self):
+ self._counter = 0
+
+ def select(self, strategies: List[SearchGenerator], iteration: int) -> SearchGenerator:
+ idx = self._counter % len(strategies)
+ self._counter += 1
+ return strategies[idx]
+
+ def reset(self):
+ self._counter = 0
+
+
+class AdaptiveSelector(StrategySelector):
+ """Adaptive selector
+
+ Dynamically adjust the selection probabilities of strategies based on their historical performance.
+ Strategies with better performance will get higher selection probabilities.
+
+ Parameters
+ ----------
+ initial_probs : List[float]
+ Initial probability distribution
+ learning_rate : float, default=0.1
+ Learning rate, control the speed of probability update
+ temperature : float, default=1.0
+ Temperature parameter, control the balance between exploration and exploitation
+ - Temperature high: more uniform probabilities (more exploration)
+ - Temperature low: more concentrated probabilities (more exploitation)
+ rng : np.random.RandomState
+ Random number generator
+
+ Notes
+ -----
+ Need to provide feedback through the update() method to adjust the probabilities.
+
+ Examples
+ --------
+ >>> selector = AdaptiveSelector([0.5, 0.5])
+ >>>
+ >>> # Use the strategy and get the performance metric
+ >>> strategy = selector.select([s1, s2], iteration=0)
+ >>> improvement = evaluate_strategy_performance(strategy)
+ >>>
+ >>> # Update the probability (the larger the improvement, the better)
+ >>> selector.update(strategy_index=0, reward=improvement)
+ """
+
+ def __init__(self,
+ initial_probs: List[float],
+ learning_rate: float = 0.1,
+ temperature: float = 1.0,
+ rng: np.random.RandomState = None):
+ sum_probs = sum(initial_probs)
+ if (abs(sum_probs) - 1.0) > 1e-6:
+ initial_probs = [prob / sum_probs for prob in initial_probs]
+
+ self.probs = np.array(initial_probs, dtype=float)
+ self.learning_rate = learning_rate
+ self.temperature = temperature
+ self.rng = rng
+
+ self.rewards = np.zeros(len(initial_probs))
+ self.counts = np.zeros(len(initial_probs))
+
+ self._last_selected = None
+
+ def select(self, strategies: List[SearchGenerator], iteration: int) -> SearchGenerator:
+ if len(strategies) != len(self.probs):
+ raise ValueError(
+ f"Number of strategies ({len(strategies)}) must match "
+ f"number of probabilities ({len(self.probs)})"
+ )
+
+ temp_probs = self._apply_temperature(self.probs)
+
+ idx = self.rng.choice(len(strategies), p=temp_probs)
+ self._last_selected = idx
+ self.counts[idx] += 1
+
+ return strategies[idx]
+
+ def update(self, strategy_index: int, reward: float):
+ """Update the probability of a strategy
+
+ Parameters
+ ----------
+ strategy_index : int
+ Index of the strategy
+ reward : float
+ Reward value (the larger the better)
+ For example, can use improvement = old_best - new_best
+ """
+ self.rewards[strategy_index] += reward
+
+ avg_reward = self.rewards[strategy_index] / max(self.counts[strategy_index], 1)
+
+ # Simple update rule: strategies with better performance increase probability
+ self.probs[strategy_index] += self.learning_rate * avg_reward
+
+ self.probs = np.maximum(self.probs, 0.01) # keep minimum probability
+ self.probs = self.probs / self.probs.sum()
+
+ def _apply_temperature(self, probs: np.ndarray) -> np.ndarray:
+ if self.temperature == 1.0:
+ return probs
+
+ # softmax with temperature, scale the probabilities
+ log_probs = np.log(probs + 1e-10)
+ scaled = log_probs / self.temperature
+ exp_scaled = np.exp(scaled - np.max(scaled))
+ return exp_scaled / exp_scaled.sum()
+
+ def reset(self):
+ self.probs = np.ones(len(self.probs)) / len(self.probs)
+ self.rewards = np.zeros(len(self.probs))
+ self.counts = np.zeros(len(self.probs))
+ self._last_selected = None
+
+ def get_statistics(self) -> dict:
+ return {
+ 'probabilities': self.probs.tolist(),
+ 'avg_rewards': (self.rewards / np.maximum(self.counts, 1)).tolist(),
+ 'counts': self.counts.tolist()
+ }
+
diff --git a/openbox/acq_optimizer/upper_maximizer.py b/openbox/acq_optimizer/upper_maximizer.py
new file mode 100644
index 000000000..dd67a74c4
--- /dev/null
+++ b/openbox/acq_optimizer/upper_maximizer.py
@@ -0,0 +1,283 @@
+import abc
+import time
+import warnings
+from typing import Iterable, List, Union, Tuple, Optional,Any
+import random
+import scipy.optimize
+import numpy as np
+
+from openbox import logger
+from openbox.acquisition_function.acquisition import AbstractAcquisitionFunction
+from openbox.utils.config_space import get_one_exchange_neighbourhood, \
+ Configuration, ConfigurationSpace
+from openbox.utils.history import History, MultiStartHistory
+from openbox.utils.util_funcs import get_types
+from openbox.utils.constants import MAXINT
+from ..compressor.sampling import SamplingStrategy, StandardSamplingStrategy
+from . import generator
+from . import base
+from . import selector
+
+class AcquisitionFunctionMaximizer(object, metaclass=abc.ABCMeta):
+ """Abstract class for acquisition maximization.
+
+ In order to use this class it has to be subclassed and the method
+ ``_maximize`` must be implemented.
+
+ Parameters
+ ----------
+ config_space : ConfigurationSpace
+
+ rng : np.random.RandomState or int, optional
+ """
+
+ def __init__(
+ self,
+ config_space: ConfigurationSpace,
+ sampling_strategy:SamplingStrategy = None,
+ rng: Union[bool, np.random.RandomState] = None,
+ turbo_length=None,
+ ):
+ if sampling_strategy is None:
+ sampling_strategy = StandardSamplingStrategy(config_space, seed=getattr(rng, "randint", lambda *_: None)(MAXINT))
+ self.sampling_strategy = sampling_strategy
+ self.config_space = config_space
+ self.turbo_length=turbo_length
+
+ self.turbo_state=False
+ if self.turbo_length is not None:
+ self.turbo_state=True
+
+ if rng is None:
+ logger.debug('no rng given, using default seed of 1')
+ self.rng = np.random.RandomState(seed=1)
+ else:
+ self.rng = rng
+
+ def maximize(
+ self,
+ acquisition_function: AbstractAcquisitionFunction,
+ history: History,
+ num_points: int,
+ **kwargs
+ ) -> Iterable[Configuration]:
+ """Maximize acquisition function using ``_maximize``.
+
+ Parameters
+ ----------
+ history: openbox.utils.history.History
+ history object
+ num_points: int
+ number of points to be sampled
+ **kwargs
+
+ Returns
+ -------
+ iterable
+ An iterable consisting of :class:`openbox.config_space.Configuration`.
+ """
+ return [t[1] for t in self._maximize(acquisition_function, history, num_points, **kwargs)]
+
+ @abc.abstractmethod
+ def _maximize(
+ self,
+ acquisition_function: AbstractAcquisitionFunction,
+ history: History,
+ num_points: int,
+ **kwargs
+ ) -> Iterable[Tuple[float, Configuration]]:
+ """Implements acquisition function maximization.
+
+ In contrast to ``maximize``, this method returns an iterable of tuples,
+ consisting of the acquisition function value and the configuration. This
+ allows to plug together different acquisition function maximizers.
+
+ Parameters
+ ----------
+ acquisition_function: AbstractAcquisitionFunction
+ acquisition function
+ history: openbox.utils.history.History
+ history object
+ num_points: int
+ number of points to be sampled
+ **kwargs
+
+ Returns
+ -------
+ iterable
+ An iterable consistng of
+ tuple(acqusition_value, :class:`openbox.config_space.Configuration`).
+ """
+ raise NotImplementedError()
+
+ def _sort_configs_by_acq_value(
+ self,
+ acquisition_function,
+ configs: List[Configuration]
+ ) -> List[Tuple[float, Configuration]]:
+ """Sort the given configurations by acquisition value
+
+ Parameters
+ ----------
+ acquisition_function: AbstractAcquisitionFunction
+ acquisition function
+ configs : list(Configuration)
+
+ Returns
+ -------
+ list: (acquisition value, Candidate solutions),
+ ordered by their acquisition function value
+ """
+
+ acq_values = acquisition_function(configs)
+
+ # From here
+ # http://stackoverflow.com/questions/20197990/how-to-make-argsort-result-to-be-random-between-equal-values
+ random = self.rng.rand(len(acq_values))
+ # Last column is primary sort key!
+ indices = np.lexsort((random.flatten(), acq_values.flatten()))
+
+ # Cannot use zip here because the indices array cannot index the
+ # rand_configs list, because the second is a pure python list
+ return [(acq_values[ind][0], configs[ind]) for ind in indices[::-1]]
+
+ def fliter(
+ self,
+ incumbent_config,
+ challengers):
+ x_center = incumbent_config.get_array()
+ lower_bounds = x_center - self.turbo_length / 2.0
+ upper_bounds = x_center + self.turbo_length / 2.0
+ filtered_challengers = []
+ for config in challengers:
+ config_array = config.get_array()
+ if np.all(config_array >= lower_bounds) and np.all(config_array <= upper_bounds):
+ filtered_challengers.append(config)
+ return filtered_challengers
+
+
+class InterleavedLocalAndRandomSearchMaximizer(AcquisitionFunctionMaximizer):
+ """Implements openbox's default acquisition function optimization.
+
+ This acq_optimizer performs local search from the previous best points
+ according, to the acquisition function, uses the acquisition function to
+ sort randomly sampled configurations and interleaves unsorted, randomly
+ sampled configurations in between.
+
+ Parameters
+ ----------
+ config_space : ConfigurationSpace
+
+ rng : np.random.RandomState or int, optional
+
+ max_steps: int
+ [LocalSearchMaximizer] Maximum number of steps that the local search will perform
+
+ n_steps_plateau_walk: int
+ [LocalSearchMaximizer] number of steps during a plateau walk before local search terminates
+
+ n_sls_iterations: int
+ [LocalSearchMaximizer] number of local search iterations
+
+ """
+
+ def __init__(
+ self,
+ config_space: ConfigurationSpace,
+ sampling_strategy:SamplingStrategy = None,
+ rng: Union[bool, np.random.RandomState] = None,
+ max_steps: Optional[int] = None,
+ n_steps_plateau_walk: int = 10,
+ n_sls_iterations: int = 10,
+ rand_prob=0.25,
+ turbo_length=None,
+ ):
+ super().__init__(config_space, sampling_strategy, rng, turbo_length)
+
+ self.random_generator = generator.RandomSearchGenerator(
+ config_space=config_space,
+ sampling_strategy=self.sampling_strategy,
+ random_state=None,
+ )
+ self.local_generator = generator.LocalSearchGenerator(
+ config_space=config_space,
+ sampling_strategy=self.sampling_strategy,
+ max_steps=max_steps,
+ n_steps_plateau_walk=n_steps_plateau_walk,
+ )
+ self.n_sls_iterations = n_sls_iterations
+ self.strategy=[self.local_generator,self.random_generator]
+
+ # =======================================================================
+ # self.local_search = DiffOpt(
+ # acquisition_function=acquisition_function,
+ # config_space=config_space,
+ # rng=rng
+ # )
+ # =======================================================================
+
+ def maximize(
+ self,
+ acquisition_function: AbstractAcquisitionFunction,
+ history: History,
+ num_points: int,
+ **kwargs
+ ) -> Iterable[Configuration]:
+ """Maximize acquisition function using ``_maximize``.
+
+ Parameters
+ ----------
+ history: openbox.utils.history.History
+ history object
+ num_points: int
+ number of points to be sampled
+ **kwargs
+ passed to acquisition function
+
+ Returns
+ -------
+ Iterable[Configuration]
+ List of configurations.
+ """
+ self.optimizer = base.QuotaCompositeOptimizer(acquisition_function=acquisition_function,
+ config_space=self.config_space,
+ strategies=self.strategy,
+ quotas=[self.n_sls_iterations, num_points - self.n_sls_iterations],
+ rng=self.rng,
+ candidate_multiplier=3)
+ results = self.optimizer._maximize(history=history,
+ num_points=num_points,
+ **kwargs)
+
+ challengers=[]
+
+ for _ in results:
+ config=_[1]
+ challengers.append(config)
+
+ if self.turbo_state and history:
+ incumbent_config = self.rng.choice(history.get_incumbent_configs())
+ flitered_challengers=self.fliter(incumbent_config=incumbent_config,challengers=challengers)
+ return flitered_challengers
+
+ return challengers
+
+
+ def _maximize(
+ self,
+ history: History,
+ acquisition_function:AbstractAcquisitionFunction,
+ num_points: int,
+ **kwargs
+ ) -> Iterable[Tuple[float, Configuration]]:
+ self.optimizer = base.QuotaCompositeOptimizer(acquisition_function=acquisition_function,
+ config_space=self.config_space,
+ strategies=self.strategy,
+ quotas=[self.n_sls_iterations, num_points - self.n_sls_iterations],
+ rng=self.rng,
+ candidate_multiplier=3)
+ results = self.optimizer._maximize(history=history,
+ num_points=num_points,
+ **kwargs)
+
+ return results
\ No newline at end of file
diff --git a/openbox/acq_optimizer/utils.py b/openbox/acq_optimizer/utils.py
new file mode 100644
index 000000000..9fbf7ca7c
--- /dev/null
+++ b/openbox/acq_optimizer/utils.py
@@ -0,0 +1,29 @@
+import numpy as np
+from typing import List
+from ConfigSpace import Configuration, ConfigurationSpace
+
+
+def convert_configurations_to_array(configs: List[Configuration]) -> np.ndarray:
+ if not configs:
+ raise ValueError("configs list cannot be empty")
+
+ configs_array = np.array(
+ [config.get_array() for config in configs],
+ dtype=np.float64
+ )
+ configuration_space = configs[0].configuration_space
+ return impute_default_values(configuration_space, configs_array)
+
+
+def impute_default_values(
+ configuration_space: ConfigurationSpace,
+ configs_array: np.ndarray
+) -> np.ndarray:
+ for hp in configuration_space.get_hyperparameters():
+ default = hp.normalized_default_value
+ idx = configuration_space.get_idx_by_hyperparameter_name(hp.name)
+
+ nonfinite_mask = ~np.isfinite(configs_array[:, idx])
+ configs_array[nonfinite_mask, idx] = default
+
+ return configs_array
\ No newline at end of file
diff --git a/openbox/compressor/__init__.py b/openbox/compressor/__init__.py
new file mode 100644
index 000000000..c52c2ae93
--- /dev/null
+++ b/openbox/compressor/__init__.py
@@ -0,0 +1,258 @@
+from typing import Type, Optional
+from ConfigSpace import ConfigurationSpace, Configuration
+from .compressor import Compressor
+from .pipeline import CompressionPipeline
+from .progress import OptimizerProgress
+from .step import CompressionStep
+
+from .update import (
+ UpdateStrategy,
+ PeriodicUpdateStrategy,
+ StagnationUpdateStrategy,
+ ImprovementUpdateStrategy,
+ HybridUpdateStrategy,
+ CompositeUpdateStrategy,
+)
+
+from .steps.dimension import (
+ DimensionSelectionStep,
+ SHAPDimensionStep,
+ ExpertDimensionStep,
+ CorrelationDimensionStep,
+ AdaptiveDimensionStep,
+)
+
+from .steps.range import (
+ RangeCompressionStep,
+ BoundaryRangeStep,
+ ExpertRangeStep,
+ SHAPBoundaryRangeStep,
+ KDEBoundaryRangeStep
+)
+
+from .steps.projection import (
+ TransformativeProjectionStep,
+ REMBOProjectionStep,
+ HesBOProjectionStep,
+ KPCAProjectionStep,
+ QuantizationProjectionStep,
+)
+
+from .sampling import (
+ SamplingStrategy,
+ StandardSamplingStrategy,
+ MixedRangeSamplingStrategy,
+)
+
+from .utils import (
+ load_expert_params,
+ create_space_from_ranges,
+)
+
+from .api import (
+ create_step_from_string,
+ create_steps_from_strings,
+ get_available_step_strings,
+ validate_step_string,
+ create_filling_from_string,
+ create_filling_from_config,
+ get_available_filling_strings,
+ validate_filling_string,
+ get_filling_info,
+ compress_from_config,
+)
+
+_COMPRESSOR_REGISTRY = {
+ 'pipeline': Compressor,
+ 'shap': None,
+ 'llamatune': None,
+ 'expert': None,
+ 'none': None,
+}
+
+
+def get_compressor(compressor_type: Optional[str] = None,
+ config_space: ConfigurationSpace = None,
+ **kwargs):
+ if compressor_type is None:
+ if 'adapter_alias' in kwargs or 'le_low_dim' in kwargs:
+ compressor_type = 'llamatune'
+ else:
+ compressor_type = kwargs.get('strategy', 'shap')
+ if compressor_type == 'none':
+ compressor_type = 'none'
+ else:
+ compressor_type = 'shap'
+
+ if 'steps' in kwargs:
+ steps = kwargs.pop('steps')
+ return Compressor(
+ config_space=config_space,
+ steps=steps,
+ **kwargs
+ )
+
+ if compressor_type == 'none':
+ class NoCompressor(Compressor):
+ def _compress_space_impl(self, space_history=None):
+ return config_space, config_space
+ def unproject_point(self, point):
+ if hasattr(point, 'get_dictionary'):
+ values = point.get_dictionary()
+ target_space = getattr(point, 'configuration_space', config_space)
+ elif isinstance(point, dict):
+ values = point
+ target_space = config_space
+ else:
+ values = dict(point)
+ target_space = config_space
+ return Configuration(target_space, values=values)
+ return NoCompressor(config_space=config_space, **kwargs)
+
+ steps = []
+
+ if compressor_type == 'shap' or compressor_type == 'expert':
+ strategy = kwargs.get('strategy', 'shap' if compressor_type == 'shap' else 'expert')
+
+ if strategy != 'none':
+ if strategy == 'expert':
+ steps.append(ExpertDimensionStep(
+ strategy='expert',
+ expert_params=kwargs.get('expert_params', []),
+ expert_config_file=kwargs.get('expert_config_file', None),
+ topk=kwargs.get('topk', 20),
+ ))
+ else:
+ steps.append(SHAPDimensionStep(
+ strategy='shap',
+ topk=kwargs.get('topk', 20),
+ ))
+
+ top_ratio = kwargs.get('top_ratio', 0.8)
+ sigma = kwargs.get('sigma', 2.0)
+ if top_ratio < 1.0 or sigma > 0:
+ steps.append(BoundaryRangeStep(
+ method='boundary',
+ top_ratio=top_ratio,
+ sigma=sigma,
+ enable_mixed_sampling=kwargs.get('enable_mixed_sampling', True),
+ initial_prob=kwargs.get('initial_prob', 0.9),
+ seed=kwargs.get('seed', 42),
+ ))
+
+ elif compressor_type == 'pipeline':
+ # Pipeline type: steps should be provided in kwargs
+ # This case is already handled above, but keep for clarity
+ raise ValueError("For 'pipeline' type, provide 'steps' in kwargs")
+
+ elif compressor_type == 'llamatune':
+ adapter_alias = kwargs.get('adapter_alias', 'none')
+ le_low_dim = kwargs.get('le_low_dim', 10)
+ max_num_values = kwargs.get('max_num_values', None)
+ seed = kwargs.get('seed', 42)
+
+ if max_num_values is not None:
+ steps.append(QuantizationProjectionStep(
+ method='quantization',
+ max_num_values=max_num_values,
+ seed=seed,
+ ))
+
+ if adapter_alias != 'none':
+ if adapter_alias == 'rembo':
+ steps.append(REMBOProjectionStep(
+ method='rembo',
+ low_dim=le_low_dim,
+ max_num_values=max_num_values,
+ seed=seed,
+ ))
+ elif adapter_alias == 'hesbo':
+ steps.append(HesBOProjectionStep(
+ method='hesbo',
+ low_dim=le_low_dim,
+ max_num_values=max_num_values,
+ seed=seed,
+ ))
+ else:
+ raise ValueError(f"Unknown adapter_alias: {adapter_alias}. Supported: 'rembo', 'hesbo'")
+
+ else:
+ raise ValueError(f"Unknown compressor type: {compressor_type}. "
+ f"Available types: {list(_COMPRESSOR_REGISTRY.keys())}")
+
+ if steps:
+ return Compressor(
+ config_space=config_space,
+ steps=steps,
+ **kwargs
+ )
+ else:
+ class NoCompressor(Compressor):
+ def _compress_space_impl(self, space_history=None):
+ return config_space, config_space
+ def unproject_point(self, point):
+ if hasattr(point, 'get_dictionary'):
+ values = point.get_dictionary()
+ target_space = getattr(point, 'configuration_space', config_space)
+ elif isinstance(point, dict):
+ values = point
+ target_space = config_space
+ else:
+ values = dict(point)
+ target_space = config_space
+ return Configuration(target_space, values=values)
+ return NoCompressor(config_space=config_space, **kwargs)
+
+
+__all__ = [
+ 'CompressionStep',
+ 'Compressor',
+ 'CompressionPipeline',
+ 'OptimizerProgress',
+
+ 'UpdateStrategy',
+ 'PeriodicUpdateStrategy',
+ 'StagnationUpdateStrategy',
+ 'ImprovementUpdateStrategy',
+ 'HybridUpdateStrategy',
+ 'CompositeUpdateStrategy',
+
+ 'DimensionSelectionStep',
+ 'SHAPDimensionStep',
+ 'ExpertDimensionStep',
+ 'CorrelationDimensionStep',
+ 'AdaptiveDimensionStep',
+
+ 'RangeCompressionStep',
+ 'BoundaryRangeStep',
+ 'ExpertRangeStep',
+ 'SHAPBoundaryRangeStep',
+ 'KDEBoundaryRangeStep',
+
+ 'TransformativeProjectionStep',
+ 'REMBOProjectionStep',
+ 'HesBOProjectionStep',
+ 'KPCAProjectionStep',
+ 'QuantizationProjectionStep',
+
+ 'SamplingStrategy',
+ 'StandardSamplingStrategy',
+ 'MixedRangeSamplingStrategy',
+
+ 'load_expert_params',
+ 'create_space_from_ranges',
+
+ 'get_compressor',
+
+ 'create_step_from_string',
+ 'create_steps_from_strings',
+ 'get_available_step_strings',
+ 'validate_step_string',
+ 'create_filling_from_string',
+ 'create_filling_from_config',
+ 'get_available_filling_strings',
+ 'validate_filling_string',
+ 'get_filling_info',
+
+ 'compress_from_config',
+]
diff --git a/openbox/compressor/api/__init__.py b/openbox/compressor/api/__init__.py
new file mode 100644
index 000000000..c2a95f87c
--- /dev/null
+++ b/openbox/compressor/api/__init__.py
@@ -0,0 +1,35 @@
+from .step_factory import (
+ create_step_from_string,
+ create_steps_from_strings,
+ get_available_step_strings,
+ validate_step_string,
+)
+from .filling_factory import (
+ create_filling_from_string,
+ create_filling_from_config,
+ get_available_filling_strings,
+ validate_filling_string,
+ get_filling_info,
+)
+from .compress_api import (
+ compress_from_config,
+ create_config_space_from_dict,
+ load_history_from_dict,
+ load_histories_from_dicts,
+)
+
+__all__ = [
+ 'create_step_from_string',
+ 'create_steps_from_strings',
+ 'get_available_step_strings',
+ 'validate_step_string',
+ 'create_filling_from_string',
+ 'create_filling_from_config',
+ 'get_available_filling_strings',
+ 'validate_filling_string',
+ 'get_filling_info',
+ 'compress_from_config',
+ 'create_config_space_from_dict',
+ 'load_history_from_dict',
+ 'load_histories_from_dicts',
+]
\ No newline at end of file
diff --git a/openbox/compressor/api/compress_api.py b/openbox/compressor/api/compress_api.py
new file mode 100644
index 000000000..3ffa4de55
--- /dev/null
+++ b/openbox/compressor/api/compress_api.py
@@ -0,0 +1,420 @@
+"""
+Compression API for Frontend Integration
+
+This module provides a high-level API for frontend applications to:
+1. Create compression steps from string identifiers
+2. Execute compression with user-provided configuration
+3. Return results in JSON format
+"""
+
+import json
+import sys
+import argparse
+import logging
+from typing import Dict, Any, List, Optional, Union
+from pathlib import Path
+
+from ConfigSpace import ConfigurationSpace, Configuration
+from ConfigSpace.hyperparameters import (
+ UniformFloatHyperparameter,
+ UniformIntegerHyperparameter,
+ CategoricalHyperparameter,
+)
+from openbox.utils.history import History, Observation
+from openbox.utils.constants import SUCCESS
+
+from .. import Compressor
+from .step_factory import (
+ validate_step_string,
+ create_steps_from_strings,
+)
+from .filling_factory import (
+ create_filling_from_string,
+ create_filling_from_config,
+)
+from openbox import logger
+
+
+def create_config_space_from_dict(config_dict: Dict[str, Any]) -> ConfigurationSpace:
+ """
+ Create a ConfigurationSpace from a dictionary definition.
+
+ Args:
+ config_dict: Dictionary with hyperparameter definitions.
+ Format: {
+ 'param_name': {
+ 'type': 'float' | 'integer' | 'int' | 'categorical',
+ 'min': float, # for float/integer/int
+ 'max': float, # for float/integer/int
+ 'default': value,
+ 'log': bool, # optional, for float/integer/int
+ 'choices': List, # for categorical
+ }
+ }
+
+ Returns:
+ ConfigurationSpace instance
+ """
+ cs = ConfigurationSpace()
+
+ for param_name, param_def in config_dict.items():
+ param_type = param_def.get('type', 'float').lower()
+
+ if param_type in ('float', 'real'):
+ hp = UniformFloatHyperparameter(
+ name=param_name,
+ lower=float(param_def['min']),
+ upper=float(param_def['max']),
+ default_value=param_def.get('default', (param_def['min'] + param_def['max']) / 2),
+ log=param_def.get('log', False)
+ )
+ elif param_type in ('int', 'integer'):
+ hp = UniformIntegerHyperparameter(
+ name=param_name,
+ lower=int(param_def['min']),
+ upper=int(param_def['max']),
+ default_value=param_def.get('default', int((param_def['min'] + param_def['max']) / 2)),
+ log=param_def.get('log', False)
+ )
+ elif param_type == 'categorical':
+ hp = CategoricalHyperparameter(
+ name=param_name,
+ choices=param_def['choices'],
+ default_value=param_def.get('default', param_def['choices'][0])
+ )
+ else:
+ raise ValueError(f"Unsupported parameter type: {param_type}. Supported: 'float', 'integer', 'int', 'categorical'")
+
+ cs.add_hyperparameter(hp)
+
+ return cs
+
+
+def load_history_from_dict(
+ history_data: List[Dict[str, Any]],
+ config_space: ConfigurationSpace
+) -> History:
+ """
+ Create a History object from dictionary data.
+
+ Supports two formats:
+ 1. Simple format:
+ {
+ 'config': Dict[str, Any],
+ 'objective': float, # single value
+ 'trial_state': str, # optional
+ 'elapsed_time': float # optional
+ }
+
+ 2. Full format (from History Type):
+ {
+ 'config': Dict[str, Any],
+ 'objectives': List[float], # array of objectives
+ 'constraints': List[float] | None, # optional
+ 'trial_state': int | str, # 0 or 'SUCCESS'
+ 'elapsed_time': float, # optional
+ 'create_time': str, # optional, ignored
+ 'extra_info': dict # optional, ignored
+ }
+
+ Args:
+ history_data: List of observations
+ config_space: ConfigurationSpace instance
+
+ Returns:
+ History instance
+ """
+ num_objectives = 1
+ num_constraints = 0
+ if history_data:
+ first_obs = history_data[0]
+ if 'objectives' in first_obs:
+ num_objectives = len(first_obs['objectives'])
+ if 'constraints' in first_obs and first_obs['constraints'] is not None:
+ num_constraints = len(first_obs['constraints'])
+
+ history = History(
+ task_id='frontend_task',
+ num_objectives=num_objectives,
+ num_constraints=num_constraints,
+ config_space=config_space
+ )
+
+ for obs_data in history_data:
+ config = Configuration(config_space, values=obs_data['config'])
+
+ if 'objectives' in obs_data:
+ objectives = obs_data['objectives']
+ elif 'objective' in obs_data:
+ objectives = [obs_data['objective']]
+ else:
+ raise ValueError("Observation must have either 'objective' or 'objectives' field")
+
+ constraints = obs_data.get('constraints', None)
+
+ trial_state = obs_data.get('trial_state', SUCCESS)
+ if isinstance(trial_state, int):
+ if trial_state == 0:
+ trial_state = SUCCESS
+
+ obs = Observation(
+ config=config,
+ objectives=objectives,
+ constraints=constraints,
+ trial_state=trial_state,
+ elapsed_time=obs_data.get('elapsed_time', 0.0)
+ )
+ history.update_observation(obs)
+
+ return history
+
+
+def load_histories_from_dicts(
+ histories_data: List[List[Dict[str, Any]]],
+ config_space: ConfigurationSpace
+) -> List[History]:
+ """
+ Load multiple History objects from list of observation dictionaries.
+
+ Args:
+ histories_data: List of history data, each is a list of observations.
+ Format: [
+ [obs1, obs2, ...], # History 1
+ [obs3, obs4, ...], # History 2
+ ...
+ ]
+ config_space: ConfigurationSpace instance
+
+ Returns:
+ List of History instances
+ """
+ histories = []
+ for i, history_data in enumerate(histories_data):
+ history = load_history_from_dict(history_data, config_space)
+ history.task_id = f'source_task_{i}'
+ histories.append(history)
+ logger.info(f"Loaded history {i+1}: {len(history.observations)} observations")
+ return histories
+
+
+def compress_from_config(
+ config_space_def: Dict[str, Any],
+ step_config: Dict[str, Any],
+ history_data: List[List[Dict[str, Any]]],
+ output_dir: Optional[str] = None,
+ save_info: bool = True
+) -> Dict[str, Any]:
+ """
+ Execute compression from configuration dictionary.
+
+ Args:
+ config_space_def: Configuration space definition (see create_config_space_from_dict)
+ step_config: Step configuration dictionary with format:
+ {
+ 'dimension_step': str, # e.g., 'd_shap', 'd_none'
+ 'range_step': str, # e.g., 'r_kde', 'r_none'
+ 'projection_step': str, # e.g., 'p_quant', 'p_none'
+ 'step_params': { # optional, parameter overrides
+ 'd_shap': {'topk': 10, 'exclude_params': ['param1']},
+ 'r_kde': {'source_top_ratio': 0.5}
+ },
+ 'filling_config': { # optional, filling strategy config
+ 'type': 'default',
+ 'fixed_values': { # optional, fixed parameter values
+ 'param1': value1,
+ 'param2': value2
+ }
+ }
+ }
+ history_data: Required history data. List[List[Dict]] format:
+ - Single history: [[...]] (one JSON file with list of observations)
+ - Multiple histories: [[...], [...], ...] (multiple JSON files, each with list of observations)
+ Each inner list (JSON file) will be converted to one History object.
+ Source similarities will be automatically set to 1/len(histories) for each history.
+ output_dir: Optional output directory for saving results
+ save_info: Whether to save compression info
+
+ Returns:
+ Dictionary with compression results
+ """
+ config_space = create_config_space_from_dict(config_space_def)
+ logger.info(f"Created configuration space with {len(config_space.get_hyperparameters())} parameters")
+
+
+ step_strings = []
+ step_params = step_config.get('step_params', {})
+
+ dim_step = step_config.get('dimension_step', 'd_none')
+ if validate_step_string(dim_step):
+ step_strings.append(dim_step)
+ else:
+ raise ValueError(f"Invalid dimension step: {dim_step}")
+
+ range_step = step_config.get('range_step', 'r_none')
+ if validate_step_string(range_step):
+ step_strings.append(range_step)
+ else:
+ raise ValueError(f"Invalid range step: {range_step}")
+
+ proj_step = step_config.get('projection_step', 'p_none')
+ if validate_step_string(proj_step):
+ step_strings.append(proj_step)
+ else:
+ raise ValueError(f"Invalid projection step: {proj_step}")
+
+ steps = create_steps_from_strings(step_strings, step_params=step_params)
+ logger.info(f"Created {len(steps)} compression steps")
+
+ # create filling strategy from config if provided
+ filling_strategy = None
+ filling_config = step_config.get('filling_config')
+ if filling_config:
+ filling_strategy = create_filling_from_config(filling_config)
+ logger.info(f"Created filling strategy: {type(filling_strategy).__name__}")
+ if filling_strategy.fixed_values:
+ logger.info(f"Fixed values: {list(filling_strategy.fixed_values.keys())}")
+
+ compressor = Compressor(
+ config_space=config_space,
+ steps=steps,
+ filling_strategy=filling_strategy,
+ save_compression_info=save_info,
+ output_dir=output_dir
+ )
+
+ space_history = []
+ for i, history_dict_list in enumerate(history_data):
+ history = load_history_from_dict(history_dict_list, config_space)
+ history.task_id = f'source_task_{i}'
+ space_history.append(history)
+ logger.info(f"Loaded history {i+1}: {len(history.observations)} observations")
+
+ num_histories = len(space_history)
+ source_similarities = {i: 1.0 / num_histories for i in range(num_histories)}
+ logger.info(f"Loaded {len(space_history)} histories with auto-calculated similarities: {source_similarities}")
+
+ surrogate_space, sample_space = compressor.compress_space(
+ space_history=space_history,
+ source_similarities=source_similarities
+ )
+
+ result = {
+ 'success': True,
+ 'original_dim': len(config_space.get_hyperparameters()),
+ 'surrogate_dim': len(surrogate_space.get_hyperparameters()),
+ 'sample_dim': len(sample_space.get_hyperparameters()),
+ 'compression_ratio': len(surrogate_space.get_hyperparameters()) / len(config_space.get_hyperparameters()),
+ 'original_params': config_space.get_hyperparameter_names(),
+ 'surrogate_params': surrogate_space.get_hyperparameter_names(),
+ 'sample_params': sample_space.get_hyperparameter_names(),
+ 'steps_used': [type(s).__name__ for s in steps],
+ 'output_dir': output_dir if save_info else None
+ }
+
+ try:
+ summary = compressor.get_compression_summary()
+ result['compression_summary'] = summary
+ except:
+ pass
+ logger.info(f"Compression completed: {result['original_dim']} -> {result['surrogate_dim']} dimensions")
+ return result
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description='Compression API - Execute compression from JSON configuration'
+ )
+ parser.add_argument(
+ '--config-space',
+ type=str,
+ required=True,
+ help='Path to JSON file with configuration space definition'
+ )
+ parser.add_argument(
+ '--steps',
+ type=str,
+ required=True,
+ help='Path to JSON file with step configuration'
+ )
+ parser.add_argument(
+ '--history',
+ type=str,
+ nargs='+',
+ required=True,
+ help='Path(s) to JSON file(s) with history data (required). Can specify multiple files for multi-source transfer learning. Source similarities will be auto-calculated as 1/len(histories) for each history.'
+ )
+ parser.add_argument(
+ '--output-dir',
+ type=str,
+ default=None,
+ help='Output directory for compression results (default: ./results/compression)'
+ )
+ parser.add_argument(
+ '--no-save',
+ action='store_true',
+ help='Do not save compression info'
+ )
+ parser.add_argument(
+ '--verbose',
+ action='store_true',
+ help='Enable verbose logging'
+ )
+
+ args = parser.parse_args()
+
+ if args.verbose:
+ logging.basicConfig(level=logging.INFO)
+ else:
+ logging.basicConfig(level=logging.WARNING)
+
+ config_space_path = Path(args.config_space)
+ if not config_space_path.exists():
+ print(f"Error: Config space file not found: {args.config_space}", file=sys.stderr)
+ sys.exit(1)
+ with open(config_space_path, 'r') as f:
+ config_space_def = json.load(f)
+
+ steps_path = Path(args.steps)
+ if not steps_path.exists():
+ print(f"Error: Steps config file not found: {args.steps}", file=sys.stderr)
+ sys.exit(1)
+ with open(steps_path, 'r') as f:
+ step_config = json.load(f)
+
+ histories_list = []
+ for hist_file in args.history:
+ hist_path = Path(hist_file)
+ if not hist_path.exists():
+ print(f"Error: History file not found: {hist_file}", file=sys.stderr)
+ sys.exit(1)
+ with open(hist_path, 'r') as f:
+ hist_data = json.load(f)
+ if isinstance(hist_data, dict) and 'observations' in hist_data:
+ hist_data = hist_data['observations']
+ histories_list.append(hist_data)
+ history_data = histories_list
+
+ try:
+ result = compress_from_config(
+ config_space_def=config_space_def,
+ step_config=step_config,
+ history_data=history_data,
+ output_dir=args.output_dir,
+ save_info=not args.no_save
+ )
+
+ print(json.dumps(result, indent=2))
+
+ except Exception as e:
+ error_result = {
+ 'success': False,
+ 'error': str(e),
+ 'error_type': type(e).__name__
+ }
+ print(json.dumps(error_result, indent=2), file=sys.stderr)
+ sys.exit(1)
+
+
+if __name__ == '__main__':
+ main()
+
diff --git a/openbox/compressor/api/filling_factory.py b/openbox/compressor/api/filling_factory.py
new file mode 100644
index 000000000..7cedafe5f
--- /dev/null
+++ b/openbox/compressor/api/filling_factory.py
@@ -0,0 +1,137 @@
+"""
+Filling Factory Module
+Provides functions to create filling strategy instances from string identifiers or configurations.
+"""
+
+from typing import Optional, Dict, Any
+from ..filling import FillingStrategy, DefaultValueFilling
+from openbox import logger
+
+_FILLING_REGISTRY = {
+ 'default': {
+ 'class': DefaultValueFilling,
+ 'default_params': {},
+ 'description': 'Default value filling strategy',
+ },
+}
+
+
+def get_available_filling_strings() -> list:
+ return list(_FILLING_REGISTRY.keys())
+
+def validate_filling_string(filling_str: str) -> bool:
+ return filling_str in _FILLING_REGISTRY
+
+def get_filling_info(filling_str: str) -> Optional[Dict[str, Any]]:
+ if not validate_filling_string(filling_str):
+ return None
+
+ info = _FILLING_REGISTRY[filling_str].copy()
+ if info['class'] is not None:
+ info['class_name'] = info['class'].__name__
+ else:
+ info['class_name'] = None
+ info.pop('class', None)
+ return info
+
+def create_filling_from_string(
+ filling_str: str = 'default',
+ fixed_values: Optional[Dict[str, Any]] = None,
+ **kwargs
+) -> FillingStrategy:
+ """
+ Create a filling strategy instance from a string identifier.
+
+ Args:
+ filling_str: Filling strategy string identifier (default: 'default')
+ fixed_values: Optional dictionary mapping parameter names to their fixed values.
+ Example: {'learning_rate': 0.001, 'batch_size': 32}
+ **kwargs: Additional parameters for the filling strategy
+
+ Returns:
+ FillingStrategy instance
+
+ Examples:
+ >>> # Create default filling strategy
+ >>> filling = create_filling_from_string('default')
+
+ >>> # Create filling strategy with fixed values
+ >>> filling = create_filling_from_string(
+ ... 'default',
+ ... fixed_values={'learning_rate': 0.001, 'batch_size': 32}
+ ... )
+ """
+ if not validate_filling_string(filling_str):
+ logger.error(f"Invalid filling strategy string identifier: {filling_str}")
+ logger.info(f"Available filling strings: {list(_FILLING_REGISTRY.keys())}")
+ raise ValueError(
+ f"Invalid filling strategy string identifier: {filling_str}. "
+ f"Available options: {list(_FILLING_REGISTRY.keys())}"
+ )
+
+ registry_entry = _FILLING_REGISTRY[filling_str]
+ filling_class = registry_entry['class']
+
+ if filling_class is None:
+ logger.error(f"Filling strategy '{filling_str}' maps to None")
+ raise ValueError(f"Filling strategy '{filling_str}' is not available")
+
+ default_params = registry_entry['default_params'].copy()
+ default_params.update(kwargs)
+ if fixed_values is not None:
+ default_params['fixed_values'] = fixed_values
+
+ try:
+ filling = filling_class(**default_params)
+ logger.debug(
+ f"Created {filling_class.__name__} from '{filling_str}' "
+ f"with parameters: {default_params}"
+ )
+ return filling
+ except Exception as e:
+ logger.error(
+ f"Failed to create filling strategy from '{filling_str}': {e}. "
+ f"Parameters: {default_params}"
+ )
+ raise
+
+
+def create_filling_from_config(
+ config: Optional[Dict[str, Any]] = None
+) -> FillingStrategy:
+ """
+ Create a filling strategy from a configuration dictionary.
+
+ Args:
+ config: Configuration dictionary with the following structure:
+ {
+ 'type': 'default', # filling strategy type
+ 'fixed_values': { # optional fixed values
+ 'param1': value1,
+ 'param2': value2,
+ }
+ }
+ If None, returns default filling strategy.
+
+ Returns:
+ FillingStrategy instance
+
+ Examples:
+ >>> # Create from config with fixed values
+ >>> config = {
+ ... 'type': 'default',
+ ... 'fixed_values': {'learning_rate': 0.001}
+ ... }
+ >>> filling = create_filling_from_config(config)
+ """
+ if config is None:
+ return create_filling_from_string('default')
+
+ filling_str = config.get('type', 'default')
+ fixed_values = config.get('fixed_values', None)
+
+ return create_filling_from_string(
+ filling_str=filling_str,
+ fixed_values=fixed_values
+ )
+
diff --git a/openbox/compressor/api/step_factory.py b/openbox/compressor/api/step_factory.py
new file mode 100644
index 000000000..ee2850e89
--- /dev/null
+++ b/openbox/compressor/api/step_factory.py
@@ -0,0 +1,480 @@
+"""
+Step Factory Module
+Provides functions to create compression step instances from string identifiers.
+"""
+
+from typing import Optional, List, Dict, Any
+from openbox import logger
+
+from ..steps.dimension import (
+ SHAPDimensionStep,
+ CorrelationDimensionStep,
+ ExpertDimensionStep,
+ AdaptiveDimensionStep,
+ SHAPImportanceCalculator,
+ CorrelationImportanceCalculator,
+)
+from ..steps.range import (
+ BoundaryRangeStep,
+ SHAPBoundaryRangeStep,
+ KDEBoundaryRangeStep,
+ ExpertRangeStep,
+)
+from ..steps.projection import (
+ QuantizationProjectionStep,
+ REMBOProjectionStep,
+ HesBOProjectionStep,
+ KPCAProjectionStep,
+)
+from .. import (
+ CompressionStep,
+ PeriodicUpdateStrategy,
+ StagnationUpdateStrategy,
+ ImprovementUpdateStrategy,
+ HybridUpdateStrategy,
+ CompositeUpdateStrategy,
+)
+
+
+def _create_importance_calculator_from_string(
+ importance_str: Optional[str] = None,
+ **kwargs
+) -> Optional[Any]:
+ if importance_str is None:
+ return None
+
+ importance_str = importance_str.lower()
+
+ if importance_str in ('shap', 'shap_importance'):
+ return SHAPImportanceCalculator()
+ elif importance_str in ('correlation', 'corr'):
+ method = kwargs.get('correlation_method', 'spearman')
+ return CorrelationImportanceCalculator(method=method)
+ elif importance_str == 'correlation_spearman':
+ return CorrelationImportanceCalculator(method='spearman')
+ elif importance_str == 'correlation_pearson':
+ return CorrelationImportanceCalculator(method='pearson')
+ else:
+ logger.warning(
+ f"Unknown importance calculator string: {importance_str}. "
+ f"Using default SHAPImportanceCalculator."
+ )
+ return SHAPImportanceCalculator()
+
+
+def _create_update_strategy_from_string(
+ update_str: Optional[str] = None,
+ **kwargs
+) -> Optional[Any]:
+ """
+ Create an update strategy instance from a string identifier.
+
+ Args:
+ update_str: String identifier for update strategy.
+ Options: 'periodic', 'stagnation', 'improvement', 'hybrid', 'composite', 'none'
+ **kwargs: Additional parameters for the strategy:
+ - For 'periodic': 'period' (default: 5)
+ - For 'stagnation': 'stagnation_threshold' (default: 5)
+ - For 'improvement': 'improvement_threshold' (default: 3)
+ - For 'hybrid': 'period', 'stagnation_threshold', 'improvement_threshold'
+ - For 'composite': expects list of strategy strings in 'composite_strategies'
+
+ Returns:
+ UpdateStrategy instance or None
+ """
+ if update_str is None or update_str.lower() == 'none':
+ return None
+
+ update_str = update_str.lower()
+
+ if update_str == 'periodic':
+ period = kwargs.get('period', 5)
+ return PeriodicUpdateStrategy(period=period)
+ elif update_str == 'stagnation':
+ threshold = kwargs.get('stagnation_threshold', 5)
+ return StagnationUpdateStrategy(threshold=threshold)
+ elif update_str == 'improvement':
+ threshold = kwargs.get('improvement_threshold', 3)
+ return ImprovementUpdateStrategy(threshold=threshold)
+ elif update_str == 'hybrid':
+ period = kwargs.get('period', 10)
+ stagnation_threshold = kwargs.get('stagnation_threshold', None)
+ improvement_threshold = kwargs.get('improvement_threshold', None)
+ return HybridUpdateStrategy(
+ period=period,
+ stagnation_threshold=stagnation_threshold,
+ improvement_threshold=improvement_threshold
+ )
+ elif update_str == 'composite':
+ composite_strategies = kwargs.get('composite_strategies', [])
+ if not composite_strategies:
+ logger.warning("No strategies provided for composite, using default periodic")
+ return PeriodicUpdateStrategy(period=5)
+
+ strategies = []
+ for strategy_str in composite_strategies:
+ strategy = _create_update_strategy_from_string(strategy_str, **kwargs)
+ if strategy is not None:
+ strategies.append(strategy)
+
+ if not strategies:
+ logger.warning("No valid strategies for composite, using default periodic")
+ return PeriodicUpdateStrategy(period=5)
+
+ return CompositeUpdateStrategy(*strategies)
+ else:
+ logger.warning(
+ f"Unknown update strategy string: {update_str}. "
+ f"Using default PeriodicUpdateStrategy(period=5)."
+ )
+ return PeriodicUpdateStrategy(period=5)
+
+
+_STEP_REGISTRY = {
+ 'd_shap': {
+ 'class': SHAPDimensionStep,
+ 'default_params': {
+ 'strategy': 'shap',
+ 'topk': 20,
+ },
+ 'description': 'SHAP-based dimension selection',
+ },
+ 'd_corr': {
+ 'class': CorrelationDimensionStep,
+ 'default_params': {
+ 'method': 'spearman',
+ 'topk': 20,
+ },
+ 'description': 'Correlation-based dimension selection',
+ },
+ 'd_expert': {
+ 'class': ExpertDimensionStep,
+ 'default_params': {
+ 'strategy': 'expert',
+ 'expert_params': [],
+ },
+ 'description': 'Expert knowledge-based dimension selection',
+ },
+ 'd_adaptive': {
+ 'class': AdaptiveDimensionStep,
+ 'default_params': {
+ 'importance_calculator': 'shap', # Options: 'shap', 'correlation', 'correlation_spearman', 'correlation_pearson'
+ 'update_strategy': 'periodic', # Options: 'periodic', 'stagnation', 'improvement', 'hybrid', 'composite', 'none'
+ 'initial_topk': 30,
+ 'reduction_ratio': 0.2,
+ 'min_dimensions': 5,
+ 'max_dimensions': None,
+ },
+ 'description': 'Adaptive dimension selection with dynamic topk adjustment',
+ },
+ 'd_none': {
+ 'class': None,
+ 'default_params': {},
+ 'description': 'No dimension selection step',
+ },
+
+ 'r_boundary': {
+ 'class': BoundaryRangeStep,
+ 'default_params': {
+ 'method': 'boundary',
+ 'top_ratio': 0.8,
+ 'sigma': 2.0,
+ 'enable_mixed_sampling': True,
+ 'initial_prob': 0.9,
+ },
+ 'description': 'Simple boundary-based range compression',
+ },
+ 'r_shap': {
+ 'class': SHAPBoundaryRangeStep,
+ 'default_params': {
+ 'method': 'shap_boundary',
+ 'top_ratio': 0.8,
+ 'sigma': 2.0,
+ 'enable_mixed_sampling': True,
+ 'initial_prob': 0.9,
+ },
+ 'description': 'SHAP-weighted boundary range compression',
+ },
+ 'r_kde': {
+ 'class': KDEBoundaryRangeStep,
+ 'default_params': {
+ 'method': 'kde_boundary',
+ 'source_top_ratio': 0.3,
+ 'kde_coverage': 0.6,
+ 'enable_mixed_sampling': True,
+ 'initial_prob': 0.9,
+ },
+ 'description': 'KDE-based boundary range compression',
+ },
+ 'r_expert': {
+ 'class': ExpertRangeStep,
+ 'default_params': {
+ 'method': 'expert',
+ 'expert_ranges': {},
+ 'enable_mixed_sampling': False,
+ 'initial_prob': 0.9,
+ },
+ 'description': 'Expert-specified range compression',
+ },
+ 'r_none': {
+ 'class': None,
+ 'default_params': {},
+ 'description': 'No range compression step',
+ },
+
+ 'p_quant': {
+ 'class': QuantizationProjectionStep,
+ 'default_params': {
+ 'method': 'quantization',
+ 'max_num_values': 10,
+ 'seed': 42,
+ 'adaptive': False,
+ },
+ 'description': 'Quantization projection step',
+ },
+ 'p_rembo': {
+ 'class': REMBOProjectionStep,
+ 'default_params': {
+ 'method': 'rembo',
+ 'low_dim': 10,
+ 'seed': 42,
+ },
+ 'description': 'REMBO projection step',
+ },
+ 'p_hesbo': {
+ 'class': HesBOProjectionStep,
+ 'default_params': {
+ 'method': 'hesbo',
+ 'low_dim': 10,
+ 'max_num_values': None,
+ 'seed': 42,
+ },
+ 'description': 'HesBO projection step',
+ },
+ 'p_kpca': {
+ 'class': KPCAProjectionStep,
+ 'default_params': {
+ 'method': 'kpca',
+ 'n_components': 10,
+ 'kernel': 'rbf',
+ 'gamma': None,
+ 'space_history': None,
+ 'seed': 42,
+ },
+ 'description': 'Kernel PCA projection step',
+ },
+ 'p_none': {
+ 'class': None,
+ 'default_params': {},
+ 'description': 'No projection step',
+ },
+}
+
+
+def get_available_step_strings() -> Dict[str, List[str]]:
+ dimension_steps = [k for k in _STEP_REGISTRY.keys() if k.startswith('d_')]
+ range_steps = [k for k in _STEP_REGISTRY.keys() if k.startswith('r_')]
+ projection_steps = [k for k in _STEP_REGISTRY.keys() if k.startswith('p_')]
+
+ return {
+ 'dimension': dimension_steps,
+ 'range': range_steps,
+ 'projection': projection_steps,
+ 'all': list(_STEP_REGISTRY.keys()),
+ }
+
+
+def validate_step_string(step_str: str) -> bool:
+ return step_str in _STEP_REGISTRY
+
+
+def get_step_info(step_str: str) -> Optional[Dict[str, Any]]:
+ if not validate_step_string(step_str):
+ return None
+
+ info = _STEP_REGISTRY[step_str].copy()
+ if info['class'] is not None:
+ info['class_name'] = info['class'].__name__
+ else:
+ info['class_name'] = None
+ info.pop('class', None)
+ return info
+
+
+def create_step_from_string(
+ step_str: str,
+ **kwargs
+) -> Optional[CompressionStep]:
+ """
+ Create a compression step instance from a string identifier.
+
+ Args:
+ step_str: Step string identifier (e.g., 'd_shap', 'r_kde', 'p_quant')
+ **kwargs: Additional parameters to override defaults or provide required parameters
+
+ Returns:
+ CompressionStep instance or None if step_str is 'd_none', 'r_none', 'p_none', or invalid.
+
+ Examples:
+ >>> # Create a SHAP dimension step with default parameters
+ >>> step = create_step_from_string('d_shap')
+
+ >>> # Create a SHAP dimension step with custom topk
+ >>> step = create_step_from_string('d_shap', topk=10)
+
+ >>> # Create an expert dimension step with expert parameters
+ >>> step = create_step_from_string('d_expert', expert_params=['param1', 'param2'])
+
+ >>> # Create a KDE range step with custom parameters
+ >>> step = create_step_from_string('r_kde', source_top_ratio=0.5, kde_coverage=0.7)
+
+ >>> # Create an adaptive dimension step with string-based importance and update strategy
+ >>> step = create_step_from_string('d_adaptive',
+ ... importance_calculator='shap',
+ ... update_strategy='periodic',
+ ... period=10)
+
+ >>> # Create an adaptive dimension step with correlation importance and stagnation strategy
+ >>> step = create_step_from_string('d_adaptive',
+ ... importance_calculator='correlation_spearman',
+ ... update_strategy='stagnation',
+ ... stagnation_threshold=5)
+
+ >>> # Create an adaptive dimension step with hybrid update strategy
+ >>> step = create_step_from_string('d_adaptive',
+ ... importance_calculator='shap',
+ ... update_strategy='hybrid',
+ ... period=10,
+ ... stagnation_threshold=5,
+ ... improvement_threshold=3)
+
+ >>> # Create a SHAP dimension step with excluded parameters
+ >>> step = create_step_from_string('d_shap',
+ ... topk=10,
+ ... exclude_params=['param1', 'param2'])
+
+ >>> # Create a correlation dimension step with excluded parameters
+ >>> step = create_step_from_string('d_corr',
+ ... method='spearman',
+ ... topk=15,
+ ... exclude_params=['learning_rate', 'batch_size'])
+
+ >>> # Create an expert dimension step with excluded parameters
+ >>> step = create_step_from_string('d_expert',
+ ... expert_params=['param1', 'param2', 'param3'],
+ ... exclude_params=['param2']) # Exclude param2 from expert selection
+
+ >>> # Create an adaptive dimension step with excluded parameters
+ >>> step = create_step_from_string('d_adaptive',
+ ... importance_calculator='shap',
+ ... update_strategy='periodic',
+ ... exclude_params=['param1', 'param2'])
+
+ >>> # Return None for 'none' steps
+ >>> step = create_step_from_string('d_none') # Returns None
+ """
+ if not validate_step_string(step_str):
+ logger.error(f"Invalid step string identifier: {step_str}")
+ logger.info(f"Available step strings: {list(_STEP_REGISTRY.keys())}")
+ raise ValueError(
+ f"Invalid step string identifier: {step_str}. "
+ f"Available options: {list(_STEP_REGISTRY.keys())}"
+ )
+
+ registry_entry = _STEP_REGISTRY[step_str]
+ step_class = registry_entry['class']
+
+ if step_class is None:
+ logger.debug(f"Step string '{step_str}' maps to None (no step)")
+ return None
+
+ default_params = registry_entry['default_params'].copy()
+ default_params.update(kwargs)
+
+ # Special handling for AdaptiveDimensionStep: convert string parameters to instances
+ if step_str == 'd_adaptive' and step_class == AdaptiveDimensionStep:
+ if 'importance_calculator' in default_params:
+ importance_calc = default_params.pop('importance_calculator')
+ if isinstance(importance_calc, str):
+ importance_calculator = _create_importance_calculator_from_string(
+ importance_calc,
+ **default_params
+ )
+ default_params['importance_calculator'] = importance_calculator
+ else:
+ default_params['importance_calculator'] = importance_calc
+
+ if 'update_strategy' in default_params:
+ update_strategy = default_params.pop('update_strategy')
+ if isinstance(update_strategy, str):
+ update_strategy = _create_update_strategy_from_string(
+ update_strategy,
+ **default_params
+ )
+ default_params['update_strategy'] = update_strategy
+ else:
+ default_params['update_strategy'] = update_strategy
+
+ try:
+ step = step_class(**default_params)
+ logger.debug(
+ f"Created {step_class.__name__} from '{step_str}' "
+ f"with parameters: {default_params}"
+ )
+ return step
+ except Exception as e:
+ logger.error(
+ f"Failed to create step from '{step_str}': {e}. "
+ f"Parameters: {default_params}"
+ )
+ raise
+
+
+def create_steps_from_strings(
+ step_strings: List[str],
+ step_params: Optional[Dict[str, Dict[str, Any]]] = None
+) -> List[CompressionStep]:
+ """
+ Create multiple compression step instances from a list of string identifiers.
+
+ Args:
+ step_strings: List of step string identifiers (e.g., ['d_shap', 'r_kde', 'p_quant'])
+ step_params: Optional dictionary mapping step strings to their parameter dictionaries.
+ Example: {'d_shap': {'topk': 10}, 'r_kde': {'source_top_ratio': 0.5}}
+
+ Returns:
+ List of CompressionStep instances (None entries are filtered out for 'none' steps).
+
+ Examples:
+ >>> # Create steps with default parameters
+ >>> steps = create_steps_from_strings(['d_shap', 'r_kde', 'p_quant'])
+
+ >>> # Create steps with custom parameters
+ >>> steps = create_steps_from_strings(
+ ... ['d_shap', 'r_kde'],
+ ... step_params={
+ ... 'd_shap': {'topk': 10},
+ ... 'r_kde': {'source_top_ratio': 0.5, 'kde_coverage': 0.7}
+ ... }
+ ... )
+ """
+ if step_params is None:
+ step_params = {}
+
+ steps = []
+ for step_str in step_strings:
+ params = step_params.get(step_str, {})
+
+ try:
+ step = create_step_from_string(step_str, **params)
+ if step is not None:
+ steps.append(step)
+ else:
+ logger.debug(f"Skipping None step for '{step_str}'")
+ except Exception as e:
+ logger.error(f"Failed to create step '{step_str}': {e}")
+ raise
+
+ logger.info(f"Created {len(steps)} step(s) from {len(step_strings)} string identifier(s)")
+ return steps
+
diff --git a/openbox/compressor/compressor.py b/openbox/compressor/compressor.py
new file mode 100644
index 000000000..8a305c8a4
--- /dev/null
+++ b/openbox/compressor/compressor.py
@@ -0,0 +1,326 @@
+from abc import ABC
+from typing import Optional, Tuple, List, Dict, TYPE_CHECKING
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace, Configuration
+import json
+import os
+from datetime import datetime
+from openbox import logger
+
+if TYPE_CHECKING:
+ from .sampling import SamplingStrategy
+ from .filling import FillingStrategy
+ from .pipeline import CompressionPipeline
+ from .step import CompressionStep
+
+
+class Compressor(ABC):
+ def __init__(self,
+ config_space: ConfigurationSpace,
+ filling_strategy: Optional['FillingStrategy'] = None,
+ pipeline: Optional['CompressionPipeline'] = None,
+ steps: Optional[List['CompressionStep']] = None,
+ save_compression_info: bool = False,
+ output_dir: Optional[str] = None,
+ **kwargs):
+ self.origin_config_space = config_space
+ self.sample_space: Optional[ConfigurationSpace] = None
+ self.surrogate_space: Optional[ConfigurationSpace] = None
+ self.unprojected_space: Optional[ConfigurationSpace] = None # Target space after unprojection
+
+ self.save_compression_info = save_compression_info
+ self.output_dir = output_dir or './results/compression'
+ self.compression_history: List[dict] = [] # Track compression updates
+
+ self._dimension_history: List[int] = []
+ self._iteration_history: List[int] = []
+
+ self._source_similarities: Optional[Dict[int, float]] = None
+
+ if filling_strategy is None:
+ from .filling import DefaultValueFilling
+ self.filling_strategy = DefaultValueFilling()
+ else:
+ self.filling_strategy = filling_strategy
+
+ self.pipeline: Optional['CompressionPipeline'] = None
+ self.seed = kwargs.get('seed', 42)
+ if pipeline is not None:
+ self.pipeline = pipeline
+ self.pipeline.original_space = config_space
+ self.pipeline.filling_strategy = self.filling_strategy
+ elif steps is not None:
+ from .pipeline import CompressionPipeline
+ self.pipeline = CompressionPipeline(steps, seed=self.seed, original_space=config_space)
+ self.pipeline.filling_strategy = self.filling_strategy
+
+
+ def compress_space(self,
+ space_history: Optional[List] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> Tuple[ConfigurationSpace, ConfigurationSpace]:
+ if self.pipeline is not None:
+ # Use pipeline mode
+ self.surrogate_space, self.sample_space = self.pipeline.compress_space(
+ self.origin_config_space, space_history, source_similarities
+ )
+ self.unprojected_space = self.pipeline.unprojected_space
+
+ # Initialize dimension history tracking
+ self._dimension_history = [len(self.surrogate_space.get_hyperparameters())]
+ self._iteration_history = [0]
+
+ self._source_similarities = source_similarities
+
+ if self.save_compression_info:
+ self._save_compression_info(event='initial_compression')
+
+ return self.surrogate_space, self.sample_space
+ else:
+ return self._compress_space_impl(space_history)
+
+ def get_unprojected_space(self) -> ConfigurationSpace:
+ return self.pipeline.unprojected_space
+
+ def _compress_space_impl(self, space_history: Optional[List] = None) -> Tuple[ConfigurationSpace, ConfigurationSpace]:
+ raise NotImplementedError(
+ "Subclasses must either provide pipeline/steps or implement _compress_space_impl"
+ )
+
+ def needs_unproject(self) -> bool:
+ if self.pipeline is not None:
+ return self.pipeline.needs_unproject()
+ return False
+
+ def unproject_points(self, points: List[Configuration]) -> List[Configuration]:
+ return [self.unproject_point(point) for point in points]
+
+ def unproject_point(self, point: Configuration) -> Configuration:
+ target_space = None
+ compressed_values = None
+ if self.pipeline is not None:
+ if self.unprojected_space is None:
+ raise ValueError("Unprojected space not initialized. Call compress_space() first.")
+ unprojected_values = self.pipeline.unproject_point(point)
+ target_space = self.unprojected_space
+ compressed_values = point.get_dictionary()
+ else:
+ if hasattr(point, 'get_dictionary'):
+ unprojected_values = point.get_dictionary()
+ target_space = getattr(point, 'configuration_space', None)
+ compressed_values = point.get_dictionary()
+ elif isinstance(point, dict):
+ unprojected_values = point
+ compressed_values = point
+ else:
+ unprojected_values = dict(point)
+ compressed_values = dict(point)
+
+ if target_space is None:
+ target_space = self.unprojected_space or self.sample_space
+
+ if target_space is None:
+ raise ValueError("Unable to determine target configuration space for unprojection.")
+
+ unprojected_config = Configuration(target_space, values=unprojected_values)
+ if hasattr(point, 'origin') and getattr(point, 'origin', None) is not None:
+ unprojected_config.origin = point.origin
+ unprojected_config._low_dim_config = compressed_values
+ return unprojected_config
+
+ def project_point(self, point) -> dict:
+ if self.pipeline is not None:
+ return self.pipeline.project_point(point)
+ if hasattr(point, 'get_dictionary'):
+ return point.get_dictionary()
+ elif isinstance(point, dict):
+ return point
+ else:
+ return dict(point)
+
+ def convert_config_to_surrogate_space(self, config: Configuration) -> Configuration:
+ if hasattr(config, 'configuration_space') and config.configuration_space == self.surrogate_space:
+ return config
+
+ # project_point() handles all transformations: filtering, clipping, and filling
+ projected_dict = self.project_point(config)
+
+ projected_config = Configuration(self.surrogate_space, values=projected_dict)
+ if hasattr(config, 'origin') and config.origin is not None:
+ projected_config.origin = config.origin
+ return projected_config
+
+ def convert_config_to_sample_space(self, config: Configuration) -> Configuration:
+ if hasattr(config, 'configuration_space') and config.configuration_space == self.sample_space:
+ return config
+
+ # project_point() handles all transformations: filtering, clipping, and filling
+ projected_dict = self.project_point(config)
+
+ sample_config = Configuration(self.sample_space, values=projected_dict)
+ if hasattr(config, 'origin') and config.origin is not None:
+ sample_config.origin = config.origin
+ return sample_config
+
+ def update_compression(self, history: History) -> bool:
+ if self.pipeline is not None:
+ updated = self.pipeline.update_compression(history)
+ if updated:
+ self.surrogate_space = self.pipeline.surrogate_space
+ self.sample_space = self.pipeline.sample_space
+ self.unprojected_space = self.pipeline.unprojected_space
+
+ if self.save_compression_info:
+ self._save_compression_info(event='adaptive_update', iteration=history.num_objectives)
+
+ current_dims = len(self.surrogate_space.get_hyperparameters())
+ current_iter = len(self._iteration_history) # Next iteration number
+ self._dimension_history.append(current_dims)
+ self._iteration_history.append(current_iter)
+
+ return updated
+ return False
+
+ def get_sampling_strategy(self) -> 'SamplingStrategy':
+ if self.pipeline is not None:
+ return self.pipeline.get_sampling_strategy()
+ from .sampling import StandardSamplingStrategy
+ if self.sample_space is None:
+ raise ValueError("Sample space not initialized. Call compress_space() first.")
+ return StandardSamplingStrategy(self.sample_space)
+
+ def transform_source_data(self, source_hpo_data: Optional[List[History]]) -> Optional[List[History]]:
+ if not source_hpo_data or not self.surrogate_space:
+ return source_hpo_data
+
+ logger.info(f"Transforming {len(source_hpo_data)} source histories to match surrogate space")
+
+ transformed = []
+ for history in source_hpo_data:
+ new_observations = []
+ for obs in history.observations:
+ new_config = self.convert_config_to_surrogate_space(obs.config)
+ from openbox.utils.history import Observation
+ new_obs = Observation(
+ config=new_config,
+ objectives=obs.objectives,
+ constraints=obs.constraints if hasattr(obs, 'constraints') else None,
+ trial_state=obs.trial_state if hasattr(obs, 'trial_state') else None,
+ )
+ new_observations.append(new_obs)
+
+ new_history = History(
+ task_id=history.task_id,
+ num_objectives=history.num_objectives,
+ num_constraints=history.num_constraints,
+ config_space=self.surrogate_space,
+ )
+ new_history.update_observations(new_observations)
+ transformed.append(new_history)
+
+ logger.info(f"Successfully transformed {len(transformed)} histories")
+ return transformed
+
+ def _save_compression_info(
+ self,
+ event: str = 'compression',
+ iteration: Optional[int] = None,
+ output_dir: Optional[str] = None
+ ):
+ if output_dir is None:
+ output_dir = self.output_dir
+
+ if not self.pipeline:
+ logger.warning("No pipeline configured, cannot save compression info")
+ return
+
+ info = {
+ 'timestamp': datetime.now().isoformat(),
+ 'event': event,
+ 'iteration': iteration,
+ 'spaces': {
+ 'original': {
+ 'n_parameters': len(self.origin_config_space.get_hyperparameters()),
+ 'parameters': self.origin_config_space.get_hyperparameter_names()
+ },
+ 'sample': {
+ 'n_parameters': len(self.sample_space.get_hyperparameters()) if self.sample_space else 0,
+ 'parameters': self.sample_space.get_hyperparameter_names() if self.sample_space else []
+ },
+ 'surrogate': {
+ 'n_parameters': len(self.surrogate_space.get_hyperparameters()) if self.surrogate_space else 0,
+ 'parameters': self.surrogate_space.get_hyperparameter_names() if self.surrogate_space else []
+ }
+ },
+ 'compression_ratios': {
+ 'sample_to_original': len(self.sample_space.get_hyperparameters()) / len(self.origin_config_space.get_hyperparameters()) if self.sample_space else 1.0,
+ 'surrogate_to_original': len(self.surrogate_space.get_hyperparameters()) / len(self.origin_config_space.get_hyperparameters()) if self.surrogate_space else 1.0
+ },
+ 'pipeline': {
+ 'n_steps': len(self.pipeline.steps),
+ 'steps': []
+ },
+ 'sampling_strategy': type(self.pipeline.get_sampling_strategy()).__name__ if self.pipeline else 'Unknown'
+ }
+
+ # Each step provides its own info through get_step_info()
+ for i, step in enumerate(self.pipeline.steps):
+ step_info = step.get_step_info()
+ step_info['step_index'] = i
+ info['pipeline']['steps'].append(step_info)
+
+ performance_metrics = {}
+ for step in self.pipeline.steps:
+ if hasattr(step, '_calculator') and hasattr(step._calculator, '_cache'):
+ cache = step._calculator._cache
+ if cache and cache.get('importances_per_task') is not None and cache.get('task_names') is not None:
+ importances_per_task = cache['importances_per_task']
+ task_names = cache['task_names']
+ performance_metrics['multi_task_importances'] = importances_per_task.tolist()
+ performance_metrics['task_names'] = task_names
+ break
+
+ if self._source_similarities is not None:
+ performance_metrics['source_similarities'] = {
+ str(performance_metrics['task_names'][int(i)]): v
+ for i, v in self._source_similarities.items()
+ }
+
+ if performance_metrics:
+ info['performance_metrics'] = performance_metrics
+
+ self.compression_history.append(info)
+
+ os.makedirs(output_dir, exist_ok=True)
+
+ timestamp_str = datetime.now().strftime('%Y%m%d_%H%M%S')
+
+ event_filename = f'compression_{event}_{timestamp_str}.json'
+ event_filepath = os.path.join(output_dir, event_filename)
+
+ with open(event_filepath, 'w') as f:
+ json.dump(info, f, indent=2)
+ logger.info(f"Saved compression info to {event_filepath}")
+
+ history_filename = 'compression_history.json'
+ history_filepath = os.path.join(output_dir, history_filename)
+
+ with open(history_filepath, 'w') as f:
+ json.dump({
+ 'total_updates': len(self.compression_history),
+ 'history': self.compression_history
+ }, f, indent=2)
+ logger.info(f"Updated compression history: {history_filepath}")
+
+ def get_compression_summary(self) -> dict:
+ if not self.sample_space or not self.surrogate_space:
+ return {}
+
+ return {
+ 'original_dimensions': len(self.origin_config_space.get_hyperparameters()),
+ 'sample_dimensions': len(self.sample_space.get_hyperparameters()),
+ 'surrogate_dimensions': len(self.surrogate_space.get_hyperparameters()),
+ 'sample_compression_ratio': len(self.sample_space.get_hyperparameters()) / len(self.origin_config_space.get_hyperparameters()),
+ 'surrogate_compression_ratio': len(self.surrogate_space.get_hyperparameters()) / len(self.origin_config_space.get_hyperparameters()),
+ 'n_updates': len(self.compression_history),
+ 'pipeline_steps': [step.name for step in self.pipeline.steps] if self.pipeline else []
+ }
\ No newline at end of file
diff --git a/openbox/compressor/filling/__init__.py b/openbox/compressor/filling/__init__.py
new file mode 100644
index 000000000..43b602d7f
--- /dev/null
+++ b/openbox/compressor/filling/__init__.py
@@ -0,0 +1,15 @@
+from .base import FillingStrategy
+from .default import DefaultValueFilling
+from .clipping import (
+ clip_values_to_space,
+ is_within_bounds,
+ get_out_of_bounds_params,
+)
+
+__all__ = [
+ 'FillingStrategy',
+ 'DefaultValueFilling',
+ 'clip_values_to_space',
+ 'is_within_bounds',
+ 'get_out_of_bounds_params',
+]
\ No newline at end of file
diff --git a/openbox/compressor/filling/base.py b/openbox/compressor/filling/base.py
new file mode 100644
index 000000000..b6a114a5a
--- /dev/null
+++ b/openbox/compressor/filling/base.py
@@ -0,0 +1,59 @@
+from abc import ABC, abstractmethod
+from typing import Dict, Any
+from ConfigSpace import ConfigurationSpace
+from openbox import logger
+
+
+class FillingStrategy(ABC):
+ """
+ Base class for strategies that fill missing parameters when converting
+ configurations between spaces
+
+ This is useful when:
+ 1. Small space -> Large space: Need to fill missing parameters
+ 2. Large space -> Small space: Already filtered, but may need to fill if space grows later
+
+ All filling strategies support fixed_values parameter to override specific
+ parameter values regardless of sampling or space ranges.
+ """
+
+ def __init__(self, fixed_values: Dict[str, Any] = None):
+ self.fixed_values = fixed_values or {}
+ if self.fixed_values:
+ logger.debug(f"{self.__class__.__name__} initialized with {len(self.fixed_values)} fixed values: {list(self.fixed_values.keys())}")
+
+ @abstractmethod
+ def fill_missing_parameters(self,
+ config_dict: Dict[str, Any],
+ target_space: ConfigurationSpace) -> Dict[str, Any]:
+ pass
+
+ def _apply_fixed_values(self,
+ filled_dict: Dict[str, Any],
+ target_space: ConfigurationSpace) -> Dict[str, Any]:
+ if not self.fixed_values:
+ return filled_dict
+
+ result_dict = filled_dict.copy()
+ for param_name, fixed_value in self.fixed_values.items():
+ if param_name in target_space.get_hyperparameter_names():
+ original_value = result_dict.get(param_name)
+ result_dict[param_name] = fixed_value
+ if original_value != fixed_value:
+ logger.debug(f"Overrode parameter '{param_name}': {original_value} -> {fixed_value}")
+ else:
+ logger.warning(f"Fixed parameter '{param_name}' not found in target space")
+ return result_dict
+
+ def get_default_value(self, hp) -> Any:
+ if hasattr(hp, 'default_value') and hp.default_value is not None:
+ return hp.default_value
+
+ if hasattr(hp, 'lower') and hasattr(hp, 'upper'):
+ return (hp.lower + hp.upper) / 2.0
+ elif hasattr(hp, 'choices') and len(hp.choices) > 0:
+ return hp.choices[0]
+ else:
+ logger.warning(f"Cannot determine default value for {hp.name}, using None")
+ return None
+
diff --git a/openbox/compressor/filling/clipping.py b/openbox/compressor/filling/clipping.py
new file mode 100644
index 000000000..8a6ef58fb
--- /dev/null
+++ b/openbox/compressor/filling/clipping.py
@@ -0,0 +1,86 @@
+from typing import Dict, Any, Tuple, List
+from ConfigSpace import ConfigurationSpace
+from openbox import logger
+
+
+def clip_values_to_space(values: dict, space: ConfigurationSpace,
+ report: bool = True) -> Dict[str, Any]:
+ clipped_values = values.copy()
+ clipped_params = []
+
+ for param_name, value in values.items():
+ if param_name not in space.get_hyperparameter_names():
+ # Parameter not in target space, will be filtered out elsewhere
+ continue
+
+ hp = space.get_hyperparameter(param_name)
+
+ if hasattr(hp, 'lower') and hasattr(hp, 'upper'):
+ # Numeric parameter: clip to [lower, upper]
+ original_value = value
+
+ if value < hp.lower:
+ clipped_values[param_name] = hp.lower
+ if report:
+ clipped_params.append(
+ f"{param_name}({original_value:.4f} -> {hp.lower} [lower bound])"
+ )
+ elif value > hp.upper:
+ clipped_values[param_name] = hp.upper
+ if report:
+ clipped_params.append(
+ f"{param_name}({original_value:.4f} -> {hp.upper} [upper bound])"
+ )
+
+ elif hasattr(hp, 'choices'):
+ # Categorical parameter: check if value is valid
+ if value not in hp.choices:
+ # Use default value if available, otherwise first choice
+ new_value = hp.default_value if hasattr(hp, 'default_value') else hp.choices[0]
+ clipped_values[param_name] = new_value
+ if report:
+ clipped_params.append(
+ f"{param_name}({value} -> {new_value} [invalid choice])"
+ )
+
+ if clipped_params and report:
+ logger.warning(f"Clipped {len(clipped_params)} parameter(s) to space bounds: {', '.join(clipped_params)}")
+
+ return clipped_values
+
+
+def is_within_bounds(values: dict, space: ConfigurationSpace) -> bool:
+ for param_name, value in values.items():
+ if param_name not in space.get_hyperparameter_names():
+ continue
+
+ hp = space.get_hyperparameter(param_name)
+
+ if hasattr(hp, 'lower') and hasattr(hp, 'upper'):
+ if value < hp.lower or value > hp.upper:
+ return False
+ elif hasattr(hp, 'choices'):
+ if value not in hp.choices:
+ return False
+
+ return True
+
+
+def get_out_of_bounds_params(values: dict, space: ConfigurationSpace) -> List[str]:
+ out_of_bounds = []
+
+ for param_name, value in values.items():
+ if param_name not in space.get_hyperparameter_names():
+ continue
+
+ hp = space.get_hyperparameter(param_name)
+
+ if hasattr(hp, 'lower') and hasattr(hp, 'upper'):
+ if value < hp.lower or value > hp.upper:
+ out_of_bounds.append(param_name)
+ elif hasattr(hp, 'choices'):
+ if value not in hp.choices:
+ out_of_bounds.append(param_name)
+
+ return out_of_bounds
+
diff --git a/openbox/compressor/filling/default.py b/openbox/compressor/filling/default.py
new file mode 100644
index 000000000..3957b44a6
--- /dev/null
+++ b/openbox/compressor/filling/default.py
@@ -0,0 +1,22 @@
+from typing import Dict, Any
+from ConfigSpace import ConfigurationSpace
+from openbox import logger
+
+from .base import FillingStrategy
+
+
+class DefaultValueFilling(FillingStrategy):
+ def __init__(self, fixed_values: Dict[str, Any] = None):
+ super().__init__(fixed_values=fixed_values)
+
+ def fill_missing_parameters(self,
+ config_dict: Dict[str, Any],
+ target_space: ConfigurationSpace) -> Dict[str, Any]:
+ filled_dict = config_dict.copy()
+ target_names = target_space.get_hyperparameter_names()
+ for name in target_names:
+ if name not in filled_dict:
+ hp = target_space.get_hyperparameter(name)
+ filled_dict[name] = self.get_default_value(hp)
+ return self._apply_fixed_values(filled_dict, target_space)
+
diff --git a/openbox/compressor/pipeline.py b/openbox/compressor/pipeline.py
new file mode 100644
index 000000000..603eb04dd
--- /dev/null
+++ b/openbox/compressor/pipeline.py
@@ -0,0 +1,170 @@
+import copy
+from typing import List, Optional, Tuple, Dict
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace
+from openbox import logger
+
+from .step import CompressionStep
+from .progress import OptimizerProgress
+from .sampling import SamplingStrategy, StandardSamplingStrategy
+
+
+class CompressionPipeline:
+ def __init__(self, steps: List[CompressionStep], seed: int = 42, original_space: Optional[ConfigurationSpace] = None):
+ self.steps = steps
+ self.seed = seed
+ self.original_space = original_space
+ self.progress = OptimizerProgress()
+
+ self.space_after_steps: List[ConfigurationSpace] = []
+ self.sample_space: Optional[ConfigurationSpace] = None
+ self.surrogate_space: Optional[ConfigurationSpace] = None
+ self.unprojected_space: Optional[ConfigurationSpace] = None # Target space after unprojection
+
+ self.sampling_strategy: Optional[SamplingStrategy] = None
+ self.filling_strategy = None
+
+ def compress_space(self,
+ original_space: ConfigurationSpace,
+ space_history: Optional[List] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> Tuple[ConfigurationSpace, ConfigurationSpace]:
+ if self.original_space is None:
+ self.original_space = original_space
+
+ logger.debug(f"Starting compression pipeline with {len(self.steps)} steps")
+
+ current_space = copy.deepcopy(original_space)
+ current_space.seed(self.seed)
+ self.space_after_steps = [current_space]
+
+ for i, step in enumerate(self.steps):
+ input_dim = len(current_space.get_hyperparameters())
+ logger.info(f"Step {i+1}/{len(self.steps)}: {step.name}")
+ logger.info(f" Input: {input_dim} parameters")
+
+ step.input_space = current_space
+ step.filling_strategy = self.filling_strategy
+ current_space = step.compress(current_space, space_history, source_similarities)
+ current_space.seed(self.seed)
+ step.output_space = current_space
+
+ output_dim = len(current_space.get_hyperparameters())
+ dimension_ratio = output_dim / input_dim if input_dim > 0 else 1.0
+
+ effective_ratio = dimension_ratio
+ if hasattr(step, 'compression_info') and step.compression_info:
+ if 'avg_compression_ratio' in step.compression_info:
+ effective_ratio = step.compression_info['avg_compression_ratio']
+ logger.info(f" Output: {output_dim} parameters (dimension: {dimension_ratio:.2%}, effective: {effective_ratio:.2%})")
+ else:
+ logger.info(f" Output: {output_dim} parameters (compression ratio: {dimension_ratio:.2%})")
+ logger.info(f" Details: {step.compression_info}")
+ else:
+ logger.info(f" Output: {output_dim} parameters (compression ratio: {dimension_ratio:.2%})")
+
+ self.space_after_steps.append(current_space)
+
+ self._determine_spaces()
+
+ self._build_sampling_strategy(original_space)
+
+ return self.surrogate_space, self.sample_space
+
+ def _determine_spaces(self):
+ sample_space_idx = 0
+ for i, step in enumerate(self.steps):
+ if step.affects_sampling_space():
+ sample_space_idx = i + 1
+
+ # Surrogate space is always the final output
+ self.surrogate_space = self.space_after_steps[-1]
+ # Sample space is determined by the last step that affects it
+ self.sample_space = self.space_after_steps[sample_space_idx]
+
+ # Unprojected space is the input to the first transformative step (that needs unproject)
+ # Default: stay in the current sampling space when no such step exists
+ self.unprojected_space = self.sample_space
+ for i, step in enumerate(self.steps):
+ if step.needs_unproject():
+ self.unprojected_space = self.space_after_steps[i]
+ break
+
+ def _build_sampling_strategy(self, original_space: ConfigurationSpace):
+ # Check from last to first, only range compression can provide a mixed sampling strategy
+ for step in reversed(self.steps):
+ strategy = step.get_sampling_strategy()
+ if strategy is not None:
+ self.sampling_strategy = strategy
+ return
+ self.sampling_strategy = StandardSamplingStrategy(self.sample_space, seed=self.seed)
+
+ def update_compression(self, history: History) -> bool:
+ self.progress.update_from_history(history)
+
+ updated = False
+ updated_steps = []
+ for step in self.steps:
+ if step.supports_adaptive_update():
+ if step.update(self.progress, history):
+ updated = True
+ updated_steps.append(step)
+ logger.info(f"Step {step.name} updated compression strategy")
+
+ if updated and self.original_space is not None:
+ # Check if any step needs to increase dimensions
+ # For dimension increase, we must start from original_space
+ # For dimension decrease, we can use progressive compression from surrogate_space
+ needs_original_space = False
+ for step in updated_steps:
+ if hasattr(step, 'current_topk') and hasattr(step, 'initial_topk'):
+ # If current_topk > number of params in surrogate_space, need original_space
+ if self.surrogate_space and step.current_topk > len(self.surrogate_space.get_hyperparameters()):
+ needs_original_space = True
+ break
+
+ if needs_original_space:
+ start_space = self.original_space
+ logger.debug(f"Dimension increase detected, re-compressing from original space with {len(start_space.get_hyperparameters())} parameters")
+ else:
+ uses_progressive = all(step.uses_progressive_compression() for step in updated_steps)
+ if uses_progressive:
+ start_space = self.surrogate_space
+ logger.debug(f"Using progressive compression, starting from {len(start_space.get_hyperparameters())} parameters")
+ else:
+ start_space = self.original_space
+ logger.debug(f"Using re-compression, starting from {len(start_space.get_hyperparameters())} parameters")
+
+ space_history = [history] if history else None
+ # Note: source_similarities should be passed from the caller (compressor/advisor)
+ # Here use None since we're updating based on current task's history
+ self.compress_space(start_space, space_history, source_similarities=None)
+ return True
+
+ return False
+
+ def get_sampling_strategy(self) -> SamplingStrategy:
+ if self.sampling_strategy is None:
+ self.sampling_strategy = StandardSamplingStrategy(self.sample_space, seed=self.seed)
+ return self.sampling_strategy
+
+ def needs_unproject(self) -> bool:
+ return any(step.needs_unproject() for step in self.steps)
+
+ def unproject_point(self, point) -> dict:
+ # Unproject a point through all steps (in reverse order)
+ current_dict = point.get_dictionary() if hasattr(point, 'get_dictionary') else dict(point)
+
+ for step in reversed(self.steps):
+ if step.needs_unproject():
+ current_dict = step.unproject_point(current_dict)
+ return current_dict
+
+ def project_point(self, point) -> dict:
+ # project a point through all steps (in forward order)
+ current_dict = point.get_dictionary() if hasattr(point, 'get_dictionary') else dict(point)
+
+ for step in self.steps:
+ if step.input_space is not None:
+ current_dict = step.project_point(current_dict)
+ return current_dict
+
diff --git a/openbox/compressor/progress.py b/openbox/compressor/progress.py
new file mode 100644
index 000000000..215b7d471
--- /dev/null
+++ b/openbox/compressor/progress.py
@@ -0,0 +1,81 @@
+from typing import List, Optional
+from openbox.utils.history import History
+from openbox import logger
+
+
+class OptimizerProgress:
+ def __init__(self):
+ self.iteration = 0
+ self.best_value_history: List[float] = []
+ self.improvement_count = 0
+ self.stagnation_count = 0
+ self.last_best_value: Optional[float] = None
+ self.minimize = True # True for minimization, False for maximization
+
+ def update(self, current_best_value: float, minimize: bool = True):
+ self.iteration += 1
+ self.minimize = minimize
+
+ if self.last_best_value is not None:
+ if minimize:
+ improved = current_best_value < self.last_best_value
+ else:
+ improved = current_best_value > self.last_best_value
+
+ if improved:
+ self.improvement_count += 1
+ self.stagnation_count = 0
+ else:
+ self.stagnation_count += 1
+ self.improvement_count = 0
+ else:
+ # First iteration
+ self.improvement_count = 0
+ self.stagnation_count = 0
+
+ self.last_best_value = current_best_value
+ self.best_value_history.append(current_best_value)
+
+ def update_from_history(self, history: History):
+ if history is None or len(history) == 0:
+ return
+
+ incumbent_value = history.get_incumbent_value()
+ if incumbent_value is not None:
+ minimize = True # Default, should be inferred from history if possible
+ self.update(incumbent_value, minimize=minimize)
+
+ def has_improvement(self, threshold: int = 3) -> bool:
+ return self.improvement_count >= threshold
+
+ def is_stagnant(self, threshold: int = 5) -> bool:
+ return self.stagnation_count >= threshold
+
+ def should_periodic_update(self, period: int = 10) -> bool:
+ return self.iteration > 0 and self.iteration % period == 0
+
+ def get_recent_trend(self, window: int = 5) -> str:
+ if len(self.best_value_history) < window:
+ return 'stable'
+
+ recent = self.best_value_history[-window:]
+ if self.minimize:
+ if recent[-1] < recent[0]:
+ return 'improving'
+ elif recent[-1] > recent[0]:
+ return 'degrading'
+ else:
+ if recent[-1] > recent[0]:
+ return 'improving'
+ elif recent[-1] < recent[0]:
+ return 'degrading'
+
+ return 'stable'
+
+ def reset(self):
+ self.iteration = 0
+ self.best_value_history = []
+ self.improvement_count = 0
+ self.stagnation_count = 0
+ self.last_best_value = None
+
diff --git a/openbox/compressor/sampling/__init__.py b/openbox/compressor/sampling/__init__.py
new file mode 100644
index 000000000..5e5ab6b8a
--- /dev/null
+++ b/openbox/compressor/sampling/__init__.py
@@ -0,0 +1,10 @@
+from .base import SamplingStrategy
+from .standard import StandardSamplingStrategy
+from .mixed import MixedRangeSamplingStrategy
+
+__all__ = [
+ 'SamplingStrategy',
+ 'StandardSamplingStrategy',
+ 'MixedRangeSamplingStrategy',
+]
+
diff --git a/openbox/compressor/sampling/base.py b/openbox/compressor/sampling/base.py
new file mode 100644
index 000000000..84e6d4167
--- /dev/null
+++ b/openbox/compressor/sampling/base.py
@@ -0,0 +1,17 @@
+from abc import ABC, abstractmethod
+from typing import List, Optional, Tuple
+from ConfigSpace import ConfigurationSpace, Configuration
+
+
+class SamplingStrategy(ABC):
+ @abstractmethod
+ def sample(self, n: int = 1) -> List[Configuration]:
+ pass
+
+ def update_probabilities(self, results: List[Tuple[Configuration, float]]):
+ pass
+
+ @abstractmethod
+ def get_spaces(self) -> Tuple[ConfigurationSpace, Optional[ConfigurationSpace]]:
+ pass
+
diff --git a/openbox/compressor/sampling/mixed.py b/openbox/compressor/sampling/mixed.py
new file mode 100644
index 000000000..6fae0f4c7
--- /dev/null
+++ b/openbox/compressor/sampling/mixed.py
@@ -0,0 +1,85 @@
+import random
+from typing import List, Optional, Tuple, Dict, Any
+from ConfigSpace import ConfigurationSpace, Configuration
+from .base import SamplingStrategy
+from openbox import logger
+
+
+class MixedRangeSamplingStrategy(SamplingStrategy):
+ def __init__(self,
+ compressed_space: ConfigurationSpace,
+ original_space: ConfigurationSpace,
+ initial_prob: float = 0.9,
+ method: str = 'boundary',
+ seed: Optional[int] = None):
+ """
+ Args:
+ compressed_space: Compressed configuration space
+ original_space: Original configuration space
+ initial_prob: Initial probability of sampling from compressed space
+ method: Compression method ('boundary' or 'expert')
+ seed: Random seed
+ """
+ self.compressed_space = compressed_space
+ self.original_space = original_space
+ self.compressed_prob = initial_prob
+ self.method = method
+ self.seed = seed
+
+ if seed is not None:
+ self.compressed_space.seed(seed)
+ self.original_space.seed(seed)
+ random.seed(seed)
+
+ self.compressed_results: List[float] = []
+ self.original_results: List[float] = []
+
+ def sample(self, n: int = 1) -> List[Configuration]:
+ configs = []
+ for _ in range(n):
+ if random.random() < self.compressed_prob:
+ config = self.compressed_space.sample_configuration()
+ config._sampled_from = 'compressed'
+ else:
+ config = self.original_space.sample_configuration()
+ config._sampled_from = 'original'
+ configs.append(config)
+ return configs
+
+ def update_probabilities(self, results: List[Tuple[Configuration, float]]):
+ compressed_perf = []
+ original_perf = []
+
+ for config, perf in results:
+ if hasattr(config, '_sampled_from'):
+ if config._sampled_from == 'compressed':
+ compressed_perf.append(perf)
+ elif config._sampled_from == 'original':
+ original_perf.append(perf)
+
+ self.compressed_results.extend(compressed_perf)
+ self.original_results.extend(original_perf)
+
+ if len(compressed_perf) > 0 and len(original_perf) > 0:
+ compressed_mean = sum(compressed_perf) / len(compressed_perf)
+ original_mean = sum(original_perf) / len(original_perf)
+
+ if original_mean < compressed_mean:
+ self.compressed_prob = max(0.5, self.compressed_prob - 0.1)
+ logger.info(f"Original range performing better. Adjusting compressed_prob to {self.compressed_prob:.2f}")
+ else:
+ self.compressed_prob = min(0.95, self.compressed_prob + 0.05)
+ logger.info(f"Compressed range performing better. Adjusting compressed_prob to {self.compressed_prob:.2f}")
+
+ def get_spaces(self) -> Tuple[ConfigurationSpace, Optional[ConfigurationSpace]]:
+ return (self.compressed_space, self.original_space)
+
+ def get_statistics(self) -> Dict[str, Any]:
+ return {
+ 'compressed_prob': self.compressed_prob,
+ 'compressed_samples': len(self.compressed_results),
+ 'original_samples': len(self.original_results),
+ 'compressed_mean': sum(self.compressed_results) / len(self.compressed_results) if self.compressed_results else None,
+ 'original_mean': sum(self.original_results) / len(self.original_results) if self.original_results else None,
+ }
+
diff --git a/openbox/compressor/sampling/standard.py b/openbox/compressor/sampling/standard.py
new file mode 100644
index 000000000..cbca80ccd
--- /dev/null
+++ b/openbox/compressor/sampling/standard.py
@@ -0,0 +1,24 @@
+import random
+from typing import List, Optional, Tuple
+from ConfigSpace import ConfigurationSpace, Configuration
+from .base import SamplingStrategy
+
+
+class StandardSamplingStrategy(SamplingStrategy):
+ def __init__(self, space: ConfigurationSpace, seed: Optional[int] = None):
+ self.space = space
+ self.seed = seed
+ if seed is not None:
+ self.space.seed(seed)
+ random.seed(seed)
+
+ def sample(self, n: int = 1) -> List[Configuration]:
+ configs = []
+ for _ in range(n):
+ config = self.space.sample_configuration()
+ configs.append(config)
+ return configs
+
+ def get_spaces(self) -> Tuple[ConfigurationSpace, Optional[ConfigurationSpace]]:
+ return (self.space, None)
+
diff --git a/openbox/compressor/step.py b/openbox/compressor/step.py
new file mode 100644
index 000000000..011e4bde6
--- /dev/null
+++ b/openbox/compressor/step.py
@@ -0,0 +1,96 @@
+"""
+Compression step base class and interface.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Optional, List, Dict, TYPE_CHECKING
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace
+from openbox import logger
+
+from .progress import OptimizerProgress
+if TYPE_CHECKING:
+ from .sampling import SamplingStrategy
+ from .filling import FillingStrategy
+
+
+class CompressionStep(ABC):
+ def __init__(self, name: str, **kwargs):
+ self.name = name
+ self.kwargs = kwargs
+ self.input_space: Optional[ConfigurationSpace] = None
+ self.output_space: Optional[ConfigurationSpace] = None
+ self.filling_strategy: Optional['FillingStrategy'] = None # Will be set by pipeline/compressor
+
+ @abstractmethod
+ def compress(self, input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None,
+ **kwargs) -> ConfigurationSpace:
+ pass
+
+ def project_point(self, point) -> dict:
+ # project a point from input_space to output_space.
+ # fill missing parameters if output_space is set
+ if hasattr(point, 'get_dictionary'):
+ point_dict = point.get_dictionary()
+ elif isinstance(point, dict):
+ point_dict = point
+ else:
+ point_dict = dict(point)
+
+ if self.output_space is not None and self.filling_strategy is not None:
+ point_dict = self.filling_strategy.fill_missing_parameters(
+ point_dict, self.output_space
+ )
+
+ return point_dict
+
+ def unproject_point(self, point) -> dict:
+ if hasattr(point, 'get_dictionary'):
+ return point.get_dictionary()
+ elif isinstance(point, dict):
+ return point
+ else:
+ return dict(point)
+
+ def needs_unproject(self) -> bool:
+ return False
+
+ def affects_sampling_space(self) -> bool:
+ return False
+
+ def update(self, progress: 'OptimizerProgress', history: History) -> bool:
+ # True if compression was updated and needs re-compression
+ return False
+
+ def get_sampling_strategy(self) -> Optional['SamplingStrategy']:
+ return None
+
+ def supports_adaptive_update(self) -> bool:
+ return False
+
+ def uses_progressive_compression(self) -> bool:
+ """
+ Whether this step uses progressive compression (compress on top of previous compression)
+ or re-compression (compress from original space).
+
+ Progressive: periodic dimension reduction (30d -> 24d -> 19d)
+ Re-compression: re-evaluate from scratch based on new data
+ """
+ return False
+
+ def get_step_info(self) -> dict:
+ info = {
+ 'name': self.name,
+ 'type': type(self).__name__,
+ 'input_space_params': len(self.input_space.get_hyperparameters()) if self.input_space else 0,
+ 'output_space_params': len(self.output_space.get_hyperparameters()) if self.output_space else 0,
+ 'supports_adaptive_update': self.supports_adaptive_update(),
+ 'uses_progressive_compression': self.uses_progressive_compression()
+ }
+
+ if hasattr(self, 'compression_info') and self.compression_info:
+ info['compression_info'] = self.compression_info
+ return info
+
diff --git a/openbox/compressor/steps/__init__.py b/openbox/compressor/steps/__init__.py
new file mode 100644
index 000000000..20785e219
--- /dev/null
+++ b/openbox/compressor/steps/__init__.py
@@ -0,0 +1,47 @@
+from .dimension import (
+ DimensionSelectionStep,
+ SHAPDimensionStep,
+ ExpertDimensionStep,
+ CorrelationDimensionStep,
+ AdaptiveDimensionStep,
+)
+
+from .projection import (
+ TransformativeProjectionStep,
+ REMBOProjectionStep,
+ HesBOProjectionStep,
+ KPCAProjectionStep,
+ QuantizationProjectionStep,
+)
+
+from .range import (
+ RangeCompressionStep,
+ BoundaryRangeStep,
+ ExpertRangeStep,
+ SHAPBoundaryRangeStep,
+ KDEBoundaryRangeStep,
+)
+
+__all__ = [
+ # Dimension selection steps
+ 'DimensionSelectionStep',
+ 'SHAPDimensionStep',
+ 'ExpertDimensionStep',
+ 'CorrelationDimensionStep',
+ 'AdaptiveDimensionStep',
+
+ # Projection steps
+ 'TransformativeProjectionStep',
+ 'REMBOProjectionStep',
+ 'HesBOProjectionStep',
+ 'KPCAProjectionStep',
+ 'QuantizationProjectionStep',
+
+ # Range compression steps
+ 'RangeCompressionStep',
+ 'BoundaryRangeStep',
+ 'ExpertRangeStep',
+ 'SHAPBoundaryRangeStep',
+ 'KDEBoundaryRangeStep',
+]
+
diff --git a/openbox/compressor/steps/dimension/__init__.py b/openbox/compressor/steps/dimension/__init__.py
new file mode 100644
index 000000000..5641a4b51
--- /dev/null
+++ b/openbox/compressor/steps/dimension/__init__.py
@@ -0,0 +1,22 @@
+from .base import DimensionSelectionStep
+from .shap import SHAPDimensionStep
+from .expert import ExpertDimensionStep
+from .correlation import CorrelationDimensionStep
+from .adaptive import AdaptiveDimensionStep
+from .importance import (
+ ImportanceCalculator,
+ SHAPImportanceCalculator,
+ CorrelationImportanceCalculator,
+)
+
+__all__ = [
+ 'DimensionSelectionStep',
+ 'SHAPDimensionStep',
+ 'ExpertDimensionStep',
+ 'CorrelationDimensionStep',
+ 'AdaptiveDimensionStep',
+ 'ImportanceCalculator',
+ 'SHAPImportanceCalculator',
+ 'CorrelationImportanceCalculator',
+]
+
diff --git a/openbox/compressor/steps/dimension/adaptive.py b/openbox/compressor/steps/dimension/adaptive.py
new file mode 100644
index 000000000..77af10e6e
--- /dev/null
+++ b/openbox/compressor/steps/dimension/adaptive.py
@@ -0,0 +1,131 @@
+import numpy as np
+from typing import Optional, List, Dict
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace
+
+from .base import DimensionSelectionStep
+from .importance import ImportanceCalculator, SHAPImportanceCalculator
+from ... import OptimizerProgress,UpdateStrategy, PeriodicUpdateStrategy
+from openbox import logger
+
+class AdaptiveDimensionStep(DimensionSelectionStep):
+ def __init__(self,
+ importance_calculator: Optional[ImportanceCalculator] = None,
+ update_strategy: Optional[UpdateStrategy] = None,
+ initial_topk: int = 30,
+ reduction_ratio: float = 0.2,
+ min_dimensions: int = 5,
+ max_dimensions: Optional[int] = None,
+ expert_params: Optional[List[str]] = None,
+ exclude_params: Optional[List[str]] = None,
+ **kwargs):
+ super().__init__(strategy='adaptive', expert_params=expert_params, exclude_params=exclude_params, **kwargs)
+
+ self.importance_calculator = importance_calculator or SHAPImportanceCalculator()
+ self.update_strategy = update_strategy or PeriodicUpdateStrategy(period=5)
+
+ self.current_topk = initial_topk
+ self.initial_topk = initial_topk
+ self.reduction_ratio = reduction_ratio
+ self.min_dimensions = min_dimensions
+ self.max_dimensions = max_dimensions
+
+ self.original_space: Optional[ConfigurationSpace] = None
+ self.space_history: Optional[List[History]] = None
+
+ if self.update_strategy:
+ logger.info(f"AdaptiveDimensionStep initialized: "
+ f"importance={self.importance_calculator.get_name()}, "
+ f"update={self.update_strategy.get_name()}, "
+ f"initial_topk={initial_topk}")
+ else:
+ logger.info(f"AdaptiveDimensionStep initialized: "
+ f"importance={self.importance_calculator.get_name()}, "
+ f"update=None (fixed topk={initial_topk})")
+
+ def compress(self, input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> ConfigurationSpace:
+ self.original_space = input_space
+ self.space_history = space_history
+ # Use base class compress which handles expert_params and exclude_params
+ return super().compress(input_space, space_history, source_similarities)
+
+ def _select_parameters(self,
+ input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> List[int]:
+ param_names, importances = self.importance_calculator.calculate_importances(
+ input_space, space_history, source_similarities
+ )
+
+ if len(param_names) == 0:
+ logger.warning("No numeric parameters detected, keeping all parameters")
+ return list(range(len(input_space.get_hyperparameter_names())))
+
+ # Return all parameters sorted by importance (not just topk)
+ # Base class will select current_topk from this sorted list
+ sorted_numeric_indices = np.argsort(importances).tolist()
+
+ all_param_names = input_space.get_hyperparameter_names()
+ sorted_indices = [all_param_names.index(param_names[i]) for i in sorted_numeric_indices]
+
+ # Calculate target topk for logging
+ target_topk = min(self.current_topk, len(param_names))
+ if self.max_dimensions is not None:
+ target_topk = min(target_topk, self.max_dimensions)
+ target_topk = max(target_topk, self.min_dimensions)
+
+ topk_indices = sorted_indices[:target_topk] if target_topk > 0 else []
+ topk_names = [all_param_names[i] for i in topk_indices]
+ topk_importances = importances[sorted_numeric_indices[:target_topk]] if target_topk > 0 else []
+
+ logger.debug(f"{self.importance_calculator.get_name()} sorted all {len(sorted_indices)} parameters by importance")
+ logger.debug(f"{self.importance_calculator.get_name()} target top-{target_topk} parameters: {topk_names}")
+ logger.debug(f"{self.importance_calculator.get_name()} target top-{target_topk} importances: {topk_importances}")
+ return sorted_indices
+
+ def supports_adaptive_update(self) -> bool:
+ return self.update_strategy is not None
+
+ def uses_progressive_compression(self) -> bool:
+ return True
+
+ def update(self, progress: OptimizerProgress, history: History) -> bool:
+ if not self.update_strategy:
+ return False
+
+ if not self.update_strategy.should_update(progress, history):
+ return False
+
+ old_topk = self.current_topk
+
+ new_topk, description = self.update_strategy.compute_new_topk(
+ current_topk=self.current_topk,
+ reduction_ratio=self.reduction_ratio,
+ min_dimensions=self.min_dimensions,
+ max_dimensions=self.max_dimensions,
+ progress=progress
+ )
+
+ if new_topk != old_topk:
+ self.current_topk = new_topk
+ logger.info(description)
+ return True
+
+ return False
+
+ def get_step_info(self) -> dict:
+ info = super().get_step_info()
+ info['importance_calculator'] = type(self.importance_calculator).__name__
+
+ if self.update_strategy:
+ info['update_strategy'] = self.update_strategy.get_name()
+
+ info['current_topk'] = self.current_topk
+ info['initial_topk'] = self.initial_topk
+ info['min_dimensions'] = self.min_dimensions
+ info['reduction_ratio'] = self.reduction_ratio
+ if self.max_dimensions:
+ info['max_dimensions'] = self.max_dimensions
+ return info
diff --git a/openbox/compressor/steps/dimension/base.py b/openbox/compressor/steps/dimension/base.py
new file mode 100644
index 000000000..c213ce02a
--- /dev/null
+++ b/openbox/compressor/steps/dimension/base.py
@@ -0,0 +1,239 @@
+import copy
+from typing import Optional, List, Dict
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace
+from openbox import logger
+
+from ... import CompressionStep
+
+
+class DimensionSelectionStep(CompressionStep):
+ def __init__(self, strategy: str = 'shap',
+ expert_params: Optional[List[str]] = None,
+ exclude_params: Optional[List[str]] = None,
+ **kwargs):
+ super().__init__('dimension_selection', **kwargs)
+ self.strategy = strategy
+ self.expert_params = expert_params or []
+ self.exclude_params = exclude_params or []
+ self.selected_indices: Optional[List[int]] = None
+ self.selected_param_names: Optional[List[str]] = None
+
+ def compress(self, input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> ConfigurationSpace:
+ if self.strategy == 'none':
+ logger.debug("Dimension selection disabled, returning input space")
+ return input_space
+
+ # Step 1: Get exclude_params indices
+ all_param_names = input_space.get_hyperparameter_names()
+ exclude_indices_set = set()
+ for exclude_name in self.exclude_params:
+ if exclude_name in all_param_names:
+ exclude_indices_set.add(all_param_names.index(exclude_name))
+
+ # Step 2: Get expert parameter indices
+ expert_indices = self._get_expert_param_indices(input_space)
+ expert_indices = [idx for idx in expert_indices if idx not in exclude_indices_set]
+
+ # Step 3: Get method-selected parameters (sorted by importance)
+ method_sorted_indices = self._select_parameters(input_space, space_history, source_similarities)
+ # Filter out exclude_params and expert_params from method selection
+ method_sorted_indices = [idx for idx in method_sorted_indices
+ if idx not in exclude_indices_set and idx not in expert_indices]
+
+ # Step 4: Merge: expert params as base, then supplement from method-sorted list
+ selected_indices = self._merge_expert_and_method_params(
+ expert_indices, method_sorted_indices, input_space
+ )
+
+ if not selected_indices:
+ logger.warning("No parameters selected, returning input space")
+ return input_space
+
+ compressed_space = self._create_compressed_space(input_space, selected_indices)
+ self.selected_indices = selected_indices
+ self.selected_param_names = [input_space.get_hyperparameter_names()[i] for i in selected_indices]
+ logger.debug(f"Dimension selection: {len(input_space.get_hyperparameters())} -> "
+ f"{len(compressed_space.get_hyperparameters())} parameters")
+ logger.debug(f"Selected parameters: {self.selected_param_names}")
+ return compressed_space
+
+ def _get_expert_param_indices(self, input_space: ConfigurationSpace) -> List[int]:
+ if not self.expert_params:
+ return []
+
+ all_param_names = input_space.get_hyperparameter_names()
+ expert_indices = []
+
+ for param_name in self.expert_params:
+ if param_name in all_param_names:
+ idx = all_param_names.index(param_name)
+ if idx not in expert_indices:
+ expert_indices.append(idx)
+ logger.debug(f"Including expert parameter: {param_name}")
+ else:
+ logger.warning(f"Expert parameter '{param_name}' not found in configuration space")
+
+ return expert_indices
+
+ def _get_target_topk(self) -> Optional[int]:
+ if hasattr(self, 'topk') and self.topk > 0:
+ return self.topk
+ elif hasattr(self, 'current_topk') and self.current_topk > 0:
+ return self.current_topk
+ return None
+
+ def _merge_expert_and_method_params(self,
+ expert_indices: List[int],
+ method_sorted_indices: List[int],
+ input_space: ConfigurationSpace) -> List[int]:
+ """
+ Merge expert parameters and method-selected parameters.
+
+ Strategy:
+ 1. Expert parameters as base (already filtered by exclude_params)
+ 2. Supplement from method_sorted_indices (sorted by importance) until reaching topk
+
+ Args:
+ expert_indices: Expert parameter indices (already filtered)
+ method_sorted_indices: Method-selected parameter indices sorted by importance (already filtered)
+ input_space: Input configuration space
+
+ Returns:
+ Merged list of parameter indices
+ """
+ merged_indices = expert_indices.copy()
+
+ target_topk = self._get_target_topk()
+
+ if target_topk is not None:
+ for idx in method_sorted_indices:
+ if len(merged_indices) >= target_topk:
+ break
+ if idx not in merged_indices:
+ merged_indices.append(idx)
+ else:
+ for idx in method_sorted_indices:
+ if idx not in merged_indices:
+ merged_indices.append(idx)
+
+ if expert_indices:
+ n_expert = len(expert_indices)
+ n_method_added = len(merged_indices) - n_expert
+ n_merged = len(merged_indices)
+ target_str = f" (target: {target_topk})" if target_topk else ""
+ logger.info(f"{self.strategy} dimension selection: {n_expert} expert + {n_method_added} method = {n_merged} total{target_str}")
+
+ return sorted(merged_indices)
+
+ def _select_parameters(self,
+ input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> List[int]:
+ """
+ Select parameters to keep using the specific method (e.g., SHAP, correlation).
+
+ Subclasses should override this method.
+ Note: Expert parameters will be automatically merged by the base class.
+
+ Args:
+ input_space: Input configuration space
+ space_history: Historical data
+ source_similarities: Source task similarities
+
+ Returns:
+ List of selected parameter indices (excluding expert params, which are handled separately)
+ """
+ # Default: keep all parameters
+ return list(range(len(input_space.get_hyperparameters())))
+
+ def _create_compressed_space(self,
+ input_space: ConfigurationSpace,
+ selected_indices: List[int]) -> ConfigurationSpace:
+ param_names = input_space.get_hyperparameter_names()
+ selected_names = [param_names[i] for i in selected_indices]
+
+ compressed_space = ConfigurationSpace()
+ for name in selected_names:
+ hp = input_space.get_hyperparameter(name)
+ compressed_space.add_hyperparameter(hp)
+
+ return compressed_space
+
+ def _apply_exclude_params(self,
+ selected_indices: List[int],
+ input_space: ConfigurationSpace,
+ step_name: str = "dimension selection") -> List[int]:
+ if not self.exclude_params:
+ return selected_indices
+
+ all_param_names = input_space.get_hyperparameter_names()
+ result_indices = selected_indices.copy()
+ excluded_count = 0
+
+ for exclude_name in self.exclude_params:
+ if exclude_name in all_param_names:
+ exclude_idx = all_param_names.index(exclude_name)
+ if exclude_idx in result_indices:
+ result_indices.remove(exclude_idx)
+ excluded_count += 1
+ logger.debug(f"Excluded parameter '{exclude_name}' from {step_name}")
+ else:
+ logger.debug(f"Parameter '{exclude_name}' was not in selected parameters, skipping exclusion")
+ else:
+ logger.warning(f"Exclude parameter '{exclude_name}' not found in configuration space")
+
+ if excluded_count > 0:
+ logger.info(f"{step_name}: Excluded {excluded_count} parameter(s) from selection")
+ return sorted(result_indices)
+
+ def project_point(self, point) -> dict:
+ # project a point from input_space to output_space.
+ # filter to selected parameters and fill missing ones.
+ if hasattr(point, 'get_dictionary'):
+ point_dict = point.get_dictionary()
+ elif isinstance(point, dict):
+ point_dict = point
+ else:
+ point_dict = dict(point)
+
+ # filter to only selected parameters
+ if self.selected_param_names is None:
+ filtered_dict = point_dict
+ else:
+ filtered_dict = {name: point_dict[name] for name in self.selected_param_names if name in point_dict}
+
+ # fill missing parameters if needed
+ if self.output_space is not None and self.filling_strategy is not None:
+ filtered_dict = self.filling_strategy.fill_missing_parameters(
+ filtered_dict, self.output_space
+ )
+
+ return filtered_dict
+
+ def needs_unproject(self) -> bool:
+ # Dimension selection is one-way, no unprojection needed
+ return False
+
+ def affects_sampling_space(self) -> bool:
+ # Dimension selection affects sampling space
+ return True
+
+ def get_step_info(self) -> dict:
+ info = super().get_step_info()
+ if self.selected_param_names:
+ info['selected_parameters'] = self.selected_param_names
+ if self.selected_indices:
+ info['selected_indices'] = self.selected_indices
+ if hasattr(self, '_calculator') and self._calculator:
+ info['calculator'] = type(self._calculator).__name__
+ if self.expert_params:
+ info['expert_params'] = self.expert_params
+ info['n_expert_params'] = len(self.expert_params)
+ if self.exclude_params:
+ info['exclude_params'] = self.exclude_params
+ if self.input_space:
+ info['compression_ratio'] = len(self.selected_indices) / len(self.input_space.get_hyperparameters())
+ return info
\ No newline at end of file
diff --git a/openbox/compressor/steps/dimension/correlation.py b/openbox/compressor/steps/dimension/correlation.py
new file mode 100644
index 000000000..97fa04b84
--- /dev/null
+++ b/openbox/compressor/steps/dimension/correlation.py
@@ -0,0 +1,70 @@
+import numpy as np
+from typing import Optional, List, Dict
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace
+from openbox import logger
+
+from .base import DimensionSelectionStep
+from .importance import CorrelationImportanceCalculator
+
+
+class CorrelationDimensionStep(DimensionSelectionStep):
+ def __init__(self,
+ method: str = 'spearman',
+ topk: int = 20,
+ expert_params: Optional[List[str]] = None,
+ exclude_params: Optional[List[str]] = None,
+ **kwargs):
+ super().__init__(strategy=method, expert_params=expert_params, exclude_params=exclude_params, **kwargs)
+ self.method = method
+ self.topk = 0 if method == 'none' else topk
+ self._calculator = CorrelationImportanceCalculator(method=method)
+
+ logger.debug(f"CorrelationDimensionStep initialized: method={method}, topk={topk}, expert_params={len(self.expert_params)}, exclude_params={len(self.exclude_params)}")
+
+ def get_step_info(self) -> dict:
+ info = super().get_step_info()
+ info['method'] = self.method
+ info['topk'] = self.topk
+ return info
+
+ def compress(self, input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> ConfigurationSpace:
+ return super().compress(input_space, space_history, source_similarities)
+
+ def _select_parameters(self,
+ input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> List[int]:
+ if self.topk <= 0:
+ logger.warning(f"No topk provided for {self.method} selection, keeping all parameters")
+ return list(range(len(input_space.get_hyperparameters())))
+
+ if not space_history:
+ logger.warning(f"No space history provided for {self.method} selection, keeping all parameters")
+ return list(range(len(input_space.get_hyperparameters())))
+
+ param_names, importances = self._calculator.calculate_importances(
+ input_space, space_history, source_similarities
+ )
+ if importances is None or np.size(importances) == 0:
+ logger.warning(f"{self.method} importances unavailable, keeping all parameters")
+ return list(range(len(input_space.get_hyperparameters())))
+
+ # Return all parameters sorted by importance (not just topk)
+ # Base class will select topk from this sorted list
+ sorted_numeric_indices = np.argsort(importances).tolist()
+ all_param_names = input_space.get_hyperparameter_names()
+ sorted_indices = [all_param_names.index(param_names[i]) for i in sorted_numeric_indices]
+
+ top_k = min(self.topk, len(sorted_indices))
+ topk_indices = sorted_indices[:top_k] if top_k > 0 else []
+ topk_names = [all_param_names[i] for i in topk_indices]
+ topk_importances = importances[sorted_numeric_indices[:top_k]] if top_k > 0 else []
+
+ logger.debug(f"{self.method.capitalize()} sorted all {len(sorted_indices)} parameters by importance")
+ logger.debug(f"{self.method.capitalize()} top-{top_k} parameters: {topk_names}")
+ logger.debug(f"{self.method.capitalize()} top-{top_k} importances: {topk_importances}")
+ return sorted_indices
+
diff --git a/openbox/compressor/steps/dimension/expert.py b/openbox/compressor/steps/dimension/expert.py
new file mode 100644
index 000000000..fb7f1d1cc
--- /dev/null
+++ b/openbox/compressor/steps/dimension/expert.py
@@ -0,0 +1,38 @@
+from typing import Optional, List, Dict
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace
+from openbox import logger
+
+from .base import DimensionSelectionStep
+from ...utils import load_expert_params
+
+
+class ExpertDimensionStep(DimensionSelectionStep):
+ def __init__(self,
+ strategy: str = 'expert',
+ expert_params: Optional[List[str]] = None,
+ exclude_params: Optional[List[str]] = None,
+ **kwargs):
+ super().__init__(strategy=strategy, expert_params=expert_params, exclude_params=exclude_params, **kwargs)
+
+ def get_step_info(self) -> dict:
+ # Base class already includes expert_params info
+ return super().get_step_info()
+
+ def _select_parameters(self,
+ input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> List[int]:
+ """
+ Expert dimension selection: only use expert parameters.
+ The base class will automatically include expert_params, so we return empty list
+ to indicate no additional method-selected parameters.
+ """
+ if not self.expert_params:
+ logger.warning("No expert parameters provided, keeping all parameters")
+ return list(range(len(input_space.get_hyperparameters())))
+
+ # Return empty list - base class will handle expert params automatically
+ # This ensures only expert parameters are selected
+ return []
+
diff --git a/openbox/compressor/steps/dimension/importance.py b/openbox/compressor/steps/dimension/importance.py
new file mode 100644
index 000000000..bc7826575
--- /dev/null
+++ b/openbox/compressor/steps/dimension/importance.py
@@ -0,0 +1,265 @@
+import numpy as np
+import pandas as pd
+from abc import ABC, abstractmethod
+from typing import List, Optional, Tuple, Dict
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace
+from openbox import logger
+from ...utils import (
+ extract_numeric_hyperparameters,
+ extract_top_samples_from_history,
+)
+
+
+class ImportanceCalculator(ABC):
+ @abstractmethod
+ def calculate_importances(self,
+ input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> Tuple[List[str], np.ndarray]:
+ pass
+
+ @abstractmethod
+ def get_name(self) -> str:
+ pass
+
+
+class SHAPImportanceCalculator(ImportanceCalculator):
+ def __init__(self):
+ self._cache = {
+ 'models': None,
+ 'importances': None,
+ 'shap_values': None,
+ 'n_features': None,
+ 'importances_per_task': None, # Store per-task importances for multi-task visualization
+ 'task_names': None, # Store task names
+ }
+ self.numeric_hyperparameter_names: List[str] = []
+ self.numeric_hyperparameter_indices: List[int] = []
+
+ def calculate_importances(self,
+ input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> Tuple[List[str], np.ndarray]:
+ self.numeric_hyperparameter_names, \
+ self.numeric_hyperparameter_indices = extract_numeric_hyperparameters(input_space)
+
+ current_n_features = len(self.numeric_hyperparameter_names)
+ cache_valid = (
+ self._cache['models'] is not None and
+ self._cache['importances'] is not None and
+ self._cache['n_features'] == current_n_features
+ )
+
+ if cache_valid:
+ logger.info(f"Using cached SHAP model (n_features={current_n_features})")
+ return self.numeric_hyperparameter_names, self._cache['importances']
+
+ importances = self._compute_shap_importances(space_history, input_space, source_similarities)
+ return self.numeric_hyperparameter_names, importances
+
+ def _compute_shap_importances(self,
+ space_history: List[History],
+ input_space: ConfigurationSpace,
+ source_similarities: Optional[Dict[int, float]] = None) -> np.ndarray:
+ import shap
+ from sklearn.ensemble import RandomForestRegressor
+
+ models = []
+ importances_list = []
+ shap_values = []
+
+ if len(space_history) == 0:
+ logger.warning("No historical data provided for SHAP")
+ return None
+
+ all_x, all_y = extract_top_samples_from_history(
+ space_history, self.numeric_hyperparameter_names, input_space,
+ top_ratio=1.0, normalize=True
+ )
+
+ for task_idx, (hist_x_numeric, hist_y) in enumerate(zip(all_x, all_y)):
+ if len(hist_x_numeric) == 0:
+ continue
+
+ model = RandomForestRegressor(n_estimators=100, random_state=42)
+ model.fit(hist_x_numeric, hist_y)
+
+ explainer = shap.Explainer(model)
+ shap_value = -np.abs(explainer(hist_x_numeric, check_additivity=False).values)
+ mean_shap = shap_value.mean(axis=0)
+
+ models.append(model)
+ importances_list.append(mean_shap)
+ shap_values.append(shap_value)
+
+ df = pd.DataFrame({
+ "feature": self.numeric_hyperparameter_names,
+ "importance": mean_shap,
+ }).sort_values("importance", ascending=True)
+ logger.debug(f"SHAP importance (task {task_idx}): {df.to_string()}")
+
+ if len(importances_list) == 0:
+ logger.warning("No SHAP importances computed")
+ return None
+
+ importances_array = np.array(importances_list)
+ if source_similarities:
+ weights = np.array([
+ source_similarities.get(task_idx, 1.0)
+ for task_idx in range(len(importances_list))
+ ])
+ weights_sum = weights.sum()
+ if weights_sum > 1e-10:
+ weights = weights / weights_sum
+ else:
+ weights = np.ones(len(importances_list)) / len(importances_list)
+ else:
+ weights = np.ones(len(importances_list)) / len(importances_list)
+
+ importances = np.average(importances_array, axis=0, weights=weights)
+
+ # Extract task names from history
+ task_names = []
+ for i, history in enumerate(space_history):
+ if hasattr(history, 'task_id') and history.task_id:
+ task_names.append(history.task_id)
+ else:
+ task_names.append(f'Task {i}')
+
+ self._cache.update({
+ 'models': models,
+ 'importances': importances,
+ 'shap_values': shap_values,
+ 'n_features': len(self.numeric_hyperparameter_names),
+ 'importances_per_task': importances_array if len(importances_array) > 1 else None, # Only save if multiple tasks
+ 'task_names': task_names if len(task_names) > 1 else None,
+ })
+
+ return importances
+
+ def get_name(self) -> str:
+ return "SHAP"
+
+
+class CorrelationImportanceCalculator(ImportanceCalculator):
+ def __init__(self, method: str = 'spearman'):
+ """
+ Args:
+ method: 'spearman' or 'pearson'
+ """
+ self.method = method
+ self._cache = {
+ 'importances': None,
+ 'importances_per_task': None,
+ 'task_names': None,
+ }
+ self.numeric_hyperparameter_names: List[str] = []
+
+ def calculate_importances(self,
+ input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> Tuple[List[str], np.ndarray]:
+ numeric_param_names, _ = extract_numeric_hyperparameters(input_space)
+ self.numeric_hyperparameter_names = numeric_param_names
+
+ all_x, all_y = extract_top_samples_from_history(
+ space_history, numeric_param_names, input_space,
+ top_ratio=1.0, normalize=True
+ )
+ if len(all_x) == 0:
+ logger.warning("No data available for correlation")
+ return numeric_param_names, np.ones(len(numeric_param_names))
+
+ if not source_similarities:
+ source_similarities = {i: 1.0 for i in range(len(all_x))}
+
+ importances, importances_per_task = self._compute_weighted_correlations(
+ all_x, all_y, numeric_param_names, source_similarities
+ )
+
+ # Extract task names from history
+ task_names = []
+ for i, history in enumerate(space_history):
+ if hasattr(history, 'task_id') and history.task_id:
+ task_names.append(history.task_id)
+ else:
+ task_names.append(f'Task {i}')
+
+ self._cache.update({
+ 'importances': importances,
+ 'importances_per_task': importances_per_task if len(all_x) > 1 else None,
+ 'task_names': task_names if len(all_x) > 1 else None,
+ })
+
+ df = pd.DataFrame({
+ "feature": numeric_param_names,
+ "importance": importances,
+ }).sort_values("importance", ascending=True)
+ logger.debug(f"{self.method.capitalize()} correlation importance: {df.to_string()}")
+
+ return numeric_param_names, importances
+
+ def _compute_weighted_correlations(self, all_x, all_y, numeric_param_names,
+ source_similarities):
+ """
+ Compute weighted correlations based on task similarities.
+
+ Single task is treated as having similarity=1.0.
+ """
+ from scipy.stats import spearmanr, pearsonr
+
+ correlations_list = []
+ for task_idx, (hist_x_numeric, hist_y) in enumerate(zip(all_x, all_y)):
+ if len(hist_x_numeric) == 0:
+ continue
+
+ n_features = hist_x_numeric.shape[1]
+ correlations = np.zeros(n_features)
+
+ for i in range(n_features):
+ try:
+ if self.method == 'spearman':
+ corr, _ = spearmanr(hist_x_numeric[:, i], hist_y.flatten())
+ else:
+ corr, _ = pearsonr(hist_x_numeric[:, i], hist_y.flatten())
+
+ correlations[i] = abs(corr) if not np.isnan(corr) else 0.0
+ except Exception as e:
+ logger.warning(f"Failed to compute {self.method} correlation for feature {i}: {e}")
+ correlations[i] = 0.0
+
+ correlations_list.append(correlations)
+
+ df = pd.DataFrame({
+ "feature": numeric_param_names,
+ "correlation": correlations,
+ }).sort_values("correlation", ascending=False)
+ logger.debug(f"{self.method.capitalize()} correlations (task {task_idx}): {df.to_string()}")
+
+ if len(correlations_list) == 0:
+ logger.warning("No correlations computed")
+ return np.ones(len(numeric_param_names)), None
+
+ correlations_array = np.array(correlations_list)
+ weights = np.array([
+ source_similarities.get(task_idx, 1.0) # Default to 1.0 if not specified
+ for task_idx in range(len(correlations_list))
+ ])
+ weights_sum = weights.sum()
+
+ if weights_sum > 1e-10:
+ weights = weights / weights_sum
+ else:
+ weights = np.ones(len(correlations_list)) / len(correlations_list)
+
+ correlations = np.average(correlations_array, axis=0, weights=weights)
+
+ # Return weighted importance and per-task data (only for multiple tasks)
+ importances = -correlations
+ importances_per_task = -correlations_array if len(correlations_list) > 1 else None
+
+ return importances, importances_per_task
+
+ def get_name(self) -> str:
+ return f"Correlation({self.method})"
\ No newline at end of file
diff --git a/openbox/compressor/steps/dimension/shap.py b/openbox/compressor/steps/dimension/shap.py
new file mode 100644
index 000000000..5174581ff
--- /dev/null
+++ b/openbox/compressor/steps/dimension/shap.py
@@ -0,0 +1,69 @@
+import numpy as np
+from typing import Optional, List, Dict
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace
+from openbox import logger
+
+from .base import DimensionSelectionStep
+from .importance import SHAPImportanceCalculator
+
+
+class SHAPDimensionStep(DimensionSelectionStep):
+
+ def __init__(self,
+ strategy: str = 'shap',
+ topk: int = 20,
+ expert_params: Optional[List[str]] = None,
+ exclude_params: Optional[List[str]] = None,
+ **kwargs):
+ super().__init__(strategy=strategy, expert_params=expert_params, exclude_params=exclude_params, **kwargs)
+ self.topk = 0 if strategy == 'none' else topk
+ self._calculator = SHAPImportanceCalculator()
+ logger.debug(f"SHAPDimensionStep initialized: topk={topk}, expert_params={len(self.expert_params)}, exclude_params={len(self.exclude_params)}")
+
+ def get_step_info(self) -> dict:
+ info = super().get_step_info()
+ info['topk'] = self.topk
+ return info
+
+ def compress(self, input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> ConfigurationSpace:
+ return super().compress(input_space, space_history, source_similarities)
+
+ def _select_parameters(self,
+ input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> List[int]:
+ if self.topk <= 0:
+ logger.warning("No topk provided for SHAP selection, keeping all parameters")
+ return list(range(len(input_space.get_hyperparameters())))
+
+ if not space_history:
+ logger.warning("No space history provided for SHAP selection, keeping all parameters")
+ return list(range(len(input_space.get_hyperparameters())))
+
+ param_names, importances = self._calculator.calculate_importances(
+ input_space, space_history, source_similarities
+ )
+
+ if importances is None or np.size(importances) == 0:
+ logger.warning("SHAP importances unavailable, keeping all parameters")
+ return list(range(len(input_space.get_hyperparameters())))
+
+ # Return all parameters sorted by importance (not just topk)
+ # Base class will select topk from this sorted list
+ sorted_numeric_indices = np.argsort(importances).tolist()
+ # sorted_numeric_indices.reverse()
+ all_param_names = input_space.get_hyperparameter_names()
+ sorted_indices = [all_param_names.index(param_names[i]) for i in sorted_numeric_indices]
+
+ top_k = min(self.topk, len(sorted_indices))
+ topk_indices = sorted_indices[:top_k] if top_k > 0 else []
+ topk_names = [all_param_names[i] for i in topk_indices]
+ topk_importances = importances[sorted_numeric_indices[:top_k]] if top_k > 0 else []
+
+ logger.debug(f"SHAP sorted all {len(sorted_indices)} parameters by importance")
+ logger.debug(f"SHAP top-{top_k} parameters: {topk_names}")
+ logger.debug(f"SHAP top-{top_k} importances: {topk_importances}")
+ return sorted_indices
diff --git a/openbox/compressor/steps/projection/__init__.py b/openbox/compressor/steps/projection/__init__.py
new file mode 100644
index 000000000..d54500dee
--- /dev/null
+++ b/openbox/compressor/steps/projection/__init__.py
@@ -0,0 +1,14 @@
+from .base import TransformativeProjectionStep
+from .rembo import REMBOProjectionStep
+from .hesbo import HesBOProjectionStep
+from .kpca import KPCAProjectionStep
+from .quantization import QuantizationProjectionStep
+
+__all__ = [
+ 'TransformativeProjectionStep',
+ 'REMBOProjectionStep',
+ 'HesBOProjectionStep',
+ 'KPCAProjectionStep',
+ 'QuantizationProjectionStep',
+]
+
diff --git a/openbox/compressor/steps/projection/base.py b/openbox/compressor/steps/projection/base.py
new file mode 100644
index 000000000..b31640066
--- /dev/null
+++ b/openbox/compressor/steps/projection/base.py
@@ -0,0 +1,73 @@
+from typing import Optional, List, Dict
+import numpy as np
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace
+import ConfigSpace.hyperparameters as CSH
+
+from ... import CompressionStep
+from openbox import logger
+
+
+class TransformativeProjectionStep(CompressionStep):
+ def __init__(self, method: str = 'rembo', **kwargs):
+ super().__init__('transformative_projection', **kwargs)
+ self.method = method
+
+ def compress(self, input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None,
+ **kwargs) -> ConfigurationSpace:
+ if self.method == 'none':
+ logger.info("Projection disabled, returning input space")
+ return input_space
+
+ projected_space = self._build_projected_space(input_space)
+
+ logger.info(f"Projection compression: {len(input_space.get_hyperparameters())} -> "
+ f"{len(projected_space.get_hyperparameters())} parameters")
+
+ return projected_space
+
+ def _build_projected_space(self, input_space: ConfigurationSpace) -> ConfigurationSpace:
+ return input_space
+
+ def needs_unproject(self) -> bool:
+ return True
+
+ def affects_sampling_space(self) -> bool:
+ return True
+
+ def get_step_info(self) -> dict:
+ info = super().get_step_info()
+ if hasattr(self, 'low_dim'):
+ info['low_dim'] = self.low_dim
+ return info
+
+ def _normalize_high_dim_config(self, high_dim_dict: dict, active_hps: List[CSH.Hyperparameter]) -> np.ndarray:
+ high_dim_values = []
+ for hp in active_hps:
+ value = high_dim_dict.get(hp.name)
+ if value is None:
+ if hasattr(hp, 'default_value'):
+ value = hp.default_value
+ elif hasattr(hp, 'lower') and hasattr(hp, 'upper'):
+ value = (hp.lower + hp.upper) / 2
+ elif hasattr(hp, 'choices'):
+ value = hp.choices[0]
+ else:
+ logger.warning(f"Cannot determine value for {hp.name}, using 0.5")
+ high_dim_values.append(0.5)
+ continue
+ # normalize to [0, 1]
+ if hasattr(hp, 'lower') and hasattr(hp, 'upper'):
+ normalized = (value - hp.lower) / (hp.upper - hp.lower)
+ elif hasattr(hp, 'choices'):
+ try:
+ normalized = hp.choices.index(value) / max(1, len(hp.choices) - 1)
+ except ValueError:
+ normalized = 0.5
+ else:
+ normalized = 0.5
+ high_dim_values.append(normalized)
+ return np.array(high_dim_values)
+
diff --git a/openbox/compressor/steps/projection/hesbo.py b/openbox/compressor/steps/projection/hesbo.py
new file mode 100644
index 000000000..938463545
--- /dev/null
+++ b/openbox/compressor/steps/projection/hesbo.py
@@ -0,0 +1,171 @@
+import numpy as np
+from typing import Optional, List
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace, Configuration
+import ConfigSpace as CS
+from sklearn.preprocessing import MinMaxScaler
+from openbox import logger
+from .base import TransformativeProjectionStep
+
+
+class HesBOProjectionStep(TransformativeProjectionStep):
+ def __init__(self,
+ method: str = 'hesbo',
+ low_dim: int = 10,
+ max_num_values: Optional[int] = None,
+ seed: int = 42,
+ **kwargs):
+ super().__init__(method=method, **kwargs)
+ self.low_dim = low_dim
+ self._max_num_values = max_num_values
+ self.seed = seed
+ self._rs = np.random.RandomState(seed=seed)
+
+ self._scaler: Optional[MinMaxScaler] = None
+ self._h: Optional[np.ndarray] = None
+ self._sigma: Optional[np.ndarray] = None
+ self.active_hps: List = []
+
+ self._low_to_high_cache: dict = {}
+ self._high_to_low_cache: dict = {}
+
+ def _build_projected_space(self, input_space: ConfigurationSpace) -> ConfigurationSpace:
+ self.active_hps = list(input_space.get_hyperparameters())
+
+ target = CS.ConfigurationSpace(
+ name=input_space.name,
+ seed=self.seed
+ )
+
+ if self._max_num_values is None: # no quantization
+ hps = [
+ CS.UniformFloatHyperparameter(
+ name=f'hesbo_{idx}',
+ lower=-1,
+ upper=1
+ )
+ for idx in range(self.low_dim)
+ ]
+ else:
+ # Use quantization, step size: 2. / max_num_values
+ logger.info(f'Using quantization: q={self._max_num_values}')
+ q = 2. / self._max_num_values
+ hps = [
+ CS.UniformFloatHyperparameter(
+ name=f'hesbo_{idx}',
+ lower=-1,
+ upper=1,
+ q=q
+ )
+ for idx in range(self.low_dim)
+ ]
+
+ target.add_hyperparameters(hps)
+ self.output_space = target
+
+ # (-1, 1) -> (0, 1) scaling
+ self._scaler = MinMaxScaler(feature_range=(0, 1))
+ ones = np.ones(len(self.active_hps))
+ # Use two points (minimum & maximum)
+ self._scaler.fit(
+ np.array([-ones, ones])
+ )
+
+ # Implicitly define matrix S' using hashing
+ # _h: maps each high-dim index to a low-dim index
+ # _sigma: sign for each high-dim dimension
+ self._h = self._rs.choice(
+ range(self.low_dim), len(self.active_hps)
+ )
+ self._sigma = self._rs.choice([-1, 1], len(self.active_hps))
+
+ return target
+
+ def unproject_point(self, point: Configuration) -> dict:
+ low_dim_point = [
+ point.get(f'hesbo_{idx}') for idx in range(self.low_dim)
+ ]
+
+ low_dim_key = tuple(low_dim_point)
+
+ if low_dim_key in self._low_to_high_cache:
+ return self._low_to_high_cache[low_dim_key].copy()
+
+ high_dim_point = [
+ self._sigma[idx] * low_dim_point[self._h[idx]]
+ for idx in range(len(self.active_hps))
+ ]
+ high_dim_point = self._scaler.transform([high_dim_point])[0]
+
+ # Transform back to original space
+ high_dim_conf = {}
+ for hp, value in zip(self.active_hps, high_dim_point):
+ # HesBO does not project values outside of range
+ # NOTE: need this cause of weird floating point errors
+ value = max(0, min(1, value))
+
+ if isinstance(hp, CS.CategoricalHyperparameter):
+ index = int(value * len(hp.choices))
+ index = max(0, min(len(hp.choices) - 1, index))
+ value = hp.choices[index]
+ elif isinstance(hp, CS.hyperparameters.NumericalHyperparameter):
+ value = hp._transform(value)
+ value = max(hp.lower, min(hp.upper, value))
+ else:
+ raise NotImplementedError(f"Unsupported hyperparameter type: {type(hp)}")
+
+ high_dim_conf[hp.name] = value
+
+ self._low_to_high_cache[low_dim_key] = high_dim_conf.copy()
+ high_dim_key = tuple(sorted(high_dim_conf.items()))
+ self._high_to_low_cache[high_dim_key] = low_dim_key
+ return high_dim_conf
+
+ def _approximate_project(self, high_dim_dict: dict) -> dict:
+ high_dim_array = self._normalize_high_dim_config(high_dim_dict, self.active_hps)
+ high_dim_scaled = self._scaler.inverse_transform([high_dim_array])[0]
+
+ # approximate low_dim from high_dim using embedding structure
+ # low_dim[i] = high_dim[h[i]] / sigma[h[i]]
+ low_dim_approx = np.zeros(self.low_dim)
+ for i in range(self.low_dim):# aggregate all dimensions that map to this low dimension
+ contributing_dims = [j for j in range(len(self.active_hps)) if self._h[j] == i]
+ if contributing_dims: # average the contributions
+ contributions = [high_dim_scaled[j] / self._sigma[j] for j in contributing_dims]
+ low_dim_approx[i] = np.mean(contributions)
+ else:
+ low_dim_approx[i] = 0.0
+
+ box_bound = np.sqrt(self.low_dim)
+ low_dim_approx = np.clip(low_dim_approx, -box_bound, box_bound)
+ low_dim_result = {f'hesbo_{idx}': float(low_dim_approx[idx]) for idx in range(self.low_dim)}
+ logger.warning(f"Approximated HesBO projection: {len(high_dim_dict)} dims -> {self.low_dim} dims")
+ return low_dim_result
+
+ def project_point(self, point) -> dict:
+ if isinstance(point, Configuration):
+ high_dim_dict = point.get_dictionary()
+ elif isinstance(point, dict):
+ high_dim_dict = point
+ else:
+ high_dim_dict = dict(point)
+
+ high_dim_key = tuple(sorted(high_dim_dict.items()))
+ if high_dim_key in self._high_to_low_cache:
+ low_dim_key = self._high_to_low_cache[high_dim_key]
+ low_dim_point = list(low_dim_key)
+ return {f'hesbo_{idx}': float(low_dim_point[idx]) for idx in range(self.low_dim)}
+ else:
+ logger.warning(f"Cache miss in project_point, using approximation")
+ low_dim_result = self._approximate_project(high_dim_dict)
+ low_dim_key = tuple(low_dim_result[f'hesbo_{i}'] for i in range(self.low_dim))
+ self._high_to_low_cache[high_dim_key] = low_dim_key
+
+ return low_dim_result
+
+ def get_step_info(self) -> dict:
+ info = super().get_step_info()
+ info['low_dim'] = self.low_dim
+ if self._max_num_values is not None:
+ info['max_num_values'] = self._max_num_values
+ return info
\ No newline at end of file
diff --git a/openbox/compressor/steps/projection/kpca.py b/openbox/compressor/steps/projection/kpca.py
new file mode 100644
index 000000000..7702c2513
--- /dev/null
+++ b/openbox/compressor/steps/projection/kpca.py
@@ -0,0 +1,206 @@
+import copy
+import numpy as np
+from typing import Optional, List, Dict
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace, Configuration
+import ConfigSpace as CS
+from sklearn.decomposition import KernelPCA
+from sklearn.preprocessing import StandardScaler
+from openbox import logger
+from .base import TransformativeProjectionStep
+from ...utils import (
+ extract_numeric_hyperparameters,
+ extract_top_samples_from_history,
+)
+
+
+class KPCAProjectionStep(TransformativeProjectionStep):
+ def __init__(self,
+ method: str = 'kpca',
+ n_components: int = 10,
+ kernel: str = 'rbf',
+ gamma: Optional[float] = None,
+ space_history: Optional[List[History]] = None,
+ seed: int = 42,
+ **kwargs):
+ super().__init__(method=method, **kwargs)
+ self.n_components = n_components
+ self.kernel = kernel
+ self.gamma = gamma
+ self.seed = seed
+ self._rs = np.random.RandomState(seed=seed)
+
+ self._kpca: Optional[KernelPCA] = None
+ self._scaler: Optional[StandardScaler] = None
+ self.active_hps: List = []
+ self.numeric_param_names: List[str] = []
+ self.numeric_param_indices: List[int] = []
+ self._projected_samples: Optional[np.ndarray] = None
+
+ self.space_history = space_history
+
+ def compress(self, input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> ConfigurationSpace:
+ if self.method == 'none':
+ logger.info("KPCA projection disabled, returning input space")
+ return input_space
+
+ if space_history is not None:
+ self.space_history = space_history
+
+ if not self.space_history:
+ logger.warning("No space history provided for KPCA, returning input space")
+ return input_space
+
+ self.numeric_param_names, self.numeric_param_indices = extract_numeric_hyperparameters(input_space)
+ self.active_hps = list(input_space.get_hyperparameters())
+
+ if len(self.numeric_param_names) == 0:
+ logger.warning("No numeric hyperparameters found for KPCA, returning input space")
+ return input_space
+
+ self._train_kpca(input_space)
+
+ if self._kpca is None:
+ logger.warning("Failed to train KPCA, returning input space")
+ return input_space
+
+ projected_space = self._build_projected_space(input_space)
+ logger.info(f"KPCA projection: {len(self.numeric_param_names)} -> "
+ f"{self.n_components} components (kernel={self.kernel})")
+ return projected_space
+
+ def _train_kpca(self, input_space: ConfigurationSpace):
+ all_x, all_y = extract_top_samples_from_history(
+ self.space_history, self.numeric_param_names, input_space,
+ top_ratio=1.0, normalize=True
+ )
+
+ if len(all_x) == 0:
+ logger.warning("No historical data available for KPCA training")
+ return
+
+ X_combined = np.vstack(all_x)
+
+ if X_combined.shape[0] < self.n_components:
+ logger.warning(
+ f"Insufficient samples for KPCA: {X_combined.shape[0]} < {self.n_components}. "
+ f"Using {X_combined.shape[0]} components instead."
+ )
+ n_components = X_combined.shape[0]
+ else:
+ n_components = min(self.n_components, X_combined.shape[1])
+
+ self._scaler = StandardScaler()
+ X_scaled = self._scaler.fit_transform(X_combined)
+
+ try:
+ self._kpca = KernelPCA(
+ n_components=n_components,
+ kernel=self.kernel,
+ gamma=self.gamma,
+ random_state=self.seed,
+ fit_inverse_transform=True
+ )
+ self._kpca.fit(X_scaled)
+
+ if hasattr(self._kpca, 'n_components_'):
+ self.n_components = self._kpca.n_components_
+ elif hasattr(self._kpca, 'alphas_'):
+ self.n_components = len(self._kpca.alphas_)
+ else:
+ self.n_components = n_components
+ logger.warning(f"Could not determine actual n_components, using requested: {n_components}")
+
+ self._projected_samples = self._kpca.transform(X_scaled)
+
+ logger.info(f"KPCA trained successfully: {X_scaled.shape[1]} features -> {self.n_components} components "
+ f"(kernel={self.kernel}, samples={X_scaled.shape[0]})")
+ except Exception as e:
+ logger.error(f"Failed to train KPCA: {e}")
+ import traceback
+ logger.debug(traceback.format_exc())
+ self._kpca = None
+
+ def _build_projected_space(self, input_space: ConfigurationSpace) -> ConfigurationSpace:
+ target = CS.ConfigurationSpace(
+ name=f"{input_space.name}_kpca",
+ seed=self.seed
+ )
+
+ hps = [
+ CS.UniformFloatHyperparameter(
+ name=f'kpca_{idx}',
+ lower=-np.sqrt(len(self.numeric_param_names)),
+ upper=np.sqrt(len(self.numeric_param_names))
+ )
+ for idx in range(self.n_components)
+ ]
+
+ target.add_hyperparameters(hps)
+ self.output_space = target
+
+ return target
+
+ def project_point(self, point) -> dict:
+ if self._kpca is None or self._scaler is None:
+ logger.warning("KPCA not trained, returning empty projection")
+ return {}
+
+ if isinstance(point, Configuration):
+ point_dict = point.get_dictionary()
+ elif isinstance(point, dict):
+ point_dict = point
+ else:
+ logger.warning(f"Unsupported point type: {type(point)}")
+ return {}
+
+ numeric_values = []
+ for param_name in self.numeric_param_names:
+ if param_name in point_dict:
+ value = point_dict[param_name]
+ hp = self.input_space.get_hyperparameter(param_name)
+ if hasattr(hp, 'lower') and hasattr(hp, 'upper'):
+ normalized = (value - hp.lower) / (hp.upper - hp.lower)
+ numeric_values.append(normalized)
+ else:
+ numeric_values.append(0.0)
+ else:
+ numeric_values.append(0.0)
+
+ if len(numeric_values) != len(self.numeric_param_names):
+ logger.warning(f"Mismatch in numeric values: {len(numeric_values)} != {len(self.numeric_param_names)}")
+ return {}
+
+ X = np.array([numeric_values])
+ X_scaled = self._scaler.transform(X)
+ X_kpca = self._kpca.transform(X_scaled)[0]
+ return {f'kpca_{idx}': float(X_kpca[idx]) for idx in range(len(X_kpca))}
+
+ def unproject_point(self, point: Configuration) -> dict:
+ """
+ KPCA does not need unprojection for evaluation.
+ Sampling happens in original space.
+ """
+ if isinstance(point, Configuration):
+ return point.get_dictionary()
+ elif isinstance(point, dict):
+ return point
+ else:
+ return dict(point)
+
+ def needs_unproject(self) -> bool:
+ return False
+
+ def affects_sampling_space(self) -> bool:
+ # does not affect sampling space (sampling happens in original space)
+ return False
+
+ def get_step_info(self) -> dict:
+ info = super().get_step_info()
+ info['n_components'] = self.n_components
+ info['kernel'] = self.kernel
+ if self.gamma is not None:
+ info['gamma'] = self.gamma
+ return info
diff --git a/openbox/compressor/steps/projection/quantization.py b/openbox/compressor/steps/projection/quantization.py
new file mode 100644
index 000000000..d99ad58e1
--- /dev/null
+++ b/openbox/compressor/steps/projection/quantization.py
@@ -0,0 +1,176 @@
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace, Configuration
+import ConfigSpace as CS
+import ConfigSpace.hyperparameters as CSH
+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+from openbox import logger
+from .base import TransformativeProjectionStep
+from ... import OptimizerProgress
+
+
+class QuantizationProjectionStep(TransformativeProjectionStep):
+ def __init__(self,
+ method: str = 'quantization',
+ max_num_values: int = 10,
+ seed: int = 42, adaptive: bool = False,
+ **kwargs):
+ super().__init__(method=method, **kwargs)
+ self._max_num_values = max_num_values
+ self.seed = seed
+ self._rs = np.random.RandomState(seed=seed)
+
+ self._knobs_scalers: dict = {}
+ self.adaptive = adaptive
+
+ def _build_projected_space(self, input_space: ConfigurationSpace) -> ConfigurationSpace:
+ self._knobs_scalers = {}
+ root_hyperparams = []
+ quantized_params = []
+ unchanged_params = []
+
+ for adaptee_hp in input_space.get_hyperparameters():
+ if not isinstance(adaptee_hp, CSH.UniformIntegerHyperparameter) and not isinstance(adaptee_hp, CSH.UniformFloatHyperparameter):
+ root_hyperparams.append(adaptee_hp)
+ unchanged_params.append(adaptee_hp.name)
+ continue
+
+ if not self._needs_quantization(adaptee_hp):
+ root_hyperparams.append(adaptee_hp)
+ unchanged_params.append(adaptee_hp.name)
+ continue
+
+ # Quantize knob
+ # original value: [lower, upper] => quantized value: [1, max_num_values]
+ lower, upper = adaptee_hp.lower, adaptee_hp.upper
+ scaler = MinMaxScaler(feature_range=(lower, upper))
+ scaler.fit([[1], [self._max_num_values]])
+ self._knobs_scalers[adaptee_hp.name] = scaler
+
+ default_value = round(
+ scaler.inverse_transform([[adaptee_hp.default_value]])[0][0]
+ )
+ default_value = max(1, min(self._max_num_values, default_value))
+
+ quantized_hp = CSH.UniformIntegerHyperparameter(
+ f'{adaptee_hp.name}|q', 1, self._max_num_values,
+ default_value=default_value,
+ )
+ root_hyperparams.append(quantized_hp)
+
+ # 记录量化信息
+ original_num_values = upper - lower + 1
+ compression_ratio = self._max_num_values / original_num_values
+ quantized_params.append({
+ 'name': adaptee_hp.name,
+ 'type': 'UniformIntegerHyperparameter',
+ 'original_range': (int(lower), int(upper)),
+ 'compressed_range': (1, self._max_num_values),
+ 'original_num_values': original_num_values,
+ 'quantized_num_values': self._max_num_values,
+ 'compression_ratio': compression_ratio
+ })
+
+ root = CS.ConfigurationSpace(
+ name=input_space.name,
+ seed=self.seed,
+ )
+ root.add_hyperparameters(root_hyperparams)
+
+ if quantized_params:
+ avg_compression_ratio = sum(p['compression_ratio'] for p in quantized_params) / len(quantized_params)
+ else:
+ avg_compression_ratio = 1.0
+
+ self.compression_info = {
+ 'compressed_params': quantized_params,
+ 'unchanged_params': unchanged_params,
+ 'total_quantized': len(quantized_params),
+ 'avg_compression_ratio': avg_compression_ratio
+ }
+
+ return root
+
+ def _needs_quantization(self, hp: CSH.UniformIntegerHyperparameter) -> bool:
+ return (hp.upper - hp.lower + 1) > self._max_num_values
+
+ def unproject_point(self, point: Configuration) -> dict:
+ coords = point.get_dictionary() if hasattr(point, 'get_dictionary') else dict(point)
+ valid_dim_names = [dim.name for dim in self.input_space.get_hyperparameters()]
+ unproject_coords = {}
+
+ for name, value in coords.items():
+ dequantize = name.endswith('|q')
+ if not dequantize:
+ unproject_coords[name] = value
+ continue
+
+ # De-quantize
+ dim_name = name[:-2]
+ if dim_name not in valid_dim_names or dim_name not in self._knobs_scalers:
+ logger.warning(f"Cannot dequantize {name}, keeping original value")
+ unproject_coords[name] = value
+ continue
+
+ scaler = self._knobs_scalers[dim_name]
+ lower, upper = scaler.feature_range
+
+ value = int(scaler.transform([[value]])[0][0])
+ value = max(lower, min(upper, value))
+ unproject_coords[dim_name] = value
+
+ return unproject_coords
+
+ def project_point(self, point) -> dict:
+ if isinstance(point, Configuration):
+ original_dict = point.get_dictionary()
+ elif isinstance(point, dict):
+ original_dict = point
+ else:
+ original_dict = dict(point)
+
+ quantized_dict = {}
+
+ for name, value in original_dict.items():
+ if name in self._knobs_scalers:
+ scaler = self._knobs_scalers[name]
+ # Use inverse_transform:
+ # original value [lower, upper] -> quantized value [1, max_num_values]
+ # The scaler maps [1, max_num_values] -> [lower, upper],
+ # so inverse maps [lower, upper] -> [1, max_num_values]
+ lower, upper = scaler.feature_range
+ value_clamped = max(lower, min(upper, value))
+ quantized_value = round(scaler.inverse_transform([[value_clamped]])[0][0])
+ quantized_value = max(1, min(self._max_num_values, quantized_value))
+ quantized_dict[f'{name}|q'] = quantized_value
+ else:
+ quantized_dict[name] = value
+
+ return quantized_dict
+
+ def supports_adaptive_update(self) -> bool:
+ return self.adaptive
+
+ def update(self, progress: OptimizerProgress, history: History) -> bool:
+ # Stagnant: reduce quantization (increase max_num_values) to expand search space
+ if progress.is_stagnant(threshold=5):
+ old_max = self._max_num_values
+ self._max_num_values = min(100, self._max_num_values + 5) # Increase by 5, cap at 100
+ if self._max_num_values != old_max:
+ logger.info(f"Stagnation detected, increasing quantization factor: {old_max} -> {self._max_num_values}")
+ return True
+
+ # Improving: can increase quantization (decrease max_num_values) to focus search
+ elif progress.has_improvement(threshold=3):
+ old_max = self._max_num_values
+ self._max_num_values = max(5, self._max_num_values - 2) # Decrease by 2, floor at 5
+ if self._max_num_values != old_max:
+ logger.info(f"Improvement detected, decreasing quantization factor: {old_max} -> {self._max_num_values}")
+ return True
+
+ return False
+
+ def get_step_info(self) -> dict:
+ info = super().get_step_info()
+ info['max_num_values'] = self._max_num_values
+ return info
\ No newline at end of file
diff --git a/openbox/compressor/steps/projection/rembo.py b/openbox/compressor/steps/projection/rembo.py
new file mode 100644
index 000000000..f2599059e
--- /dev/null
+++ b/openbox/compressor/steps/projection/rembo.py
@@ -0,0 +1,194 @@
+import numpy as np
+from typing import Optional, List
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace, Configuration
+import ConfigSpace as CS
+import ConfigSpace.hyperparameters as CSH
+from sklearn.preprocessing import MinMaxScaler
+from openbox import logger
+from .base import TransformativeProjectionStep
+
+
+class REMBOProjectionStep(TransformativeProjectionStep):
+ def __init__(self,
+ method: str = 'rembo',
+ low_dim: int = 10,
+ max_num_values: Optional[int] = None,
+ seed: int = 42,
+ **kwargs):
+ super().__init__(method=method, **kwargs)
+ self.low_dim = low_dim
+ self._max_num_values = max_num_values
+ self.seed = seed
+ self._rs = np.random.RandomState(seed=seed)
+
+ # Projection matrix and scalers
+ self._A: Optional[np.ndarray] = None
+ self._scaler: Optional[MinMaxScaler] = None
+ self._q_scaler: Optional[MinMaxScaler] = None
+ self.active_hps: List = []
+
+ self._low_to_high_cache: dict = {}
+ self._high_to_low_cache: dict = {}
+
+ def _build_projected_space(self, input_space: ConfigurationSpace) -> ConfigurationSpace:
+ self.active_hps = list(input_space.get_hyperparameters())
+
+ # Create lower dimensionality configuration space
+ # Space bounds are [-sqrt(low_dim), sqrt(low_dim)] rather than [-1, 1]
+ box_bound = np.sqrt(self.low_dim)
+ target = CS.ConfigurationSpace(
+ name=input_space.name,
+ seed=self.seed
+ )
+
+ if self._max_num_values is None:
+ # Continuous low-dimensional space
+ hps = [
+ CS.UniformFloatHyperparameter(
+ name=f'rembo_{idx}',
+ lower=-box_bound,
+ upper=box_bound
+ )
+ for idx in range(self.low_dim)
+ ]
+ self._q_scaler = None
+ else:
+ # Quantized low-dimensional space
+ logger.info(f'Using quantization: q={self._max_num_values}')
+ hps = [
+ CS.UniformIntegerHyperparameter(
+ name=f'rembo_{idx}',
+ lower=1,
+ upper=self._max_num_values
+ )
+ for idx in range(self.low_dim)
+ ]
+ # (1, q) -> (-sqrt(low_dim), sqrt(low_dim)) scaling
+ self._q_scaler = MinMaxScaler(feature_range=(-box_bound, box_bound))
+ ones = np.ones(shape=self.low_dim)
+ self._q_scaler.fit([ones, ones * self._max_num_values])
+
+ target.add_hyperparameters(hps)
+ self.output_space = target
+
+ # (-sqrt, sqrt) -> (0, 1) scaling
+ self._scaler = MinMaxScaler(feature_range=(0, 1))
+ bbound_vector = np.ones(len(self.active_hps)) * box_bound
+ # Use two points (minimum & maximum)
+ self._scaler.fit(
+ np.array([-bbound_vector, bbound_vector])
+ )
+
+ self._A = self._rs.normal(
+ 0, 1, (len(self.active_hps), self.low_dim)
+ )
+
+ return target
+
+ def unproject_point(self, point: Configuration) -> dict:
+ low_dim_point_raw = np.array([
+ point.get(f'rembo_{idx}') for idx in range(self.low_dim)
+ ])
+
+ low_dim_key = tuple(low_dim_point_raw.tolist())
+ if low_dim_key in self._low_to_high_cache:
+ return self._low_to_high_cache[low_dim_key].copy()
+
+ # Dequantize if needed
+ if self._max_num_values is not None:
+ assert self._q_scaler is not None
+ # Dequantize: (1, q) -> (-sqrt(low_dim), sqrt(low_dim))
+ low_dim_point = self._q_scaler.transform([low_dim_point_raw])[0]
+ else:
+ low_dim_point = low_dim_point_raw
+
+ # Project: (-sqrt(low_dim), sqrt(low_dim)) -> (0, 1)
+ high_dim_point = [
+ np.dot(self._A[idx, :], low_dim_point)
+ for idx in range(len(self.active_hps))
+ ]
+ high_dim_point = self._scaler.transform([high_dim_point])[0]
+
+ # Transform back to original space
+ high_dim_conf = {}
+ dims_clipped = 0
+ for hp, value in zip(self.active_hps, high_dim_point):
+ if value <= 0 or value >= 1:
+ logger.debug(f'Point clipped in dim: {hp.name}')
+ dims_clipped += 1
+ # Clip value to [0, 1]
+ value = np.clip(value, 0., 1.)
+
+ if isinstance(hp, CS.CategoricalHyperparameter):
+ index = int(value * len(hp.choices))
+ index = max(0, min(len(hp.choices) - 1, index))
+ value = hp.choices[index]
+ elif isinstance(hp, CS.hyperparameters.NumericalHyperparameter):
+ value = hp._transform(value)
+ value = max(hp.lower, min(hp.upper, value))
+ else:
+ raise NotImplementedError(f"Unsupported hyperparameter type: {type(hp)}")
+
+ high_dim_conf[hp.name] = value
+
+ if dims_clipped > 0:
+ logger.warning(f'Clipped {dims_clipped} dimensions during unprojection')
+
+ self._low_to_high_cache[low_dim_key] = high_dim_conf.copy()
+ high_dim_key = tuple(sorted(high_dim_conf.items()))
+ self._high_to_low_cache[high_dim_key] = low_dim_key
+
+ return high_dim_conf
+
+ def _approximate_project(self, high_dim_dict: dict) -> dict:
+ high_dim_array = self._normalize_high_dim_config(high_dim_dict, self.active_hps)
+
+ # inverse transform: (0, 1) -> (-sqrt(low_dim), sqrt(low_dim))
+ high_dim_scaled = self._scaler.inverse_transform([high_dim_array])[0]
+
+ # pseudoinverse projection: A+ @ high_dim = low_dim
+ # where A+ = (A^T @ A)^(-1) @ A^T (Moore-Penrose pseudoinverse)
+ A_pinv = np.linalg.pinv(self._A)
+ low_dim_approx = A_pinv @ high_dim_scaled
+
+ if self._max_num_values is not None and self._q_scaler is not None:
+ low_dim_approx = self._q_scaler.inverse_transform([low_dim_approx])[0]
+ low_dim_approx = np.round(low_dim_approx).astype(int)
+ low_dim_approx = np.clip(low_dim_approx, 1, self._max_num_values)
+ low_dim_result = {f'rembo_{idx}': int(low_dim_approx[idx]) for idx in range(self.low_dim)}
+ else:
+ box_bound = np.sqrt(self.low_dim)
+ low_dim_approx = np.clip(low_dim_approx, -box_bound, box_bound)
+ low_dim_result = {f'rembo_{idx}': float(low_dim_approx[idx]) for idx in range(self.low_dim)}
+
+ logger.warning(f"Approximated projection: {len(high_dim_dict)} dims -> {self.low_dim} dims")
+ return low_dim_result
+
+ def project_point(self, point) -> dict:
+ if isinstance(point, Configuration):
+ high_dim_dict = point.get_dictionary()
+ elif isinstance(point, dict):
+ high_dim_dict = point
+ else:
+ high_dim_dict = dict(point)
+
+ high_dim_key = tuple(sorted(high_dim_dict.items()))
+ if high_dim_key in self._high_to_low_cache:
+ low_dim_key = self._high_to_low_cache[high_dim_key]
+ low_dim_point = np.array(low_dim_key)
+ return {f'rembo_{idx}': float(low_dim_point[idx]) for idx in range(self.low_dim)}
+ else:
+ logger.warning(f"Cache miss in project_point, using pseudoinverse approximation")
+ low_dim_result = self._approximate_project(high_dim_dict)
+ low_dim_key = tuple(low_dim_result[f'rembo_{i}'] for i in range(self.low_dim))
+ self._high_to_low_cache[high_dim_key] = low_dim_key
+
+ return low_dim_result
+
+ def get_step_info(self) -> dict:
+ info = super().get_step_info()
+ info['low_dim'] = self.low_dim
+ if self._max_num_values is not None:
+ info['max_num_values'] = self._max_num_values
+ return info
\ No newline at end of file
diff --git a/openbox/compressor/steps/range/__init__.py b/openbox/compressor/steps/range/__init__.py
new file mode 100644
index 000000000..4fc565b6c
--- /dev/null
+++ b/openbox/compressor/steps/range/__init__.py
@@ -0,0 +1,14 @@
+from .base import RangeCompressionStep
+from .boundary import BoundaryRangeStep
+from .expert import ExpertRangeStep
+from .shap import SHAPBoundaryRangeStep
+from .kde import KDEBoundaryRangeStep
+
+__all__ = [
+ 'RangeCompressionStep',
+ 'BoundaryRangeStep',
+ 'ExpertRangeStep',
+ 'SHAPBoundaryRangeStep',
+ 'KDEBoundaryRangeStep',
+]
+
diff --git a/openbox/compressor/steps/range/base.py b/openbox/compressor/steps/range/base.py
new file mode 100644
index 000000000..2082bc0b0
--- /dev/null
+++ b/openbox/compressor/steps/range/base.py
@@ -0,0 +1,130 @@
+import copy
+from typing import Optional, List, Dict
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace
+from openbox import logger
+
+from ... import CompressionStep
+
+
+class RangeCompressionStep(CompressionStep):
+ def __init__(self, method: str = 'boundary', **kwargs):
+ super().__init__('range_compression', **kwargs)
+ self.method = method
+ self.original_space: Optional[ConfigurationSpace] = None
+
+ def compress(self, input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None,
+ **kwargs) -> ConfigurationSpace:
+ if self.method == 'none':
+ logger.info("Range compression disabled, returning input space")
+ return input_space
+
+ self.original_space = copy.deepcopy(input_space)
+ compressed_space = self._compute_compressed_space(input_space, space_history, source_similarities)
+
+ self.compression_info = self._collect_compression_details(input_space, compressed_space)
+
+ logger.info(f"Range compression: {len(input_space.get_hyperparameters())} parameters compressed")
+ return compressed_space
+
+ def _collect_compression_details(self, input_space: ConfigurationSpace,
+ compressed_space: ConfigurationSpace) -> dict:
+ details = {
+ 'compressed_params': [],
+ 'unchanged_params': [],
+ 'avg_compression_ratio': 1.0
+ }
+
+ for hp in input_space.get_hyperparameters():
+ name = hp.name
+ if name not in [h.name for h in compressed_space.get_hyperparameters()]:
+ continue
+
+ compressed_hp = compressed_space.get_hyperparameter(name)
+
+ if hasattr(hp, 'lower') and hasattr(hp, 'upper'):
+ original_range = (float(hp.lower), float(hp.upper))
+ compressed_range = (float(compressed_hp.lower), float(compressed_hp.upper))
+
+ if abs(original_range[0] - compressed_range[0]) > 1e-6 or abs(original_range[1] - compressed_range[1]) > 1e-6:
+ compression_ratio = (compressed_range[1] - compressed_range[0]) / (original_range[1] - original_range[0])
+ details['compressed_params'].append({
+ 'name': name,
+ 'type': type(hp).__name__,
+ 'original_range': original_range,
+ 'compressed_range': compressed_range,
+ 'compression_ratio': compression_ratio
+ })
+ else:
+ details['unchanged_params'].append(name)
+ elif hasattr(hp, 'choices'):
+ original_choices = list(hp.choices)
+ compressed_choices = list(compressed_hp.choices)
+
+ if original_choices != compressed_choices:
+ details['compressed_params'].append({
+ 'name': name,
+ 'type': 'Categorical',
+ 'original_choices': original_choices,
+ 'compressed_choices': compressed_choices,
+ 'compression_ratio': len(compressed_choices) / len(original_choices)
+ })
+ else:
+ details['unchanged_params'].append(name)
+
+ if details['compressed_params']:
+ details['avg_compression_ratio'] = sum(p['compression_ratio'] for p in details['compressed_params']) / len(details['compressed_params'])
+ else:
+ details['avg_compression_ratio'] = 1.0
+
+ return details
+
+ def _get_fixed_params(self) -> set:
+ if self.filling_strategy is not None:
+ return set(self.filling_strategy.fixed_values.keys())
+ return set()
+
+ def _compute_compressed_space(self,
+ input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None,
+ **kwargs) -> ConfigurationSpace:
+ return copy.deepcopy(input_space)
+
+ def project_point(self, point) -> dict:
+ # project a point from input_space to output_space.
+ # clip values to compressed ranges and fill missing parameters.
+ if hasattr(point, 'get_dictionary'):
+ point_dict = point.get_dictionary()
+ elif isinstance(point, dict):
+ point_dict = point
+ else:
+ point_dict = dict(point)
+
+ # if no compression was applied (output_space not set), return as is
+ if self.output_space is None:
+ return point_dict
+
+ # clip values to the compressed ranges
+ from ...filling import clip_values_to_space
+ clipped_dict = clip_values_to_space(point_dict, self.output_space, report=False)
+
+ # fill missing parameters if needed
+ if self.filling_strategy is not None:
+ clipped_dict = self.filling_strategy.fill_missing_parameters(
+ clipped_dict, self.output_space
+ )
+
+ return clipped_dict
+
+ def needs_unproject(self) -> bool:
+ return False
+
+ def affects_sampling_space(self) -> bool:
+ return True
+
+ def get_step_info(self) -> dict:
+ info = super().get_step_info()
+ return info
\ No newline at end of file
diff --git a/openbox/compressor/steps/range/boundary.py b/openbox/compressor/steps/range/boundary.py
new file mode 100644
index 000000000..7f6b067cc
--- /dev/null
+++ b/openbox/compressor/steps/range/boundary.py
@@ -0,0 +1,137 @@
+import copy
+import numpy as np
+import pandas as pd
+from typing import Optional, List, Tuple, Dict
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace
+from openbox import logger
+from .base import RangeCompressionStep
+from ...utils import (
+ create_space_from_ranges,
+ extract_numeric_hyperparameters,
+ extract_top_samples_from_history,
+)
+from ...sampling import MixedRangeSamplingStrategy
+
+
+class BoundaryRangeStep(RangeCompressionStep):
+ @staticmethod
+ def _clamp_range_bounds(min_val: float, max_val: float,
+ param_values: np.ndarray,
+ original_space: ConfigurationSpace,
+ param_name: str) -> Tuple[float, float]:
+ if min_val > max_val:
+ min_val = np.min(param_values)
+ max_val = np.max(param_values)
+
+ hp = original_space.get_hyperparameter(param_name)
+ original_min = hp.lower
+ original_max = hp.upper
+
+ min_val = max(min_val, original_min)
+ max_val = min(max_val, original_max)
+ return min_val, max_val
+
+ def __init__(self,
+ method: str = 'boundary',
+ top_ratio: float = 0.8,
+ sigma: float = 2.0,
+ enable_mixed_sampling: bool = True,
+ initial_prob: float = 0.9,
+ seed: Optional[int] = None,
+ **kwargs):
+ super().__init__(method=method, **kwargs)
+ self.top_ratio = top_ratio
+ self.sigma = sigma
+ self.enable_mixed_sampling = enable_mixed_sampling
+ self.initial_prob = initial_prob
+ self.seed = seed
+
+
+ def _compute_compressed_space(self,
+ input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> ConfigurationSpace:
+ if not space_history:
+ logger.warning("No space history provided for boundary compression, returning input space")
+ return copy.deepcopy(input_space)
+
+ numeric_param_names, _ = extract_numeric_hyperparameters(input_space)
+
+ if not numeric_param_names:
+ logger.warning("No numeric hyperparameters found, returning input space")
+ return copy.deepcopy(input_space)
+
+ compressed_ranges = self._compute_simple_ranges(
+ space_history, numeric_param_names, input_space
+ )
+
+ compressed_space = create_space_from_ranges(input_space, compressed_ranges)
+ logger.info(f"Boundary range compression: {len(compressed_ranges)} parameters compressed")
+ return compressed_space
+
+ def _compute_simple_ranges(self,
+ space_history: List[History],
+ numeric_param_names: List[str],
+ original_space: ConfigurationSpace) -> Dict[str, Tuple[float, float]]:
+ all_x, _ = extract_top_samples_from_history(
+ space_history, numeric_param_names, original_space,
+ top_ratio=self.top_ratio, normalize=True
+ )
+
+ if len(all_x) == 0:
+ return {}
+
+ X_combined = np.vstack(all_x)
+
+ fixed_params = self._get_fixed_params()
+
+ compressed_ranges = {}
+ for i, param_name in enumerate(numeric_param_names):
+ if param_name in fixed_params:
+ logger.debug(f"Skipping range compression for fixed parameter '{param_name}'")
+ continue
+ values_norm = X_combined[:, i]
+
+ mean = np.mean(values_norm)
+ std = np.std(values_norm)
+ min_val_norm = max(np.min(values_norm), mean - self.sigma * std)
+ max_val_norm = min(np.max(values_norm), mean + self.sigma * std)
+
+ hp = original_space.get_hyperparameter(param_name)
+ lower = hp.lower
+ upper = hp.upper
+ range_size = upper - lower
+
+ min_val = lower + min_val_norm * range_size
+ max_val = lower + max_val_norm * range_size
+
+ values_original = lower + values_norm * range_size
+
+ min_val, max_val = self._clamp_range_bounds(
+ min_val, max_val, values_original, original_space, param_name
+ )
+
+ compressed_ranges[param_name] = (min_val, max_val)
+
+ return compressed_ranges
+
+ def get_sampling_strategy(self):
+ if self.enable_mixed_sampling and self.original_space is not None:
+ compressed_space = self.output_space if self.output_space else self.original_space
+ return MixedRangeSamplingStrategy(
+ compressed_space=compressed_space,
+ original_space=self.original_space,
+ initial_prob=self.initial_prob,
+ method='boundary',
+ seed=self.seed
+ )
+ return None
+
+ def get_step_info(self) -> dict:
+ info = super().get_step_info()
+ info['top_ratio'] = self.top_ratio
+ info['sigma'] = self.sigma
+ info['enable_mixed_sampling'] = self.enable_mixed_sampling
+ info['initial_prob'] = self.initial_prob
+ return info
\ No newline at end of file
diff --git a/openbox/compressor/steps/range/expert.py b/openbox/compressor/steps/range/expert.py
new file mode 100644
index 000000000..7e1371fa7
--- /dev/null
+++ b/openbox/compressor/steps/range/expert.py
@@ -0,0 +1,98 @@
+"""
+Expert-specified range compression step.
+"""
+
+import copy
+from typing import Optional, List, Dict, Tuple
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace
+from openbox import logger
+from .base import RangeCompressionStep
+from ...sampling import MixedRangeSamplingStrategy
+from ...utils import create_space_from_ranges
+
+
+class ExpertRangeStep(RangeCompressionStep):
+ def __init__(self,
+ method: str = 'expert',
+ expert_ranges: Optional[Dict[str, Tuple[float, float]]] = None,
+ enable_mixed_sampling: bool = False,
+ initial_prob: float = 0.9,
+ seed: Optional[int] = None,
+ **kwargs):
+ super().__init__(method=method, **kwargs)
+
+ self.expert_ranges = expert_ranges or {}
+ self.enable_mixed_sampling = enable_mixed_sampling
+ self.initial_prob = initial_prob
+ self.seed = seed
+
+ def _compute_compressed_space(self,
+ input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> ConfigurationSpace:
+ if not self.expert_ranges:
+ logger.warning("No expert ranges provided, returning input space")
+ return copy.deepcopy(input_space)
+
+ valid_ranges = {}
+ param_names = input_space.get_hyperparameter_names()
+
+ fixed_params = self._get_fixed_params()
+
+ for param_name, (min_val, max_val) in self.expert_ranges.items():
+ if param_name not in param_names:
+ logger.warning(f"Expert parameter '{param_name}' not found in configuration space")
+ continue
+ if param_name in fixed_params:
+ logger.debug(f"Skipping range compression for fixed parameter '{param_name}'")
+ continue
+
+ if min_val >= max_val:
+ logger.warning(f"Invalid expert range [{min_val}, {max_val}] for {param_name}, skipping")
+ continue
+
+ hp = input_space.get_hyperparameter(param_name)
+ if not (hasattr(hp, 'lower') and hasattr(hp, 'upper')):
+ logger.warning(f"Parameter '{param_name}' is not numeric, skipping")
+ continue
+
+ original_min = hp.lower
+ original_max = hp.upper
+ min_val = max(min_val, original_min)
+ max_val = min(max_val, original_max)
+
+ if min_val >= max_val:
+ logger.warning(f"Expert range for {param_name} is invalid after clamping, skipping")
+ continue
+ valid_ranges[param_name] = (min_val, max_val)
+
+ if not valid_ranges:
+ logger.warning("No valid expert ranges, returning input space")
+ return copy.deepcopy(input_space)
+
+ compressed_space = create_space_from_ranges(input_space, valid_ranges)
+ logger.info(f"Expert range compression: {len(valid_ranges)} parameters compressed")
+ logger.info(f"Compressed parameters: {list(valid_ranges.keys())}")
+ return compressed_space
+
+ def get_sampling_strategy(self):
+ if self.enable_mixed_sampling and self.original_space is not None:
+ compressed_space = self.output_space if self.output_space else self.original_space
+ return MixedRangeSamplingStrategy(
+ compressed_space=compressed_space,
+ original_space=self.original_space,
+ initial_prob=self.initial_prob,
+ method='expert',
+ seed=self.seed
+ )
+ return None
+
+ def get_step_info(self) -> dict:
+ info = super().get_step_info()
+ info['n_expert_ranges'] = len(self.expert_ranges)
+ info['expert_ranges'] = self.expert_ranges
+ info['enable_mixed_sampling'] = self.enable_mixed_sampling
+ info['initial_prob'] = self.initial_prob
+ info['seed'] = self.seed
+ return info
diff --git a/openbox/compressor/steps/range/kde.py b/openbox/compressor/steps/range/kde.py
new file mode 100644
index 000000000..2e838bffc
--- /dev/null
+++ b/openbox/compressor/steps/range/kde.py
@@ -0,0 +1,184 @@
+import copy
+import numpy as np
+from typing import Optional, List, Tuple, Dict
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace
+from scipy.stats import gaussian_kde
+from openbox import logger
+from .boundary import BoundaryRangeStep
+from ...utils import (
+ create_space_from_ranges,
+ extract_numeric_hyperparameters,
+ extract_numeric_values_from_configs,
+)
+
+
+class KDEBoundaryRangeStep(BoundaryRangeStep):
+ def __init__(self,
+ method: str = 'kde_boundary',
+ kde_coverage: float = 0.6, # Coverage ratio for KDE interval
+ enable_mixed_sampling: bool = True,
+ initial_prob: float = 0.9,
+ seed: Optional[int] = None,
+ top_ratio: Optional[float] = None,
+ **kwargs):
+ super().__init__(
+ method=method,
+ top_ratio=top_ratio,
+ sigma=2.0, # Not used in KDE method
+ enable_mixed_sampling=enable_mixed_sampling,
+ initial_prob=initial_prob,
+ seed=seed,
+ **kwargs
+ )
+ self.kde_coverage = kde_coverage
+ logger.info(f"KDEBoundaryRangeStep initialized with top_ratio={top_ratio}, kde_coverage={kde_coverage}")
+
+
+ def _compute_compressed_space(self,
+ input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> ConfigurationSpace:
+ if not space_history:
+ logger.warning("No space history provided for KDE boundary compression, returning input space")
+ return copy.deepcopy(input_space)
+
+ numeric_param_names, _ = extract_numeric_hyperparameters(input_space)
+
+ if not numeric_param_names:
+ logger.warning("No numeric hyperparameters found, returning input space")
+ return copy.deepcopy(input_space)
+
+ compressed_ranges = self._compute_kde_based_ranges(
+ space_history, numeric_param_names, input_space, source_similarities
+ )
+
+ compressed_space = create_space_from_ranges(input_space, compressed_ranges)
+ logger.info(f"KDE boundary range compression: {len(compressed_ranges)} parameters compressed")
+
+ return compressed_space
+
+ def _compute_kde_based_ranges(self,
+ space_history: List[History],
+ numeric_param_names: List[str],
+ original_space: ConfigurationSpace,
+ source_similarities: Optional[Dict[int, float]] = None) -> Dict[str, Tuple[float, float]]:
+ median_top_ratio = self.top_ratio
+
+ compressed_ranges = {}
+ fixed_params = self._get_fixed_params()
+ for param_name in numeric_param_names:
+ if param_name in fixed_params:
+ logger.debug(f"Skipping range compression for fixed parameter '{param_name}'")
+ continue
+
+ weighted_values = []
+ weights = []
+
+ for task_idx, history in enumerate(space_history):
+ if len(history) == 0:
+ continue
+
+ if source_similarities:
+ similarity = source_similarities.get(task_idx, 0.0)
+ if similarity <= 0:
+ continue
+ else:
+ n_histories = len(space_history)
+ similarity = 1.0 / n_histories if n_histories > 0 else 0.0
+ if similarity <= 0:
+ continue
+
+ objectives = history.get_objectives()
+ if len(objectives) == 0:
+ continue
+
+ obj_flat = objectives.flatten()
+ valid_mask = np.isfinite(obj_flat)
+ valid_indices = np.where(valid_mask)[0]
+
+ if len(valid_indices) == 0:
+ logger.warning(f"Task {task_idx}: all objectives are inf/nan, skipping")
+ continue
+
+ valid_objectives = obj_flat[valid_indices]
+ sorted_valid_indices = np.argsort(valid_objectives)
+
+ top_n = max(1, int(len(valid_indices) * median_top_ratio))
+ top_indices_in_valid = sorted_valid_indices[:top_n]
+
+ top_indices = valid_indices[top_indices_in_valid]
+
+ top_configs = [history.observations[idx].config for idx in top_indices]
+
+ logger.debug(f"KDEBoundaryRangeStep: top_indices: {top_indices}, len(top_indices): {len(top_indices)}")
+ param_values = extract_numeric_values_from_configs(
+ top_configs, [param_name], original_space, normalize=False
+ )
+ for rank, value in enumerate(param_values[:, 0]):
+ if np.isnan(value):
+ continue
+
+ weight = (rank + 1) * similarity
+ weighted_values.append(float(value))
+ weights.append(weight)
+
+ if len(weighted_values) == 0:
+ logger.warning(f"No weighted values for parameter {param_name}, skipping")
+ continue
+
+ weighted_values = np.array(weighted_values)
+ weights = np.array(weights)
+
+ weights = weights / (weights.sum() + 1e-10)
+
+ try:
+ kde = gaussian_kde(weighted_values, weights=weights)
+ except Exception as e:
+ logger.warning(f"Failed to build KDE for {param_name}: {e}, using simple range")
+ min_val = np.min(weighted_values)
+ max_val = np.max(weighted_values)
+ hp = original_space.get_hyperparameter(param_name)
+ compressed_ranges[param_name] = (
+ max(min_val, hp.lower),
+ min(max_val, hp.upper)
+ )
+ continue
+
+ hp = original_space.get_hyperparameter(param_name)
+ original_min = hp.lower
+ original_max = hp.upper
+
+ grid_size = 1000
+ grid = np.linspace(original_min, original_max, grid_size)
+ kde_density = kde(grid)
+
+ kde_density = kde_density / (kde_density.sum() + 1e-10)
+
+ sorted_indices = np.argsort(kde_density)[::-1]
+ cumulative_density = np.cumsum(kde_density[sorted_indices])
+
+ n_points_needed = np.searchsorted(cumulative_density, self.kde_coverage) + 1
+ n_points_needed = min(n_points_needed, len(grid))
+
+ selected_indices = sorted_indices[:n_points_needed]
+ selected_grid = grid[selected_indices]
+
+ min_val = np.min(selected_grid)
+ max_val = np.max(selected_grid)
+
+ min_val, max_val = self._clamp_range_bounds(
+ min_val, max_val, weighted_values, original_space, param_name
+ )
+
+ compressed_ranges[param_name] = (min_val, max_val)
+
+ logger.info(f"KDE-based ranges computed for {len(compressed_ranges)} parameters")
+ return compressed_ranges
+
+ def get_step_info(self) -> dict:
+ info = super().get_step_info()
+ info['top_ratio'] = self.top_ratio
+ info['kde_coverage'] = self.kde_coverage
+ return info
+
diff --git a/openbox/compressor/steps/range/shap.py b/openbox/compressor/steps/range/shap.py
new file mode 100644
index 000000000..4f23c2784
--- /dev/null
+++ b/openbox/compressor/steps/range/shap.py
@@ -0,0 +1,162 @@
+import copy
+import numpy as np
+from typing import Optional, List, Tuple, Dict
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace
+from sklearn.ensemble import RandomForestRegressor
+import shap
+from openbox import logger
+from .boundary import BoundaryRangeStep
+from ...utils import (
+ create_space_from_ranges,
+ extract_numeric_hyperparameters,
+ extract_top_samples_from_history,
+)
+
+
+class SHAPBoundaryRangeStep(BoundaryRangeStep):
+ def __init__(self,
+ method: str = 'shap_boundary',
+ top_ratio: float = 0.8,
+ sigma: float = 2.0,
+ enable_mixed_sampling: bool = True,
+ initial_prob: float = 0.9,
+ seed: Optional[int] = None,
+ **kwargs):
+ super().__init__(
+ method=method,
+ top_ratio=top_ratio,
+ sigma=sigma,
+ enable_mixed_sampling=enable_mixed_sampling,
+ initial_prob=initial_prob,
+ seed=seed,
+ **kwargs
+ )
+
+ def _compute_compressed_space(self,
+ input_space: ConfigurationSpace,
+ space_history: Optional[List[History]] = None,
+ source_similarities: Optional[Dict[int, float]] = None) -> ConfigurationSpace:
+ if not space_history:
+ logger.warning("No space history provided for SHAP boundary compression, returning input space")
+ return copy.deepcopy(input_space)
+
+ numeric_param_names, numeric_param_indices = extract_numeric_hyperparameters(input_space)
+
+ if not numeric_param_names:
+ logger.warning("No numeric hyperparameters found, returning input space")
+ return copy.deepcopy(input_space)
+
+ compressed_ranges = self._compute_shap_based_ranges(
+ space_history, numeric_param_names, input_space, source_similarities
+ )
+
+ compressed_space = create_space_from_ranges(input_space, compressed_ranges)
+ logger.info(f"SHAP boundary range compression: {len(compressed_ranges)} parameters compressed")
+
+ return compressed_space
+
+
+ def _compute_shap_based_ranges(self,
+ space_history: List[History],
+ numeric_param_names: List[str],
+ original_space: ConfigurationSpace,
+ source_similarities: Optional[Dict[int, float]] = None) -> Dict[str, Tuple[float, float]]:
+ all_x, all_y, sample_history_indices = extract_top_samples_from_history(
+ space_history, numeric_param_names, original_space,
+ top_ratio=self.top_ratio, normalize=True, return_history_indices=True
+ )
+
+ if len(all_x) == 0:
+ return {}
+
+ X_combined = np.vstack(all_x)
+ y_combined = np.concatenate(all_y)
+ sample_history_indices = np.array(sample_history_indices)
+
+ model = RandomForestRegressor(n_estimators=100, random_state=self.seed or 42)
+ model.fit(X_combined, y_combined)
+
+ explainer = shap.Explainer(model)
+ shap_values = explainer(X_combined)
+ shap_vals_array = -np.abs(shap_values.values)
+ logger.debug(f"SHAP values: {shap_vals_array}")
+
+ compressed_ranges = {}
+
+ fixed_params = self._get_fixed_params()
+
+ for i, param_name in enumerate(numeric_param_names):
+ if param_name in fixed_params:
+ logger.debug(f"Skipping range compression for fixed parameter '{param_name}'")
+ continue
+
+ param_shap = shap_vals_array[:, i] # Original SHAP values (can be negative)
+ param_values = X_combined[:, i]
+
+ beneficial_mask = param_shap < 0
+ beneficial_values = param_values[beneficial_mask]
+ beneficial_shap = param_shap[beneficial_mask]
+ beneficial_history_indices = sample_history_indices[beneficial_mask]
+
+ if len(beneficial_values) == 0:
+ logger.warning(
+ f"Parameter {param_name} has no samples with SHAP < 0. "
+ f"Using all samples with uniform weights."
+ )
+ beneficial_values = param_values
+ beneficial_shap = np.ones_like(param_values)
+ beneficial_history_indices = sample_history_indices
+ else:
+ logger.debug(
+ f"Parameter {param_name}: {len(beneficial_values)}/{len(param_values)} samples "
+ f"have SHAP < 0 (beneficial)"
+ )
+ beneficial_shap = -beneficial_shap # Convert negative to positive weights
+
+ # Get similarity weights for beneficial samples
+ if source_similarities:
+ beneficial_similarities = np.array([
+ source_similarities.get(idx, 0.0) for idx in beneficial_history_indices
+ ])
+ else:
+ beneficial_similarities = np.ones_like(beneficial_shap)
+
+ # Combined weight = SHAP weight * similarity weight
+ combined_weights = beneficial_shap * beneficial_similarities
+
+ combined_weights_sum = combined_weights.sum()
+ if combined_weights_sum < 1e-10:
+ unique_values = len(np.unique(beneficial_values))
+ logger.warning(
+ f"Parameter {param_name} has zero combined weights. "
+ f"Using uniform weights for {len(beneficial_values)} beneficial samples."
+ )
+ weights = np.ones_like(combined_weights) / len(combined_weights)
+ else:
+ weights = combined_weights / combined_weights_sum
+
+ weighted_mean = np.average(beneficial_values, weights=weights)
+ weighted_std = np.sqrt(np.average((beneficial_values - weighted_mean) ** 2, weights=weights))
+
+ min_val_norm = max(np.min(beneficial_values), weighted_mean - self.sigma * weighted_std)
+ max_val_norm = min(np.max(beneficial_values), weighted_mean + self.sigma * weighted_std)
+
+ hp = original_space.get_hyperparameter(param_name)
+ lower = hp.lower
+ upper = hp.upper
+ range_size = upper - lower
+
+ min_val = lower + min_val_norm * range_size
+ max_val = lower + max_val_norm * range_size
+
+ beneficial_values_original = lower + beneficial_values * range_size
+
+ min_val, max_val = self._clamp_range_bounds(
+ min_val, max_val, beneficial_values_original, original_space, param_name
+ )
+ compressed_ranges[param_name] = (min_val, max_val)
+
+ logger.info(f"SHAP-based ranges computed for {len(compressed_ranges)} parameters")
+ return compressed_ranges
+
diff --git a/openbox/compressor/update.py b/openbox/compressor/update.py
new file mode 100644
index 000000000..d291b0633
--- /dev/null
+++ b/openbox/compressor/update.py
@@ -0,0 +1,170 @@
+from abc import ABC, abstractmethod
+from typing import Optional, Tuple
+from openbox.utils.history import History
+from .progress import OptimizerProgress
+from openbox import logger
+
+
+class UpdateStrategy(ABC):
+ @abstractmethod
+ def should_update(self, progress: OptimizerProgress, history: History) -> bool:
+ pass
+
+ @abstractmethod
+ def compute_new_topk(self,
+ current_topk: int,
+ reduction_ratio: float,
+ min_dimensions: int,
+ max_dimensions: Optional[int],
+ progress: OptimizerProgress) -> Tuple[int, str]:
+ pass
+
+ @abstractmethod
+ def get_name(self) -> str:
+ pass
+
+
+class PeriodicUpdateStrategy(UpdateStrategy):
+ def __init__(self, period: int = 10):
+ self.period = period
+
+ def should_update(self, progress: OptimizerProgress, history: History) -> bool:
+ return progress.should_periodic_update(period=self.period)
+
+ def compute_new_topk(self,
+ current_topk: int,
+ reduction_ratio: float,
+ min_dimensions: int,
+ max_dimensions: Optional[int],
+ progress: OptimizerProgress) -> Tuple[int, str]:
+ reduction = int(current_topk * reduction_ratio)
+ new_topk = max(min_dimensions, current_topk - reduction)
+ description = f"Periodic update (iteration {progress.iteration}): reducing dimensions {current_topk} -> {new_topk}"
+ return new_topk, description
+
+ def get_name(self) -> str:
+ return f"periodic(every {self.period} iters)"
+
+
+class StagnationUpdateStrategy(UpdateStrategy):
+ def __init__(self, threshold: int = 5):
+ self.threshold = threshold
+
+ def should_update(self, progress: OptimizerProgress, history: History) -> bool:
+ return progress.is_stagnant(threshold=self.threshold)
+
+ def compute_new_topk(self,
+ current_topk: int,
+ reduction_ratio: float,
+ min_dimensions: int,
+ max_dimensions: Optional[int],
+ progress: OptimizerProgress) -> Tuple[int, str]:
+ increase = int(current_topk * reduction_ratio)
+ new_topk = current_topk + increase
+ if max_dimensions is not None:
+ new_topk = min(new_topk, max_dimensions)
+ description = f"Stagnation detected, increasing dimensions: {current_topk} -> {new_topk}"
+ return new_topk, description
+
+ def get_name(self) -> str:
+ return f"stagnation(threshold={self.threshold})"
+
+
+class ImprovementUpdateStrategy(UpdateStrategy):
+ def __init__(self, threshold: int = 3):
+ self.threshold = threshold
+
+ def should_update(self, progress: OptimizerProgress, history: History) -> bool:
+ return progress.has_improvement(threshold=self.threshold)
+
+ def compute_new_topk(self,
+ current_topk: int,
+ reduction_ratio: float,
+ min_dimensions: int,
+ max_dimensions: Optional[int],
+ progress: OptimizerProgress) -> Tuple[int, str]:
+ reduction = int(current_topk * reduction_ratio)
+ new_topk = max(min_dimensions, current_topk - reduction)
+ description = f"Improvement detected, reducing dimensions: {current_topk} -> {new_topk}"
+ return new_topk, description
+
+ def get_name(self) -> str:
+ return f"improvement(threshold={self.threshold})"
+
+
+class CompositeUpdateStrategy(UpdateStrategy):
+ def __init__(self, *strategies: UpdateStrategy):
+ self.strategies = strategies
+
+ def should_update(self, progress: OptimizerProgress, history: History) -> bool:
+ return any(s.should_update(progress, history) for s in self.strategies)
+
+ def compute_new_topk(self,
+ current_topk: int,
+ reduction_ratio: float,
+ min_dimensions: int,
+ max_dimensions: Optional[int],
+ progress: OptimizerProgress) -> Tuple[int, str]:
+ # check each strategy in order
+ for strategy in self.strategies:
+ if strategy.should_update(progress, None):
+ return strategy.compute_new_topk(
+ current_topk, reduction_ratio, min_dimensions, max_dimensions, progress
+ )
+ return current_topk, "No update triggered"
+
+ def get_name(self) -> str:
+ names = [s.get_name() for s in self.strategies]
+ return f"composite({' OR '.join(names)})"
+
+
+class HybridUpdateStrategy(UpdateStrategy):
+ def __init__(self,
+ period: int = 10,
+ stagnation_threshold: Optional[int] = None,
+ improvement_threshold: Optional[int] = None):
+ self.period = period
+ self.stagnation_threshold = stagnation_threshold
+ self.improvement_threshold = improvement_threshold
+
+ self.periodic_strategy = PeriodicUpdateStrategy(period)
+ self.stagnation_strategy = StagnationUpdateStrategy(stagnation_threshold) if stagnation_threshold is not None else None
+ self.improvement_strategy = ImprovementUpdateStrategy(improvement_threshold) if improvement_threshold is not None else None
+
+ self.strategies = [self.periodic_strategy]
+ if self.stagnation_strategy:
+ self.strategies.append(self.stagnation_strategy)
+ if self.improvement_strategy:
+ self.strategies.append(self.improvement_strategy)
+
+ def should_update(self, progress: OptimizerProgress, history: History) -> bool:
+ return any(s.should_update(progress, history) for s in self.strategies)
+
+ def compute_new_topk(self,
+ current_topk: int,
+ reduction_ratio: float,
+ min_dimensions: int,
+ max_dimensions: Optional[int],
+ progress: OptimizerProgress) -> Tuple[int, str]:
+ # Priority: stagnation => improvement => periodic
+ # Check stagnation first (highest priority)
+ if self.stagnation_strategy and self.stagnation_strategy.should_update(progress, None):
+ return self.stagnation_strategy.compute_new_topk(
+ current_topk, reduction_ratio, min_dimensions, max_dimensions, progress
+ )
+ if self.improvement_strategy and self.improvement_strategy.should_update(progress, None):
+ return self.improvement_strategy.compute_new_topk(
+ current_topk, reduction_ratio, min_dimensions, max_dimensions, progress
+ )
+ return self.periodic_strategy.compute_new_topk(
+ current_topk, reduction_ratio, min_dimensions, max_dimensions, progress
+ )
+
+ def get_name(self) -> str:
+ parts = [f"periodic({self.period})"]
+ if self.stagnation_threshold is not None:
+ parts.append(f"stagnant({self.stagnation_threshold})")
+ if self.improvement_threshold is not None:
+ parts.append(f"improve({self.improvement_threshold})")
+ return " OR ".join(parts)
+
diff --git a/openbox/compressor/utils/__init__.py b/openbox/compressor/utils/__init__.py
new file mode 100644
index 000000000..8d318b581
--- /dev/null
+++ b/openbox/compressor/utils/__init__.py
@@ -0,0 +1,291 @@
+import json
+import copy
+import numpy as np
+import pandas as pd
+from typing import Dict, Any, Tuple, List, Optional, Union
+from openbox.utils.history import History
+from ConfigSpace import ConfigurationSpace, Configuration
+from ConfigSpace.hyperparameters import UniformIntegerHyperparameter, UniformFloatHyperparameter
+from openbox import space as sp, logger as _logger
+
+def create_param(key, value):
+ q_val = value.get('q', None)
+ param_type = value['type']
+
+ if param_type == 'integer':
+ return sp.Int(key, value['min'], value['max'], default_value=value['default'], q=q_val)
+ elif param_type == 'real':
+ return sp.Real(key, value['min'], value['max'], default_value=value['default'], q=q_val)
+ elif param_type == 'enum':
+ return sp.Categorical(key, value['enum_values'], default_value=value['default'])
+ elif param_type == 'categorical':
+ return sp.Categorical(key, value['choices'], default_value=value['default'])
+ else:
+ raise ValueError(f"Unsupported type: {param_type}")
+
+def parse_combined_space(json_file_origin, json_file_new):
+ if isinstance(json_file_origin, str):
+ with open(json_file_origin, 'r') as f:
+ conf = json.load(f)
+ space = sp.Space()
+ for key, value in conf.items():
+ if key not in space.keys():
+ para = create_param(key, value)
+ space.add_variable(para)
+ else:
+ space = copy.deepcopy(json_file_origin)
+
+ if isinstance(json_file_new, str):
+ with open(json_file_new, 'r') as f:
+ conf_new = json.load(f)
+ for key, value in conf_new.items():
+ if key not in space.keys():
+ para = create_param(key, value)
+ space.add_variable(para)
+ else:
+ for param in json_file_new.get_hyperparameters():
+ if param.name not in space.keys():
+ space.add_variable(param)
+
+ return space
+
+def create_space_from_ranges(
+ original_space: ConfigurationSpace,
+ compressed_ranges: Dict[str, Tuple[float, float]]
+) -> ConfigurationSpace:
+ compressed_space = copy.deepcopy(original_space)
+
+ for param_name, (min_val, max_val) in compressed_ranges.items():
+ try:
+ hp = compressed_space.get_hyperparameter(param_name)
+ if hasattr(hp, 'lower') and hasattr(hp, 'upper'):
+ original_lower = hp.lower
+ original_upper = hp.upper
+
+ # Handle invalid range (min_val >= max_val)
+ if min_val >= max_val:
+ if max_val < original_upper:
+ max_val = min_val + 1 \
+ if isinstance(hp, (sp.Int, UniformIntegerHyperparameter)) \
+ else max_val + (original_upper - original_lower) * 0.01
+ max_val = min(max_val, original_upper)
+ elif min_val > original_lower:
+ min_val = max_val - 1 \
+ if isinstance(hp, (sp.Int, UniformIntegerHyperparameter)) \
+ else max_val - (original_upper - original_lower) * 0.01
+ min_val = max(min_val, original_lower)
+ else:
+ min_val = original_lower
+ max_val = original_upper
+
+ # Handle quantization
+ q = hp.q if hasattr(hp, 'q') and hp.q is not None else None
+ if q is not None:
+ min_val = np.floor(min_val / q) * q
+ max_val = np.ceil(max_val / q) * q
+ range_size = max_val - min_val
+ if range_size < q:
+ max_val = min_val + q
+ else:
+ range_size_rounded = np.round(range_size / q) * q
+ max_val = min_val + range_size_rounded
+
+ if isinstance(hp, (sp.Int, UniformIntegerHyperparameter)) or isinstance(hp, sp.Int):
+ new_hp = UniformIntegerHyperparameter(
+ name=param_name,
+ lower=int(min_val),
+ upper=int(max_val),
+ default_value=int((min_val + max_val) / 2),
+ log=hp.log if hasattr(hp, 'log') else False,
+ q=q
+ )
+ elif isinstance(hp, (sp.Real, UniformFloatHyperparameter)) or isinstance(hp, sp.Real):
+ new_hp = UniformFloatHyperparameter(
+ name=param_name,
+ lower=float(min_val),
+ upper=float(max_val),
+ default_value=float((min_val + max_val) / 2),
+ q=q,
+ log=hp.log if hasattr(hp, 'log') else False
+ )
+ else:
+ _logger.warning(f"Unsupported hyperparameter type for {param_name}: {type(hp)}")
+ continue
+ compressed_space._hyperparameters.pop(param_name)
+ compressed_space.add_hyperparameter(new_hp)
+
+ _logger.info(
+ f"Compressed {param_name}: [{original_lower}, {original_upper}] -> "
+ f"[{new_hp.lower}, {new_hp.upper}]"
+ )
+ except Exception as e:
+ _logger.warning(f"Failed to compress parameter {param_name}: {e}")
+
+ return compressed_space
+
+
+def load_performance_data(data_path: str) -> pd.DataFrame:
+ try:
+ data = pd.read_csv(data_path)
+ _logger.debug(f"Loaded {len(data)} records from {data_path}")
+ return data
+ except Exception as e:
+ _logger.error(f"Failed to load data from {data_path}: {e}")
+ return None
+
+def extract_top_samples_from_history(
+ space_history: List[History],
+ numeric_param_names: List[str],
+ input_space: ConfigurationSpace,
+ top_ratio: float = 1.0,
+ normalize: bool = True,
+ return_history_indices: bool = False
+) -> Union[Tuple[List[np.ndarray], List[np.ndarray]], Tuple[List[np.ndarray], List[np.ndarray], List[int]]]:
+ all_x = []
+ all_y = []
+ history_indices = [] if return_history_indices else None
+
+ for task_idx, history in enumerate(space_history):
+ if len(history) == 0:
+ continue
+
+ valid_configs = []
+ valid_objectives = []
+
+ for obs in history.observations:
+ if obs.objectives and len(obs.objectives) > 0:
+ obj_value = obs.objectives[0]
+ if np.isfinite(obj_value):
+ valid_configs.append(obs.config)
+ valid_objectives.append(obj_value)
+
+ if len(valid_configs) == 0:
+ _logger.debug(f"Skipping history with no valid objectives")
+ continue
+
+ x_numeric = extract_numeric_values_from_configs(
+ valid_configs, numeric_param_names, input_space, normalize=normalize
+ )
+
+ objectives_array = np.array(valid_objectives)
+
+ if top_ratio < 1.0:
+ sorted_indices = np.argsort(objectives_array)
+ top_n = max(1, int(len(sorted_indices) * top_ratio))
+ top_indices = sorted_indices[: top_n]
+ all_x.append(x_numeric[top_indices])
+ all_y.append(objectives_array[top_indices])
+ if return_history_indices:
+ history_indices.extend([task_idx] * len(top_indices))
+ else:
+ all_x.append(x_numeric)
+ all_y.append(objectives_array)
+ if return_history_indices:
+ history_indices.extend([task_idx] * len(x_numeric))
+
+ return (all_x, all_y, history_indices) if return_history_indices else (all_x, all_y)
+
+
+def extract_numeric_values_from_configs(
+ configs: List[Union[Configuration, Dict]],
+ numeric_param_names: List[str],
+ input_space: ConfigurationSpace,
+ normalize: bool = True
+) -> np.ndarray:
+ n_samples = len(configs)
+ n_params = len(numeric_param_names)
+ X = np.zeros((n_samples, n_params))
+
+ for i, param_name in enumerate(numeric_param_names):
+ try:
+ hp = input_space.get_hyperparameter(param_name)
+ except KeyError:
+ _logger.warning(f"Parameter {param_name} not found in input_space, skipping")
+ continue
+
+ if not hasattr(hp, 'lower') or not hasattr(hp, 'upper'):
+ _logger.warning(f"Parameter {param_name} is not numeric, skipping")
+ continue
+
+ lower = hp.lower
+ upper = hp.upper
+ range_size = upper - lower
+
+ for j, config in enumerate(configs):
+ value = None
+ if hasattr(config, 'get'):
+ value = config.get(param_name)
+ elif isinstance(config, dict):
+ value = config.get(param_name)
+ elif hasattr(config, param_name):
+ value = getattr(config, param_name, None)
+
+ if value is None:
+ value = hp.default_value
+ _logger.warning(f"Parameter {param_name} not found in config {j}, using default {value}")
+
+ if normalize and range_size > 0:
+ normalized_value = (value - lower) / range_size
+ X[j, i] = normalized_value
+ else:
+ X[j, i] = value
+ return X
+
+
+def load_expert_params(expert_config_file: str, key: str = 'spark') -> List[str]:
+ try:
+ with open(expert_config_file, "r") as f:
+ all_expert_params = json.load(f)
+ expert_params = all_expert_params.get(key, [])
+ return expert_params
+
+ except FileNotFoundError:
+ _logger.warning(f"Expert config file not found: {expert_config_file}")
+ return []
+ except json.JSONDecodeError as e:
+ _logger.error(f"Error parsing expert config file: {e}")
+ return []
+ except Exception as e:
+ _logger.error(f"Error loading expert parameters: {e}")
+ return []
+
+
+def collect_compression_details(original_space: ConfigurationSpace, compressed_space: ConfigurationSpace) -> Dict[str, Any]:
+ details = {}
+ range_hp_names = [hp.name for hp in compressed_space.get_hyperparameters()]
+
+ for hp in original_space.get_hyperparameters():
+ name = hp.name
+ if name in range_hp_names:
+ original_hp = hp
+ compressed_hp = compressed_space.get_hyperparameter(name)
+
+ if hasattr(original_hp, 'lower') and hasattr(original_hp, 'upper'): # Numeric hyperparameter
+ details[name] = {
+ 'type': 'numeric',
+ 'original_range': [original_hp.lower, original_hp.upper],
+ 'compressed_range': [compressed_hp.lower, compressed_hp.upper],
+ 'original_default': original_hp.default_value,
+ 'compressed_default': compressed_hp.default_value,
+ 'compression_ratio': (compressed_hp.upper - compressed_hp.lower) / (original_hp.upper - original_hp.lower)
+ }
+ elif hasattr(original_hp, 'choices'): # Categorical hyperparameter
+ details[name] = {
+ 'type': 'categorical',
+ 'original_choices': list(original_hp.choices),
+ 'compressed_choices': list(compressed_hp.choices),
+ 'original_default': original_hp.default_value,
+ 'compressed_default': compressed_hp.default_value,
+ 'compression_ratio': len(compressed_hp.choices) / len(original_hp.choices)
+ }
+ return details
+
+
+def extract_numeric_hyperparameters(space: ConfigurationSpace) -> Tuple[List[str], List[int]]:
+ numeric_hyperparameter_indices = []
+ numeric_hyperparameter_names = []
+ for i, hp in enumerate(space.get_hyperparameters()):
+ if hasattr(hp, 'lower') and hasattr(hp, 'upper'):
+ numeric_hyperparameter_names.append(hp.name)
+ numeric_hyperparameter_indices.append(i)
+ return numeric_hyperparameter_names, numeric_hyperparameter_indices
\ No newline at end of file
diff --git a/openbox/compressor/viz.py b/openbox/compressor/viz.py
new file mode 100644
index 000000000..eb3bf40a3
--- /dev/null
+++ b/openbox/compressor/viz.py
@@ -0,0 +1,526 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from typing import List, Dict, Tuple, Optional
+from ConfigSpace import ConfigurationSpace
+import json
+import os
+from openbox import logger
+
+sns.set_style("whitegrid")
+plt.rcParams['figure.figsize'] = (12, 8)
+plt.rcParams['font.size'] = 10
+
+
+def visualize_range_compression_step(step, step_index: int, save_dir: str):
+ if not (hasattr(step, 'compression_info') and step.compression_info):
+ return
+
+ info = step.compression_info
+ if 'compressed_params' not in info or len(info['compressed_params']) == 0:
+ return
+
+ fig = plt.figure(figsize=(14, max(8, len(info['compressed_params']) * 0.4)))
+
+ compressed_params = info['compressed_params']
+ n_params = len(compressed_params)
+
+ if n_params > 30:
+ compressed_params = compressed_params[:30]
+ n_params = 30
+
+ param_names = [p['name'].split('.')[-1] for p in compressed_params]
+
+ original_ranges = []
+ compressed_ranges = []
+ compression_ratios = []
+ param_labels = []
+
+ for param in compressed_params:
+ if 'original_range' in param:
+ original_ranges.append(param['original_range'])
+ compressed_ranges.append(param['compressed_range'])
+ compression_ratios.append(param['compression_ratio'])
+
+ if 'original_num_values' in param:
+ label = f"{param['original_num_values']}→{param['quantized_num_values']} values"
+ param_labels.append(label)
+ else:
+ param_labels.append('')
+
+ y_pos = np.arange(n_params)
+ ax = plt.subplot(111)
+
+ for idx, (orig, comp, name, label) in enumerate(zip(original_ranges, compressed_ranges, param_names, param_labels)):
+ orig_min, orig_max = orig[0], orig[1]
+ comp_min, comp_max = comp[0], comp[1]
+
+ norm_orig_start = 0.0
+ norm_orig_end = 1.0
+
+ is_quantization = label != ''
+
+ if is_quantization:
+ norm_comp_start = 0.0
+ norm_comp_end = 1.0
+ edge_style = 'dashed'
+ else:
+ if orig_max - orig_min > 0:
+ norm_comp_start = (comp_min - orig_min) / (orig_max - orig_min)
+ norm_comp_end = (comp_max - orig_min) / (orig_max - orig_min)
+ else:
+ norm_comp_start = 0.0
+ norm_comp_end = 1.0
+ edge_style = 'solid'
+
+ ax.barh(idx, norm_orig_end - norm_orig_start, left=norm_orig_start, height=0.4,
+ alpha=0.3, color='gray', label='Original' if idx == 0 else '')
+
+ ratio = compression_ratios[idx]
+ color = plt.cm.RdYlGn_r(ratio)
+
+ if is_quantization:
+ ax.barh(idx, norm_comp_end - norm_comp_start, left=norm_comp_start, height=0.4,
+ alpha=0.5, color=color, edgecolor=color, linewidth=2, linestyle='--',
+ label='Quantized (mapped)' if idx == 0 and label else '')
+ else:
+ ax.barh(idx, norm_comp_end - norm_comp_start, left=norm_comp_start, height=0.4,
+ alpha=0.8, color=color, label='Compressed' if idx == 0 and not label else '')
+
+ if label:
+ ax.text(1.02, idx, f'{ratio:.1%} ({label})', va='center', fontsize=7)
+ ax.text(0.5, idx - 0.35, f'→[{int(comp_min)}, {int(comp_max)}]',
+ va='top', ha='center', fontsize=12, color='black',
+ fontweight='bold', style='italic')
+ else:
+ ax.text(1.02, idx, f'{ratio:.1%}', va='center', fontsize=8)
+ ax.text(0.5, idx - 0.35, f'→[{comp_min:.0f}, {comp_max:.0f}]',
+ va='top', ha='center', fontsize=12, color='black',
+ fontweight='bold')
+
+ ax.text(-0.05, idx + 0.25, f'{orig_min:.0f}',
+ va='center', ha='right', fontsize=7, color='gray', alpha=0.6)
+ ax.text(1.05, idx + 0.25, f'{orig_max:.0f}',
+ va='center', ha='left', fontsize=7, color='gray', alpha=0.6)
+
+ ax.set_yticks(y_pos)
+ ax.set_yticklabels(param_names, fontsize=9)
+ ax.set_xlim(-0.15, 1.25)
+ ax.set_xlabel('Normalized Range [0=lower, 1=upper]', fontsize=12, fontweight='bold')
+ ax.set_title(f'{step.name}: Range Compression Details (Top {n_params} params)',
+ fontsize=14, fontweight='bold', pad=20)
+ ax.legend(loc='upper right')
+ ax.grid(axis='x', alpha=0.3)
+
+ plt.tight_layout()
+ plt.savefig(os.path.join(save_dir, f'range_compression_step_{step_index}.png'),
+ dpi=300, bbox_inches='tight')
+ plt.close()
+ print(f" Saved range_compression_step_{step_index}.png")
+
+
+def visualize_compression_summary(pipeline, save_path: str):
+ """
+ Generate a 4-panel compression summary visualization.
+
+ Args:
+ pipeline: CompressionPipeline with compression steps
+ save_path: Path to save the summary plot
+ """
+
+ fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+
+ # Panel 1: Dimension Reduction Across Steps
+ ax = axes[0, 0]
+ step_names = ['Original'] + [step.name for step in pipeline.steps]
+ dimensions = [len(space.get_hyperparameters()) for space in pipeline.space_after_steps]
+
+ colors = plt.cm.viridis(np.linspace(0, 1, len(dimensions)))
+ bars = ax.bar(range(len(dimensions)), dimensions, color=colors, alpha=0.8, edgecolor='black')
+
+ for bar, dim in zip(bars, dimensions):
+ height = bar.get_height()
+ ax.text(bar.get_x() + bar.get_width()/2., height,
+ f'{int(dim)}', ha='center', va='bottom', fontweight='bold')
+
+ ax.set_xticks(range(len(step_names)))
+ ax.set_xticklabels(step_names, rotation=45, ha='right')
+ ax.set_ylabel('Number of Parameters', fontsize=11, fontweight='bold')
+ ax.set_title('Dimension Reduction Across Steps', fontsize=12, fontweight='bold')
+ ax.grid(axis='y', alpha=0.3)
+
+ # Panel 2: Compression Ratio by Step
+ ax = axes[0, 1]
+ compression_ratios = [dim / dimensions[0] for dim in dimensions[1:]]
+ step_names_no_orig = step_names[1:]
+
+ colors = plt.cm.RdYlGn_r(compression_ratios)
+ bars = ax.bar(range(len(compression_ratios)), compression_ratios, color=colors, alpha=0.8, edgecolor='black')
+
+ for bar, ratio in zip(bars, compression_ratios):
+ height = bar.get_height()
+ ax.text(bar.get_x() + bar.get_width()/2., height,
+ f'{ratio:.1%}', ha='center', va='bottom', fontweight='bold')
+
+ ax.set_xticks(range(len(step_names_no_orig)))
+ ax.set_xticklabels(step_names_no_orig, rotation=45, ha='right')
+ ax.set_ylabel('Compression Ratio', fontsize=11, fontweight='bold')
+ ax.set_title('Compression Ratio by Step', fontsize=12, fontweight='bold')
+ ax.axhline(y=1.0, color='red', linestyle='--', alpha=0.5, label='No compression')
+ ax.legend()
+ ax.grid(axis='y', alpha=0.3)
+
+ # Panel 3: Range Compression Statistics
+ ax = axes[1, 0]
+ range_stats = []
+ step_labels = []
+
+ for i, step in enumerate(pipeline.steps):
+ if hasattr(step, 'compression_info') and step.compression_info:
+ info = step.compression_info
+ if 'compressed_params' in info:
+ n_compressed = len(info['compressed_params'])
+ n_unchanged = len(info.get('unchanged_params', []))
+ range_stats.append([n_compressed, n_unchanged])
+ step_labels.append(f"Step {i+1}\n{step.name}")
+
+ if range_stats:
+ range_stats = np.array(range_stats)
+ x = np.arange(len(step_labels))
+ width = 0.35
+
+ ax.bar(x, range_stats[:, 0], width, label='Compressed', alpha=0.8, color='coral')
+ ax.bar(x, range_stats[:, 1], width, bottom=range_stats[:, 0],
+ label='Unchanged', alpha=0.8, color='lightblue')
+
+ ax.set_ylabel('Number of Parameters', fontsize=11, fontweight='bold')
+ ax.set_title('Range Compression Statistics', fontsize=12, fontweight='bold')
+ ax.set_xticks(x)
+ ax.set_xticklabels(step_labels, fontsize=9)
+ ax.legend()
+ ax.grid(axis='y', alpha=0.3)
+ else:
+ ax.text(0.5, 0.5, 'No range compression', ha='center', va='center',
+ transform=ax.transAxes, fontsize=14)
+ ax.axis('off')
+
+ # Panel 4: Text Summary
+ ax = axes[1, 1]
+ ax.axis('off')
+
+ summary_text = "Compression Summary\n" + "="*40 + "\n\n"
+ summary_text += f"Original dimensions: {dimensions[0]}\n"
+ summary_text += f"Final sample space: {len(pipeline.sample_space.get_hyperparameters())}\n"
+ summary_text += f"Final surrogate space: {len(pipeline.surrogate_space.get_hyperparameters())}\n"
+ summary_text += f"Overall compression: {len(pipeline.surrogate_space.get_hyperparameters())/dimensions[0]:.1%}\n\n"
+
+ summary_text += "Steps:\n"
+ for i, step in enumerate(pipeline.steps):
+ input_dim = dimensions[i]
+ output_dim = dimensions[i+1]
+ dimension_ratio = output_dim / input_dim if input_dim > 0 else 1.0
+
+ summary_text += f"{i+1}. {step.name}\n"
+
+ if hasattr(step, 'compression_info') and step.compression_info:
+ info = step.compression_info
+ if 'avg_compression_ratio' in info:
+ effective_ratio = info['avg_compression_ratio']
+ summary_text += f" Dim: {input_dim} → {output_dim} ({dimension_ratio:.1%})\n"
+ summary_text += f" Effective: {effective_ratio:.1%}\n"
+ else:
+ summary_text += f" {input_dim} → {output_dim} ({dimension_ratio:.1%})\n"
+ else:
+ summary_text += f" {input_dim} → {output_dim} ({dimension_ratio:.1%})\n"
+
+ ax.text(0.1, 0.9, summary_text, transform=ax.transAxes,
+ fontsize=10, verticalalignment='top', fontfamily='monospace',
+ bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))
+
+ plt.tight_layout()
+ plt.savefig(save_path, dpi=300, bbox_inches='tight')
+ plt.close()
+ print(f" Saved compression_summary.png")
+
+
+def visualize_compression_details(compressor, save_dir: str):
+ """
+ Intelligently visualize compression details based on the steps used.
+
+ This is the main dispatcher that coordinates all visualization functions.
+ It automatically generates relevant plots based on:
+ - Compression steps used (dimension selection, range compression, projection)
+ - Whether transfer learning is used (source similarities)
+ - Whether adaptive updates are used
+ - Whether multi-task data is available
+
+ Automatically generates:
+ - Compression summary (always)
+ - Range compression details (if range compression steps are used)
+ - Parameter importance (if importance-based dimension selection is used)
+ - Multi-task importance heatmap (if multiple source tasks are used)
+ - Dimension evolution (if adaptive dimension step with history is used)
+ - Source task similarities (if transfer learning is used)
+ """
+ os.makedirs(save_dir, exist_ok=True)
+
+ pipeline = compressor.pipeline
+ if pipeline is None:
+ logger.warning("No pipeline found in compressor")
+ return
+
+ # 1. Generate compression summary (always)
+ visualize_compression_summary(
+ pipeline=pipeline,
+ save_path=os.path.join(save_dir, 'compression_summary.png')
+ )
+
+ # 2. Generate range compression details for each step
+ for i, step in enumerate(pipeline.steps):
+ visualize_range_compression_step(
+ step=step,
+ step_index=i+1,
+ save_dir=save_dir
+ )
+
+ # Intelligent visualization based on step types
+
+ # 1. Generate source task similarity plot if using transfer learning
+ if hasattr(compressor, '_source_similarities') and compressor._source_similarities:
+ try:
+ visualize_source_task_similarities(
+ similarities=compressor._source_similarities,
+ save_path=os.path.join(save_dir, 'source_task_similarities.png')
+ )
+ except Exception as e:
+ logger.warning(f"Failed to generate source task similarity plot: {e}")
+
+ # Check for importance-based dimension selection steps
+ for i, step in enumerate(pipeline.steps):
+ step_class_name = step.__class__.__name__
+
+ # 2. Generate parameter importance plot for SHAP/Correlation/Adaptive steps
+ if step_class_name in ['SHAPDimensionStep', 'CorrelationDimensionStep', 'AdaptiveDimensionStep']:
+ # Check if importance data is available
+ if hasattr(step, '_calculator') and hasattr(step._calculator, '_cache'):
+ cache = step._calculator._cache
+ if cache and 'importances' in cache and cache['importances'] is not None:
+ importances = cache['importances']
+ if hasattr(step._calculator, 'numeric_hyperparameter_names'):
+ param_names = step._calculator.numeric_hyperparameter_names
+ if len(param_names) > 0 and len(importances) > 0:
+ # Generate single-task importance plot
+ try:
+ visualize_parameter_importance(
+ param_names=param_names,
+ importances=importances,
+ save_path=os.path.join(save_dir, f'parameter_importance_step_{i+1}.png'),
+ topk=min(20, len(param_names))
+ )
+ except Exception as e:
+ logger.warning(f"Failed to generate parameter importance plot: {e}")
+
+ # Check if multi-task data is available
+ if (cache.get('importances_per_task') is not None and
+ cache.get('task_names') is not None):
+ try:
+ visualize_importance_heatmap(
+ param_names=param_names,
+ importances=cache['importances_per_task'],
+ save_path=os.path.join(save_dir, f'multi_task_importance_heatmap_step_{i+1}.png'),
+ tasks=cache['task_names']
+ )
+ except Exception as e:
+ logger.warning(f"Failed to generate multi-task importance heatmap: {e}")
+
+ # 3. Generate dimension evolution plot for Adaptive step
+ if step_class_name == 'AdaptiveDimensionStep':
+ # Check if we have update history
+ if hasattr(compressor, '_dimension_history') and hasattr(compressor, '_iteration_history'):
+ iterations = compressor._iteration_history
+ dimensions = compressor._dimension_history
+ if len(iterations) > 1: # Has at least initial + 1 update
+ try:
+ visualize_adaptive_dimension_evolution(
+ iterations=iterations,
+ dimensions=dimensions,
+ save_path=os.path.join(save_dir, 'dimension_evolution.png'),
+ title='Adaptive Dimension Evolution'
+ )
+ except Exception as e:
+ logger.warning(f"Failed to generate dimension evolution plot: {e}")
+
+
+def visualize_parameter_importance(param_names: List[str], importances: List[float], save_path: str, topk: int = 20):
+ abs_importances = np.abs(importances)
+ sorted_indices = np.argsort(abs_importances)[-topk:]
+ top_names = [param_names[i] for i in sorted_indices]
+ top_importances = [abs_importances[i] for i in sorted_indices]
+
+ fig, ax = plt.subplots(figsize=(10, 8))
+
+ y_pos = np.arange(len(top_names))
+ bars = ax.barh(y_pos, top_importances, alpha=0.7, color='coral')
+
+ ax.set_yticks(y_pos)
+ ax.set_yticklabels(top_names)
+ ax.set_xlabel('Importance Score', fontsize=12)
+ ax.set_title(f'Top-{topk} Parameter Importance', fontsize=14, fontweight='bold')
+ ax.grid(axis='x', alpha=0.3)
+
+ for i, (bar, imp) in enumerate(zip(bars, top_importances)):
+ width = bar.get_width()
+ ax.text(width, bar.get_y() + bar.get_height()/2.,
+ f'{imp:.4f}',
+ ha='left', va='center', fontsize=9)
+
+ plt.tight_layout()
+ plt.savefig(save_path, dpi=300, bbox_inches='tight')
+ plt.close()
+ print(f"Saved parameter importance plot to {save_path}")
+
+
+def visualize_adaptive_dimension_evolution(iterations: List[int], dimensions: List[int],
+ save_path: str, title: str = 'Adaptive Dimension Evolution'):
+ fig, ax = plt.subplots(figsize=(10, 6))
+
+ ax.plot(iterations, dimensions, marker='o', linewidth=2, markersize=8,
+ color='steelblue', label='Dimensions')
+
+ for i in range(1, len(dimensions)):
+ if dimensions[i] != dimensions[i-1]:
+ ax.axvline(x=iterations[i], color='red', linestyle='--', alpha=0.5, linewidth=1)
+ # Annotate the change
+ change = dimensions[i] - dimensions[i-1]
+ change_str = f'+{change}' if change > 0 else str(change)
+ ax.annotate(f'{dimensions[i]}\n({change_str})',
+ xy=(iterations[i], dimensions[i]),
+ xytext=(5, 10), textcoords='offset points',
+ fontweight='bold', fontsize=9,
+ bbox=dict(boxstyle='round,pad=0.3', facecolor='yellow', alpha=0.7))
+
+ ax.set_xlabel('Iteration', fontsize=12, fontweight='bold')
+ ax.set_ylabel('Number of Dimensions', fontsize=12, fontweight='bold')
+ ax.set_title(title, fontsize=14, fontweight='bold')
+ ax.grid(True, alpha=0.3)
+ ax.legend(loc='best')
+
+ ax.yaxis.set_major_locator(plt.MaxNLocator(integer=True))
+
+ y_range = max(dimensions) - min(dimensions)
+ if y_range > 0:
+ ax.set_ylim([min(dimensions) - 0.5, max(dimensions) + 0.5])
+
+ plt.tight_layout()
+ plt.savefig(save_path, dpi=300, bbox_inches='tight')
+ plt.close()
+ print(f"Saved adaptive dimension evolution plot to {save_path}")
+
+
+def visualize_source_task_similarities(similarities: Dict[int, float],
+ save_path: str,
+ task_names: Optional[List[str]] = None):
+ if not similarities:
+ return
+
+ task_indices = sorted(similarities.keys())
+ sim_values = [similarities[idx] for idx in task_indices]
+
+ if task_names is None:
+ task_names = [f'Source Task {idx}' for idx in task_indices]
+
+ fig, ax = plt.subplots(figsize=(max(10, len(task_indices) * 0.8), 6))
+
+ colors = plt.cm.RdYlGn(np.array(sim_values))
+
+ bars = ax.bar(range(len(task_indices)), sim_values, color=colors, alpha=0.8, edgecolor='black')
+
+ for bar, val in zip(bars, sim_values):
+ height = bar.get_height()
+ ax.text(bar.get_x() + bar.get_width()/2., height,
+ f'{val:.3f}',
+ ha='center', va='bottom', fontweight='bold', fontsize=10)
+
+ ax.set_xlabel('Source Tasks', fontsize=12, fontweight='bold')
+ ax.set_ylabel('Similarity Score', fontsize=12, fontweight='bold')
+ ax.set_title('Source Task Similarity to Target Task', fontsize=14, fontweight='bold')
+ ax.set_xticks(range(len(task_indices)))
+ ax.set_xticklabels(task_names, rotation=45, ha='right')
+ ax.set_ylim([0, max(sim_values) * 1.1])
+ ax.grid(axis='y', alpha=0.3)
+
+ plt.tight_layout()
+ plt.savefig(save_path, dpi=300, bbox_inches='tight')
+ plt.close()
+ print(f" Saved source_task_similarities.png")
+
+
+def visualize_importance_heatmap(param_names: List[str], importances: np.ndarray,
+ save_path: str, tasks: Optional[List[str]] = None):
+ if len(importances.shape) == 1:
+ importances = importances.reshape(1, -1)
+
+ importances = np.abs(importances)
+
+ n_tasks, n_params = importances.shape
+
+ if tasks is None:
+ tasks = [f'Task {i+1}' for i in range(n_tasks)]
+
+ if n_params > 30:
+ mean_importance = importances.mean(axis=0)
+ top_indices = np.argsort(mean_importance)[-30:]
+ importances = importances[:, top_indices]
+ param_names = [param_names[i] for i in top_indices]
+ n_params = 30
+
+ short_names = [name.split('.')[-1] if len(name) > 20 else name for name in param_names]
+
+ fig, ax = plt.subplots(figsize=(max(14, n_params * 0.5), max(8, n_tasks * 0.6)))
+
+ if importances.max() > 0:
+ normalized_importances = importances / importances.max()
+ else:
+ normalized_importances = importances
+
+ # Choose colormap (feel free to change):
+ # Option 1: 'RdYlGn_r' - Red (low) -> Yellow (medium) -> Green (high) [Intuitive]
+ # Option 2: 'viridis' - Purple (low) -> Green -> Yellow (high) [Perceptually uniform]
+ # Option 3: 'plasma' - Dark blue (low) -> Purple -> Orange -> Yellow (high) [Good contrast]
+ # Option 4: 'rocket_r' - Black (low) -> Red -> Orange (high) [High contrast]
+ # Option 5: 'mako_r' - Teal (low) -> Green -> Yellow (high) [Cool tones]
+ cmap = sns.color_palette("RdYlGn_r", as_cmap=True)
+
+ show_annotations = (n_tasks <= 5 and n_params <= 20)
+
+ sns.heatmap(normalized_importances,
+ annot=show_annotations,
+ fmt='.2f' if show_annotations else '',
+ cmap=cmap,
+ xticklabels=short_names,
+ yticklabels=tasks,
+ cbar_kws={
+ 'label': 'Normalized Importance Score',
+ 'orientation': 'vertical',
+ 'pad': 0.02
+ },
+ linewidths=0.5,
+ linecolor='white',
+ square=False,
+ ax=ax)
+
+ ax.set_xlabel('Parameters', fontsize=13, fontweight='bold', labelpad=10)
+ ax.set_ylabel('Tasks', fontsize=13, fontweight='bold', labelpad=10)
+ ax.set_title('Multi-Task Parameter Importance Heatmap',
+ fontsize=15, fontweight='bold', pad=15)
+
+ plt.xticks(rotation=45, ha='right', fontsize=10)
+ plt.yticks(rotation=0, fontsize=11)
+
+ ax.set_facecolor('#f0f0f0')
+
+ plt.tight_layout()
+ plt.savefig(save_path, dpi=300, bbox_inches='tight')
+ plt.close()
+
+ print(f" Saved multi_task_importance_heatmap.png")
diff --git a/openbox/core/__init__.py b/openbox/core/__init__.py
index a1e14ed13..bfa462863 100644
--- a/openbox/core/__init__.py
+++ b/openbox/core/__init__.py
@@ -1 +1,380 @@
# License: MIT
+# Author: LINGCHING TUNG
+
+from openbox import logger
+
+
+# Advisor registry mapping advisor type to (class, required_conditions)
+_ADVISOR_REGISTRY = {}
+
+
+def register_advisor(name, advisor_class, required_conditions=None):
+ """
+ Register an advisor class with optional requirements.
+
+ Parameters
+ ----------
+ name : str
+ Advisor type name
+ advisor_class : type
+ Advisor class
+ required_conditions : dict, optional
+ Requirements like {'num_objectives': 1, 'num_constraints': 0}
+ """
+ _ADVISOR_REGISTRY[name] = {
+ 'class': advisor_class,
+ 'conditions': required_conditions or {}
+ }
+
+
+def _check_advisor_conditions(advisor_type, num_objectives, num_constraints):
+ if advisor_type not in _ADVISOR_REGISTRY:
+ return True
+
+ conditions = _ADVISOR_REGISTRY[advisor_type]['conditions']
+
+ if 'num_objectives' in conditions:
+ if num_objectives != conditions['num_objectives']:
+ raise ValueError(
+ f"Advisor '{advisor_type}' requires num_objectives={conditions['num_objectives']}, "
+ f"but got {num_objectives}"
+ )
+
+ if 'num_constraints' in conditions:
+ if num_constraints != conditions['num_constraints']:
+ raise ValueError(
+ f"Advisor '{advisor_type}' requires num_constraints={conditions['num_constraints']}, "
+ f"but got {num_constraints}"
+ )
+
+ return True
+
+
+def build_advisor(
+ advisor_type='default',
+ config_space=None,
+ num_objectives=1,
+ num_constraints=0,
+ initial_trials=3,
+ init_strategy='random_explore_first',
+ initial_configurations=None,
+ optimization_strategy='bo',
+ surrogate_type='auto',
+ acq_type='auto',
+ acq_optimizer_type='auto',
+ ref_point=None,
+ transfer_learning_history=None,
+ early_stop=False,
+ early_stop_kwargs=None,
+ task_id='OpenBox',
+ output_dir='logs',
+ random_state=None,
+ logger_kwargs=None,
+ scheduler_type=None,
+ # Batch advisor specific parameters
+ batch_size=None,
+ batch_strategy='default',
+ **advisor_kwargs
+):
+ """
+ Factory function to build advisor instances.
+
+ Parameters
+ ----------
+ advisor_type : str, default='default'
+ Type of advisor to create:
+ - 'default': Generic Bayesian Optimization advisor
+ - 'mf': Multi-fidelity Bayesian Optimization advisor
+ - 'mcadvisor': Monte Carlo advisor
+ - 'tpe': Tree-structured Parzen Estimator
+ - 'ea': Evolutionary Algorithm advisor
+ - 'random': Random search advisor
+ - 'sync_batch': Synchronous batch advisor
+ - 'async_batch': Asynchronous batch advisor
+ config_space : ConfigSpace
+ Configuration space
+ num_objectives : int
+ Number of objectives
+ num_constraints : int
+ Number of constraints
+ initial_trials : int
+ Number of initial trials
+ init_strategy : str
+ Initialization strategy
+ initial_configurations : list, optional
+ Initial configurations
+ optimization_strategy : str
+ Optimization strategy
+ - 'bo' (default)
+ - 'random'
+ - 'ea'
+ surrogate_type : str
+ Surrogate model type
+ acq_type : str
+ Acquisition function type
+ acq_optimizer_type : str
+ Acquisition optimizer type
+ ref_point : list, optional
+ Reference point for multi-objective
+ transfer_learning_history : list, optional
+ Transfer learning history
+ early_stop : bool
+ Enable early stopping
+ early_stop_kwargs : dict, optional
+ Early stop parameters
+ task_id : str
+ Task identifier
+ output_dir : str
+ Output directory
+ random_state : int, optional
+ Random seed
+ logger_kwargs : dict, optional
+ Logger parameters
+ batch_size : int, optional
+ Batch size for batch advisors
+ batch_strategy : str, optional
+ Batch strategy for batch advisors
+ **advisor_kwargs
+ Additional advisor-specific parameters
+
+ Returns
+ -------
+ advisor : BaseAdvisor
+ Configured advisor instance
+
+ Raises
+ ------
+ ValueError
+ If advisor_type is invalid or requirements not met
+ """
+ advisor_type = advisor_type.lower()
+ scheduler_type = scheduler_type.lower() if isinstance(scheduler_type, str) else scheduler_type
+
+ mf_scheduler_types = {'mfes', 'mfes_flatten'}
+ bo_scheduler_types = {'full', 'fixed', 'bohb', 'flatten', 'bohb_flatten'}
+ if scheduler_type in mf_scheduler_types:
+ if advisor_type != 'mf':
+ logger.warning(
+ 'scheduler_type=%s requires mf advisor; override advisor_type from %s to mf.'
+ % (scheduler_type, advisor_type)
+ )
+ advisor_type = 'mf'
+ if surrogate_type == 'auto':
+ surrogate_type = 'mfgpe'
+ elif scheduler_type in bo_scheduler_types:
+ if advisor_type == 'mf':
+ logger.warning(
+ 'scheduler_type=%s should use regular BO advisor; override advisor_type from mf to default.'
+ % scheduler_type
+ )
+ advisor_type = 'default'
+ if surrogate_type == 'mfgpe':
+ logger.warning(
+ 'scheduler_type=%s should not use mfgpe surrogate directly; override surrogate_type from mfgpe to auto.'
+ % scheduler_type
+ )
+ surrogate_type = 'auto'
+
+ if advisor_type == 'mf' and surrogate_type == 'auto':
+ surrogate_type = 'mfgpe'
+
+ _check_advisor_conditions(advisor_type, num_objectives, num_constraints)
+
+ _logger_kwargs = logger_kwargs or {}
+ if 'force_init' not in _logger_kwargs:
+ _logger_kwargs['force_init'] = True
+
+ if advisor_type == 'default':
+ from openbox.core.generic_advisor import Advisor
+ return Advisor(
+ config_space,
+ num_objectives=num_objectives,
+ num_constraints=num_constraints,
+ initial_trials=initial_trials,
+ init_strategy=init_strategy,
+ initial_configurations=initial_configurations,
+ optimization_strategy=optimization_strategy,
+ surrogate_type=surrogate_type,
+ acq_type=acq_type,
+ acq_optimizer_type=acq_optimizer_type,
+ ref_point=ref_point,
+ transfer_learning_history=transfer_learning_history,
+ early_stop=early_stop,
+ early_stop_kwargs=early_stop_kwargs,
+ task_id=task_id,
+ output_dir=output_dir,
+ random_state=random_state,
+ logger_kwargs=_logger_kwargs,
+ **advisor_kwargs
+ )
+
+ elif advisor_type == 'mf':
+ from openbox.core.mf_advisor import MFAdvisor
+ return MFAdvisor(
+ config_space,
+ num_objectives=num_objectives,
+ num_constraints=num_constraints,
+ initial_trials=initial_trials,
+ init_strategy=init_strategy,
+ initial_configurations=initial_configurations,
+ optimization_strategy=optimization_strategy,
+ surrogate_type=surrogate_type,
+ acq_type=acq_type,
+ acq_optimizer_type=acq_optimizer_type,
+ ref_point=ref_point,
+ transfer_learning_history=transfer_learning_history,
+ early_stop=early_stop,
+ early_stop_kwargs=early_stop_kwargs,
+ task_id=task_id,
+ output_dir=output_dir,
+ random_state=random_state,
+ logger_kwargs=_logger_kwargs,
+ **advisor_kwargs
+ )
+
+ elif advisor_type == 'mcadvisor':
+ from openbox.core.mc_advisor import MCAdvisor
+ return MCAdvisor(
+ config_space,
+ num_objectives=num_objectives,
+ num_constraints=num_constraints,
+ initial_trials=initial_trials,
+ init_strategy=init_strategy,
+ initial_configurations=initial_configurations,
+ optimization_strategy=optimization_strategy,
+ surrogate_type=surrogate_type,
+ acq_type=acq_type,
+ acq_optimizer_type=acq_optimizer_type,
+ ref_point=ref_point,
+ transfer_learning_history=transfer_learning_history,
+ task_id=task_id,
+ output_dir=output_dir,
+ random_state=random_state,
+ logger_kwargs=_logger_kwargs,
+ **advisor_kwargs
+ )
+
+ elif advisor_type == 'tpe':
+ from openbox.core.tpe_advisor import TPE_Advisor
+ # TPE only supports single objective without constraints
+ if num_objectives != 1 or num_constraints != 0:
+ raise ValueError(
+ f"TPE advisor only supports single objective without constraints, "
+ f"but got num_objectives={num_objectives}, num_constraints={num_constraints}"
+ )
+ return TPE_Advisor(
+ config_space,
+ task_id=task_id,
+ random_state=random_state,
+ logger_kwargs=_logger_kwargs,
+ **advisor_kwargs
+ )
+
+ elif advisor_type == 'ea':
+ from openbox.core.ea_advisor import EA_Advisor
+ # EA only supports single objective without constraints
+ if num_objectives != 1 or num_constraints != 0:
+ raise ValueError(
+ f"EA advisor only supports single objective without constraints, "
+ f"but got num_objectives={num_objectives}, num_constraints={num_constraints}"
+ )
+ return EA_Advisor(
+ config_space,
+ num_objectives=num_objectives,
+ num_constraints=num_constraints,
+ optimization_strategy=optimization_strategy,
+ batch_size=batch_size if batch_size is not None else 1,
+ task_id=task_id,
+ output_dir=output_dir,
+ random_state=random_state,
+ logger_kwargs=_logger_kwargs,
+ **advisor_kwargs
+ )
+
+ elif advisor_type == 'random':
+ from openbox.core.random_advisor import RandomAdvisor
+ return RandomAdvisor(
+ config_space,
+ num_objectives=num_objectives,
+ num_constraints=num_constraints,
+ ref_point=ref_point,
+ task_id=task_id,
+ output_dir=output_dir,
+ random_state=random_state,
+ logger_kwargs=_logger_kwargs,
+ **advisor_kwargs
+ )
+
+ elif advisor_type == 'sync_batch':
+ from openbox.core.sync_batch_advisor import SyncBatchAdvisor
+ return SyncBatchAdvisor(
+ config_space,
+ num_objectives=num_objectives,
+ num_constraints=num_constraints,
+ batch_size=batch_size,
+ batch_strategy=batch_strategy,
+ initial_trials=initial_trials,
+ initial_configurations=initial_configurations,
+ init_strategy=init_strategy,
+ transfer_learning_history=transfer_learning_history,
+ optimization_strategy=optimization_strategy,
+ surrogate_type=surrogate_type,
+ acq_type=acq_type,
+ acq_optimizer_type=acq_optimizer_type,
+ ref_point=ref_point,
+ task_id=task_id,
+ output_dir=output_dir,
+ random_state=random_state,
+ logger_kwargs=_logger_kwargs,
+ **advisor_kwargs
+ )
+
+ elif advisor_type == 'async_batch':
+ from openbox.core.async_batch_advisor import AsyncBatchAdvisor
+ return AsyncBatchAdvisor(
+ config_space,
+ num_objectives=num_objectives,
+ num_constraints=num_constraints,
+ batch_size=batch_size,
+ batch_strategy=batch_strategy,
+ initial_trials=initial_trials,
+ initial_configurations=initial_configurations,
+ init_strategy=init_strategy,
+ transfer_learning_history=transfer_learning_history,
+ optimization_strategy=optimization_strategy,
+ surrogate_type=surrogate_type,
+ acq_type=acq_type,
+ acq_optimizer_type=acq_optimizer_type,
+ ref_point=ref_point,
+ task_id=task_id,
+ output_dir=output_dir,
+ random_state=random_state,
+ logger_kwargs=_logger_kwargs,
+ **advisor_kwargs
+ )
+
+ else:
+ raise ValueError(
+ f"Invalid advisor type: '{advisor_type}'. "
+ f"Supported types: 'default', 'mcadvisor', 'tpe', 'ea', 'random', "
+ f"'sync_batch', 'async_batch', 'mf'"
+ )
+
+def create_parallel_advisor(parallel_strategy, sample_strategy, **kwargs):
+ """Create a parallel advisor factory function."""
+ if parallel_strategy not in ['sync', 'async']:
+ raise ValueError('Invalid parallel strategy: %s' % parallel_strategy)
+ if sample_strategy not in ['random', 'bo', 'ea']:
+ raise ValueError('Invalid sample strategy: %s' % sample_strategy)
+ if parallel_strategy == 'sync':
+ advisor_type = 'sync_batch' if sample_strategy in ['random', 'bo'] else 'ea'
+ elif parallel_strategy == 'async':
+ advisor_type = 'async_batch' if sample_strategy in ['random', 'bo'] else 'ea'
+ else:
+ raise ValueError(f'Invalid parallel strategy: {parallel_strategy}')
+
+ return build_advisor(advisor_type=advisor_type, **kwargs)
+
+# Register advisors (for future extensibility)
+# Users can register custom advisors like:
+# register_advisor('my_advisor', MyAdvisorClass, {'num_objectives': 1})
\ No newline at end of file
diff --git a/openbox/core/async_batch_advisor.py b/openbox/core/async_batch_advisor.py
index 0707172b1..2a8ba3cae 100644
--- a/openbox/core/async_batch_advisor.py
+++ b/openbox/core/async_batch_advisor.py
@@ -124,8 +124,7 @@ def _get_suggestion(self, history=None):
trial_state=SUCCESS, elapsed_time=None, extra_info=None)
batch_history.update_observation(observation)
- # use super class get_suggestion
- return super().get_suggestion(batch_history)
+ return super().get_suggestion(history=batch_history)
elif self.batch_strategy == 'local_penalization':
# local_penalization only supports single objective with no constraint
@@ -145,7 +144,7 @@ def _get_suggestion(self, history=None):
elif self.batch_strategy == 'default':
# select first N candidates
- candidates = super().get_suggestion(history, return_list=True)
+ candidates = self._get_bo_candidates(history)
self.early_stop_ei(history, challengers=candidates)
for config in candidates:
diff --git a/openbox/core/base_advisor.py b/openbox/core/base_advisor.py
index 2ff694a07..98858363b 100644
--- a/openbox/core/base_advisor.py
+++ b/openbox/core/base_advisor.py
@@ -10,6 +10,7 @@
from openbox.utils.early_stop import EarlyStopAlgorithm, EarlyStopException
from openbox.utils.history import Observation, History
from openbox.utils.constants import MAXINT
+from openbox.core.space_adapter import IdentitySpaceAdapter, CompressorSpaceAdapter
class BaseAdvisor(object, metaclass=abc.ABCMeta):
@@ -51,6 +52,9 @@ def __init__(
task_id='OpenBox',
random_state=None,
logger_kwargs: dict = None,
+ compressor=None,
+ compressor_type='none',
+ compressor_kwargs=None,
):
self.timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
@@ -68,6 +72,32 @@ def __init__(
self.config_space_seed = self.rng.randint(MAXINT)
self.config_space.seed(self.config_space_seed)
self.ref_point = ref_point
+ self.sample_space = self.config_space
+ self.surrogate_space = self.config_space
+
+ # space compression setting
+ if compressor is not None:
+ self.space_adapter = CompressorSpaceAdapter(
+ config_space=self.config_space,
+ compressor=compressor,
+ )
+ elif compressor_type is not None and str(compressor_type).lower() == 'none':
+ logger.info('compressor_type=none, using identity space adapter.')
+ self.space_adapter = IdentitySpaceAdapter(self.config_space)
+ elif compressor_type is not None or compressor_kwargs is not None:
+ compressor = self._build_compressor(
+ config_space=self.config_space,
+ compressor_type=compressor_type,
+ compressor_kwargs=compressor_kwargs,
+ seed=self.config_space_seed,
+ )
+ self.space_adapter = CompressorSpaceAdapter(
+ config_space=self.config_space,
+ compressor=compressor,
+ )
+ else:
+ logger.info('No compressor is provided, using identity space adapter.')
+ self.space_adapter = IdentitySpaceAdapter(self.config_space)
# init history
self.history = History(
@@ -83,6 +113,130 @@ def __init__(
if self.early_stop:
logger.info(f'Early stop is enabled.')
+ @staticmethod
+ def _build_compressor(config_space, compressor_type, compressor_kwargs, seed):
+ from openbox.compressor import Compressor
+ from openbox.compressor.api import (
+ create_steps_from_strings,
+ create_filling_from_config,
+ create_filling_from_string,
+ )
+
+ kwargs = dict(compressor_kwargs or {})
+ kwargs.setdefault('seed', seed)
+
+ step_params = kwargs.pop('step_params', {})
+ filling_config = kwargs.pop('filling_config', None)
+ filling_type = kwargs.pop('filling_type', None)
+ fixed_values = kwargs.pop('fixed_values', None)
+
+ filling_strategy = None
+ if filling_config is not None:
+ filling_strategy = create_filling_from_config(filling_config)
+ elif filling_type is not None or fixed_values is not None:
+ filling_strategy = create_filling_from_string(
+ filling_str=filling_type or 'default',
+ fixed_values=fixed_values,
+ )
+
+ raw_steps = kwargs.pop('steps', None)
+ if raw_steps is not None:
+ if len(raw_steps) > 0 and isinstance(raw_steps[0], str):
+ steps = create_steps_from_strings(raw_steps, step_params=step_params)
+ else:
+ steps = raw_steps
+ return Compressor(
+ config_space=config_space,
+ steps=steps,
+ filling_strategy=filling_strategy,
+ **kwargs,
+ )
+
+ step_strings = kwargs.pop('step_strings', None)
+ if step_strings is not None:
+ steps = create_steps_from_strings(step_strings, step_params=step_params)
+ return Compressor(
+ config_space=config_space,
+ steps=steps,
+ filling_strategy=filling_strategy,
+ **kwargs,
+ )
+
+ compressor_type = (compressor_type or 'none').lower()
+ step_strings = []
+ mapped_step_params = {}
+
+ if compressor_type == 'none':
+ step_strings = []
+ elif compressor_type == 'pipeline':
+ raise ValueError('compressor_type="pipeline" requires `steps` or `step_strings` in compressor_kwargs.')
+ elif compressor_type in ('shap', 'expert'):
+ if compressor_type == 'shap':
+ step_strings.append('d_shap')
+ mapped_step_params['d_shap'] = {
+ 'topk': kwargs.pop('topk', 20),
+ 'exclude_params': kwargs.pop('exclude_params', None),
+ }
+ else:
+ step_strings.append('d_expert')
+ mapped_step_params['d_expert'] = {
+ 'expert_params': kwargs.pop('expert_params', []),
+ 'exclude_params': kwargs.pop('exclude_params', None),
+ }
+ top_ratio = kwargs.pop('top_ratio', 0.8)
+ sigma = kwargs.pop('sigma', 2.0)
+ if top_ratio < 1.0 or sigma > 0:
+ step_strings.append('r_boundary')
+ mapped_step_params['r_boundary'] = {
+ 'top_ratio': top_ratio,
+ 'sigma': sigma,
+ 'enable_mixed_sampling': kwargs.pop('enable_mixed_sampling', True),
+ 'initial_prob': kwargs.pop('initial_prob', 0.9),
+ }
+ elif compressor_type == 'llamatune':
+ max_num_values = kwargs.pop('max_num_values', None)
+ adapter_alias = kwargs.pop('adapter_alias', 'none')
+ low_dim = kwargs.pop('le_low_dim', 10)
+
+ if max_num_values is not None:
+ step_strings.append('p_quant')
+ mapped_step_params['p_quant'] = {'max_num_values': max_num_values, 'seed': kwargs.get('seed', seed)}
+ if adapter_alias == 'rembo':
+ step_strings.append('p_rembo')
+ mapped_step_params['p_rembo'] = {
+ 'low_dim': low_dim,
+ 'max_num_values': max_num_values,
+ 'seed': kwargs.get('seed', seed),
+ }
+ elif adapter_alias == 'hesbo':
+ step_strings.append('p_hesbo')
+ mapped_step_params['p_hesbo'] = {
+ 'low_dim': low_dim,
+ 'max_num_values': max_num_values,
+ 'seed': kwargs.get('seed', seed),
+ }
+ elif adapter_alias != 'none':
+ raise ValueError(f'Unknown adapter_alias: {adapter_alias}.')
+ else:
+ raise ValueError(f'Unknown compressor_type: {compressor_type}.')
+
+ step_params = {**mapped_step_params, **step_params}
+ steps = create_steps_from_strings(step_strings, step_params=step_params)
+ return Compressor(
+ config_space=config_space,
+ steps=steps,
+ filling_strategy=filling_strategy,
+ **kwargs,
+ )
+
+ def setup_space_adapter(self, transfer_learning_history=None):
+ transformed_history = self.space_adapter.setup(transfer_learning_history)
+ self.sample_space = self.space_adapter.sample_space
+ self.surrogate_space = self.space_adapter.surrogate_space
+ self.sample_space.seed(self.config_space_seed)
+ self.surrogate_space.seed(self.config_space_seed)
+ return transformed_history
+
def early_stop_perf(self, history):
if not self.early_stop:
return
@@ -135,6 +289,7 @@ def update_observation(self, observation: Observation):
observation: Observation
Observation of the objective function.
"""
+ self.space_adapter.cache_observation(observation) # low_dim_config
return self.history.update_observation(observation)
def update_observations(self, observations: List[Observation]):
diff --git a/openbox/core/generic_advisor.py b/openbox/core/generic_advisor.py
index 1940f359d..a75eff223 100644
--- a/openbox/core/generic_advisor.py
+++ b/openbox/core/generic_advisor.py
@@ -5,12 +5,12 @@
from openbox import logger
from openbox.utils.util_funcs import deprecate_kwarg
from openbox.utils.history import History
-from openbox.utils.samplers import SobolSampler, LatinHypercubeSampler, HaltonSampler
from openbox.utils.multi_objective import NondominatedPartitioning
from openbox.utils.early_stop import EarlyStopException
from openbox.core.base import build_acq_func, build_surrogate
from openbox.acq_optimizer import build_acq_optimizer
from openbox.core.base_advisor import BaseAdvisor
+from openbox.core.initial_config import InitialConfigProvider
class Advisor(BaseAdvisor):
@@ -38,6 +38,14 @@ class Advisor(BaseAdvisor):
If provided, the initial configurations will be evaluated in initial iterations of optimization.
transfer_learning_history : List[History], optional
Historical data for transfer learning.
+ warm_start_strategy : str, default='topk'
+ How to select configs from transfer learning history:
+ - 'no': Do not use warm start even if transfer_learning_history is provided
+ - 'best': Select best config from each source
+ - 'topk': Select top-k configs from each history
+ warm_start_num : int, optional
+ Number of configs to extract from transfer learning.
+ If None, uses init_num by default.
rand_prob : float, default=0.1
Probability to sample random configurations.
surrogate_type : str, default='auto'
@@ -105,6 +113,8 @@ def __init__(
initial_configurations=None,
init_strategy='random_explore_first',
transfer_learning_history=None,
+ warm_start_strategy='topk',
+ warm_start_num=None,
rand_prob=0.1,
optimization_strategy='bo',
surrogate_type='auto',
@@ -130,6 +140,7 @@ def __init__(
task_id=task_id,
random_state=random_state,
logger_kwargs=logger_kwargs,
+ **kwargs,
)
# Basic components in Advisor.
@@ -138,20 +149,23 @@ def __init__(
# Init the basic ingredients in Bayesian optimization.
self.transfer_learning_history = transfer_learning_history
+ self.surrogate_transfer_learning_history = self.setup_space_adapter(self.transfer_learning_history)
self.surrogate_type = surrogate_type
self.constraint_surrogate_type = None
self.acq_type = acq_type
self.acq_optimizer_type = acq_optimizer_type
- # initial design
- self.init_num = initial_trials
self.init_strategy = init_strategy
- if initial_configurations is not None and len(initial_configurations) > 0:
- self.initial_configurations = initial_configurations
- self.init_num = len(initial_configurations)
- else:
- self.initial_configurations = self.create_initial_design(self.init_strategy)
- self.init_num = len(self.initial_configurations)
+ self.initial_configurations = self.create_initial_design(
+ init_strategy=init_strategy,
+ init_num=initial_trials,
+ initial_configurations=initial_configurations,
+ transfer_learning_history=transfer_learning_history,
+ warm_start_strategy=warm_start_strategy,
+ warm_start_num=warm_start_num,
+ rng=self.rng,
+ )
+ self.init_num = len(self.initial_config_provider)
self.surrogate_model = None
@@ -163,6 +177,28 @@ def __init__(
self.check_setup()
self.setup_bo_basics()
+ def create_initial_design(self, init_strategy=None, init_num=None, \
+ initial_configurations=None, transfer_learning_history=None, \
+ warm_start_strategy='no', warm_start_num=0, rng=None):
+ if init_strategy is None:
+ init_strategy = self.init_strategy
+ if init_num is None:
+ init_num = self.init_num
+ if rng is None:
+ rng = self.rng
+
+ self.initial_config_provider = InitialConfigProvider(
+ config_space=self.config_space,
+ init_num=init_num,
+ init_strategy=init_strategy,
+ initial_configurations=initial_configurations,
+ transfer_learning_history=transfer_learning_history,
+ warm_start_strategy=warm_start_strategy,
+ warm_start_num=warm_start_num,
+ rng=rng,
+ )
+ return self.initial_config_provider.config_queue
+
def algo_auto_selection(self):
from ConfigSpace import UniformFloatHyperparameter, UniformIntegerHyperparameter, \
CategoricalHyperparameter, OrdinalHyperparameter
@@ -299,9 +335,12 @@ def check_setup(self):
if not (self.num_objectives == 1 and self.num_constraints == 0):
raise NotImplementedError('Currently, transfer learning is only supported for single objective '
'optimization without constraints.')
- surrogate_str = self.surrogate_type.split('_')
- assert len(surrogate_str) == 3 and surrogate_str[0] == 'tlbo'
- assert surrogate_str[1] in ['rgpe', 'sgpr', 'topov3'] # todo: 'mfgpe'
+ if self.surrogate_type.startswith('mfgpe'):
+ pass
+ else:
+ surrogate_str = self.surrogate_type.split('_')
+ assert len(surrogate_str) == 3 and surrogate_str[0] == 'tlbo'
+ assert surrogate_str[1] in ['rgpe', 'sgpr', 'topov3', 'mfgpe']
# early stop
if self.early_stop:
@@ -316,20 +355,20 @@ def setup_bo_basics(self):
"""
if self.num_objectives == 1:
self.surrogate_model = build_surrogate(func_str=self.surrogate_type,
- config_space=self.config_space,
+ config_space=self.surrogate_space,
rng=self.rng,
- transfer_learning_history=self.transfer_learning_history)
+ transfer_learning_history=self.surrogate_transfer_learning_history)
elif self.acq_type == 'parego':
func_str = 'parego_' + self.surrogate_type
self.surrogate_model = build_surrogate(func_str=func_str,
- config_space=self.config_space,
+ config_space=self.surrogate_space,
rng=self.rng,
- transfer_learning_history=self.transfer_learning_history)
+ transfer_learning_history=self.surrogate_transfer_learning_history)
else: # multi-objectives
self.surrogate_model = [build_surrogate(func_str=self.surrogate_type,
- config_space=self.config_space,
+ config_space=self.surrogate_space,
rng=self.rng,
- transfer_learning_history=self.transfer_learning_history)
+ transfer_learning_history=self.surrogate_transfer_learning_history)
for _ in range(self.num_objectives)]
if self.num_constraints > 0:
@@ -341,7 +380,7 @@ def setup_bo_basics(self):
self.acquisition_function = build_acq_func(func_str=self.acq_type,
model=self.surrogate_model,
constraint_models=self.constraint_models,
- config_space=self.config_space)
+ config_space=self.surrogate_space)
else:
self.acquisition_function = build_acq_func(func_str=self.acq_type,
model=self.surrogate_model,
@@ -350,78 +389,7 @@ def setup_bo_basics(self):
if self.acq_type == 'usemo':
self.acq_optimizer_type = 'usemo_optimizer'
self.acq_optimizer = build_acq_optimizer(
- func_str=self.acq_optimizer_type, config_space=self.config_space, rng=self.rng)
-
- def create_initial_design(self, init_strategy='default'):
- """
- Create several configurations as initial design.
- Parameters
- ----------
- init_strategy: str
-
- Returns
- -------
- Initial configurations.
- """
- default_config = self.config_space.get_default_configuration()
- num_random_config = self.init_num - 1
- if init_strategy == 'random':
- initial_configs = self.sample_random_configs(self.config_space, self.init_num)
- elif init_strategy == 'default':
- initial_configs = [default_config] + self.sample_random_configs(self.config_space, num_random_config)
- elif init_strategy == 'random_explore_first':
- candidate_configs = self.sample_random_configs(self.config_space, 100)
- initial_configs = self.max_min_distance(default_config, candidate_configs, num_random_config)
- elif init_strategy == 'sobol':
- sobol = SobolSampler(self.config_space, num_random_config, random_state=self.rng)
- initial_configs = [default_config] + sobol.generate(return_config=True)
- elif init_strategy == 'latin_hypercube':
- lhs = LatinHypercubeSampler(self.config_space, num_random_config, criterion='maximin')
- initial_configs = [default_config] + lhs.generate(return_config=True)
- elif init_strategy == 'halton':
- halton = HaltonSampler(self.config_space, num_random_config, random_state=self.rng)
- initial_configs = [default_config] + halton.generate(return_config=True)
- else:
- raise ValueError('Unknown initial design strategy: %s.' % init_strategy)
-
- valid_configs = []
- for config in initial_configs:
- try:
- config.is_valid_configuration()
- except ValueError:
- continue
- valid_configs.append(config)
- if len(valid_configs) != len(initial_configs):
- logger.warning('Only %d/%d valid configurations are generated for initial design strategy: %s. '
- 'Add more random configurations.'
- % (len(valid_configs), len(initial_configs), init_strategy))
- num_random_config = self.init_num - len(valid_configs)
- valid_configs += self.sample_random_configs(self.config_space, num_random_config,
- excluded_configs=valid_configs)
- return valid_configs
-
- def max_min_distance(self, default_config, src_configs, num):
- min_dis = list()
- initial_configs = list()
- initial_configs.append(default_config)
-
- for config in src_configs:
- dis = np.linalg.norm(config.get_array() - default_config.get_array())
- min_dis.append(dis)
- min_dis = np.array(min_dis)
-
- for i in range(num):
- furthest_config = src_configs[np.argmax(min_dis)]
- initial_configs.append(furthest_config)
- min_dis[np.argmax(min_dis)] = -1
-
- for j in range(len(src_configs)):
- if src_configs[j] in initial_configs:
- continue
- updated_dis = np.linalg.norm(src_configs[j].get_array() - furthest_config.get_array())
- min_dis[j] = min(updated_dis, min_dis[j])
-
- return initial_configs
+ func_str=self.acq_optimizer_type, config_space=self.sample_space, rng=self.rng)
def early_stop_ei(self, history, challengers):
if not self.early_stop:
@@ -436,116 +404,155 @@ def early_stop_ei(self, history, challengers):
self.early_stop_algorithm.set_already_early_stopped(history)
raise EarlyStopException("Early stop triggered!")
- def get_suggestion(self, history: History = None, return_list: bool = False):
- """
- Generate a configuration (suggestion) for this query.
- Returns
- -------
- A configuration.
- """
+ def update_compression(self, history: History = None) -> bool:
if history is None:
history = self.history
+ if not self.space_adapter.update(history):
+ return False
+ self.sample_space = self.space_adapter.sample_space
+ self.surrogate_space = self.space_adapter.surrogate_space
+ self.surrogate_transfer_learning_history = self.space_adapter.setup(self.transfer_learning_history)
+ self.setup_bo_basics()
+ return True
- # if self.early_stop and self.early_stop_algorithm.decide_early_stop_before_suggest(history):
- # self.early_stop_algorithm.set_already_early_stopped(history)
- # raise EarlyStopException("Early stop triggered!")
- self.early_stop_perf(history)
-
- self.alter_model(history)
-
+ def _get_bo_candidates(self, history: History):
num_config_evaluated = len(history)
num_config_successful = history.get_success_count()
- if num_config_evaluated < self.init_num:
- res = self.initial_configurations[num_config_evaluated]
- return [res] if return_list else res
- if self.optimization_strategy == 'random':
- res = self.sample_random_configs(self.config_space, 1, excluded_configs=history.configurations)[0]
- return [res] if return_list else res
-
- if (not return_list) and self.rng.random() < self.rand_prob:
- logger.info('Sample random config. rand_prob=%f.' % self.rand_prob)
- res = self.sample_random_configs(self.config_space, 1, excluded_configs=history.configurations)[0]
- return res
+ if num_config_successful < max(self.init_num, 1):
+ logger.warning('No enough successful initial trials! Sample random configuration.')
+ return self.sample_random_configs(self.config_space, 1, excluded_configs=history.configurations)
- X = history.get_config_array(transform='scale')
+ X = self.space_adapter.get_surrogate_array(history)
Y = history.get_objectives(transform='infeasible')
cY = history.get_constraints(transform='bilog')
- if self.optimization_strategy == 'bo':
- if num_config_successful < max(self.init_num, 1):
- logger.warning('No enough successful initial trials! Sample random configuration.')
- res = self.sample_random_configs(self.config_space, 1, excluded_configs=history.configurations)[0]
- return [res] if return_list else res
-
- # train surrogate model
- if self.num_objectives == 1:
- self.surrogate_model.train(X, Y[:, 0])
- elif self.acq_type == 'parego':
- self.surrogate_model.train(X, Y)
- else: # multi-objectives
- for i in range(self.num_objectives):
- self.surrogate_model[i].train(X, Y[:, i])
-
- # train constraint model
- for i in range(self.num_constraints):
- self.constraint_models[i].train(X, cY[:, i])
-
- # update acquisition function
- if self.num_objectives == 1:
- incumbent_value = history.get_incumbent_value()
+ # train surrogate model
+ if self.num_objectives == 1:
+ self.surrogate_model.train(X, Y[:, 0])
+ elif self.acq_type == 'parego':
+ self.surrogate_model.train(X, Y)
+ else: # multi-objectives
+ for i in range(self.num_objectives):
+ self.surrogate_model[i].train(X, Y[:, i])
+
+ # train constraint model
+ for i in range(self.num_constraints):
+ self.constraint_models[i].train(X, cY[:, i])
+
+ # update acquisition function
+ if self.num_objectives == 1:
+ incumbent_value = history.get_incumbent_value()
+ self.acquisition_function.update(model=self.surrogate_model,
+ constraint_models=self.constraint_models,
+ eta=incumbent_value,
+ num_data=num_config_evaluated)
+ else: # multi-objectives
+ mo_incumbent_values = history.get_mo_incumbent_values()
+ if self.acq_type == 'parego':
+ scalarized_obj = self.surrogate_model.get_scalarized_obj()
+ incumbent_value = scalarized_obj(np.atleast_2d(mo_incumbent_values))
self.acquisition_function.update(model=self.surrogate_model,
constraint_models=self.constraint_models,
eta=incumbent_value,
num_data=num_config_evaluated)
- else: # multi-objectives
- mo_incumbent_values = history.get_mo_incumbent_values()
- if self.acq_type == 'parego':
- scalarized_obj = self.surrogate_model.get_scalarized_obj()
- incumbent_value = scalarized_obj(np.atleast_2d(mo_incumbent_values))
- self.acquisition_function.update(model=self.surrogate_model,
- constraint_models=self.constraint_models,
- eta=incumbent_value,
- num_data=num_config_evaluated)
- elif self.acq_type.startswith('ehvi'):
- partitioning = NondominatedPartitioning(self.num_objectives, Y)
- cell_bounds = partitioning.get_hypercell_bounds(ref_point=self.ref_point)
- self.acquisition_function.update(model=self.surrogate_model,
- constraint_models=self.constraint_models,
- cell_lower_bounds=cell_bounds[0],
- cell_upper_bounds=cell_bounds[1])
- else:
- self.acquisition_function.update(model=self.surrogate_model,
- constraint_models=self.constraint_models,
- constraint_perfs=cY, # for MESMOC
- eta=mo_incumbent_values,
- num_data=num_config_evaluated,
- X=X, Y=Y)
-
- # optimize acquisition function
- challengers = self.acq_optimizer.maximize(
- acquisition_function=self.acquisition_function,
- history=history,
- num_points=5000,
- )
- if return_list:
- # Caution: return_list doesn't contain random configs sampled according to rand_prob
- return challengers
-
- # early stop
- # if self.early_stop:
- # max_acq_value = np.max(self.acquisition_function(challengers)).item()
- # if self.early_stop_algorithm.decide_early_stop_after_suggest(
- # history=history, max_acq_value=max_acq_value):
- # self.early_stop_algorithm.set_already_early_stopped(history)
- # raise EarlyStopException("Early stop triggered!")
- self.early_stop_ei(history, challengers=challengers)
-
- for config in challengers:
- if config not in history.configurations:
- return config
- logger.warning('Cannot get non duplicate configuration from BO candidates (len=%d). '
- 'Sample random config.' % (len(challengers), ))
+ elif self.acq_type.startswith('ehvi'):
+ partitioning = NondominatedPartitioning(self.num_objectives, Y)
+ cell_bounds = partitioning.get_hypercell_bounds(ref_point=self.ref_point)
+ self.acquisition_function.update(model=self.surrogate_model,
+ constraint_models=self.constraint_models,
+ cell_lower_bounds=cell_bounds[0],
+ cell_upper_bounds=cell_bounds[1])
+ else:
+ self.acquisition_function.update(model=self.surrogate_model,
+ constraint_models=self.constraint_models,
+ constraint_perfs=cY, # for MESMOC
+ eta=mo_incumbent_values,
+ num_data=num_config_evaluated,
+ X=X, Y=Y)
+
+ challengers = self.acq_optimizer.maximize(
+ acquisition_function=self.acquisition_function,
+ history=self.space_adapter.history_for_acq(history),
+ num_points=5000,
+ )
+ return [self.space_adapter.config_to_original(conf) for conf in challengers]
+
+ def get_suggestion(self, history: History = None):
+ if history is None:
+ history = self.history
+
+ self.early_stop_perf(history)
+ self.alter_model(history)
+
+ num_config_evaluated = len(history)
+ if num_config_evaluated < self.init_num:
+ return self.initial_config_provider.get_config(num_config_evaluated)
+ if self.optimization_strategy == 'random':
return self.sample_random_configs(self.config_space, 1, excluded_configs=history.configurations)[0]
- else:
+
+ if self.rng.random() < self.rand_prob:
+ logger.info('Sample random config. rand_prob=%f.' % self.rand_prob)
+ return self.sample_random_configs(self.config_space, 1, excluded_configs=history.configurations)[0]
+
+ if self.optimization_strategy != 'bo':
+ raise ValueError('Unknown optimization strategy: %s.' % self.optimization_strategy)
+
+ candidates = self._get_bo_candidates(history)
+ self.early_stop_ei(history, challengers=candidates)
+ for config in candidates:
+ if config not in history.configurations:
+ return config
+ logger.warning('Cannot get non duplicate configuration from BO candidates (len=%d). '
+ 'Sample random config.' % (len(candidates),))
+ return self.sample_random_configs(self.config_space, 1, excluded_configs=history.configurations)[0]
+
+
+ def get_suggestions(self, batch_size=None, history: History = None):
+ if batch_size is None:
+ batch_size = 1
+ batch_size = int(batch_size)
+ if batch_size <= 0:
+ return []
+ if history is None:
+ history = self.history
+
+ self.early_stop_perf(history)
+ self.alter_model(history)
+
+ num_config_evaluated = len(history)
+ num_config_successful = history.get_success_count()
+ if (
+ num_config_evaluated < self.init_num
+ or self.optimization_strategy == 'random'
+ or num_config_successful < max(self.init_num, 1)
+ ):
+ return self.sample_random_configs(
+ self.config_space,
+ num_configs=batch_size,
+ excluded_configs=history.configurations,
+ )
+
+ if self.optimization_strategy != 'bo':
raise ValueError('Unknown optimization strategy: %s.' % self.optimization_strategy)
+
+ candidates = self._get_bo_candidates(history)
+ self.early_stop_ei(history, challengers=candidates)
+
+ results = []
+ for config in candidates:
+ if config in history.configurations or config in results:
+ continue
+ results.append(config)
+ if len(results) >= batch_size:
+ return results
+
+ if len(results) < batch_size:
+ excluded = set(history.configurations)
+ excluded.update(results)
+ results.extend(self.sample_random_configs(
+ self.config_space,
+ num_configs=batch_size - len(results),
+ excluded_configs=excluded,
+ ))
+ return results
diff --git a/openbox/core/initial_config.py b/openbox/core/initial_config.py
new file mode 100644
index 000000000..1bf7b03f1
--- /dev/null
+++ b/openbox/core/initial_config.py
@@ -0,0 +1,526 @@
+# License: MIT
+# Author: LINGCHING TUNG
+
+"""
+Unified Initial Configuration Provider.
+
+Combines multiple sources of initial configurations:
+1. User-provided configurations (highest priority)
+2. Warm-start from transfer learning history
+3. Initial design strategies (sobol, latin_hypercube, etc.)
+4. Random sampling (fallback)
+"""
+
+from typing import List, Optional, Dict, Tuple
+from ConfigSpace import Configuration, ConfigurationSpace
+import numpy as np
+
+from openbox import logger
+from openbox.utils.history import History
+from openbox.utils.samplers import SobolSampler, LatinHypercubeSampler, HaltonSampler
+
+
+class InitialConfigProvider:
+ """
+ Unified initial configuration provider.
+
+ Combines multiple sources of initial configurations with priority:
+ 1. User-provided configurations (highest priority)
+ 2. Warm-start from transfer learning history
+ 3. Initial design strategies (sobol, latin_hypercube, etc.)
+ 4. Random sampling (fallback)
+
+ Parameters
+ ----------
+ config_space : ConfigurationSpace
+ Configuration space.
+ init_num : int, default=3
+ Total number of initial configurations to use.
+ init_strategy : str, default='random_explore_first'
+ Strategy for generating initial design configs:
+ - 'random': Pure random sampling
+ - 'default': Default config + random sampling
+ - 'random_explore_first': Max-min distance (space-filling)
+ - 'sobol': Sobol sequence sampling
+ - 'latin_hypercube': Latin hypercube sampling
+ - 'halton': Halton sequence sampling
+ initial_configurations : List[Configuration], optional
+ User-provided initial configurations (highest priority).
+ transfer_learning_history : List[History], optional
+ Historical data for warm start.
+ warm_start_strategy : str, default='topk'
+ How to select configs from transfer learning history:
+ - 'no': Do not use warm start even if transfer_learning_history is provided
+ - 'best': Select best config from each source
+ - 'topk': Select top-k configs from each history
+ warm_start_num : int, optional
+ Number of configs to extract from transfer learning (independent of init_num).
+ If None and has transfer learning history, uses init_num.
+ Note: warm_start configs may not all be used for init - some can be used
+ in multi-fidelity competition pool.
+ rng : RandomState, optional
+ Random number generator.
+
+ Attributes
+ ----------
+ init_num : int
+ Number of initial configurations.
+ config_queue : List[Configuration]
+ Queue of initial configurations.
+ config_sources : List[str]
+ Source of each configuration in the queue.
+
+ Examples
+ --------
+ >>> from openbox.core.initial_config import InitialConfigProvider
+ >>> provider = InitialConfigProvider(
+ ... config_space=cs,
+ ... init_num=5,
+ ... init_strategy='sobol',
+ ... transfer_learning_history=histories
+ ... )
+ >>> for i in range(len(provider)):
+ ... config = provider.get_config(i)
+ ... source = provider.get_config_source(i)
+ ... print(f"Config {i} from {source}")
+ """
+
+ def __init__(
+ self,
+ config_space: ConfigurationSpace,
+ init_num: int = 3,
+ init_strategy: str = 'random_explore_first',
+ initial_configurations: Optional[List[Configuration]] = None,
+ transfer_learning_history: Optional[List[History]] = None,
+ warm_start_strategy: str = 'best',
+ warm_start_num: Optional[int] = None,
+ rng: np.random.RandomState = None,
+ ):
+ self.config_space = config_space
+ self._init_num = init_num
+ self.init_strategy = init_strategy
+ self.warm_start_strategy = warm_start_strategy
+ self.rng = rng if rng is not None else np.random.RandomState()
+
+ # Configuration queue and sources
+ self._config_queue: List[Configuration] = []
+ self._config_sources: List[str] = []
+
+ # Build the configuration queue
+ self._build_config_queue(
+ initial_configurations=initial_configurations,
+ transfer_learning_history=transfer_learning_history,
+ warm_start_num=warm_start_num
+ )
+
+ logger.info(f'InitialConfigProvider initialized: {len(self._config_queue)} configs ready. '
+ f'Sources: {self.get_source_summary()}')
+
+ def _build_config_queue(
+ self,
+ initial_configurations: Optional[List[Configuration]],
+ transfer_learning_history: Optional[List[History]],
+ warm_start_num: Optional[int]
+ ):
+ # 1. User-provided configurations (highest priority)
+ if initial_configurations is not None and len(initial_configurations) > 0:
+ for config in initial_configurations:
+ self._add_config(config, source='user_provided')
+ logger.info(f'Added {len(initial_configurations)} user-provided configurations')
+
+ # 2. Warm-start from transfer learning history (warm_start_num is independent of init_num)
+ if (transfer_learning_history is not None
+ and len(transfer_learning_history) > 0
+ and self.warm_start_strategy != 'no'):
+ num_warm = warm_start_num if warm_start_num is not None else self._init_num
+ warm_configs = self._extract_warm_start_configs(
+ histories=transfer_learning_history,
+ strategy=self.warm_start_strategy,
+ num=num_warm
+ )
+ for config in warm_configs:
+ self._add_config(config, source='warm_start')
+
+ logger.info(f'Warm start: extracted {len(warm_configs)} configs (warm_start_num={num_warm})')
+
+ # 3. Initial design configurations (fill remaining to reach init_num)
+ remaining = self._init_num - len(self._config_queue)
+ if remaining > 0:
+ design_configs = self._create_initial_design(remaining)
+ for config in design_configs:
+ self._add_config(config, source=f'initial_design:{self.init_strategy}')
+
+ # 4. Random fallback (if still not enough)
+ remaining = self._init_num - len(self._config_queue)
+ if remaining > 0:
+ logger.warning(f'Still need {remaining} configs, filling with random sampling')
+ random_configs = self._sample_random(
+ remaining,
+ excluded=set(self._config_queue)
+ )
+ for config in random_configs:
+ self._add_config(config, source='random_fallback')
+
+ def _add_config(self, config: Configuration, source: str) -> bool:
+ """
+ Add config to queue if valid and not duplicate.
+
+ Returns True if added successfully, False otherwise.
+ """
+ if config in self._config_queue:
+ logger.debug(f'Skipping duplicate config from {source}')
+ return False
+ config.origin = source
+ self._config_queue.append(config)
+ self._config_sources.append(source)
+ return True
+
+ def _extract_warm_start_configs(
+ self,
+ histories: List[History],
+ strategy: str,
+ num: int
+ ) -> List[Configuration]:
+ """
+ Extract configurations from transfer learning history.
+
+ Parameters
+ ----------
+ histories : List[History]
+ Historical data from source tasks.
+ strategy : str
+ Extraction strategy ('best', 'topk').
+ num : int
+ Number of configurations to extract.
+
+ Returns
+ -------
+ configs : List[Configuration]
+ Extracted configurations.
+ """
+ configs = []
+
+ if strategy == 'best':
+ for hist in histories:
+ if len(hist) > 0:
+ try:
+ incumbent_config = hist.get_incumbents()[0]
+ if incumbent_config is not None and incumbent_config not in configs:
+ configs.append(incumbent_config)
+ if len(configs) >= num:
+ break
+ except Exception as e:
+ logger.debug(f'Failed to get incumbent from history: {e}')
+ continue
+
+ elif strategy == 'topk':
+ # Select top-k configs from each history until reaching num
+ # k_per_hist: how many to take from each history per round
+ k_per_hist = max(1, (num + len(histories) - 1) // len(histories)) if len(histories) > 0 else num
+
+ # Round-robin selection: take top-k from each history until we have enough
+ # This ensures balanced selection across all histories
+ hist_iterators = []
+ for hist in histories:
+ if len(hist) == 0:
+ continue
+
+ hist_configs_with_perf: List[Tuple[Configuration, float]] = []
+ for obs in hist.observations:
+ if obs.objectives is not None and len(obs.objectives) > 0:
+ hist_configs_with_perf.append((obs.config, obs.objectives[0]))
+
+ hist_configs_with_perf.sort(key=lambda x: x[1])
+ hist_iterators.append(iter(hist_configs_with_perf))
+
+ # Take top-k from each history in round-robin fashion
+ while len(configs) < num and hist_iterators:
+ exhausted = []
+ for i, it in enumerate(hist_iterators):
+ count = 0
+ while count < k_per_hist and len(configs) < num:
+ try:
+ config, _ = next(it)
+ if config not in configs:
+ configs.append(config)
+ count += 1
+ except StopIteration:
+ exhausted.append(i)
+ break
+
+ for i in reversed(exhausted):
+ hist_iterators.pop(i)
+
+ remaining = num - len(configs)
+ if remaining > 0:
+ all_configs = []
+ for hist in histories:
+ all_configs.extend([obs.config for obs in hist.observations])
+
+ self.rng.shuffle(all_configs)
+ for config in all_configs:
+ if config not in configs:
+ configs.append(config)
+ if len(configs) >= num:
+ break
+ else:
+ raise ValueError(f'Unknown warm start strategy: {strategy}. '
+ f'Supported: best, topk')
+
+ logger.info(f'Warm start: extracted {len(configs)} configs using "{strategy}" strategy')
+ return configs
+
+ def _create_initial_design(self, num: int) -> List[Configuration]:
+ """
+ Generate initial design configurations.
+
+ Parameters
+ ----------
+ num : int
+ Number of configurations to generate.
+
+ Returns
+ -------
+ configs : List[Configuration]
+ Generated configurations.
+ """
+ if num <= 0:
+ return []
+
+ default_config = self.config_space.get_default_configuration()
+
+ num_random = max(0, num - 1)
+ configs = []
+
+ if self.init_strategy == 'random':
+ configs = self._sample_random(num, excluded=set(self._config_queue))
+
+ elif self.init_strategy == 'default':
+ if default_config not in self._config_queue:
+ configs = [default_config]
+ configs.extend(self._sample_random(
+ num - len(configs),
+ excluded=set(self._config_queue + configs)
+ ))
+
+ elif self.init_strategy == 'random_explore_first':
+ candidates = self._sample_random(max(100, num * 10), excluded=set(self._config_queue))
+ configs = self._max_min_distance(
+ default_config,
+ candidates,
+ num_random
+ )
+
+ elif self.init_strategy == 'sobol':
+ try:
+ sampler = SobolSampler(self.config_space, num_random, random_state=self.rng)
+ sobol_configs = sampler.generate(return_config=True)
+ if default_config not in self._config_queue:
+ configs = [default_config] + sobol_configs
+ else:
+ configs = sobol_configs
+ except Exception as e:
+ logger.warning(f'Sobol sampling failed: {e}. Falling back to random.')
+ configs = self._sample_random(num, excluded=set(self._config_queue))
+
+ elif self.init_strategy == 'latin_hypercube':
+ try:
+ sampler = LatinHypercubeSampler(self.config_space, num_random, criterion='maximin')
+ lhs_configs = sampler.generate(return_config=True)
+ if default_config not in self._config_queue:
+ configs = [default_config] + lhs_configs
+ else:
+ configs = lhs_configs
+ except Exception as e:
+ logger.warning(f'Latin hypercube sampling failed: {e}. Falling back to random.')
+ configs = self._sample_random(num, excluded=set(self._config_queue))
+
+ elif self.init_strategy == 'halton':
+ try:
+ sampler = HaltonSampler(self.config_space, num_random, random_state=self.rng)
+ halton_configs = sampler.generate(return_config=True)
+ if default_config not in self._config_queue:
+ configs = [default_config] + halton_configs
+ else:
+ configs = halton_configs
+ except Exception as e:
+ logger.warning(f'Halton sampling failed: {e}. Falling back to random.')
+ configs = self._sample_random(num, excluded=set(self._config_queue))
+ else:
+ raise ValueError(f'Unknown initial design strategy: {self.init_strategy}. '
+ f'Supported: random, default, random_explore_first, sobol, latin_hypercube, halton')
+
+ # Validate and filter
+ valid_configs = []
+ for config in configs:
+ if config in self._config_queue or config in valid_configs:
+ continue
+ valid_configs.append(config)
+
+ return valid_configs[: num]
+
+ def _sample_random(
+ self,
+ num: int,
+ excluded: Optional[set] = None
+ ) -> List[Configuration]:
+ if num <= 0:
+ return []
+
+ if excluded is None:
+ excluded = set()
+
+ configs = []
+ max_trials = max(1000, num * 20)
+ trials = 0
+
+ while len(configs) < num and trials < max_trials:
+ trials += 1
+ config = self.config_space.sample_configuration()
+ if config not in configs and config not in excluded:
+ configs.append(config)
+
+ if len(configs) < num:
+ logger.warning(f'Could only sample {len(configs)}/{num} random configs after {max_trials} trials')
+
+ return configs
+
+ def _max_min_distance(
+ self,
+ default_config: Configuration,
+ candidates: List[Configuration],
+ num: int,
+ ) -> List[Configuration]:
+ """
+ Select configurations maximizing minimum distance (space-filling).
+
+ Parameters
+ ----------
+ default_config : Configuration
+ Default configuration to start with.
+ candidates : List[Configuration]
+ Candidate configurations.
+ num : int
+ Number of configurations to select (excluding default_config).
+
+ Returns
+ -------
+ configs : List[Configuration]
+ Selected configurations (including default_config).
+ """
+ initial_configs = [default_config]
+
+ if len(candidates) == 0:
+ return initial_configs
+
+ min_dis = np.array([
+ np.linalg.norm(config.get_array() - default_config.get_array())
+ for config in candidates
+ ])
+
+ num_to_select = min(num, len(candidates))
+
+ for _ in range(num_to_select):
+ idx = np.argmax(min_dis)
+ if min_dis[idx] <= 0:
+ break
+
+ furthest_config = candidates[idx]
+ initial_configs.append(furthest_config)
+ min_dis[idx] = -1
+
+ for j in range(len(candidates)):
+ if min_dis[j] > 0: # Only update unselected configs
+ updated_dis = np.linalg.norm(candidates[j].get_array() - furthest_config.get_array())
+ min_dis[j] = min(updated_dis, min_dis[j])
+
+ return initial_configs
+
+ # ========== Public API ==========
+
+ def get_config(self, index: int) -> Optional[Configuration]:
+ """
+ Get initial configuration by index.
+
+ Parameters
+ ----------
+ index : int
+ Index of the configuration.
+
+ Returns
+ -------
+ config : Configuration or None
+ Configuration at the index, or None if exhausted.
+ """
+ if 0 <= index < len(self._config_queue):
+ return self._config_queue[index]
+ return None
+
+ def get_config_source(self, index: int) -> Optional[str]:
+ """
+ Get the source of configuration at index.
+
+ Parameters
+ ----------
+ index : int
+ Index of the configuration.
+
+ Returns
+ -------
+ source : str or None
+ Source string, or None if index out of range.
+ """
+ if 0 <= index < len(self._config_sources):
+ return self._config_sources[index]
+ return None
+
+ def is_exhausted(self, num_evaluated: int) -> bool:
+ """
+ Check if all initial configurations have been used.
+
+ Parameters
+ ----------
+ num_evaluated : int
+ Number of configurations already evaluated.
+
+ Returns
+ -------
+ exhausted : bool
+ True if all initial configs have been used.
+ """
+ return num_evaluated >= len(self._config_queue)
+
+ def get_source_summary(self) -> Dict[str, int]:
+ """
+ Get summary of configuration sources.
+
+ Returns
+ -------
+ summary : dict
+ Dictionary mapping source type to count.
+ """
+ summary = {}
+ for source in self._config_sources:
+ # Extract main source type (before ':')
+ key = source.split(':')[0]
+ summary[key] = summary.get(key, 0) + 1
+ return summary
+
+ @property
+ def config_queue(self) -> List[Configuration]:
+ """List of initial configurations (read-only copy)."""
+ return list(self._config_queue)
+
+ @property
+ def config_sources(self) -> List[str]:
+ """List of configuration sources (read-only copy)."""
+ return list(self._config_sources)
+
+ def __len__(self) -> int:
+ """Number of initial configurations."""
+ return len(self._config_queue)
+
+ def __repr__(self) -> str:
+ return (f"InitialConfigProvider(init_num={len(self)}, "
+ f"strategy='{self.init_strategy}', "
+ f"sources={self.get_source_summary()})")
diff --git a/openbox/core/mc_advisor.py b/openbox/core/mc_advisor.py
index 9ee665e9c..91b9d9c1d 100644
--- a/openbox/core/mc_advisor.py
+++ b/openbox/core/mc_advisor.py
@@ -181,7 +181,10 @@ def get_suggestion(self, history=None):
num_config_successful = history.get_success_count()
if num_config_evaluated < self.init_num:
- return self.initial_configurations[num_config_evaluated]
+ config = self.initial_config_provider.get_config(num_config_evaluated)
+ if config is not None:
+ return config
+ return self.sample_random_configs(self.config_space, 1, excluded_configs=history.configurations)[0]
if self.optimization_strategy == 'random':
return self.sample_random_configs(self.config_space, 1, excluded_configs=history.configurations)[0]
@@ -230,16 +233,6 @@ def get_suggestion(self, history=None):
num_points=5000,
turbo_state=self.turbo_state)
- # is_repeated_config = True
- # repeated_time = 0
- # cur_config = None
- # while is_repeated_config:
- # cur_config = challengers[repeated_time]
- # if cur_config in history.configurations:
- # repeated_time += 1
- # else:
- # is_repeated_config = False
- # return cur_config
for config in challengers:
if config not in history.configurations:
return config
@@ -250,6 +243,12 @@ def get_suggestion(self, history=None):
else:
raise ValueError('Unknown optimization strategy: %s.' % self.optimization_strategy)
+ def get_suggestions(self, batch_size=1, history=None):
+ if batch_size is None:
+ batch_size = 1
+ batch_size = int(batch_size)
+ return [self.get_suggestion(history=history) for _ in range(batch_size)]
+
def update_observation(self, observation: Observation):
super().update_observation(observation)
if self.use_trust_region:
diff --git a/openbox/core/mf_advisor.py b/openbox/core/mf_advisor.py
new file mode 100644
index 000000000..8a479b5ac
--- /dev/null
+++ b/openbox/core/mf_advisor.py
@@ -0,0 +1,97 @@
+# License: MIT
+
+from openbox.core.generic_advisor import Advisor
+from openbox.utils.history import Observation, History
+from openbox.utils.util_funcs import deprecate_kwarg
+
+
+class MFAdvisor(Advisor):
+ @deprecate_kwarg('num_objs', 'num_objectives', 'a future version')
+ def __init__(
+ self,
+ config_space,
+ num_objectives=1,
+ num_constraints=0,
+ initial_trials=3,
+ initial_configurations=None,
+ init_strategy='random_explore_first',
+ transfer_learning_history=None,
+ warm_start_strategy='topk',
+ warm_start_num=None,
+ rand_prob=0.1,
+ optimization_strategy='bo',
+ surrogate_type='mfgpe',
+ acq_type='ei',
+ acq_optimizer_type='local_random',
+ ref_point=None,
+ early_stop=False,
+ early_stop_kwargs=None,
+ output_dir='logs',
+ task_id='OpenBox',
+ random_state=None,
+ logger_kwargs: dict = None,
+ **kwargs,
+ ):
+ super().__init__(
+ config_space=config_space,
+ num_objectives=num_objectives,
+ num_constraints=num_constraints,
+ initial_trials=initial_trials,
+ initial_configurations=initial_configurations,
+ init_strategy=init_strategy,
+ transfer_learning_history=transfer_learning_history,
+ warm_start_strategy=warm_start_strategy,
+ warm_start_num=warm_start_num,
+ rand_prob=rand_prob,
+ optimization_strategy=optimization_strategy,
+ surrogate_type=surrogate_type,
+ acq_type=acq_type,
+ acq_optimizer_type=acq_optimizer_type,
+ ref_point=ref_point,
+ early_stop=early_stop,
+ early_stop_kwargs=early_stop_kwargs,
+ output_dir=output_dir,
+ task_id=task_id,
+ random_state=random_state,
+ logger_kwargs=logger_kwargs,
+ **kwargs,
+ )
+ self.history_list = []
+ self.resource_identifiers = []
+
+ @staticmethod
+ def _round_resource_ratio(resource_ratio):
+ return round(float(resource_ratio), 5)
+
+ def _get_or_create_history(self, resource_ratio):
+ if resource_ratio not in self.resource_identifiers:
+ self.resource_identifiers.append(resource_ratio)
+ history = History(
+ task_id=self.task_id,
+ num_objectives=self.num_objectives,
+ num_constraints=self.num_constraints,
+ config_space=self.config_space,
+ ref_point=self.ref_point,
+ )
+ self.history_list.append(history)
+ idx = self.resource_identifiers.index(resource_ratio)
+ return self.history_list[idx]
+
+ def _prepare_mf_surrogate(self):
+ self.surrogate_model.update_mf_trials(self.history_list)
+ self.surrogate_model.build_source_surrogates()
+
+ def get_suggestion(self, history=None):
+ self._prepare_mf_surrogate()
+ return super().get_suggestion(history=history)
+
+ def get_suggestions(self, batch_size=1, history=None):
+ self._prepare_mf_surrogate()
+ return super().get_suggestions(batch_size=batch_size, history=history)
+
+ def update_observation(self, observation: Observation, resource_ratio=1.0):
+ resource_ratio = self._round_resource_ratio(resource_ratio)
+ mf_history = self._get_or_create_history(resource_ratio)
+ mf_history.update_observation(observation)
+ if resource_ratio == self._round_resource_ratio(1.0):
+ return super().update_observation(observation)
diff --git a/openbox/core/mf_batch_advisor.py b/openbox/core/mf_batch_advisor.py
deleted file mode 100644
index d1a808e6f..000000000
--- a/openbox/core/mf_batch_advisor.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# License: MIT
-
-from openbox import logger
-from openbox.core.generic_advisor import Advisor
-from openbox.utils.history import Observation, History
-from openbox.utils.util_funcs import deprecate_kwarg
-
-
-class MFBatchAdvisor(Advisor):
- @deprecate_kwarg('num_objs', 'num_objectives', 'a future version')
- def __init__(
- self,
- config_space,
- num_objectives=1,
- num_constraints=0,
- batch_size=4,
- initial_trials=3,
- initial_configurations=None,
- init_strategy='random_explore_first',
- transfer_learning_history=None,
- rand_prob=0.1,
- optimization_strategy='bo',
- surrogate_type='mfgpe',
- acq_type='ei',
- acq_optimizer_type='local_random',
- ref_point=None,
- early_stop=False,
- early_stop_kwargs=None,
- output_dir='logs',
- task_id='OpenBox',
- random_state=None,
- logger_kwargs: dict = None,
- ):
-
- self.batch_size = batch_size
- super().__init__(config_space,
- num_objectives=num_objectives,
- num_constraints=num_constraints,
- initial_trials=initial_trials,
- initial_configurations=initial_configurations,
- init_strategy=init_strategy,
- transfer_learning_history=transfer_learning_history,
- rand_prob=rand_prob,
- optimization_strategy=optimization_strategy,
- surrogate_type=surrogate_type,
- acq_type=acq_type,
- acq_optimizer_type=acq_optimizer_type,
- ref_point=ref_point,
- early_stop=early_stop,
- early_stop_kwargs=early_stop_kwargs,
- output_dir=output_dir,
- task_id=task_id,
- random_state=random_state,
- logger_kwargs=logger_kwargs)
- self.history_list = list()
- self.resource_identifiers = list()
-
- def get_suggestions(self, batch_size=None, history=None):
- if batch_size is None:
- batch_size = self.batch_size
- assert batch_size >= 1
- if history is None:
- history = self.history
-
- self.early_stop_perf(history)
-
- num_config_evaluated = len(history)
- num_config_successful = history.get_success_count()
-
- if num_config_evaluated < self.init_num:
- if self.initial_configurations is not None: # self.init_num equals to len(self.initial_configurations)
- next_configs = self.initial_configurations[num_config_evaluated: num_config_evaluated + batch_size]
- if len(next_configs) < batch_size:
- next_configs.extend(self.sample_random_configs(
- self.config_space, batch_size - len(next_configs), excluded_configs=history.configurations))
- return next_configs
- else:
- return self.sample_random_configs(self.config_space, batch_size,
- excluded_configs=history.configurations)
-
- if self.optimization_strategy == 'random':
- return self.sample_random_configs(self.config_space, batch_size,
- excluded_configs=history.configurations)
-
- if num_config_successful < max(self.init_num, 1):
- logger.warning('No enough successful initial trials! Sample random configurations.')
- return self.sample_random_configs(self.config_space, batch_size,
- excluded_configs=history.configurations)
-
- batch_configs_list = list()
-
- # select first N candidates
- self.surrogate_model.update_mf_trials(self.history_list)
- self.surrogate_model.build_source_surrogates()
- candidates = super().get_suggestion(history, return_list=True) # replace
-
- self.early_stop_ei(history, challengers=candidates)
-
- idx = 0
- while len(batch_configs_list) < batch_size:
- if idx >= len(candidates):
- logger.warning('Cannot get non duplicate configuration from BO candidates (len=%d). '
- 'Sample random config.' % (len(candidates),))
- cur_config = self.sample_random_configs(
- self.config_space, 1, excluded_configs=history.configurations + batch_configs_list)[0]
- elif self.rng.random() < self.rand_prob:
- # sample random configuration proportionally
- logger.info('Sample random config. rand_prob=%f.' % self.rand_prob)
- cur_config = self.sample_random_configs(
- self.config_space, 1, excluded_configs=history.configurations + batch_configs_list)[0]
- else:
- cur_config = None
- while idx < len(candidates):
- conf = candidates[idx]
- idx += 1
- if conf not in batch_configs_list and conf not in history.configurations:
- cur_config = conf
- break
- if cur_config is not None:
- batch_configs_list.append(cur_config)
-
- return batch_configs_list
-
- def update_observation(self, observation: Observation, resource_ratio):
- if resource_ratio not in self.resource_identifiers:
- self.resource_identifiers.append(resource_ratio)
- history = History(task_id=self.task_id, num_objectives=self.num_objectives,
- num_constraints=self.num_constraints,
- config_space=self.config_space,
- ref_point=self.ref_point)
- self.history_list.append(history)
-
- self.history_list[self.resource_identifiers.index(resource_ratio)].update_observation(observation)
-
- if resource_ratio == 1:
- self.history.update_observation(observation)
diff --git a/openbox/core/random_advisor.py b/openbox/core/random_advisor.py
index 1e40ff5e5..bdd5c1654 100644
--- a/openbox/core/random_advisor.py
+++ b/openbox/core/random_advisor.py
@@ -1,12 +1,46 @@
# License: MIT
-from openbox.core.base_advisor import BaseAdvisor
+from openbox.core.generic_advisor import Advisor
from openbox.utils.util_funcs import deprecate_kwarg
-class RandomAdvisor(BaseAdvisor):
+class RandomAdvisor(Advisor):
"""
Random Advisor Class, which adopts the random policy to sample a configuration.
+
+ This is a convenience wrapper around Advisor with optimization_strategy='random'.
+ It supports initial configurations via InitialConfigProvider before falling back
+ to random sampling.
+
+ Parameters
+ ----------
+ config_space : openbox.space.Space or ConfigSpace.ConfigurationSpace
+ Configuration space.
+ num_objectives : int, default=1
+ Number of objectives in objective function.
+ num_constraints : int, default=0
+ Number of constraints in objective function.
+ initial_trials : int, default=0
+ Number of initial iterations before random sampling.
+ Set to 0 for pure random search without initial design.
+ init_strategy : str, default='random'
+ Strategy to generate configurations for initial iterations.
+ initial_configurations : List[Configuration], optional
+ User-provided initial configurations.
+ ref_point : List[float], optional
+ Reference point for multi-objective optimization.
+ early_stop : bool, default=False
+ Whether to enable early stop.
+ early_stop_kwargs : dict, optional
+ Options for early stop algorithm.
+ output_dir : str, default='logs'
+ Directory to save log files.
+ task_id : str, default='OpenBox'
+ Task identifier.
+ random_state : int, optional
+ Random seed for RNG.
+ logger_kwargs : dict, optional
+ Additional keyword arguments for logger.
"""
@deprecate_kwarg('num_objs', 'num_objectives', 'a future version')
@@ -15,6 +49,9 @@ def __init__(
config_space,
num_objectives=1,
num_constraints=0,
+ initial_trials=0,
+ init_strategy='random',
+ initial_configurations=None,
ref_point=None,
early_stop=False,
early_stop_kwargs=None,
@@ -27,6 +64,11 @@ def __init__(
config_space=config_space,
num_objectives=num_objectives,
num_constraints=num_constraints,
+ initial_trials=initial_trials,
+ init_strategy=init_strategy,
+ initial_configurations=initial_configurations,
+ rand_prob=1.0, # Always sample random after initial trials
+ optimization_strategy='random', # Use random strategy
ref_point=ref_point,
early_stop=early_stop,
early_stop_kwargs=early_stop_kwargs,
@@ -35,22 +77,3 @@ def __init__(
random_state=random_state,
logger_kwargs=logger_kwargs,
)
-
- # early stop
- if self.early_stop:
- self.early_stop_algorithm.check_setup(advisor=self)
-
- def get_suggestion(self, history=None):
- """
- Generate a configuration (suggestion) for this query.
-
- Returns
- -------
- A configuration.
- """
- if history is None:
- history = self.history
-
- self.early_stop_perf(history)
-
- return self.sample_random_configs(self.config_space, 1, excluded_configs=history.configurations)[0]
diff --git a/openbox/core/space_adapter.py b/openbox/core/space_adapter.py
new file mode 100644
index 000000000..ac48f65a6
--- /dev/null
+++ b/openbox/core/space_adapter.py
@@ -0,0 +1,137 @@
+from typing import List, Optional
+from ConfigSpace import Configuration, ConfigurationSpace
+
+from openbox import logger
+from openbox.utils.history import History, Observation
+
+
+class IdentitySpaceAdapter:
+ # Default adapter that keeps original Advisor behavior
+ def __init__(self, config_space: ConfigurationSpace):
+ self.original_space = config_space
+ self.sample_space = config_space
+ self.surrogate_space = config_space
+
+ def setup(self, transfer_learning_history: Optional[List[History]] = None):
+ return transfer_learning_history
+
+ def config_to_surrogate(self, config: Configuration) -> Configuration:
+ return config
+
+ def config_to_sample(self, config: Configuration) -> Configuration:
+ return config
+
+ def config_to_original(self, config: Configuration) -> Configuration:
+ return config
+
+ def history_for_acq(self, history: History) -> History:
+ return history
+
+ def get_surrogate_array(self, history: History):
+ return history.get_config_array(transform='scale')
+
+ def cache_observation(self, observation: Observation):
+ return
+
+ def update(self, history: History) -> bool:
+ return False
+
+
+class CompressorSpaceAdapter(IdentitySpaceAdapter):
+ # Adapter that bridges Advisor with the optional compressor module
+ def __init__(self, config_space: ConfigurationSpace, compressor):
+ super().__init__(config_space)
+ self.compressor = compressor
+
+ def setup(self, transfer_learning_history: Optional[List[History]] = None):
+ source_similarities = None
+ if transfer_learning_history:
+ n_history = len(transfer_learning_history)
+ source_similarities = {i: 1.0 / n_history for i in range(n_history)}
+ surrogate_space, sample_space = self.compressor.compress_space(
+ space_history=transfer_learning_history,
+ source_similarities=source_similarities
+ )
+ self.surrogate_space = surrogate_space
+ self.sample_space = sample_space
+ if getattr(self.compressor, 'surrogate_space', None) is None:
+ self.compressor.surrogate_space = surrogate_space
+ if getattr(self.compressor, 'sample_space', None) is None:
+ self.compressor.sample_space = sample_space
+ return self.compressor.transform_source_data(transfer_learning_history)
+
+ def config_to_surrogate(self, config: Configuration) -> Configuration:
+ return self.compressor.convert_config_to_surrogate_space(config)
+
+ def config_to_sample(self, config: Configuration) -> Configuration:
+ convert_fn = getattr(self.compressor, 'convert_config_to_sample_space', None)
+ if convert_fn is None:
+ return config
+ return convert_fn(config)
+
+ def config_to_original(self, config: Configuration) -> Configuration:
+ if getattr(config, 'configuration_space', None) == self.original_space:
+ return config
+ return self.compressor.unproject_point(config)
+
+ def _convert_history(self, history: History, target_space: ConfigurationSpace, converter):
+ converted_history = History(
+ task_id=history.task_id,
+ num_objectives=history.num_objectives,
+ num_constraints=history.num_constraints,
+ config_space=target_space,
+ ref_point=history.ref_point,
+ meta_info=history.meta_info,
+ )
+ for obs in history.observations:
+ converted_obs = Observation(
+ config=converter(obs.config),
+ objectives=obs.objectives,
+ constraints=obs.constraints,
+ trial_state=obs.trial_state,
+ elapsed_time=obs.elapsed_time,
+ extra_info=obs.extra_info,
+ )
+ converted_history.update_observation(converted_obs)
+ return converted_history
+
+ def history_for_acq(self, history: History) -> History:
+ if self.sample_space == self.original_space:
+ return history
+ return self._convert_history(
+ history=history,
+ target_space=self.sample_space,
+ converter=self.config_to_sample,
+ )
+
+ def get_surrogate_array(self, history: History):
+ if self.surrogate_space == self.original_space:
+ return history.get_config_array(transform='scale')
+ surrogate_history = self._convert_history(
+ history=history,
+ target_space=self.surrogate_space,
+ converter=self.config_to_surrogate,
+ )
+ return surrogate_history.get_config_array(transform='scale')
+
+ def cache_observation(self, observation: Observation):
+ config = observation.config
+ low_dim_cfg = getattr(config, '_low_dim_config', None)
+ if low_dim_cfg is None:
+ return
+ if observation.extra_info is None:
+ observation.extra_info = {}
+ observation.extra_info['low_dim_config'] = low_dim_cfg
+
+ def update(self, history: History) -> bool:
+ updated = self.compressor.update_compression(history)
+ if not updated:
+ return False
+ self.surrogate_space = self.compressor.surrogate_space
+ self.sample_space = self.compressor.sample_space
+ logger.info(
+ 'Compressor updated spaces: sample_dim=%d, surrogate_dim=%d',
+ len(self.sample_space.get_hyperparameters()),
+ len(self.surrogate_space.get_hyperparameters()),
+ )
+ return True
diff --git a/openbox/core/sync_batch_advisor.py b/openbox/core/sync_batch_advisor.py
index 549bba76a..5182cbe32 100644
--- a/openbox/core/sync_batch_advisor.py
+++ b/openbox/core/sync_batch_advisor.py
@@ -88,15 +88,16 @@ def get_suggestions(self, batch_size=None, history=None):
num_config_successful = history.get_success_count()
if num_config_evaluated < self.init_num:
- if self.initial_configurations is not None: # self.init_num equals to len(self.initial_configurations)
- next_configs = self.initial_configurations[num_config_evaluated: num_config_evaluated + batch_size]
- if len(next_configs) < batch_size:
- next_configs.extend(self.sample_random_configs(
- self.config_space, batch_size - len(next_configs), excluded_configs=history.configurations))
- return next_configs
- else:
- return self.sample_random_configs(
- self.config_space, batch_size, excluded_configs=history.configurations)
+ next_configs = []
+ for offset in range(batch_size):
+ config = self.initial_config_provider.get_config(num_config_evaluated + offset)
+ if config is None:
+ break
+ next_configs.append(config)
+ if len(next_configs) < batch_size:
+ next_configs.extend(self.sample_random_configs(
+ self.config_space, batch_size - len(next_configs), excluded_configs=history.configurations))
+ return next_configs
if self.optimization_strategy == 'random':
return self.sample_random_configs(self.config_space, batch_size, excluded_configs=history.configurations)
@@ -120,8 +121,7 @@ def get_suggestions(self, batch_size=None, history=None):
batch_history = copy.deepcopy(history)
for batch_i in range(batch_size):
- # use super class get_suggestion
- curr_batch_config = super().get_suggestion(batch_history)
+ curr_batch_config = super().get_suggestion(history=batch_history)
# imputation
observation = Observation(config=curr_batch_config, objectives=estimated_y, constraints=estimated_c,
@@ -163,8 +163,8 @@ def get_suggestions(self, batch_size=None, history=None):
self.config_space, 1, excluded_configs=history.configurations + batch_configs_list)[0]
else:
if not surrogate_trained:
- # set return_list=True to ensure surrogate trained
- candidates = super().get_suggestion(history, return_list=True)
+ # train surrogate once and get challenger list
+ candidates = self._get_bo_candidates(history)
surrogate_trained = True
else:
# re-optimize acquisition function
@@ -186,7 +186,7 @@ def get_suggestions(self, batch_size=None, history=None):
batch_configs_list.append(cur_config)
elif self.batch_strategy == 'default':
# select first N candidates
- candidates = super().get_suggestion(history, return_list=True)
+ candidates = self._get_bo_candidates(history)
self.early_stop_ei(history, challengers=candidates)
idx = 0
while len(batch_configs_list) < batch_size:
diff --git a/openbox/core/tpe_advisor.py b/openbox/core/tpe_advisor.py
index 47e94b7c8..3464ac317 100644
--- a/openbox/core/tpe_advisor.py
+++ b/openbox/core/tpe_advisor.py
@@ -188,6 +188,12 @@ def get_suggestion(self, history=None):
return config
+ def get_suggestions(self, batch_size=1, history=None):
+ if batch_size is None:
+ batch_size = 1
+ batch_size = int(batch_size)
+ return [self.get_suggestion(history=history) for _ in range(batch_size)]
+
def impute_conditional_data(self, array):
return_array = np.empty_like(array)
diff --git a/openbox/optimizer/base.py b/openbox/optimizer/base.py
index 60d5486f5..c3df4864a 100644
--- a/openbox/optimizer/base.py
+++ b/openbox/optimizer/base.py
@@ -4,7 +4,7 @@
import abc
import time
import numpy as np
-from typing import List
+from typing import List, Optional
from openbox import logger
from openbox.utils.util_funcs import check_random_state, deprecate_kwarg
from openbox.utils.history import History
@@ -62,3 +62,4 @@ def get_history(self) -> History:
def get_incumbents(self):
assert self.config_advisor is not None
return self.config_advisor.history.get_incumbents()
+
\ No newline at end of file
diff --git a/openbox/optimizer/generic_smbo.py b/openbox/optimizer/generic_smbo.py
index d2941004d..bc627307f 100644
--- a/openbox/optimizer/generic_smbo.py
+++ b/openbox/optimizer/generic_smbo.py
@@ -1,6 +1,7 @@
# License: MIT
import time
+import inspect
from typing import List
from tqdm import tqdm
import numpy as np
@@ -118,6 +119,45 @@ class SMBO(BOBase):
Additional keyword arguments for logger.
advisor_kwargs : dict, optional
Additional keyword arguments for advisor.
+ scheduler_type : str, default='full'
+ Type of fidelity scheduler for multi-fidelity optimization.
+ - 'full' (default): Full fidelity scheduler (resource_ratio=1.0). Behaves like standard single-fidelity BO.
+ - 'bohb': BOHB-style successive halving scheduler
+ - 'flatten': Flattened BOHB scheduler with expanded full-fidelity brackets
+ - 'mfes': MFES-style multi-fidelity scheduler
+ - 'mfes_flatten': Flattened MFES scheduler
+ - 'fixed': Fixed fidelity levels (requires scheduler_kwargs)
+ scheduler_kwargs : dict, optional
+ Additional keyword arguments for scheduler initialization.
+ For BOHB/MFES schedulers:
+ - R : int, default=9
+ Maximum resource allocation
+ - eta : int, default=3
+ Reduction factor for successive halving
+ - num_nodes : int, default=1
+ Number of parallel nodes (for distributed optimization)
+ For fixed scheduler:
+ - n_resources : List[int]
+ Number of configurations at each stage
+ - r_resources : List[int]
+ Resource allocations at each stage
+ - fidelity_levels : List[float]
+ Available fidelity levels
+
+ Notes
+ -----
+ Multi-Fidelity Optimization:
+ When using scheduler_type other than 'full', the objective function should accept
+ a `resource_ratio` keyword argument (float, 0.0 to 1.0) to control the evaluation fidelity.
+ For example, in hyperparameter optimization:
+ - resource_ratio=0.1: Train on 10% of data
+ - resource_ratio=1.0: Train on full dataset
+
+ The optimizer function signature should be:
+ def objective_function(config, resource_ratio=1.0):
+ # Use resource_ratio to control fidelity
+ ...
+ return result
"""
@deprecate_kwarg('num_objs', 'num_objectives', 'a future version')
@deprecate_kwarg('time_limit_per_trial', 'max_runtime_per_trial', 'a future version')
@@ -150,6 +190,8 @@ def __init__(
random_state=None,
logger_kwargs: dict = None,
advisor_kwargs: dict = None,
+ scheduler_type: str = 'full',
+ scheduler_kwargs: dict = None,
):
if task_id is None:
@@ -166,84 +208,60 @@ def __init__(
self.advisor_type = advisor_type
advisor_kwargs = advisor_kwargs or {}
- _logger_kwargs = {'force_init': False} # do not init logger in advisor
- if advisor_type == 'default':
- from openbox.core.generic_advisor import Advisor
- self.config_advisor = Advisor(config_space,
- num_objectives=num_objectives,
- num_constraints=num_constraints,
- initial_trials=initial_runs,
- init_strategy=init_strategy,
- initial_configurations=initial_configurations,
- optimization_strategy=sample_strategy,
- surrogate_type=surrogate_type,
- acq_type=acq_type,
- acq_optimizer_type=acq_optimizer_type,
- ref_point=ref_point,
- transfer_learning_history=transfer_learning_history,
- early_stop=early_stop,
- early_stop_kwargs=early_stop_kwargs,
- task_id=task_id,
- output_dir=logging_dir,
- random_state=random_state,
- logger_kwargs=_logger_kwargs,
- **advisor_kwargs)
- elif advisor_type == 'mcadvisor':
- from openbox.core.mc_advisor import MCAdvisor
- self.config_advisor = MCAdvisor(config_space,
- num_objectives=num_objectives,
- num_constraints=num_constraints,
- initial_trials=initial_runs,
- init_strategy=init_strategy,
- initial_configurations=initial_configurations,
- optimization_strategy=sample_strategy,
- surrogate_type=surrogate_type,
- acq_type=acq_type,
- acq_optimizer_type=acq_optimizer_type,
- ref_point=ref_point,
- transfer_learning_history=transfer_learning_history,
- task_id=task_id,
- output_dir=logging_dir,
- random_state=random_state,
- logger_kwargs=_logger_kwargs,
- **advisor_kwargs)
- elif advisor_type == 'tpe':
- from openbox.core.tpe_advisor import TPE_Advisor
- assert num_objectives == 1 and num_constraints == 0
- self.config_advisor = TPE_Advisor(config_space, task_id=task_id, random_state=random_state,
- logger_kwargs=_logger_kwargs, **advisor_kwargs)
- elif advisor_type == 'ea':
- from openbox.core.ea_advisor import EA_Advisor
- assert num_objectives == 1 and num_constraints == 0
- self.config_advisor = EA_Advisor(config_space,
- num_objectives=num_objectives,
- num_constraints=num_constraints,
- optimization_strategy=sample_strategy,
- batch_size=1,
- task_id=task_id,
- output_dir=logging_dir,
- random_state=random_state,
- logger_kwargs=_logger_kwargs,
- **advisor_kwargs)
- elif advisor_type == 'random':
- from openbox.core.random_advisor import RandomAdvisor
- self.config_advisor = RandomAdvisor(config_space,
- num_objectives=num_objectives,
- num_constraints=num_constraints,
- ref_point=ref_point,
- task_id=task_id,
- output_dir=logging_dir,
- random_state=random_state,
- logger_kwargs=_logger_kwargs,
- **advisor_kwargs)
- else:
- raise ValueError('Invalid advisor type!')
+
+ from openbox.optimizer.scheduler import build_scheduler, check_scheduler
+ scheduler_kwargs = scheduler_kwargs or {}
+ self.scheduler_type = scheduler_type
+ self.scheduler = build_scheduler(scheduler_type, **scheduler_kwargs)
+ logger.info(f'Using scheduler: {scheduler_type} with fidelity levels: '
+ f'{self.scheduler.get_fidelity_levels()}')
+ # Check if objective function supports resource_ratio for multi-fidelity optimization
+ self._supports_resource_ratio = check_scheduler(objective_function, scheduler_type)
+
+ from openbox.core import build_advisor
+ self.config_advisor = build_advisor(
+ advisor_type=advisor_type,
+ config_space=config_space,
+ num_objectives=num_objectives,
+ num_constraints=num_constraints,
+ initial_trials=initial_runs,
+ init_strategy=init_strategy,
+ initial_configurations=initial_configurations,
+ optimization_strategy=sample_strategy,
+ surrogate_type=surrogate_type,
+ acq_type=acq_type,
+ acq_optimizer_type=acq_optimizer_type,
+ ref_point=ref_point,
+ transfer_learning_history=transfer_learning_history,
+ early_stop=early_stop,
+ early_stop_kwargs=early_stop_kwargs,
+ task_id=task_id,
+ output_dir=logging_dir,
+ random_state=random_state,
+ logger_kwargs={'force_init': False}, # do not init logger in advisor
+ scheduler_type=scheduler_type,
+ **advisor_kwargs
+ )
self.visualizer = build_visualizer(
option=visualization, history=self.get_history(),
logging_dir=self.output_dir, optimizer=self, advisor=None, auto_open_html=auto_open_html,
)
self.visualizer.setup()
+ self._fidelity_support = self._check_fidelity_support()
+
+ def _check_fidelity_support(self) -> bool:
+ try:
+ signature = inspect.signature(self.config_advisor.update_observation)
+ except (TypeError, ValueError):
+ return False
+ return 'resource_ratio' in signature.parameters
+
+ def _update_advi_obs(self, observation: Observation, resource_ratio: float):
+ if self._fidelity_support:
+ self.config_advisor.update_observation(observation, resource_ratio=resource_ratio)
+ else:
+ self.config_advisor.update_observation(observation)
def run(self) -> History:
for idx in tqdm(range(self.iteration_id, self.max_runs)):
@@ -260,10 +278,18 @@ def run(self) -> History:
self.time_left -= runtime
return self.get_history()
- def iterate(self, time_left=None) -> Observation:
- # get configuration suggestion from advisor
- config = self.config_advisor.get_suggestion()
-
+ def _evaluate_single_config(self, config, resource_ratio=1.0, time_left=None) -> Observation:
+ """
+ Evaluate a single configuration with specified resource ratio.
+
+ Args:
+ config: Configuration to evaluate
+ resource_ratio: Resource ratio for multi-fidelity (0.0 to 1.0)
+ time_left: Remaining time budget
+
+ Returns:
+ Observation object containing evaluation results
+ """
if config in self.config_advisor.history.configurations:
logger.warning('Evaluating duplicated configuration: %s' % config)
@@ -277,7 +303,11 @@ def iterate(self, time_left=None) -> Observation:
timeout = None
# evaluate configuration on objective_function
- obj_args, obj_kwargs = (config,), dict()
+ # pass resource_ratio to objective function for multi-fidelity support
+ if self._supports_resource_ratio:
+ obj_args, obj_kwargs = (config,), dict(resource_ratio=resource_ratio)
+ else:
+ obj_args, obj_kwargs = (config,), dict()
result = run_obj_func(self.objective_function, obj_args, obj_kwargs, timeout)
# parse result
@@ -300,14 +330,80 @@ def iterate(self, time_left=None) -> Observation:
config=config, objectives=objectives, constraints=constraints,
trial_state=trial_state, elapsed_time=elapsed_time, extra_info=extra_info,
)
- self.config_advisor.update_observation(observation)
+
+ return observation
+ def iterate(self, time_left=None) -> Observation:
self.iteration_id += 1
- # Logging
- if self.num_constraints > 0:
- logger.info('Iter %d, objectives: %s. constraints: %s.' % (self.iteration_id, objectives, constraints))
- else:
- logger.info('Iter %d, objectives: %s.' % (self.iteration_id, objectives))
-
+
+ # Initial runs: always use full fidelity (resource_ratio=1.0)
+ if self.iteration_id <= self.config_advisor.init_num:
+ configs = self.config_advisor.get_suggestions(batch_size=1)
+ config = configs[0]
+ observation = self._evaluate_single_config(config, resource_ratio=1.0, time_left=time_left)
+ self._update_advi_obs(observation, resource_ratio=1.0)
+
+ if self.num_constraints > 0:
+ logger.info('Iter %d (init), objectives: %s, constraints: %s, resource_ratio: 1.0' %
+ (self.iteration_id, observation.objectives, observation.constraints))
+ else:
+ logger.info('Iter %d (init), objectives: %s, resource_ratio: 1.0' %
+ (self.iteration_id, observation.objectives))
+
+ self.visualizer.update()
+ return observation
+
+ # After initialization: use scheduler for multi-fidelity optimization
+ iter_full_eval_observations = []
+ candidates = []
+
+ # Get bracket index based on current iteration
+ s = self.scheduler.get_bracket_index(self.iteration_id - self.config_advisor.init_num - 1)
+
+ # Execute successive halving within the bracket
+ # For 'full' scheduler: s=0, only 1 stage, 1 config, ratio=1.0
+ for stage in range(s + 1):
+ n_configs, n_resource = self.scheduler.get_stage_params(s=s, stage=stage)
+ resource_ratio = self.scheduler.calculate_resource_ratio(n_resource=n_resource)
+
+ if self.scheduler_type != 'full':
+ logger.info(f'Bracket {s} Stage {stage}: n_configs={n_configs}, '
+ f'resource={n_resource}, ratio={resource_ratio:.3f}')
+
+ # First stage: sample new configurations
+ if stage == 0:
+ candidates = self.config_advisor.get_suggestions(batch_size=n_configs)
+ if self.scheduler_type != 'full' and len(candidates) > 1:
+ logger.info(f'Generated {len(candidates)} initial candidates for stage {stage}')
+
+ # Evaluate all candidates at current fidelity
+ observations = []
+ perfs = []
+ for config in candidates:
+ obs = self._evaluate_single_config(config, resource_ratio, time_left)
+ observations.append(obs)
+ perfs.append(obs.objectives[0]) # Use first objective for elimination
+
+ # Update advisor if scheduler says so
+ if self.scheduler.should_update_history(resource_ratio):
+ self._update_advi_obs(obs, resource_ratio=resource_ratio)
+ if self.num_constraints > 0:
+ logger.info('Iter %d, objectives: %s, constraints: %s, '
+ 'resource_ratio: %.3f' %
+ (self.iteration_id, obs.objectives, obs.constraints, resource_ratio))
+ else:
+ logger.info('Iter %d, objectives: %s, resource_ratio: %.3f' %
+ (self.iteration_id, obs.objectives, resource_ratio))
+
+ # Eliminate poor performing candidates for next stage
+ if stage < s:
+ candidates, perfs = self.scheduler.eliminate_candidates(
+ candidates, perfs, s=s, stage=stage
+ )
+ logger.info(f'After elimination: {len(candidates)} candidates remain')
+ else:
+ # Last stage: these are full-fidelity evaluations
+ iter_full_eval_observations.extend(observations)
+
self.visualizer.update()
- return observation
+ return iter_full_eval_observations[-1] if iter_full_eval_observations else None
diff --git a/openbox/optimizer/message_queue_smbo.py b/openbox/optimizer/message_queue_smbo.py
index 31d750f79..9abb6e0f7 100644
--- a/openbox/optimizer/message_queue_smbo.py
+++ b/openbox/optimizer/message_queue_smbo.py
@@ -63,49 +63,30 @@ def __init__(
self.master_messager = MasterMessager(ip, port, authkey, max_queue_len, max_queue_len)
advisor_kwargs = advisor_kwargs or {}
- _logger_kwargs = {'force_init': False} # do not init logger in advisor
- if parallel_strategy == 'sync':
- self.config_advisor = SyncBatchAdvisor(config_space,
- num_objectives=num_objectives,
- num_constraints=num_constraints,
- batch_size=batch_size,
- batch_strategy=batch_strategy,
- initial_trials=initial_runs,
- initial_configurations=initial_configurations,
- init_strategy=init_strategy,
- transfer_learning_history=transfer_learning_history,
- optimization_strategy=sample_strategy,
- surrogate_type=surrogate_type,
- acq_type=acq_type,
- acq_optimizer_type=acq_optimizer_type,
- ref_point=ref_point,
- task_id=task_id,
- output_dir=logging_dir,
- random_state=random_state,
- logger_kwargs=_logger_kwargs,
- **advisor_kwargs)
- elif parallel_strategy == 'async':
- self.config_advisor = AsyncBatchAdvisor(config_space,
- num_objectives=num_objectives,
- num_constraints=num_constraints,
- batch_size=batch_size,
- batch_strategy=batch_strategy,
- initial_trials=initial_runs,
- initial_configurations=initial_configurations,
- init_strategy=init_strategy,
- transfer_learning_history=transfer_learning_history,
- optimization_strategy=sample_strategy,
- surrogate_type=surrogate_type,
- acq_type=acq_type,
- acq_optimizer_type=acq_optimizer_type,
- ref_point=ref_point,
- task_id=task_id,
- output_dir=logging_dir,
- random_state=random_state,
- logger_kwargs=_logger_kwargs,
- **advisor_kwargs)
- else:
- raise ValueError('Invalid parallel strategy - %s.' % parallel_strategy)
+ from openbox.core import create_parallel_advisor
+ self.config_advisor = create_parallel_advisor(
+ parallel_strategy=parallel_strategy,
+ sample_strategy=sample_strategy,
+ config_space=config_space,
+ num_objectives=num_objectives,
+ num_constraints=num_constraints,
+ batch_size=batch_size,
+ batch_strategy=batch_strategy,
+ initial_trials=initial_runs,
+ initial_configurations=initial_configurations,
+ init_strategy=init_strategy,
+ transfer_learning_history=transfer_learning_history,
+ optimization_strategy=sample_strategy,
+ surrogate_type=surrogate_type,
+ acq_type=acq_type,
+ acq_optimizer_type=acq_optimizer_type,
+ ref_point=ref_point,
+ task_id=task_id,
+ output_dir=logging_dir,
+ random_state=random_state,
+ logger_kwargs={'force_init': False},
+ **advisor_kwargs
+ )
def async_run(self):
config_num = 0
diff --git a/openbox/optimizer/parallel_smbo.py b/openbox/optimizer/parallel_smbo.py
index 3acc88cb1..d67d80d59 100644
--- a/openbox/optimizer/parallel_smbo.py
+++ b/openbox/optimizer/parallel_smbo.py
@@ -10,9 +10,6 @@
from openbox.core.computation.parallel_process import ParallelEvaluation
from openbox.utils.limit import run_obj_func
from openbox.utils.util_funcs import parse_result, deprecate_kwarg
-from openbox.core.sync_batch_advisor import SyncBatchAdvisor
-from openbox.core.async_batch_advisor import AsyncBatchAdvisor
-from openbox.core.ea_advisor import EA_Advisor
from openbox.utils.history import Observation, History
from openbox.optimizer.base import BOBase
@@ -56,9 +53,9 @@ def __init__(
num_objectives=1,
num_constraints=0,
parallel_strategy='async',
+ sample_strategy: str = 'bo',
batch_size=4,
batch_strategy='default',
- sample_strategy: str = 'bo',
max_runs=100,
max_runtime_per_trial=None,
surrogate_type='auto',
@@ -91,80 +88,33 @@ def __init__(
self.batch_size = batch_size
advisor_kwargs = advisor_kwargs or {}
- _logger_kwargs = {'force_init': False} # do not init logger in advisor
- if parallel_strategy == 'sync':
- if sample_strategy in ['random', 'bo']:
- self.config_advisor = SyncBatchAdvisor(config_space,
- num_objectives=num_objectives,
- num_constraints=num_constraints,
- batch_size=batch_size,
- batch_strategy=batch_strategy,
- initial_trials=initial_runs,
- initial_configurations=initial_configurations,
- init_strategy=init_strategy,
- transfer_learning_history=transfer_learning_history,
- optimization_strategy=sample_strategy,
- surrogate_type=surrogate_type,
- acq_type=acq_type,
- acq_optimizer_type=acq_optimizer_type,
- ref_point=ref_point,
- task_id=task_id,
- output_dir=logging_dir,
- random_state=random_state,
- logger_kwargs=_logger_kwargs,
- **advisor_kwargs)
- elif sample_strategy == 'ea':
- assert num_objectives == 1 and num_constraints == 0
- self.config_advisor = EA_Advisor(config_space,
- num_objectives=num_objectives,
- num_constraints=num_constraints,
- optimization_strategy=sample_strategy,
- batch_size=batch_size,
- task_id=task_id,
- output_dir=logging_dir,
- random_state=random_state,
- logger_kwargs=_logger_kwargs,
- **advisor_kwargs)
- else:
- raise ValueError('Unknown sample_strategy: %s' % sample_strategy)
- elif parallel_strategy == 'async':
- self.advisor_lock = Lock()
- if sample_strategy in ['random', 'bo']:
- self.config_advisor = AsyncBatchAdvisor(config_space,
- num_objectives=num_objectives,
- num_constraints=num_constraints,
- batch_size=batch_size,
- batch_strategy=batch_strategy,
- initial_trials=initial_runs,
- initial_configurations=initial_configurations,
- init_strategy=init_strategy,
- transfer_learning_history=transfer_learning_history,
- optimization_strategy=sample_strategy,
- surrogate_type=surrogate_type,
- acq_type=acq_type,
- acq_optimizer_type=acq_optimizer_type,
- ref_point=ref_point,
- task_id=task_id,
- output_dir=logging_dir,
- random_state=random_state,
- logger_kwargs=_logger_kwargs,
- **advisor_kwargs)
- elif sample_strategy == 'ea':
- assert num_objectives == 1 and num_constraints == 0
- self.config_advisor = EA_Advisor(config_space,
- num_objectives=num_objectives,
- num_constraints=num_constraints,
- optimization_strategy=sample_strategy,
- batch_size=batch_size,
- task_id=task_id,
- output_dir=logging_dir,
- random_state=random_state,
- logger_kwargs=_logger_kwargs,
- **advisor_kwargs)
- else:
- raise ValueError('Unknown sample_strategy: %s' % sample_strategy)
- else:
- raise ValueError('Invalid parallel strategy - %s.' % parallel_strategy)
+
+ # Determine advisor type based on parallel and sample strategies
+ from openbox.core import create_parallel_advisor
+ self.config_advisor = create_parallel_advisor(
+ parallel_strategy=parallel_strategy,
+ sample_strategy=sample_strategy,
+ config_space=config_space,
+ num_objectives=num_objectives,
+ num_constraints=num_constraints,
+ batch_size=batch_size,
+ batch_strategy=batch_strategy,
+ initial_trials=initial_runs,
+ initial_configurations=initial_configurations,
+ init_strategy=init_strategy,
+ transfer_learning_history=transfer_learning_history,
+ optimization_strategy=sample_strategy,
+ surrogate_type=surrogate_type,
+ acq_type=acq_type,
+ acq_optimizer_type=acq_optimizer_type,
+ ref_point=ref_point,
+ task_id=task_id,
+ output_dir=logging_dir,
+ random_state=random_state,
+ logger_kwargs={'force_init': False}, # do not init logger in advisor
+ **advisor_kwargs
+ )
+ self.advisor_lock = Lock()
def callback(self, observation: Observation):
# Report the result, and remove the config from the running queue.
diff --git a/openbox/optimizer/scheduler/__init__.py b/openbox/optimizer/scheduler/__init__.py
new file mode 100644
index 000000000..777553467
--- /dev/null
+++ b/openbox/optimizer/scheduler/__init__.py
@@ -0,0 +1,92 @@
+# License: MIT
+# Author: LINGCHING TUNG
+from .base import BaseScheduler, FullFidelityScheduler
+from .fidelity import FixedFidelityScheduler, \
+ BOHBFidelityScheduler, MFESFidelityScheduler, \
+ FlattenFidelityScheduler, MFESFlattenFidelityScheduler
+
+schedulers = {
+ 'fixed': FixedFidelityScheduler,
+ 'bohb': BOHBFidelityScheduler,
+ 'full': FullFidelityScheduler,
+ 'mfes': MFESFidelityScheduler,
+ 'flatten': FlattenFidelityScheduler,
+ 'bohb_flatten': FlattenFidelityScheduler,
+ 'mfes_flatten': MFESFlattenFidelityScheduler
+}
+
+
+def build_scheduler(scheduler_type: str = 'full', **kwargs) -> BaseScheduler:
+ """
+ Factory function to create scheduler instance.
+
+ Args:
+ scheduler_type: Type of scheduler
+ - 'full': Full fidelity scheduler (default)
+ - 'bohb': BOHB-style successive halving
+ - 'mfes': MFES-style multi-fidelity
+ - 'flatten': Flattened BOHB scheduler
+ - 'bohb_flatten': Alias for flatten
+ - 'mfes_flatten': Flattened MFES scheduler
+ - 'fixed': Fixed fidelity levels
+ **kwargs: Additional arguments passed to scheduler constructor
+
+ Returns:
+ Scheduler instance
+
+ Raises:
+ ValueError: If scheduler_type is unknown
+ """
+ if scheduler_type not in schedulers:
+ raise ValueError(
+ f'Unknown scheduler_type: {scheduler_type}. '
+ f'Available options: {list(schedulers.keys())}'
+ )
+
+ scheduler_class = schedulers[scheduler_type]
+ return scheduler_class(**kwargs)
+
+
+def check_scheduler(objective_function: callable, scheduler_type: str = 'full') -> bool:
+ """
+ Check if the objective function supports the scheduler type.
+
+ Args:
+ objective_function: Objective function
+ scheduler_type: Type of scheduler
+ Returns:
+ True if the objective function supports the scheduler type, False otherwise
+ """
+ import inspect
+ sig = inspect.signature(objective_function)
+ has_resource_ratio = 'resource_ratio' in sig.parameters
+ has_var_keyword = any(
+ p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()
+ )
+ supports_resource_ratio = has_resource_ratio or has_var_keyword
+
+ if scheduler_type != 'full' and not supports_resource_ratio:
+ raise ValueError(
+ f'Multi-fidelity scheduler "{scheduler_type}" requires objective function '
+ f'to accept "resource_ratio" parameter.\n'
+ f'Please modify your objective function signature to:\n'
+ f' def objective_function(config, resource_ratio=1.0):\n'
+ f' ...\n'
+ f'Or use scheduler_type="full" for single-fidelity optimization.'
+ )
+ # For full-fidelity scheduler, resource_ratio is not required and should not be passed.
+ return scheduler_type != 'full' and supports_resource_ratio
+
+
+__all__ = [
+ 'BaseScheduler',
+ 'FullFidelityScheduler',
+ 'FixedFidelityScheduler',
+ 'BOHBFidelityScheduler',
+ 'MFESFidelityScheduler',
+ 'FlattenFidelityScheduler',
+ 'MFESFlattenFidelityScheduler',
+ 'schedulers',
+ 'build_scheduler',
+ 'check_scheduler'
+]
\ No newline at end of file
diff --git a/openbox/optimizer/scheduler/base.py b/openbox/optimizer/scheduler/base.py
new file mode 100644
index 000000000..159a3d30e
--- /dev/null
+++ b/openbox/optimizer/scheduler/base.py
@@ -0,0 +1,58 @@
+# License: MIT
+# Author: LINGCHING TUNG
+import abc
+import numpy as np
+from typing import List, Tuple
+
+class BaseScheduler(abc.ABC):
+ def __init__(self, num_nodes: int = 1):
+ self.num_nodes = num_nodes
+ self.fidelity_levels = [round(float(1.0), 5)]
+
+ def get_bracket_index(self, iter_id: int) -> int:
+ # always return 0 when using full fidelity scheduler since there is only one fidelity level
+ return iter_id % len(self.fidelity_levels)
+
+ @abc.abstractmethod
+ def get_elimination_count(self) -> int:
+ pass
+
+ def eliminate_candidates(
+ self, candidates: List, perfs: List, **kwargs
+ ) -> Tuple[List, List]:
+ reduced_num = self.get_elimination_count(**kwargs)
+ indices = np.argsort(perfs)
+ sorted_candidates = [candidates[i] for i in indices]
+ sorted_perfs = [perfs[i] for i in indices]
+ return sorted_candidates[:reduced_num], sorted_perfs[:reduced_num]
+
+ def get_fidelity_levels(self) -> List[float]:
+ return self.fidelity_levels
+
+ @abc.abstractmethod
+ def calculate_resource_ratio(self) -> float:
+ pass
+
+ def get_stage_params(self, **kwargs) -> Tuple[int, int]:
+ return self.num_nodes, 1
+
+ def should_update_history(self, resource_ratio: float) -> bool:
+ """
+ Determine whether observations should be updated to advisor.history.
+ Notes:
+ - For SMBO (FullFidelityScheduler): always returns True
+ - For BOHB: only returns True when resource_ratio == 1.0
+ - For MFSE: returns True
+ """
+ return True
+
+
+class FullFidelityScheduler(BaseScheduler):
+ def __init__(self, num_nodes: int = 1, **kwargs):
+ super().__init__(num_nodes)
+
+ def calculate_resource_ratio(self, **kwargs) -> float:
+ return round(float(1.0), 5)
+
+ def get_elimination_count(self, **kwargs) -> int:
+ return self.num_nodes
\ No newline at end of file
diff --git a/openbox/optimizer/scheduler/fidelity.py b/openbox/optimizer/scheduler/fidelity.py
new file mode 100644
index 000000000..eacebc40a
--- /dev/null
+++ b/openbox/optimizer/scheduler/fidelity.py
@@ -0,0 +1,258 @@
+# License: MIT
+# Author: LINGCHING TUNG
+import numpy as np
+from math import log, ceil
+from typing import List, Tuple
+from openbox import logger
+
+from .base import BaseScheduler
+
+class FixedFidelityScheduler(BaseScheduler):
+ def __init__(self,
+ n_resources: List[int],
+ r_resources: List[int],
+ fidelity_levels: List[float],
+ num_nodes: int = 1):
+ super().__init__(num_nodes)
+ self.n_resources = n_resources
+ self.r_resources = r_resources
+ self.fidelity_levels = fidelity_levels
+
+ for r in self.r_resources:
+ if r not in self.fidelity_levels:
+ raise ValueError(f"r_resource {r} not in fidelity_levels {self.fidelity_levels}")
+
+ def get_stage_params(self, stage: int, **kwargs) -> Tuple[int, int]:
+ assert stage < len(self.n_resources) and stage < len(self.r_resources), "Stage index out of range"
+ return self.n_resources[stage] * self.num_nodes, self.r_resources[stage]
+
+ def get_elimination_count(self, stage: int, **kwargs) -> int:
+ reduced_num = self.n_resources[stage + 1] if stage + 1 < len(self.n_resources) else self.n_resources[-1]
+ return reduced_num * self.num_nodes
+
+ def calculate_resource_ratio(self, n_resource: int) -> float:
+ return round(float(n_resource), 5)
+
+ def should_update_history(self, resource_ratio: float) -> bool:
+ return True
+
+class BOHBFidelityScheduler(BaseScheduler):
+ """
+ Multi-Fidelity Scheduler for BOHB optimization.
+
+ This class provides the core scheduling logic:
+ - Determines how many configurations to run (n)
+ - Determines how much resource to allocate (r)
+ - Manages bracket and stage structure
+ - Calculates elimination counts
+ """
+
+ def __init__(self,
+ num_nodes: int = 1,
+ R: int = 9, eta: int = 3):
+ super().__init__(num_nodes)
+ self.R = R
+ self.eta = eta
+ self.logeta = lambda x: log(x) / log(self.eta)
+ self.s_max = int(self.logeta(self.R))
+ self.B = (self.s_max + 1) * self.R
+ self.s_values = list(reversed(range(self.s_max + 1)))
+
+ self.fidelity_levels = [round(x / self.R, 5) for x in np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)]
+ assert len(self.fidelity_levels) == self.s_max + 1, "Fidelity levels length mismatch"
+
+ logger.info("FidelityScheduler: run %d brackets with fidelity levels %s. s_max = [%d]. R = [%d], eta = [%d]"
+ % (len(self.s_values), self.get_fidelity_levels(), self.s_max, self.R, self.eta))
+
+ def get_bracket_index(self, iter_id: int) -> int:
+ return self.s_values[iter_id % len(self.s_values)]
+
+ def get_bracket_params(self, s: int) -> Tuple[int, int]:
+ """
+ Get bracket parameters for a given bracket index.
+
+ Args:
+ s: Bracket index (0 to s_max)
+
+ Returns:
+ Tuple of (n_configs, n_resource)
+ """
+ n_configs = int(ceil(self.B / self.R / (s + 1) * self.eta ** s)) * self.num_nodes
+ n_resource = int(self.R * self.eta ** (-s))
+ return n_configs, n_resource
+
+ def get_stage_params(self, s: int, stage: int) -> Tuple[int, int]:
+ """
+ Get stage parameters within a bracket.
+
+ Args:
+ s: Bracket index
+ stage: Stage index within bracket (0 to s)
+ num_nodes: Number of Spark nodes (for multi-node scaling)
+
+ Returns:
+ Tuple of (n_configs, n_resource)
+ """
+ n_configs, base_resource = self.get_bracket_params(s)
+ n_configs_stage = int(n_configs * self.eta ** (-stage))
+ n_resource_stage = int(base_resource * self.eta ** stage)
+
+ return n_configs_stage, n_resource_stage
+
+ def calculate_resource_ratio(self, n_resource: int) -> float:
+ """
+ Calculate resource ratio from resource allocation.
+
+ Args:
+ n_resource: Resource allocation
+
+ Returns:
+ Resource ratio (0.0 to 1.0)
+ """
+ return round(float(n_resource / self.R), 5)
+
+ def get_elimination_count(self, s: int, stage: int) -> int:
+ """
+ Get number of configurations to eliminate after a stage.
+
+ Args:
+ s: Bracket index
+ stage: Stage index
+
+ Returns:
+ Number of configurations to keep
+ """
+ n_configs, r_resource = self.get_stage_params(s, stage)
+ return int(n_configs / self.eta) if int(r_resource) != self.R else int(n_configs)
+
+ def should_update_history(self, resource_ratio: float) -> bool:
+ # only update history when resource_ratio == 1.0
+ return resource_ratio == round(float(1.0), 5)
+
+
+class MFESFidelityScheduler(BOHBFidelityScheduler):
+ def __init__(self,
+ num_nodes: int = 1,
+ R: int = 9, eta: int = 3):
+ super().__init__(num_nodes=num_nodes, R=R, eta=eta)
+
+ def should_update_history(self, resource_ratio: float) -> bool:
+ # always return True for MFSE - let MFBO.update decide history vs history_list
+ return True
+
+
+class FlattenFidelityScheduler(BOHBFidelityScheduler):
+ """
+ Scheduler with expanded full-fidelity brackets.
+
+ This scheduler is similar to BOHBFidelityScheduler, but expands the last
+ full-fidelity bracket (s=0) into multiple single-configuration brackets.
+
+ For example, if the last bracket would be (r=27, n=4), it creates 4 separate
+ brackets each with (r=27, n=1). This allows more fine-grained scheduling of
+ full-fidelity evaluations.
+
+ The bracket structure stores explicit (n_configs, n_resource) tuples instead
+ of using s indices.
+ """
+
+ def __init__(self,
+ num_nodes: int = 1,
+ R: int = 9, eta: int = 3):
+ super().__init__(num_nodes=num_nodes, R=R, eta=eta)
+
+ self.brackets = []
+
+ for s in range(self.s_max, 0, -1):
+ n_configs = int(ceil(self.B / self.R / (s + 1) * self.eta ** s)) * self.num_nodes
+ n_resource = int(self.R * self.eta ** (-s))
+
+ stages = []
+ for stage in range(s + 1):
+ n_configs_stage = int(n_configs * self.eta ** (-stage))
+ n_resource_stage = int(n_resource * self.eta ** stage)
+ stages.append((n_configs_stage, n_resource_stage))
+
+ self.brackets.append({
+ 's': s,
+ 'n_configs': n_configs,
+ 'n_resource': n_resource,
+ 'stages': stages
+ })
+
+ # Expand the last bracket (s=0) into multiple single-config brackets
+ s = 0
+ n_configs_last = int(ceil(self.B / self.R / (s + 1) * self.eta ** s)) * self.num_nodes
+ n_resource_last = int(self.R * self.eta ** (-s))
+ num_expanded = n_configs_last // self.num_nodes
+
+ for i in range(num_expanded):
+ self.brackets.append({
+ 's': 0,
+ 'expanded_idx': i,
+ 'n_configs': self.num_nodes,
+ 'n_resource': n_resource_last,
+ 'stages': [(self.num_nodes, n_resource_last)]
+ })
+
+ logger.info(f"FlattenFidelityScheduler: Expanded last bracket (r={n_resource_last}, n={n_configs_last}) "
+ f"into {num_expanded} brackets of (r={n_resource_last}, n={self.num_nodes})")
+ logger.info(f"Total brackets: {len(self.brackets)}")
+
+ def get_bracket_index(self, iter_id: int) -> int:
+ bracket_idx = iter_id % len(self.brackets)
+ bracket = self.brackets[bracket_idx]
+ self._current_bracket_idx = bracket_idx
+ return bracket['s']
+
+ def get_bracket_params(self, s: int) -> Tuple[int, int]:
+ if hasattr(self, '_current_bracket_idx'):
+ bracket = self.brackets[self._current_bracket_idx]
+ if bracket['s'] == s:
+ return bracket['n_configs'], bracket['n_resource']
+ for bracket in self.brackets:
+ if bracket['s'] == s:
+ return bracket['n_configs'], bracket['n_resource']
+ raise ValueError(f"Bracket with s={s} not found")
+
+ def get_stage_params(self, s: int, stage: int) -> Tuple[int, int]:
+ if hasattr(self, '_current_bracket_idx'):
+ bracket = self.brackets[self._current_bracket_idx]
+ if bracket['s'] == s and stage < len(bracket['stages']):
+ return bracket['stages'][stage]
+ for bracket in self.brackets:
+ if bracket['s'] == s:
+ if stage < len(bracket['stages']):
+ return bracket['stages'][stage]
+ else:
+ raise ValueError(f"Stage {stage} out of range for bracket with s={s}")
+ raise ValueError(f"Bracket with s={s} not found or stage {stage} out of range")
+
+ def get_elimination_count(self, s: int, stage: int) -> int:
+ if hasattr(self, '_current_bracket_idx'):
+ bracket = self.brackets[self._current_bracket_idx]
+ if bracket['s'] == s and stage < len(bracket['stages']):
+ n_configs, r_resource = bracket['stages'][stage]
+ if stage == len(bracket['stages']) - 1 or r_resource == self.R:
+ return n_configs
+ else:
+ return int(n_configs / self.eta)
+ for bracket in self.brackets:
+ if bracket['s'] == s:
+ if stage < len(bracket['stages']):
+ n_configs, r_resource = bracket['stages'][stage]
+ if stage == len(bracket['stages']) - 1 or r_resource == self.R:
+ return n_configs
+ else:
+ return int(n_configs / self.eta)
+ raise ValueError(f"Bracket with s={s} not found or stage {stage} out of range")
+
+
+class MFESFlattenFidelityScheduler(FlattenFidelityScheduler):
+ def __init__(self,
+ num_nodes: int = 1,
+ R: int = 9, eta: int = 3):
+ super().__init__(num_nodes=num_nodes, R=R, eta=eta)
+
+ def should_update_history(self, resource_ratio: float) -> bool:
+ return True
\ No newline at end of file
diff --git a/openbox/surrogate/tlbo/base.py b/openbox/surrogate/tlbo/base.py
index f0555e50f..63351a657 100644
--- a/openbox/surrogate/tlbo/base.py
+++ b/openbox/surrogate/tlbo/base.py
@@ -72,15 +72,23 @@ def build_source_surrogates(self, normalize='scale'):
logger.info('Start to train base surrogates.')
start_time = time.time()
self.source_surrogates = list()
+ if self.num_src_hpo_trial is None or self.num_src_hpo_trial < 0:
+ end = None
+ else:
+ end = self.num_src_hpo_trial
for task_history in self.source_hpo_data:
assert isinstance(task_history, History)
+ X = task_history.get_config_array(transform=normalize)[:end]
+ y = task_history.get_objectives(transform='infeasible')[:end]
+ y = y.reshape(-1) # single objective
+
+ if y.shape[0] == 0:
+ logger.warning('Skip one empty source history when building TL surrogates.')
+ continue
+
model = build_surrogate(self.surrogate_type, self.config_space,
np.random.RandomState(self.random_seed))
- X = task_history.get_config_array(transform=normalize)[:self.num_src_hpo_trial]
- y = task_history.get_objectives(transform='infeasible')[:self.num_src_hpo_trial]
- y = y.reshape(-1) # single objective
-
if (y == y[0]).all():
y[0] += 1e-4
y, _, _ = zero_mean_unit_var_normalization(y)
diff --git a/openbox/surrogate/tlbo/mfgpe.py b/openbox/surrogate/tlbo/mfgpe.py
index 4c3310b4a..044ab7e00 100644
--- a/openbox/surrogate/tlbo/mfgpe.py
+++ b/openbox/surrogate/tlbo/mfgpe.py
@@ -28,9 +28,14 @@ def __init__(self, config_space, source_hpo_data, seed,
self.iteration_id = 0
def update_mf_trials(self, mf_hpo_data: List[History]):
- if self.K == 0:
- self.K = len(mf_hpo_data) - 1 # K is the number of low-fidelity groups
- self.w = [1. / self.K] * self.K + [0.]
+ if len(mf_hpo_data) == 0:
+ return
+ if self.K <= 0:
+ self.K = max(len(mf_hpo_data) - 1, 0)
+ if self.K > 0:
+ self.w = [1. / self.K] * self.K + [0.]
+ else:
+ self.w = [1.]
self.snapshot_w = self.w
self.source_hpo_data = mf_hpo_data
# Refit the base surrogates.
diff --git a/test/core/test_compressor.py b/test/core/test_compressor.py
new file mode 100644
index 000000000..aac1bfdbf
--- /dev/null
+++ b/test/core/test_compressor.py
@@ -0,0 +1,444 @@
+import pytest
+
+from openbox.core.generic_advisor import Advisor
+from openbox.core.space_adapter import IdentitySpaceAdapter, CompressorSpaceAdapter
+from openbox.compressor import Compressor
+from openbox.compressor.api.step_factory import create_steps_from_strings
+from openbox.utils.constants import SUCCESS
+from openbox.utils.history import Observation
+
+
+def test_generic_advisor_without_compressor_uses_identity_adapter(configspace_tiny):
+ advisor = Advisor(configspace_tiny, initial_trials=1)
+ assert isinstance(advisor.space_adapter, IdentitySpaceAdapter)
+ assert advisor.sample_space == configspace_tiny
+ assert advisor.surrogate_space == configspace_tiny
+
+
+def test_generic_advisor_with_compressor_adapter(configspace_tiny):
+ advisor = Advisor(
+ config_space=configspace_tiny,
+ initial_trials=1,
+ compressor_type='none', # no compressor, use identity space adapter
+ compressor_kwargs={},
+ )
+ assert isinstance(advisor.space_adapter, IdentitySpaceAdapter)
+ assert advisor.sample_space == configspace_tiny
+ assert advisor.surrogate_space == configspace_tiny
+
+ config = advisor.get_suggestion()
+ observation = Observation(
+ config=config,
+ objectives=[0.1],
+ trial_state=SUCCESS,
+ elapsed_time=0.1,
+ extra_info={},
+ )
+ advisor.update_observation(observation)
+ assert len(advisor.history) == 1
+
+
+def test_compressor_adapter_roundtrip_and_spaces(configspace_tiny):
+ advisor = Advisor(
+ config_space=configspace_tiny,
+ initial_trials=1,
+ compressor_type='pipeline',
+ compressor_kwargs={
+ 'step_strings': ['p_quant'],
+ 'step_params': {'p_quant': {'max_num_values': 4, 'seed': 3}},
+ 'seed': 3,
+ },
+ )
+ adapter = advisor.space_adapter
+ config = advisor.get_suggestion()
+
+ surrogate_config = adapter.config_to_surrogate(config)
+ sample_config = adapter.config_to_sample(config)
+ original_config = adapter.config_to_original(sample_config)
+
+ surrogate_dict = surrogate_config.get_dictionary()
+ sample_dict = sample_config.get_dictionary()
+ original_dict = original_config.get_dictionary()
+
+ assert set(surrogate_dict.keys()) == {'x1|q', 'x2|q'}
+ assert set(sample_dict.keys()) == {'x1|q', 'x2|q'}
+ assert all(1 <= int(v) <= 4 for v in surrogate_dict.values())
+ assert all(1 <= int(v) <= 4 for v in sample_dict.values())
+
+ # unproject returns original-space parameter names
+ assert set(original_dict.keys()) == {'x1', 'x2'}
+ assert advisor.sample_space != advisor.config_space
+ assert advisor.surrogate_space != advisor.config_space
+
+
+def test_compressor_adapter_history_and_update_compression(configspace_tiny):
+ advisor = Advisor(
+ config_space=configspace_tiny,
+ initial_trials=1,
+ compressor_type='none',
+ compressor_kwargs={},
+ )
+ adapter = advisor.space_adapter
+ assert isinstance(adapter, IdentitySpaceAdapter)
+ config = advisor.get_suggestion()
+ observation = Observation(
+ config=config,
+ objectives=[0.2],
+ trial_state=SUCCESS,
+ elapsed_time=0.1,
+ extra_info={},
+ )
+ advisor.update_observation(observation)
+ acq_history = adapter.history_for_acq(advisor.history) # identity adapter keeps acquisition history untouched
+ assert acq_history is advisor.history
+ # no adaptive update for identity adapter
+ assert adapter.update(advisor.history) is False
+ assert advisor.update_compression(advisor.history) is False
+
+
+def test_compressor_adapter_caches_low_dim_config(configspace_tiny):
+ advisor = Advisor(
+ config_space=configspace_tiny,
+ initial_trials=1,
+ compressor_type='pipeline',
+ compressor_kwargs={
+ 'step_strings': ['p_quant'],
+ 'step_params': {'p_quant': {'max_num_values': 4, 'seed': 5}},
+ 'seed': 5,
+ },
+ )
+ config = advisor.get_suggestion()
+ config._low_dim_config = {'x1': 0.0, 'x2': 1.0}
+
+ observation = Observation(
+ config=config,
+ objectives=[0.3],
+ trial_state=SUCCESS,
+ elapsed_time=0.1,
+ extra_info=None,
+ )
+ advisor.update_observation(observation)
+
+ assert len(advisor.history) == 1
+ assert 'low_dim_config' in advisor.history.observations[0].extra_info
+ assert advisor.history.observations[0].extra_info['low_dim_config'] == {'x1': 0.0, 'x2': 1.0}
+
+
+def test_compressor_adapter_with_transfer_learning_history(configspace_tiny, transfer_learning_history_single):
+ advisor = Advisor(
+ config_space=configspace_tiny,
+ initial_trials=1,
+ compressor_type='pipeline',
+ compressor_kwargs={
+ 'step_strings': ['p_quant'],
+ 'step_params': {'p_quant': {'max_num_values': 4, 'seed': 6}},
+ 'seed': 6,
+ },
+ transfer_learning_history=transfer_learning_history_single,
+ )
+
+ assert isinstance(advisor.space_adapter, CompressorSpaceAdapter)
+ assert advisor.surrogate_transfer_learning_history is not None
+ assert len(advisor.surrogate_transfer_learning_history) == len(transfer_learning_history_single)
+ assert all(
+ h.config_space == advisor.surrogate_space
+ for h in advisor.surrogate_transfer_learning_history
+ )
+
+
+def test_generic_advisor_with_llamatune_quantization(configspace_huge):
+ advisor = Advisor(
+ config_space=configspace_huge,
+ initial_trials=1,
+ compressor_type='llamatune',
+ compressor_kwargs={
+ 'adapter_alias': 'none',
+ 'max_num_values': 5,
+ 'seed': 1,
+ },
+ )
+
+ assert isinstance(advisor.space_adapter, CompressorSpaceAdapter)
+ assert len(advisor.surrogate_space.get_hyperparameters()) == len(configspace_huge.get_hyperparameters())
+ assert 'x1|q' in advisor.surrogate_space.get_hyperparameter_names()
+ assert 'x2|q' in advisor.surrogate_space.get_hyperparameter_names()
+ assert 'x4|q' in advisor.surrogate_space.get_hyperparameter_names()
+ assert 'x1' not in advisor.surrogate_space.get_hyperparameter_names()
+ assert 'x2' not in advisor.surrogate_space.get_hyperparameter_names()
+ assert 'x4' not in advisor.surrogate_space.get_hyperparameter_names()
+
+ config = advisor.get_suggestion()
+ observation = Observation(
+ config=config,
+ objectives=[0.15],
+ trial_state=SUCCESS,
+ elapsed_time=0.1,
+ extra_info={},
+ )
+ advisor.update_observation(observation)
+ assert len(advisor.history) == 1
+
+
+def test_generic_advisor_with_shap_dimension_compression(configspace_tiny, transfer_learning_history_single):
+ pytest.importorskip('shap')
+
+ advisor = Advisor(
+ config_space=configspace_tiny,
+ initial_trials=1,
+ compressor_type='shap',
+ compressor_kwargs={
+ 'strategy': 'shap',
+ 'topk': 1,
+ 'top_ratio': 1.0,
+ 'sigma': 0.0,
+ 'seed': 2,
+ },
+ transfer_learning_history=transfer_learning_history_single,
+ )
+
+ assert isinstance(advisor.space_adapter, CompressorSpaceAdapter)
+ assert advisor.space_adapter.compressor.pipeline is not None
+ assert any(type(step).__name__ == 'SHAPDimensionStep'
+ for step in advisor.space_adapter.compressor.pipeline.steps)
+ assert len(advisor.surrogate_space.get_hyperparameters()) == 1
+ assert advisor.surrogate_space.get_hyperparameter_names()[0] in ['x1', 'x2']
+
+ config = advisor.get_suggestion()
+ observation = Observation(
+ config=config,
+ objectives=[0.11],
+ trial_state=SUCCESS,
+ elapsed_time=0.1,
+ extra_info={},
+ )
+ advisor.update_observation(observation)
+ assert len(advisor.history) == 1
+
+
+def test_generic_advisor_with_custom_pipeline_steps(configspace_huge):
+ steps = create_steps_from_strings(
+ ['p_quant'],
+ step_params={'p_quant': {'max_num_values': 4, 'seed': 3}},
+ )
+ advisor = Advisor(
+ config_space=configspace_huge,
+ initial_trials=1,
+ compressor_type='pipeline',
+ compressor_kwargs={
+ 'steps': steps,
+ 'seed': 3,
+ },
+ )
+
+ assert isinstance(advisor.space_adapter, CompressorSpaceAdapter)
+ assert advisor.space_adapter.compressor.pipeline is not None
+ assert len(advisor.space_adapter.compressor.pipeline.steps) == 1
+ assert 'x4|q' in advisor.surrogate_space.get_hyperparameter_names()
+
+
+def test_generic_advisor_with_step_strings_api_build(configspace_huge):
+ advisor = Advisor(
+ config_space=configspace_huge,
+ initial_trials=1,
+ compressor_type='pipeline',
+ compressor_kwargs={
+ 'step_strings': ['p_quant'],
+ 'step_params': {'p_quant': {'max_num_values': 3, 'seed': 9}},
+ 'seed': 9,
+ },
+ )
+
+ assert isinstance(advisor.space_adapter, CompressorSpaceAdapter)
+ assert advisor.space_adapter.compressor.pipeline is not None
+ assert len(advisor.space_adapter.compressor.pipeline.steps) == 1
+ assert advisor.space_adapter.compressor.pipeline.steps[0].__class__.__name__ == 'QuantizationProjectionStep'
+ assert 'x4|q' in advisor.surrogate_space.get_hyperparameter_names()
+
+
+def test_generic_advisor_with_prebuilt_compressor_instance(configspace_huge):
+ steps = create_steps_from_strings(
+ ['p_quant'],
+ step_params={'p_quant': {'max_num_values': 6, 'seed': 7}},
+ )
+ compressor = Compressor(
+ config_space=configspace_huge,
+ steps=steps,
+ seed=7,
+ )
+ advisor = Advisor(
+ config_space=configspace_huge,
+ initial_trials=1,
+ compressor=compressor,
+ )
+
+ assert isinstance(advisor.space_adapter, CompressorSpaceAdapter)
+ assert advisor.space_adapter.compressor is compressor
+ assert 'x4|q' in advisor.surrogate_space.get_hyperparameter_names()
+
+
+def test_generic_advisor_with_expert_dimension_compression(configspace_huge):
+ advisor = Advisor(
+ config_space=configspace_huge,
+ initial_trials=1,
+ compressor_type='expert',
+ compressor_kwargs={
+ 'expert_params': ['x1', 'x4'],
+ 'top_ratio': 1.0,
+ 'sigma': 0.0,
+ 'seed': 11,
+ },
+ )
+
+ assert isinstance(advisor.space_adapter, CompressorSpaceAdapter)
+ assert advisor.space_adapter.compressor.pipeline is not None
+ assert any(type(step).__name__ == 'ExpertDimensionStep'
+ for step in advisor.space_adapter.compressor.pipeline.steps)
+ assert set(advisor.surrogate_space.get_hyperparameter_names()) == {'x1', 'x4'}
+
+
+def test_generic_advisor_with_llamatune_rembo_projection(configspace_huge):
+ advisor = Advisor(
+ config_space=configspace_huge,
+ initial_trials=1,
+ compressor_type='llamatune',
+ compressor_kwargs={
+ 'adapter_alias': 'rembo',
+ 'le_low_dim': 2,
+ 'max_num_values': 6,
+ 'seed': 13,
+ },
+ )
+
+ assert isinstance(advisor.space_adapter, CompressorSpaceAdapter)
+ assert advisor.space_adapter.compressor.pipeline is not None
+ assert advisor.space_adapter.compressor.needs_unproject()
+ assert len(advisor.sample_space.get_hyperparameters()) == 2
+ assert all(name.startswith('rembo_') for name in advisor.sample_space.get_hyperparameter_names())
+
+
+def test_generic_advisor_with_pipeline_steps_as_strings(configspace_huge):
+ advisor = Advisor(
+ config_space=configspace_huge,
+ initial_trials=1,
+ compressor_type='pipeline',
+ compressor_kwargs={
+ 'steps': ['p_quant'],
+ 'step_params': {'p_quant': {'max_num_values': 5, 'seed': 17}},
+ 'seed': 17,
+ },
+ )
+
+ assert isinstance(advisor.space_adapter, CompressorSpaceAdapter)
+ assert advisor.space_adapter.compressor.pipeline is not None
+ assert advisor.space_adapter.compressor.pipeline.steps[0].__class__.__name__ == 'QuantizationProjectionStep'
+ assert 'x4|q' in advisor.surrogate_space.get_hyperparameter_names()
+
+
+@pytest.mark.parametrize(
+ 'step_strings,step_params,expected_unprojected_names,uses_projection',
+ [
+ (
+ ['d_expert', 'r_boundary'],
+ {
+ 'd_expert': {'expert_params': ['x1']},
+ 'r_boundary': {'top_ratio': 0.8, 'sigma': 0.0},
+ },
+ {'x1'},
+ False,
+ ),
+ (
+ ['d_expert', 'r_boundary', 'p_rembo'],
+ {
+ 'd_expert': {'expert_params': ['x1']},
+ 'r_boundary': {'top_ratio': 0.8, 'sigma': 0.0},
+ 'p_rembo': {'low_dim': 1, 'seed': 19},
+ },
+ {'x1'},
+ True,
+ ),
+ (
+ ['d_expert', 'p_rembo'],
+ {
+ 'd_expert': {'expert_params': ['x1']},
+ 'p_rembo': {'low_dim': 1, 'seed': 23},
+ },
+ {'x1'},
+ True,
+ ),
+ (
+ ['r_boundary', 'p_rembo'],
+ {
+ 'r_boundary': {'top_ratio': 0.8, 'sigma': 0.0},
+ 'p_rembo': {'low_dim': 1, 'seed': 29},
+ },
+ {'x1', 'x2'},
+ True,
+ ),
+ ],
+ ids=[
+ 'dimension+boundary',
+ 'dimension+boundary+projection',
+ 'dimension+projection',
+ 'boundary+projection',
+ ],
+)
+def test_generic_advisor_pipeline_step_combinations(
+ configspace_tiny,
+ transfer_learning_history_single,
+ step_strings,
+ step_params,
+ expected_unprojected_names,
+ uses_projection,
+):
+ advisor = Advisor(
+ config_space=configspace_tiny,
+ initial_trials=1,
+ compressor_type='pipeline',
+ compressor_kwargs={
+ 'step_strings': step_strings,
+ 'step_params': step_params,
+ 'seed': 31,
+ },
+ transfer_learning_history=transfer_learning_history_single,
+ )
+
+ pipeline = advisor.space_adapter.compressor.pipeline
+ assert pipeline is not None
+ assert [step.__class__.__name__ for step in pipeline.steps] == [
+ 'ExpertDimensionStep' if s == 'd_expert'
+ else 'BoundaryRangeStep' if s == 'r_boundary'
+ else 'REMBOProjectionStep'
+ for s in step_strings
+ ]
+
+ config = advisor.get_suggestion()
+ surrogate_config = advisor.space_adapter.config_to_surrogate(config)
+ surrogate_names = set(surrogate_config.get_dictionary().keys())
+
+ if uses_projection:
+ assert all(name.startswith('rembo_') for name in surrogate_names)
+ else:
+ assert surrogate_names == expected_unprojected_names
+
+ unprojected_names = set(advisor.space_adapter.compressor.unprojected_space.get_hyperparameter_names())
+ assert unprojected_names == expected_unprojected_names
+
+ observation = Observation(
+ config=config,
+ objectives=[0.17],
+ trial_state=SUCCESS,
+ elapsed_time=0.1,
+ extra_info={},
+ )
+ advisor.update_observation(observation)
+ assert len(advisor.history) == 1
+
+
+def test_pipeline_type_requires_steps_or_step_strings(configspace_huge):
+ with pytest.raises(ValueError, match='requires `steps` or `step_strings`'):
+ Advisor(
+ config_space=configspace_huge,
+ initial_trials=1,
+ compressor_type='pipeline',
+ compressor_kwargs={},
+ )
diff --git a/test/core/test_generic_advisor.py b/test/core/test_generic_advisor.py
index 038884cf8..54d6055fe 100644
--- a/test/core/test_generic_advisor.py
+++ b/test/core/test_generic_advisor.py
@@ -16,7 +16,7 @@ def test_generic_advisor(configspace_tiny,configspace_cat,configspace_big,config
config_space_4 = configspace_mid
advisor_cat = Advisor(config_space_2,early_stop=True,early_stop_kwargs={"min_iter":1})
- config=advisor_cat.get_suggestion()
+ config = advisor_cat.get_suggestion()
observation = Observation(config, [0.1], trial_state=SUCCESS, elapsed_time=2.0, extra_info={})
advisor_cat.update_observation(observation)
@@ -87,8 +87,7 @@ def test_generic_advisor(configspace_tiny,configspace_cat,configspace_big,config
'''
让initial configuration有值,注意传参不能跳过默认参数,要么就用关键字形式赋值
'''
- initial_configurations = advisor.create_initial_design(advisor.init_strategy)
- new_advisor = Advisor(config_space,initial_configurations=initial_configurations)
+ _ = advisor.create_initial_design(init_strategy=advisor.init_strategy)
advisor.save_json("test/datas/test.json")
advisor.load_json("test/datas/test.json")
diff --git a/test/core/test_mf_advisor.py b/test/core/test_mf_advisor.py
new file mode 100644
index 000000000..d7a31a704
--- /dev/null
+++ b/test/core/test_mf_advisor.py
@@ -0,0 +1,50 @@
+from openbox.core import build_advisor
+from openbox.core.mf_advisor import MFAdvisor
+from openbox.utils.constants import SUCCESS
+from openbox.utils.history import Observation
+
+
+def test_mf_advisor_update_observation_by_resource_ratio(configspace_tiny, transfer_learning_history_single):
+ advisor = MFAdvisor(
+ config_space=configspace_tiny,
+ transfer_learning_history=transfer_learning_history_single,
+ initial_trials=1,
+ )
+
+ config = advisor.get_suggestion()
+ low_fidelity_obs = Observation(
+ config=config,
+ objectives=[0.5],
+ trial_state=SUCCESS,
+ elapsed_time=0.1,
+ extra_info={},
+ )
+ advisor.update_observation(low_fidelity_obs, resource_ratio=0.5)
+
+ assert len(advisor.history) == 0
+ assert len(advisor.history_list) == 1
+ assert advisor.resource_identifiers == [0.5]
+
+ full_fidelity_obs = Observation(
+ config=config,
+ objectives=[0.3],
+ trial_state=SUCCESS,
+ elapsed_time=0.2,
+ extra_info={},
+ )
+ advisor.update_observation(full_fidelity_obs, resource_ratio=1.0)
+
+ assert len(advisor.history) == 1
+ assert len(advisor.history_list) == 2
+ assert set(advisor.resource_identifiers) == {0.5, 1.0}
+
+
+def test_build_mf_advisor_from_factory(configspace_tiny, transfer_learning_history_single):
+ advisor = build_advisor(
+ advisor_type='mf',
+ config_space=configspace_tiny,
+ surrogate_type='mfgpe',
+ transfer_learning_history=transfer_learning_history_single,
+ initial_trials=1,
+ )
+ assert isinstance(advisor, MFAdvisor)
diff --git a/test/core/test_mf_batch_advisor.py b/test/core/test_mf_batch_advisor.py
deleted file mode 100644
index 88a1d374b..000000000
--- a/test/core/test_mf_batch_advisor.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import pytest
-from unittest.mock import MagicMock, patch
-from openbox.core.mf_batch_advisor import MFBatchAdvisor
-from openbox.utils.config_space import ConfigurationSpace
-from openbox.utils.history import Observation
-from openbox.utils.constants import MAXINT, SUCCESS
-
-
-def test_mf_batch_advisor(configspace_tiny, history_single_obs):
- config_space = configspace_tiny
- advisor = MFBatchAdvisor(config_space)
- assert advisor.config_space == config_space
- assert advisor.num_objectives == 1
- assert advisor.num_constraints == 0
- assert advisor.batch_size == 4
- assert advisor.init_num == 3
- assert advisor.init_strategy == 'random_explore_first'
- assert advisor.rand_prob == 0.1
- assert advisor.optimization_strategy == 'bo'
- assert advisor.surrogate_type == 'mfgpe'
- assert advisor.acq_type == 'ei'
- assert advisor.acq_optimizer_type == 'local_random'
- assert advisor.ref_point is None
- assert advisor.output_dir == 'logs'
- assert advisor.task_id == 'OpenBox'
- assert advisor.rng is not None
- assert advisor.history_list == []
- assert advisor.resource_identifiers == []
-
- resource_ratios = [0.2, 0.2, 0.2, 0.4, 0.4, 0.4, 0.8, 0.8, 1.0, 1.0]
-
- for i, obs in enumerate(history_single_obs.observations):
- advisor.update_observation(obs, resource_ratio=resource_ratios[i])
-
- suggestions = advisor.get_suggestions(batch_size=5, history=history_single_obs)
- assert len(suggestions) == 5
-
- observation = Observation(suggestions[0], [0.1], trial_state=SUCCESS, elapsed_time=2.0, extra_info={})
- advisor.update_observation(observation, resource_ratio=1)
- assert len(advisor.history) == 3
-
- configs = advisor.sample_random_configs(config_space, 5)
- assert len(configs) == 5
-
- assert len(advisor.history_list) == 4
- assert len(advisor.resource_identifiers) == 4
diff --git a/test/optimizer/test_generic_smbo.py b/test/optimizer/test_generic_smbo.py
index 0fee6831f..0c05477ef 100644
--- a/test/optimizer/test_generic_smbo.py
+++ b/test/optimizer/test_generic_smbo.py
@@ -1,10 +1,16 @@
import pytest
from openbox.optimizer.base import BOBase
from openbox.optimizer.generic_smbo import SMBO
+from openbox.core.generic_advisor import Advisor
+from openbox.core.mf_advisor import MFAdvisor
from openbox.utils.config_space import ConfigurationSpace
from openbox.utils.history import History, Observation
+def _objective_func(config, resource_ratio=1.0):
+ return [float(resource_ratio)]
+
+
def test_smbo(configspace_tiny, func_brain):
config_space = configspace_tiny
objective_function = func_brain
@@ -29,3 +35,238 @@ def test_smbo_default(configspace_tiny, func_brain):
smbo.run()
assert smbo.iteration_id == 2
assert len(smbo.config_advisor.history) == 2
+
+
+def test_smbo_multi_fidelity(configspace_tiny, monkeypatch):
+ class DummyAdvisor:
+ def __init__(self, config_space):
+ self.config_space = config_space
+ self.init_num = 0
+ self.history = History(
+ task_id='dummy',
+ num_objectives=1,
+ num_constraints=0,
+ config_space=config_space,
+ )
+ self.seen_resource_ratios = []
+
+ def get_suggestion(self):
+ return self.config_space.sample_configuration()
+
+ def get_suggestions(self, batch_size=1):
+ size = int(batch_size or 1)
+ return [self.get_suggestion() for _ in range(size)]
+
+ def update_observation(self, observation, resource_ratio=1.0):
+ self.seen_resource_ratios.append(round(float(resource_ratio), 5))
+ if resource_ratio == 1.0:
+ self.history.update_observation(observation)
+
+ dummy_advisor = DummyAdvisor(configspace_tiny)
+
+ def _build_dummy_advisor(*args, **kwargs):
+ return dummy_advisor
+
+ monkeypatch.setattr('openbox.core.build_advisor', _build_dummy_advisor)
+
+ smbo = SMBO(
+ objective_function=_objective_func,
+ config_space=configspace_tiny,
+ advisor_type='mf',
+ scheduler_type='mfes',
+ scheduler_kwargs={'R': 9, 'eta': 3},
+ max_runs=10,
+ initial_runs=0,
+ logging_dir='test/datas',
+ )
+ smbo.run()
+
+ assert smbo.iteration_id == 10
+ assert dummy_advisor.seen_resource_ratios.count(0.11111) == 9 * 4
+ assert dummy_advisor.seen_resource_ratios.count(0.33333) == 3 * 4 + 5 * 3
+ assert dummy_advisor.seen_resource_ratios.count(1.0) == 5 * 3 + 1
+ assert 1.0 in dummy_advisor.seen_resource_ratios
+ assert any(ratio < 1.0 for ratio in dummy_advisor.seen_resource_ratios)
+
+
+def test_smbo_batch_sampling(configspace_tiny, monkeypatch):
+ class DummyAdvisor:
+ def __init__(self, config_space):
+ self.config_space = config_space
+ self.init_num = 0
+ self.history = History(
+ task_id='dummy',
+ num_objectives=1,
+ num_constraints=0,
+ config_space=config_space,
+ )
+ self.batch_calls = 0
+ self.single_calls = 0
+
+ def get_suggestion(self):
+ self.single_calls += 1
+ return self.config_space.sample_configuration()
+
+ def get_suggestions(self, batch_size=1):
+ self.batch_calls += 1
+ size = int(batch_size or 1)
+ return [self.config_space.sample_configuration() for _ in range(size)]
+
+ def update_observation(self, observation, resource_ratio=1.0):
+ if resource_ratio == 1.0:
+ self.history.update_observation(observation)
+
+ dummy_advisor = DummyAdvisor(configspace_tiny)
+
+ def _build_dummy_advisor(*args, **kwargs):
+ return dummy_advisor
+
+ monkeypatch.setattr('openbox.core.build_advisor', _build_dummy_advisor)
+
+ smbo = SMBO(
+ objective_function=_objective_func,
+ config_space=configspace_tiny,
+ advisor_type='mf',
+ scheduler_type='mfes',
+ scheduler_kwargs={'R': 9, 'eta': 3},
+ max_runs=1,
+ initial_runs=0,
+ logging_dir='test/datas',
+ )
+ smbo.run()
+
+ # For first bracket in MFES (s=2), stage0 requests 9 configs once.
+ assert dummy_advisor.batch_calls == 1
+ assert dummy_advisor.single_calls == 0
+
+
+def _sample_unique_configs(config_space, n):
+ configs = []
+ while len(configs) < n:
+ conf = config_space.sample_configuration()
+ if conf not in configs:
+ configs.append(conf)
+ return configs
+
+
+@pytest.mark.parametrize('scheduler_type', ['bohb', 'mfes'])
+@pytest.mark.parametrize(
+ 'R,eta,stage_sizes,stage_ratios',
+ [
+ (9, 3, [9, 3, 1], [0.11111, 0.33333, 1.0]),
+ (27, 3, [27, 9, 3, 1], [0.03704, 0.11111, 0.33333, 1.0]),
+ ],
+)
+def test_smbo_scheduler(
+ configspace_tiny, monkeypatch, scheduler_type, R, eta, stage_sizes, stage_ratios
+):
+ stage0_n = stage_sizes[0]
+ stage0_candidates = _sample_unique_configs(configspace_tiny, stage0_n)
+ # Make best configs appear at the tail to ensure elimination logic is really exercised.
+ score_by_id = {id(cfg): float(stage0_n - i) for i, cfg in enumerate(stage0_candidates)}
+ eval_by_ratio = {ratio: [] for ratio in stage_ratios}
+
+ def objective_with_trace(config, resource_ratio=1.0):
+ ratio = round(float(resource_ratio), 5)
+ eval_by_ratio[ratio].append(config)
+ return [score_by_id[id(config)]]
+
+ class DummyAdvisor:
+ def __init__(self, config_space):
+ self.config_space = config_space
+ self.init_num = 0
+ self.history = History(
+ task_id='dummy',
+ num_objectives=1,
+ num_constraints=0,
+ config_space=config_space,
+ )
+ self.batch_calls = []
+ self.updated_ratios = []
+
+ def get_suggestion(self):
+ return self.config_space.sample_configuration()
+
+ def get_suggestions(self, batch_size=1):
+ self.batch_calls.append(int(batch_size))
+ if int(batch_size) != stage0_n:
+ raise AssertionError(f'Unexpected batch_size: {batch_size}, expected {stage0_n}')
+ return list(stage0_candidates)
+
+ def update_observation(self, observation, resource_ratio=1.0):
+ ratio = round(float(resource_ratio), 5)
+ self.updated_ratios.append(ratio)
+ if ratio == 1.0:
+ self.history.update_observation(observation)
+
+ dummy_advisor = DummyAdvisor(configspace_tiny)
+
+ def _build_dummy_advisor(*args, **kwargs):
+ return dummy_advisor
+
+ monkeypatch.setattr('openbox.core.build_advisor', _build_dummy_advisor)
+
+ smbo = SMBO(
+ objective_function=objective_with_trace,
+ config_space=configspace_tiny,
+ advisor_type='mf',
+ scheduler_type=scheduler_type,
+ scheduler_kwargs={'R': R, 'eta': eta},
+ max_runs=1,
+ initial_runs=0,
+ logging_dir='test/datas',
+ )
+ smbo.run()
+
+ # Stage0 should only query advisor once.
+ assert dummy_advisor.batch_calls == [stage0_n]
+
+ expected_by_ratio = {}
+ current = list(stage0_candidates)
+ expected_by_ratio[stage_ratios[0]] = list(current)
+ for idx in range(1, len(stage_sizes)):
+ keep_n = stage_sizes[idx]
+ current = sorted(current, key=lambda c: score_by_id[id(c)])[:keep_n]
+ expected_by_ratio[stage_ratios[idx]] = list(current)
+
+ for ratio in stage_ratios:
+ assert eval_by_ratio[ratio] == expected_by_ratio[ratio]
+
+ if scheduler_type == 'bohb':
+ assert dummy_advisor.updated_ratios == [1.0]
+ else:
+ expected_updates = []
+ for ratio, n in zip(stage_ratios, stage_sizes):
+ expected_updates.extend([ratio] * n)
+ assert dummy_advisor.updated_ratios == expected_updates
+
+
+def test_smbo_scheduler_advisor_mapping(configspace_tiny):
+ smbo_bohb = SMBO(
+ objective_function=_objective_func,
+ config_space=configspace_tiny,
+ advisor_type='mf',
+ scheduler_type='bohb',
+ surrogate_type='mfgpe',
+ scheduler_kwargs={'R': 9, 'eta': 3},
+ max_runs=1,
+ initial_runs=0,
+ logging_dir='test/datas',
+ )
+ assert isinstance(smbo_bohb.config_advisor, Advisor)
+ assert not isinstance(smbo_bohb.config_advisor, MFAdvisor)
+ assert smbo_bohb.config_advisor.surrogate_type != 'mfgpe'
+
+ smbo_mfes = SMBO(
+ objective_function=_objective_func,
+ config_space=configspace_tiny,
+ advisor_type='default',
+ scheduler_type='mfes',
+ surrogate_type='auto',
+ scheduler_kwargs={'R': 9, 'eta': 3},
+ max_runs=1,
+ initial_runs=0,
+ logging_dir='test/datas',
+ )
+ assert isinstance(smbo_mfes.config_advisor, MFAdvisor)
+ assert smbo_mfes.config_advisor.surrogate_type == 'mfgpe'
diff --git a/test/optimizer/test_scheduler.py b/test/optimizer/test_scheduler.py
new file mode 100644
index 000000000..05dc22a29
--- /dev/null
+++ b/test/optimizer/test_scheduler.py
@@ -0,0 +1,192 @@
+import pytest
+
+from openbox.optimizer.scheduler import build_scheduler, check_scheduler
+
+
+def test_fixed_scheduler():
+ scheduler = build_scheduler(
+ 'fixed',
+ n_resources=[4, 2, 1],
+ r_resources=[0.25, 0.5, 1.0],
+ fidelity_levels=[0.25, 0.5, 1.0],
+ )
+
+ assert scheduler.get_fidelity_levels() == [0.25, 0.5, 1.0]
+ assert scheduler.get_stage_params(stage=0) == (4, 0.25)
+ assert scheduler.get_stage_params(stage=1) == (2, 0.5)
+ assert scheduler.get_stage_params(stage=2) == (1, 1.0)
+ assert scheduler.calculate_resource_ratio(0.25) == 0.25
+ assert scheduler.calculate_resource_ratio(1.0) == 1.0
+ assert scheduler.should_update_history(0.25)
+ assert scheduler.should_update_history(1.0)
+
+
+def test_bohb_scheduler():
+ scheduler = build_scheduler('bohb', R=9, eta=3)
+
+ assert scheduler.s_values == [2, 1, 0]
+ assert [scheduler.get_bracket_index(i) for i in range(6)] == [2, 1, 0, 2, 1, 0]
+
+ expected_brackets = {
+ 2: [(9, 1), (3, 3), (1, 9)],
+ 1: [(5, 3), (1, 9)],
+ 0: [(3, 9)],
+ }
+ for s, stages in expected_brackets.items():
+ for stage, expected in enumerate(stages):
+ n_configs, n_resource = scheduler.get_stage_params(s=s, stage=stage)
+ assert (n_configs, n_resource) == expected
+ assert scheduler.calculate_resource_ratio(n_resource) == round(n_resource / scheduler.R, 5)
+
+ assert scheduler.calculate_resource_ratio(1) == 0.11111
+ assert scheduler.calculate_resource_ratio(3) == 0.33333
+ assert scheduler.calculate_resource_ratio(9) == 1.0
+ assert scheduler.should_update_history(0.33333) is False
+ assert scheduler.should_update_history(1.0) is True
+
+
+def test_mfes_scheduler():
+ scheduler = build_scheduler('mfes', R=9, eta=3)
+
+ expected_brackets = {
+ 2: [(9, 1), (3, 3), (1, 9)],
+ 1: [(5, 3), (1, 9)],
+ 0: [(3, 9)],
+ }
+ for s, stages in expected_brackets.items():
+ for stage, expected in enumerate(stages):
+ assert scheduler.get_stage_params(s=s, stage=stage) == expected
+
+ assert scheduler.should_update_history(0.33333) is True
+ assert scheduler.should_update_history(1.0) is True
+
+
+def test_flatten_scheduler():
+ scheduler = build_scheduler('flatten', R=9, eta=3, num_nodes=1)
+
+ assert len(scheduler.brackets) == 5
+ assert [b['s'] for b in scheduler.brackets] == [2, 1, 0, 0, 0]
+
+ assert scheduler.brackets[0]['stages'] == [(9, 1), (3, 3), (1, 9)]
+ assert scheduler.brackets[1]['stages'] == [(5, 3), (1, 9)]
+ for bracket in scheduler.brackets[2:]:
+ assert bracket['stages'] == [(1, 9)]
+
+ expected_cycle = [2, 1, 0, 0, 0, 2, 1]
+ for iter_id, expected_s in enumerate(expected_cycle):
+ s = scheduler.get_bracket_index(iter_id)
+ assert s == expected_s
+ n_configs, n_resource = scheduler.get_stage_params(s=s, stage=0)
+ if s == 2:
+ assert (n_configs, n_resource) == (9, 1)
+ elif s == 1:
+ assert (n_configs, n_resource) == (5, 3)
+ else:
+ assert (n_configs, n_resource) == (1, 9)
+
+ assert scheduler.calculate_resource_ratio(1) == 0.11111
+ assert scheduler.calculate_resource_ratio(3) == 0.33333
+ assert scheduler.calculate_resource_ratio(9) == 1.0
+ assert scheduler.should_update_history(0.33333) is False
+ assert scheduler.should_update_history(1.0) is True
+
+
+def test_mfes_flatten_scheduler():
+ scheduler = build_scheduler('mfes_flatten', R=9, eta=3, num_nodes=1)
+
+ assert len(scheduler.brackets) == 5
+ assert [b['s'] for b in scheduler.brackets] == [2, 1, 0, 0, 0]
+ assert scheduler.should_update_history(0.33333) is True
+ assert scheduler.should_update_history(1.0) is True
+
+
+def test_check_scheduler_requires_resource_ratio_for_mf():
+ def objective_without_resource_ratio(config):
+ return [0.0]
+
+ with pytest.raises(ValueError, match='requires objective function to accept "resource_ratio"'):
+ check_scheduler(objective_without_resource_ratio, scheduler_type='mfes')
+
+
+@pytest.mark.parametrize('scheduler_type', ['bohb', 'mfes'])
+@pytest.mark.parametrize(
+ 'R,eta,expected_brackets',
+ [
+ (
+ 9,
+ 3,
+ {
+ 2: [(9, 1), (3, 3), (1, 9)],
+ 1: [(5, 3), (1, 9)],
+ 0: [(3, 9)],
+ },
+ ),
+ (
+ 27,
+ 3,
+ {
+ 3: [(27, 1), (9, 3), (3, 9), (1, 27)],
+ 2: [(12, 3), (4, 9), (1, 27)],
+ 1: [(6, 9), (2, 27)],
+ 0: [(4, 27)],
+ },
+ ),
+ ],
+)
+def test_mf_scheduler(
+ scheduler_type, R, eta, expected_brackets
+):
+ scheduler = build_scheduler(scheduler_type, R=R, eta=eta)
+
+ # Bracket scheduling order should be s_max -> ... -> 0 and repeat cyclically.
+ expected_cycle = list(reversed(range(scheduler.s_max + 1)))
+ assert scheduler.s_values == expected_cycle
+ assert [scheduler.get_bracket_index(i) for i in range(len(expected_cycle) * 2)] == expected_cycle * 2
+
+ for s, stages in expected_brackets.items():
+ for stage, (expected_n, expected_r) in enumerate(stages):
+ n_configs, n_resource = scheduler.get_stage_params(s=s, stage=stage)
+ assert (n_configs, n_resource) == (expected_n, expected_r)
+ assert scheduler.calculate_resource_ratio(n_resource) == round(expected_r / R, 5)
+
+
+@pytest.mark.parametrize('scheduler_type', ['bohb', 'mfes'])
+@pytest.mark.parametrize(
+ 'R,eta,bracket_s',
+ [
+ (9, 3, 2),
+ (27, 3, 3),
+ ],
+)
+def test_mf_scheduler_eliminate_candidates(
+ scheduler_type, R, eta, bracket_s
+):
+ scheduler = build_scheduler(scheduler_type, R=R, eta=eta)
+
+ n0, _ = scheduler.get_stage_params(s=bracket_s, stage=0)
+ total_n = n0
+ candidates = [f'cfg_{i}' for i in range(total_n)]
+ perfs = list(reversed(range(n0)))
+
+ current_candidates = candidates
+ current_perfs = perfs
+ survivors_by_stage = []
+
+ for stage in range(bracket_s):
+ next_n, _ = scheduler.get_stage_params(s=bracket_s, stage=stage + 1)
+ current_candidates, current_perfs = scheduler.eliminate_candidates(
+ current_candidates, current_perfs, s=bracket_s, stage=stage
+ )
+ survivors_by_stage.append(list(current_candidates))
+
+ assert len(current_candidates) == next_n
+ assert len(current_perfs) == next_n
+ assert current_perfs == sorted(current_perfs)
+
+ expected_survivors = []
+ for stage in range(bracket_s):
+ next_n, _ = scheduler.get_stage_params(s=bracket_s, stage=stage + 1)
+ expected_survivors.append([f'cfg_{i}' for i in range(total_n - 1, total_n - next_n - 1, -1)])
+
+ assert survivors_by_stage == expected_survivors
+
|