|
| 1 | +""" |
| 2 | +How to use this benchmark: |
| 3 | +-------------------------- |
| 4 | +
|
| 5 | +We recommend using the containerized version of this benchmark. |
| 6 | +If you want to use this benchmark locally (without running it via the corresponding container), |
| 7 | +you need to perform the following steps. |
| 8 | +
|
| 9 | +Prerequisites: 1) Install Conda |
| 10 | +=============================== |
| 11 | +Conda environment in which the HPOBench is installed (pip install .). Activate your environment. |
| 12 | +``` |
| 13 | +conda activate <Name_of_Conda_HPOBench_environment> |
| 14 | +``` |
| 15 | +
|
| 16 | +Prerequisites: 2) Install R |
| 17 | +=========================== |
| 18 | +
|
| 19 | +Install R (4.0.5 - IMPORTANT!) and the required dependencies: |
| 20 | +
|
| 21 | +``` bash |
| 22 | +Rscript -e 'install.packages("remotes", repos = "http://cran.r-project.org")' |
| 23 | +
|
| 24 | +# Install OpenML dependencies |
| 25 | +Rscript -e 'install.packages("curl", repos = "http://cran.r-project.org")' \ |
| 26 | +&& Rscript -e 'install.packages("httr", repos = "http://cran.r-project.org")' \ |
| 27 | +&& Rscript -e 'install.packages("farff", repos = "http://cran.r-project.org")' \ |
| 28 | +&& Rscript -e 'install.packages("OpenML", repos = "http://cran.r-project.org")' \ |
| 29 | +
|
| 30 | +# Install rbv2 dependencies |
| 31 | +Rscript -e 'remotes::install_version("BBmisc", version = "1.11", upgrade = "never", repos = "http://cran.r-project.org")' \ |
| 32 | +&& Rscript -e 'remotes::install_version("glmnet", version = "2.0-16", upgrade = "never", repos = "http://cran.r-project.o")' \ |
| 33 | +&& Rscript -e 'remotes::install_version("rpart", version = "4.1-13", upgrade = "never", repos = "http://cran.r-project.org")' \ |
| 34 | +&& Rscript -e 'remotes::install_version("e1071", version = "1.7-0.1", upgrade = "never", repos = "http://cran.r-project.org")' \ |
| 35 | +&& Rscript -e 'remotes::install_version("xgboost", version = "0.82.1", upgrade = "never", repos = "http://cran.r-project.org")' \ |
| 36 | +&& Rscript -e 'remotes::install_version("ranger", version = "0.11.2", upgrade = "never", repos = "http://cran.r-project.org")' \ |
| 37 | +&& Rscript -e 'remotes::install_version("RcppHNSW", version = "0.1.0", upgrade = "never", repos = "http://cran.r-project.org")' \ |
| 38 | +&& Rscript -e 'remotes::install_version("mlr", version = "2.14", upgrade = "never", repos = "http://cran.r-project.org")' \ |
| 39 | +&& Rscript -e 'remotes::install_github("mlr-org/mlr3misc", upgrade = "never", repos = "http://cran.r-project.org")' \ |
| 40 | +&& Rscript -e 'remotes::install_version("mlrCPO", version = "0.3.6", upgrade = "never", repos = "http://cran.r-projt.org")' \ |
| 41 | +&& Rscript -e 'remotes::install_github("pfistfl/rbv2", upgrade = "never")' \ |
| 42 | +&& Rscript -e 'remotes::install_version("testthat", version = "3.1.4", upgrade = "never", repos = "http://cran.r-project.org")' \ |
| 43 | +&& Rscript -e 'remotes::install_github("sumny/iaml", upgrade = "never")' |
| 44 | +``` |
| 45 | +Prerequisites: 3) Install rpy2 |
| 46 | +============================== |
| 47 | +Installing the connector between R and python might be a little bit tricky. |
| 48 | +Official installation guide: https://rpy2.github.io/doc/latest/html/introduction.html |
| 49 | +
|
| 50 | +We received in some cases the error: "/opt/R/4.0.5/lib/R/library/methods/libs/methods.so: undefined symbol". |
| 51 | +To solve this error, we had to execute the following command: |
| 52 | +``` |
| 53 | +export LD_LIBRARY_PATH=$(python -m rpy2.situation LD_LIBRARY_PATH):${LD_LIBRARY_PATH} |
| 54 | +``` |
| 55 | +
|
| 56 | +1. Download data: |
| 57 | +================= |
| 58 | +Normally, the data will be downloaded automatically. |
| 59 | +
|
| 60 | +If you want to download the data on your own, you can download the data with the following command: |
| 61 | +
|
| 62 | +``` bash |
| 63 | +git clone --depth 1 -b main https://github.com/pfistfl/yahpo_data.git |
| 64 | +``` |
| 65 | +
|
| 66 | +Later, you have to give yahpo the link to the data. |
| 67 | +
|
| 68 | +```python |
| 69 | +from yahpo_gym import local_config |
| 70 | +local_config.init_config() |
| 71 | +local_config.set_data_path("path-to-data") |
| 72 | +``` |
| 73 | +
|
| 74 | +The data consist of surrogates for different data sets. Each surrogate is a compressed ONNX neural network. |
| 75 | +
|
| 76 | +
|
| 77 | +2. Install HPOBench: |
| 78 | +==================== |
| 79 | +``` |
| 80 | +git clone HPOBench |
| 81 | +cd /path/to/HPOBench |
| 82 | +pip install .[yahpo_gym_raw] |
| 83 | +``` |
| 84 | +
|
| 85 | +Changelog: |
| 86 | +========== |
| 87 | +0.0.1: |
| 88 | +* First implementation |
| 89 | +""" # noqa: E501 |
| 90 | + |
| 91 | +import logging |
| 92 | +from pathlib import Path |
| 93 | +from typing import Union, Dict, List |
| 94 | + |
| 95 | +import ConfigSpace as CS |
| 96 | +import numpy as np |
| 97 | +import rpy2.robjects as robjects |
| 98 | +from rpy2.robjects.packages import importr |
| 99 | +from yahpo_gym.benchmark_set import BenchmarkSet |
| 100 | + |
| 101 | +import hpobench.config |
| 102 | +from hpobench.abstract_benchmark import AbstractBenchmark, AbstractMultiObjectiveBenchmark |
| 103 | + |
| 104 | +__version__ = '0.0.1' |
| 105 | + |
| 106 | +logger = logging.getLogger('YAHPO-Raw') |
| 107 | + |
| 108 | + |
| 109 | +class YAHPOGymMORawBenchmark(AbstractMultiObjectiveBenchmark): |
| 110 | + def __init__(self, scenario: str, instance: str, |
| 111 | + rng: Union[np.random.RandomState, int, None] = None, |
| 112 | + data_dir: Union[Path, str, None] = None): |
| 113 | + """ |
| 114 | + Parameters |
| 115 | + ---------- |
| 116 | + scenario : str |
| 117 | + Name for the learner. Must be one of [ |
| 118 | + "rbv2_ranger", "rbv2_rpart", "rbv2_glmnet", "rbv2_xgboost", "rbv2_svm", "rbv2_aknn", "rbv2_super", |
| 119 | + "iaml_ranger", "iaml_rpart", "iaml_glmnet", "iaml_xgboost" |
| 120 | + ] |
| 121 | + instance : str |
| 122 | + A valid instance for the scenario. See `self.benchset.instances`. |
| 123 | + https://slds-lmu.github.io/yahpo_gym/scenarios.html#instances |
| 124 | + rng : np.random.RandomState, int, None |
| 125 | + """ |
| 126 | + |
| 127 | + assert scenario.startswith('rbv2_') or scenario.startswith('iaml_'), \ |
| 128 | + 'Currently, we only support the experiments with rbv2_ and iaml from yahpo. ' \ |
| 129 | + f'The scenario has to start with either rbv2_ or iaml_, but was {scenario}' |
| 130 | + |
| 131 | + from hpobench.util.data_manager import YAHPODataManager |
| 132 | + self.data_manager = YAHPODataManager(data_dir=data_dir) |
| 133 | + self.data_manager.load() |
| 134 | + |
| 135 | + self.scenario = scenario |
| 136 | + self.instance = instance |
| 137 | + self.benchset = BenchmarkSet(scenario, active_session=True) |
| 138 | + self.benchset.set_instance(instance) |
| 139 | + |
| 140 | + logger.info(f'Start Benchmark for scenario {scenario} and instance {instance}') |
| 141 | + super(YAHPOGymMORawBenchmark, self).__init__(rng=rng) |
| 142 | + |
| 143 | + # pylint: disable=arguments-differ |
| 144 | + def get_configuration_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: |
| 145 | + return self.benchset.get_opt_space(drop_fidelity_params=True, seed=seed) |
| 146 | + |
| 147 | + # pylint: disable=arguments-differ |
| 148 | + def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: |
| 149 | + return self.benchset.get_fidelity_space(seed=seed) |
| 150 | + |
| 151 | + @AbstractMultiObjectiveBenchmark.check_parameters |
| 152 | + def objective_function(self, configuration: Union[CS.Configuration, Dict], |
| 153 | + fidelity: Union[CS.Configuration, Dict, None] = None, |
| 154 | + rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: |
| 155 | + |
| 156 | + # Cast python dict to R list: |
| 157 | + parameters = {**configuration, **fidelity} |
| 158 | + r_list = YAHPOGymMORawBenchmark.__cast_dict_to_rlist(parameters) |
| 159 | + |
| 160 | + # Call the random bot evaluation method |
| 161 | + if self.scenario.startswith('rbv2_'): |
| 162 | + # Establish a connection to the R package |
| 163 | + rbv2pkg = importr('rbv2') |
| 164 | + out = rbv2pkg.eval_yahpo(scenario=robjects.StrVector([self.scenario]), configuration=r_list) |
| 165 | + elif self.scenario.startswith('iaml_'): |
| 166 | + # We have to create a cache dir and initialize the cache |
| 167 | + _cache_dir = hpobench.config.config_file.cache_dir / 'R' / 'mlr3oml' |
| 168 | + oml = importr('mlr3oml') |
| 169 | + oml.initialize_cache(cache=robjects.StrVector([str(_cache_dir)])) |
| 170 | + |
| 171 | + iaml = importr('iaml') |
| 172 | + out = iaml.eval_yahpo(scenario=robjects.StrVector([self.scenario]), configuration=r_list) |
| 173 | + else: |
| 174 | + out = None |
| 175 | + |
| 176 | + # Cast the R list (result) back to a python dictionary |
| 177 | + result = YAHPOGymMORawBenchmark.__cast_to_dict(out) |
| 178 | + objectives = {target: value for target, value in result.items() if target in self.benchset.config.y_names} |
| 179 | + additional = {target: value for target, value in result.items() if target not in self.benchset.config.y_names} |
| 180 | + |
| 181 | + return {'function_value': objectives, |
| 182 | + "cost": result["timetrain"], |
| 183 | + 'info': {'fidelity': fidelity, 'additional_info': additional}} |
| 184 | + |
| 185 | + @AbstractMultiObjectiveBenchmark.check_parameters |
| 186 | + def objective_function_test(self, configuration: Union[CS.Configuration, Dict], |
| 187 | + fidelity: Union[CS.Configuration, Dict, None] = None, |
| 188 | + rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: |
| 189 | + return self.objective_function(configuration, fidelity=fidelity, rng=rng) |
| 190 | + |
| 191 | + @staticmethod |
| 192 | + def get_meta_information(): |
| 193 | + """ Returns the meta information for the benchmark """ |
| 194 | + return {'name': 'YAHPO Gym', |
| 195 | + 'references': ['@misc{pfisterer2021yahpo,', |
| 196 | + 'title={YAHPO Gym -- Design Criteria and a new Multifidelity Benchmark ' |
| 197 | + ' for Hyperparameter Optimization},', |
| 198 | + 'author={Florian Pfisterer and Lennart Schneider and Julia Moosbauer ' |
| 199 | + ' and Martin Binder and Bernd Bischl},', |
| 200 | + 'eprint={2109.03670},', |
| 201 | + 'archivePrefix={arXiv},', |
| 202 | + 'year={2021}}'], |
| 203 | + 'code': ['https://github.com/pfistfl/yahpo_gym/yahpo_gym', |
| 204 | + 'https://github.com/pfistfl/rbv2/', |
| 205 | + 'https://github.com/sumny/iaml'] |
| 206 | + } |
| 207 | + |
| 208 | + # pylint: disable=arguments-differ |
| 209 | + def get_objective_names(self) -> List[str]: |
| 210 | + return self.benchset.config.y_names |
| 211 | + |
| 212 | + @staticmethod |
| 213 | + def __cast_to_dict(r_list_object): |
| 214 | + """ |
| 215 | + Convert an RPy2 ListVector to a Python dict. |
| 216 | + Source: https://ogeek.cn/qa/?qa=815151/ |
| 217 | + """ |
| 218 | + result = {} |
| 219 | + for i, name in enumerate(r_list_object.names): |
| 220 | + if isinstance(r_list_object[i], robjects.ListVector): |
| 221 | + result[name] = YAHPOGymMORawBenchmark.__cast_to_dict(r_list_object[i]) |
| 222 | + elif len(r_list_object[i]) == 1: |
| 223 | + result[name] = r_list_object[i][0] |
| 224 | + else: |
| 225 | + result[name] = r_list_object[i] |
| 226 | + return result |
| 227 | + |
| 228 | + @staticmethod |
| 229 | + def __cast_dict_to_rlist(py_dict): |
| 230 | + """ Convert a python dictionary to a RPy2 ListVector""" |
| 231 | + pairs = [f'{key} = {value}' if not isinstance(value, str) else f'{key} = \"{value}\"' |
| 232 | + for key, value in py_dict.items()] |
| 233 | + pairs = ",".join(pairs) |
| 234 | + str_list = f"list({pairs})" |
| 235 | + r_list = robjects.r(str_list) |
| 236 | + return r_list |
| 237 | + |
| 238 | + |
| 239 | +class YAHPOGymRawBenchmark(AbstractBenchmark): |
| 240 | + def __init__(self, scenario: str, instance: str, objective: str = None, |
| 241 | + rng: Union[np.random.RandomState, int, None] = None): |
| 242 | + """ |
| 243 | + Parameters |
| 244 | + ---------- |
| 245 | + scenario : str |
| 246 | + Name for the surrogate data. Must be one of ["lcbench", "fcnet", "nb301", "rbv2_svm", |
| 247 | + "rbv2_ranger", "rbv2_rpart", "rbv2_glmnet", "rbv2_aknn", "rbv2_xgboost", "rbv2_super"] |
| 248 | + instance : str |
| 249 | + A valid instance for the scenario. See `self.benchset.instances`. |
| 250 | + https://slds-lmu.github.io/yahpo_gym/scenarios.html#instances |
| 251 | + objective : str |
| 252 | + Name of the (single-crit) objective. See `self.benchset.config.y_names`. |
| 253 | + Initialized to None, picks the first element in y_names. |
| 254 | + rng : np.random.RandomState, int, None |
| 255 | + """ |
| 256 | + self.backbone = YAHPOGymMORawBenchmark(scenario=scenario, instance=instance, rng=rng) |
| 257 | + self.objective = objective |
| 258 | + super(YAHPOGymRawBenchmark, self).__init__(rng=rng) |
| 259 | + |
| 260 | + @AbstractBenchmark.check_parameters |
| 261 | + def objective_function(self, configuration: Union[CS.Configuration, Dict], |
| 262 | + fidelity: Union[Dict, CS.Configuration, None] = None, |
| 263 | + rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: |
| 264 | + |
| 265 | + mo_results = self.backbone.objective_function(configuration=configuration, |
| 266 | + fidelity=fidelity, |
| 267 | + **kwargs) |
| 268 | + |
| 269 | + # If not objective is set, we just grab the first returned entry. |
| 270 | + if self.objective is None: |
| 271 | + self.objective = self.backbone.benchset.config.y_names[0] |
| 272 | + |
| 273 | + obj_value = mo_results['function_value'][self.objective] |
| 274 | + |
| 275 | + return {'function_value': obj_value, |
| 276 | + "cost": mo_results['cost'], |
| 277 | + 'info': {'fidelity': fidelity, |
| 278 | + 'additional_info': mo_results['info']['additional_info'], |
| 279 | + 'objectives': mo_results['function_value']}} |
| 280 | + |
| 281 | + @AbstractBenchmark.check_parameters |
| 282 | + def objective_function_test(self, configuration: Union[CS.Configuration, Dict], |
| 283 | + fidelity: Union[Dict, CS.Configuration, None] = None, |
| 284 | + rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: |
| 285 | + return self.objective_function(configuration, fidelity=fidelity, rng=rng) |
| 286 | + |
| 287 | + # pylint: disable=arguments-differ |
| 288 | + def get_configuration_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: |
| 289 | + return self.backbone.get_configuration_space(seed=seed) |
| 290 | + |
| 291 | + # pylint: disable=arguments-differ |
| 292 | + def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: |
| 293 | + return self.backbone.get_fidelity_space(seed=seed) |
| 294 | + |
| 295 | + @staticmethod |
| 296 | + def get_meta_information() -> Dict: |
| 297 | + return YAHPOGymMORawBenchmark.get_meta_information() |
0 commit comments