Skip to content

Commit c07fc4f

Browse files
committed
Merge branch 'yahpo_raw' into mo_experiments
2 parents 21dde0d + 5fbb0ca commit c07fc4f

File tree

8 files changed

+464
-3
lines changed

8 files changed

+464
-3
lines changed

extra_requirements/yahpo_gym.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
{
2-
"yahpo_gym": ["yahpo_gym@git+https://github.com/pfistfl/yahpo_gym#egg=yahpo_gym&subdirectory=yahpo_gym"]
2+
"yahpo_gym": ["yahpo_gym@git+https://github.com/pfistfl/yahpo_gym#egg=yahpo_gym&subdirectory=yahpo_gym"],
3+
"yahpo_gym_raw": ["yahpo_gym@git+https://github.com/pfistfl/yahpo_gym#egg=yahpo_gym&subdirectory=yahpo_gym", "rpy2>=3.5.0", "openml==0.10.2", "gitpython>=3.1"]
34
}

hpobench/benchmarks/ml/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
RandomForestBenchmarkMF
66
from hpobench.benchmarks.ml.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF
77
from hpobench.benchmarks.ml.tabular_benchmark import TabularBenchmark
8+
from hpobench.benchmarks.ml.yahpo_benchmark import YAHPOGymMORawBenchmark, YAHPOGymRawBenchmark
89

910
try:
1011
from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF
@@ -19,4 +20,5 @@
1920
'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF',
2021
'TabularBenchmark',
2122
'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF',
23+
'YAHPOGymMORawBenchmark', 'YAHPOGymRawBenchmark',
2224
]
Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
"""
2+
How to use this benchmark:
3+
--------------------------
4+
5+
We recommend using the containerized version of this benchmark.
6+
If you want to use this benchmark locally (without running it via the corresponding container),
7+
you need to perform the following steps.
8+
9+
Prerequisites: 1) Install Conda
10+
===============================
11+
Conda environment in which the HPOBench is installed (pip install .). Activate your environment.
12+
```
13+
conda activate <Name_of_Conda_HPOBench_environment>
14+
```
15+
16+
Prerequisites: 2) Install R
17+
===========================
18+
19+
Install R (4.0.5 - IMPORTANT!) and the required dependencies:
20+
21+
``` bash
22+
Rscript -e 'install.packages("remotes", repos = "http://cran.r-project.org")'
23+
24+
# Install OpenML dependencies
25+
Rscript -e 'install.packages("curl", repos = "http://cran.r-project.org")' \
26+
&& Rscript -e 'install.packages("httr", repos = "http://cran.r-project.org")' \
27+
&& Rscript -e 'install.packages("farff", repos = "http://cran.r-project.org")' \
28+
&& Rscript -e 'install.packages("OpenML", repos = "http://cran.r-project.org")' \
29+
30+
# Install rbv2 dependencies
31+
Rscript -e 'remotes::install_version("BBmisc", version = "1.11", upgrade = "never", repos = "http://cran.r-project.org")' \
32+
&& Rscript -e 'remotes::install_version("glmnet", version = "2.0-16", upgrade = "never", repos = "http://cran.r-project.o")' \
33+
&& Rscript -e 'remotes::install_version("rpart", version = "4.1-13", upgrade = "never", repos = "http://cran.r-project.org")' \
34+
&& Rscript -e 'remotes::install_version("e1071", version = "1.7-0.1", upgrade = "never", repos = "http://cran.r-project.org")' \
35+
&& Rscript -e 'remotes::install_version("xgboost", version = "0.82.1", upgrade = "never", repos = "http://cran.r-project.org")' \
36+
&& Rscript -e 'remotes::install_version("ranger", version = "0.11.2", upgrade = "never", repos = "http://cran.r-project.org")' \
37+
&& Rscript -e 'remotes::install_version("RcppHNSW", version = "0.1.0", upgrade = "never", repos = "http://cran.r-project.org")' \
38+
&& Rscript -e 'remotes::install_version("mlr", version = "2.14", upgrade = "never", repos = "http://cran.r-project.org")' \
39+
&& Rscript -e 'remotes::install_github("mlr-org/mlr3misc", upgrade = "never", repos = "http://cran.r-project.org")' \
40+
&& Rscript -e 'remotes::install_version("mlrCPO", version = "0.3.6", upgrade = "never", repos = "http://cran.r-projt.org")' \
41+
&& Rscript -e 'remotes::install_github("pfistfl/rbv2", upgrade = "never")' \
42+
&& Rscript -e 'remotes::install_version("testthat", version = "3.1.4", upgrade = "never", repos = "http://cran.r-project.org")' \
43+
&& Rscript -e 'remotes::install_github("sumny/iaml", upgrade = "never")'
44+
```
45+
Prerequisites: 3) Install rpy2
46+
==============================
47+
Installing the connector between R and python might be a little bit tricky.
48+
Official installation guide: https://rpy2.github.io/doc/latest/html/introduction.html
49+
50+
We received in some cases the error: "/opt/R/4.0.5/lib/R/library/methods/libs/methods.so: undefined symbol".
51+
To solve this error, we had to execute the following command:
52+
```
53+
export LD_LIBRARY_PATH=$(python -m rpy2.situation LD_LIBRARY_PATH):${LD_LIBRARY_PATH}
54+
```
55+
56+
1. Download data:
57+
=================
58+
Normally, the data will be downloaded automatically.
59+
60+
If you want to download the data on your own, you can download the data with the following command:
61+
62+
``` bash
63+
git clone --depth 1 -b main https://github.com/pfistfl/yahpo_data.git
64+
```
65+
66+
Later, you have to give yahpo the link to the data.
67+
68+
```python
69+
from yahpo_gym import local_config
70+
local_config.init_config()
71+
local_config.set_data_path("path-to-data")
72+
```
73+
74+
The data consist of surrogates for different data sets. Each surrogate is a compressed ONNX neural network.
75+
76+
77+
2. Install HPOBench:
78+
====================
79+
```
80+
git clone HPOBench
81+
cd /path/to/HPOBench
82+
pip install .[yahpo_gym_raw]
83+
```
84+
85+
Changelog:
86+
==========
87+
0.0.1:
88+
* First implementation
89+
""" # noqa: E501
90+
91+
import logging
92+
from pathlib import Path
93+
from typing import Union, Dict, List
94+
95+
import ConfigSpace as CS
96+
import numpy as np
97+
import rpy2.robjects as robjects
98+
from rpy2.robjects.packages import importr
99+
from yahpo_gym.benchmark_set import BenchmarkSet
100+
101+
import hpobench.config
102+
from hpobench.abstract_benchmark import AbstractBenchmark, AbstractMultiObjectiveBenchmark
103+
104+
__version__ = '0.0.1'
105+
106+
logger = logging.getLogger('YAHPO-Raw')
107+
108+
109+
class YAHPOGymMORawBenchmark(AbstractMultiObjectiveBenchmark):
110+
def __init__(self, scenario: str, instance: str,
111+
rng: Union[np.random.RandomState, int, None] = None,
112+
data_dir: Union[Path, str, None] = None):
113+
"""
114+
Parameters
115+
----------
116+
scenario : str
117+
Name for the learner. Must be one of [
118+
"rbv2_ranger", "rbv2_rpart", "rbv2_glmnet", "rbv2_xgboost", "rbv2_svm", "rbv2_aknn", "rbv2_super",
119+
"iaml_ranger", "iaml_rpart", "iaml_glmnet", "iaml_xgboost"
120+
]
121+
instance : str
122+
A valid instance for the scenario. See `self.benchset.instances`.
123+
https://slds-lmu.github.io/yahpo_gym/scenarios.html#instances
124+
rng : np.random.RandomState, int, None
125+
"""
126+
127+
assert scenario.startswith('rbv2_') or scenario.startswith('iaml_'), \
128+
'Currently, we only support the experiments with rbv2_ and iaml from yahpo. ' \
129+
f'The scenario has to start with either rbv2_ or iaml_, but was {scenario}'
130+
131+
from hpobench.util.data_manager import YAHPODataManager
132+
self.data_manager = YAHPODataManager(data_dir=data_dir)
133+
self.data_manager.load()
134+
135+
self.scenario = scenario
136+
self.instance = instance
137+
self.benchset = BenchmarkSet(scenario, active_session=True)
138+
self.benchset.set_instance(instance)
139+
140+
logger.info(f'Start Benchmark for scenario {scenario} and instance {instance}')
141+
super(YAHPOGymMORawBenchmark, self).__init__(rng=rng)
142+
143+
# pylint: disable=arguments-differ
144+
def get_configuration_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
145+
return self.benchset.get_opt_space(drop_fidelity_params=True, seed=seed)
146+
147+
# pylint: disable=arguments-differ
148+
def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
149+
return self.benchset.get_fidelity_space(seed=seed)
150+
151+
@AbstractMultiObjectiveBenchmark.check_parameters
152+
def objective_function(self, configuration: Union[CS.Configuration, Dict],
153+
fidelity: Union[CS.Configuration, Dict, None] = None,
154+
rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
155+
156+
# Cast python dict to R list:
157+
parameters = {**configuration, **fidelity}
158+
r_list = YAHPOGymMORawBenchmark.__cast_dict_to_rlist(parameters)
159+
160+
# Call the random bot evaluation method
161+
if self.scenario.startswith('rbv2_'):
162+
# Establish a connection to the R package
163+
rbv2pkg = importr('rbv2')
164+
out = rbv2pkg.eval_yahpo(scenario=robjects.StrVector([self.scenario]), configuration=r_list)
165+
elif self.scenario.startswith('iaml_'):
166+
# We have to create a cache dir and initialize the cache
167+
_cache_dir = hpobench.config.config_file.cache_dir / 'R' / 'mlr3oml'
168+
oml = importr('mlr3oml')
169+
oml.initialize_cache(cache=robjects.StrVector([str(_cache_dir)]))
170+
171+
iaml = importr('iaml')
172+
out = iaml.eval_yahpo(scenario=robjects.StrVector([self.scenario]), configuration=r_list)
173+
else:
174+
out = None
175+
176+
# Cast the R list (result) back to a python dictionary
177+
result = YAHPOGymMORawBenchmark.__cast_to_dict(out)
178+
objectives = {target: value for target, value in result.items() if target in self.benchset.config.y_names}
179+
additional = {target: value for target, value in result.items() if target not in self.benchset.config.y_names}
180+
181+
return {'function_value': objectives,
182+
"cost": result["timetrain"],
183+
'info': {'fidelity': fidelity, 'additional_info': additional}}
184+
185+
@AbstractMultiObjectiveBenchmark.check_parameters
186+
def objective_function_test(self, configuration: Union[CS.Configuration, Dict],
187+
fidelity: Union[CS.Configuration, Dict, None] = None,
188+
rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
189+
return self.objective_function(configuration, fidelity=fidelity, rng=rng)
190+
191+
@staticmethod
192+
def get_meta_information():
193+
""" Returns the meta information for the benchmark """
194+
return {'name': 'YAHPO Gym',
195+
'references': ['@misc{pfisterer2021yahpo,',
196+
'title={YAHPO Gym -- Design Criteria and a new Multifidelity Benchmark '
197+
' for Hyperparameter Optimization},',
198+
'author={Florian Pfisterer and Lennart Schneider and Julia Moosbauer '
199+
' and Martin Binder and Bernd Bischl},',
200+
'eprint={2109.03670},',
201+
'archivePrefix={arXiv},',
202+
'year={2021}}'],
203+
'code': ['https://github.com/pfistfl/yahpo_gym/yahpo_gym',
204+
'https://github.com/pfistfl/rbv2/',
205+
'https://github.com/sumny/iaml']
206+
}
207+
208+
# pylint: disable=arguments-differ
209+
def get_objective_names(self) -> List[str]:
210+
return self.benchset.config.y_names
211+
212+
@staticmethod
213+
def __cast_to_dict(r_list_object):
214+
"""
215+
Convert an RPy2 ListVector to a Python dict.
216+
Source: https://ogeek.cn/qa/?qa=815151/
217+
"""
218+
result = {}
219+
for i, name in enumerate(r_list_object.names):
220+
if isinstance(r_list_object[i], robjects.ListVector):
221+
result[name] = YAHPOGymMORawBenchmark.__cast_to_dict(r_list_object[i])
222+
elif len(r_list_object[i]) == 1:
223+
result[name] = r_list_object[i][0]
224+
else:
225+
result[name] = r_list_object[i]
226+
return result
227+
228+
@staticmethod
229+
def __cast_dict_to_rlist(py_dict):
230+
""" Convert a python dictionary to a RPy2 ListVector"""
231+
pairs = [f'{key} = {value}' if not isinstance(value, str) else f'{key} = \"{value}\"'
232+
for key, value in py_dict.items()]
233+
pairs = ",".join(pairs)
234+
str_list = f"list({pairs})"
235+
r_list = robjects.r(str_list)
236+
return r_list
237+
238+
239+
class YAHPOGymRawBenchmark(AbstractBenchmark):
240+
def __init__(self, scenario: str, instance: str, objective: str = None,
241+
rng: Union[np.random.RandomState, int, None] = None):
242+
"""
243+
Parameters
244+
----------
245+
scenario : str
246+
Name for the surrogate data. Must be one of ["lcbench", "fcnet", "nb301", "rbv2_svm",
247+
"rbv2_ranger", "rbv2_rpart", "rbv2_glmnet", "rbv2_aknn", "rbv2_xgboost", "rbv2_super"]
248+
instance : str
249+
A valid instance for the scenario. See `self.benchset.instances`.
250+
https://slds-lmu.github.io/yahpo_gym/scenarios.html#instances
251+
objective : str
252+
Name of the (single-crit) objective. See `self.benchset.config.y_names`.
253+
Initialized to None, picks the first element in y_names.
254+
rng : np.random.RandomState, int, None
255+
"""
256+
self.backbone = YAHPOGymMORawBenchmark(scenario=scenario, instance=instance, rng=rng)
257+
self.objective = objective
258+
super(YAHPOGymRawBenchmark, self).__init__(rng=rng)
259+
260+
@AbstractBenchmark.check_parameters
261+
def objective_function(self, configuration: Union[CS.Configuration, Dict],
262+
fidelity: Union[Dict, CS.Configuration, None] = None,
263+
rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
264+
265+
mo_results = self.backbone.objective_function(configuration=configuration,
266+
fidelity=fidelity,
267+
**kwargs)
268+
269+
# If not objective is set, we just grab the first returned entry.
270+
if self.objective is None:
271+
self.objective = self.backbone.benchset.config.y_names[0]
272+
273+
obj_value = mo_results['function_value'][self.objective]
274+
275+
return {'function_value': obj_value,
276+
"cost": mo_results['cost'],
277+
'info': {'fidelity': fidelity,
278+
'additional_info': mo_results['info']['additional_info'],
279+
'objectives': mo_results['function_value']}}
280+
281+
@AbstractBenchmark.check_parameters
282+
def objective_function_test(self, configuration: Union[CS.Configuration, Dict],
283+
fidelity: Union[Dict, CS.Configuration, None] = None,
284+
rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
285+
return self.objective_function(configuration, fidelity=fidelity, rng=rng)
286+
287+
# pylint: disable=arguments-differ
288+
def get_configuration_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
289+
return self.backbone.get_configuration_space(seed=seed)
290+
291+
# pylint: disable=arguments-differ
292+
def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace:
293+
return self.backbone.get_fidelity_space(seed=seed)
294+
295+
@staticmethod
296+
def get_meta_information() -> Dict:
297+
return YAHPOGymMORawBenchmark.get_meta_information()

hpobench/container/benchmarks/ml/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@
66
from hpobench.container.benchmarks.ml.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF
77
from hpobench.container.benchmarks.ml.tabular_benchmark import TabularBenchmark
88
from hpobench.container.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF
9-
9+
from hpobench.container.benchmarks.ml.yahpo_benchmark import YAHPOGymRawBenchmark, YAHPOGymMORawBenchmark
1010

1111
__all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF',
1212
'LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF',
1313
'NNBenchmark', 'NNBenchmarkBB', 'NNBenchmarkMF',
1414
'RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF',
1515
'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF',
1616
'TabularBenchmark',
17-
'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF']
17+
'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF',
18+
'YAHPOGymRawBenchmark', 'YAHPOGymMORawBenchmark']
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/usr/bin/python3
2+
# -*- coding: utf-8 -*-
3+
4+
from hpobench.container.client_abstract_benchmark import AbstractMOBenchmarkClient, \
5+
AbstractBenchmarkClient
6+
7+
8+
class YAHPOGymMORawBenchmark(AbstractMOBenchmarkClient):
9+
def __init__(self, **kwargs):
10+
kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'YAHPOGymMORawBenchmark')
11+
kwargs['container_name'] = kwargs.get('container_name', 'yahpo_raw')
12+
kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
13+
super(YAHPOGymMORawBenchmark, self).__init__(**kwargs)
14+
15+
16+
class YAHPOGymRawBenchmark(AbstractBenchmarkClient):
17+
def __init__(self, **kwargs):
18+
kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'YAHPOGymRawBenchmark')
19+
kwargs['container_name'] = kwargs.get('container_name', 'yahpo_raw')
20+
kwargs['latest'] = kwargs.get('container_tag', '0.0.1')
21+
super(YAHPOGymRawBenchmark, self).__init__(**kwargs)

0 commit comments

Comments
 (0)