Source code for orion.benchmark.task.profet.profet_task

""" Base class for Tasks that are generated using the Profet algorithm.

For more information on Profet, see original paper at https://arxiv.org/abs/1905.12982.

Klein, Aaron, Zhenwen Dai, Frank Hutter, Neil Lawrence, and Javier Gonzalez. "Meta-surrogate
benchmarking for hyperparameter optimization." Advances in Neural Information Processing Systems 32
(2019): 6270-6280.
"""
import os
import random
import warnings
from abc import ABC
from contextlib import contextmanager
from dataclasses import asdict
from logging import getLogger as get_logger
from pathlib import Path
from typing import Any, ClassVar, Dict, List, Optional, Type, Union

import numpy as np

try:
    import torch
    from torch.distributions import Normal
except ImportError as err:
    warnings.warn(
        RuntimeWarning(
            f"The `profet` extras needs to be installed in order to use the Profet tasks.\n"
            f"Error: {err}\n"
            f"Use `pip install orion[profet]` to install the profet extras."
        )
    )
from orion.algo.space import Space
from orion.benchmark.task.base import BenchmarkTask
from orion.benchmark.task.profet.model_utils import MetaModelConfig
from orion.core.io.space_builder import SpaceBuilder
from orion.core.utils import compute_identity
from orion.core.utils.flatten import flatten
from orion.core.utils.format_trials import dict_to_trial
from orion.core.worker import transformer

logger = get_logger(__name__)


[docs]@contextmanager def make_reproducible(seed: int): """Makes the random operations within a block of code reproducible for a given seed.""" # First: Get the starting random state, and restore it after. start_random_state = random.getstate() start_np_rng_state = np.random.get_state() with torch.random.fork_rng(): # Set the random state, using the given seed. random.seed(seed) np_seed = random.randint(0, 2**32 - 1) np.random.seed(np_seed) torch_seed = random.randint(0, 2**32 - 1) torch.random.manual_seed(torch_seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(torch_seed) yield # Restore the random state to the original state. np.random.set_state(start_np_rng_state) random.setstate(start_random_state)
[docs]class ProfetTask(BenchmarkTask, ABC): """Base class for Tasks that are generated using the Profet algorithm. For more information on Profet, see original paper at https://arxiv.org/abs/1905.12982. Klein, Aaron, Zhenwen Dai, Frank Hutter, Neil Lawrence, and Javier Gonzalez. "Meta-surrogate benchmarking for hyperparameter optimization." Advances in Neural Information Processing Systems 32 (2019): 6270-6280. Parameters ---------- max_trials : int, optional Max number of trials to run, by default 100 input_dir : Union[Path, str], optional Input directory containing the data used to train the meta-model, by default None. checkpoint_dir : Union[Path, str], optional Directory used to save/load trained meta-models, by default None. model_config : MetaModelConfig, optional Configuration options for the training of the meta-model, by default None device : str, optional The device to use for training, by default None. with_grad : bool, optional Whether the task should also return the gradients of the objective function with respect to the inputs. Defaults to `False`. """ # Type of model config to use. Has to be overwritten by subclasses. ModelConfig: ClassVar[Type[MetaModelConfig]] = MetaModelConfig def __init__( self, max_trials: int = 100, input_dir: Union[Path, str] = "profet_data", checkpoint_dir: Union[Path, str] = None, model_config: MetaModelConfig = None, device: Union[str, Any] = None, with_grad: bool = False, ): super().__init__(max_trials=max_trials) self.input_dir = Path(input_dir) self.checkpoint_dir = Path(checkpoint_dir or self.input_dir / "checkpoints") # The config for the training of the meta-model. # NOTE: the train config is used to determine the hash of the task. if model_config is None: # NOTE: This type error is safe to ignore: the benchmark argument will have been set in # each ModelConfig subclass. self.model_config = self.ModelConfig() # type: ignore elif isinstance(model_config, dict): self.model_config = self.ModelConfig(**model_config) elif not isinstance(model_config, self.ModelConfig): # If passed a model config, for example through deserializing the configuration, # then convert it back to the right type, so the class attributes are correct. self.model_config = self.ModelConfig(**asdict(model_config)) else: self.model_config = model_config assert isinstance(self.model_config, self.ModelConfig) self.seed = self.model_config.seed self.with_grad = with_grad # The parameters that have an influence over the training of the meta-model are used to # create the filename where the model will be saved. task_hash_params = asdict(self.model_config) logger.info(f"Task hash params: {task_hash_params}") task_hash = compute_identity(**task_hash_params) filename = f"{task_hash}.pkl" self.checkpoint_file = self.checkpoint_dir / filename logger.info(f"Checkpoint file for this task: {self.checkpoint_file}") if isinstance(device, torch.device): self.device = device else: self.device = torch.device( device or ("cuda" if torch.cuda.is_available() else "cpu") ) # NOTE: Need to control the randomness that's happening inside *both* the training # function, as well as the loading function (since `load_task_network`` instantiates a model # and then loads the weights, it also affects the global rng state of pytorch). with make_reproducible(self.seed): if os.path.exists(self.checkpoint_file): logger.info( f"Model has already been trained: loading it from file {self.checkpoint_file}." ) self.net, h = self.model_config.load_task_network(self.checkpoint_file) else: warnings.warn( RuntimeWarning( f"Checkpoint file {self.checkpoint_file} doesn't exist: re-training the " f"model. (This may take a *very* long time!)" ) ) logger.info(f"Task hash params: {task_hash_params}") self.checkpoint_file.parent.mkdir(exist_ok=True, parents=True) # Need to re-train the meta-model and sample this task. self.net, h = self.model_config.get_task_network(self.input_dir) # Numpy random state. Currently only used in `sample()` self._np_rng_state = np.random.RandomState(self.seed) self.h: np.ndarray = np.array(h) self.model_config.save_task_network(self.checkpoint_file, self.net, self.h) self.net = self.net.to(device=self.device, dtype=torch.float32) self.net.eval() self.h_tensor = torch.as_tensor(self.h, dtype=torch.float32, device=self.device) self._space: Optional[Space] = None self.name = ( f"profet.{type(self).__qualname__.lower()}_{self.model_config.task_id}" ) self.transformed_space = transformer.build_required_space( self.space, type_requirement="real", shape_requirement="flattened", dist_requirement="linear", ) @property def space(self) -> Space: if self._space is None: self._space = SpaceBuilder().build(self.get_search_space()) return self._space
[docs] def call(self, **kwargs) -> List[Dict]: """Get the value of the sampled objective function at the given point (hyper-parameters). If `self.with_grad` is set, also returns the gradient of the objective function with respect to the inputs. Parameters ---------- **kwargs Dictionary of hyper-parameters. Returns ------- List[Dict] Result dictionaries: objective and optionally gradient. Raises ------ ValueError If the input isn't of a supported type. """ # A bit of gymnastics to convert the params Dict into a PyTorch tensor. trial = dict_to_trial(kwargs, self._space) flattened_trial = self.transformed_space.transform(trial) flattened_params = flatten(flattened_trial.params) flattened_point = np.array( [flattened_params[key] for key in self.transformed_space.keys()] ) x_tensor = torch.as_tensor(flattened_point).type_as(self.h_tensor) if self.with_grad: x_tensor = x_tensor.requires_grad_(True) p_tensor = torch.cat([x_tensor, self.h_tensor]) p_tensor = torch.atleast_2d(p_tensor) devices = [] if self.device.type == "cpu" else [self.device] # NOTE: Currently no way to locally seed the rng of torch distributions, hence forking the # rng for torch only here. with torch.random.fork_rng(devices=devices): torch.random.manual_seed(self.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(self.seed) # Forward pass: out = self.net(p_tensor) y_mean, y_log_std = out[0, 0], out[0, 1] y_std = torch.exp(y_log_std) # NOTE: Here we create a distribution over `y`, and use `rsample()`, so that we get can # also return the gradients if need be. y_dist = Normal(loc=y_mean, scale=y_std) y_sample = y_dist.rsample() logger.debug(f"y_sample: {y_sample}") results: List[dict] = [ dict(name=self.name, type="objective", value=y_sample.detach().cpu().item()) ] if self.with_grad: self.net.zero_grad() y_sample.backward() assert x_tensor.grad is not None results.append( dict(name=self.name, type="gradient", value=x_tensor.grad.cpu().numpy()) ) return results
@property def configuration(self): """Return the configuration of the task.""" return { self.__class__.__qualname__: { "max_trials": self.max_trials, "input_dir": str(self.input_dir), "checkpoint_dir": str(self.checkpoint_dir), "model_config": asdict(self.model_config), "device": self.device.type, "with_grad": self.with_grad, } }