Source code for orion.benchmark.task.profet.profet_task

""" Base class for Tasks that are generated using the Profet algorithm.

For more information on Profet, see original paper at https://arxiv.org/abs/1905.12982.

Klein, Aaron, Zhenwen Dai, Frank Hutter, Neil Lawrence, and Javier Gonzalez. "Meta-surrogate
benchmarking for hyperparameter optimization." Advances in Neural Information Processing Systems 32
(2019): 6270-6280.
"""
import os
import random
import warnings
from abc import ABC
from contextlib import contextmanager
from dataclasses import asdict
from logging import getLogger as get_logger
from pathlib import Path
from typing import Any, ClassVar, Dict, List, Optional, Type, Union

import numpy as np

try:
    import torch
    from torch.distributions import Normal
except ImportError as err:
    warnings.warn(
        RuntimeWarning(
            f"The `profet` extras needs to be installed in order to use the Profet tasks.\n"
            f"Error: {err}\n"
            f"Use `pip install orion[profet]` to install the profet extras."
        )
    )
from orion.algo.space import Space
from orion.benchmark.task.base import BenchmarkTask
from orion.benchmark.task.profet.model_utils import MetaModelConfig
from orion.core.io.space_builder import SpaceBuilder
from orion.core.utils import compute_identity
from orion.core.utils.flatten import flatten
from orion.core.utils.format_trials import dict_to_trial
from orion.core.worker import transformer

logger = get_logger(__name__)


[docs]@contextmanager
def make_reproducible(seed: int):
    """Makes the random operations within a block of code reproducible for a given seed."""
    # First: Get the starting random state, and restore it after.
    start_random_state = random.getstate()
    start_np_rng_state = np.random.get_state()
    with torch.random.fork_rng():
        # Set the random state, using the given seed.
        random.seed(seed)
        np_seed = random.randint(0, 2**32 - 1)
        np.random.seed(np_seed)

        torch_seed = random.randint(0, 2**32 - 1)
        torch.random.manual_seed(torch_seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(torch_seed)

        yield

    # Restore the random state to the original state.
    np.random.set_state(start_np_rng_state)
    random.setstate(start_random_state)


[docs]class ProfetTask(BenchmarkTask, ABC):
    """Base class for Tasks that are generated using the Profet algorithm.

    For more information on Profet, see original paper at https://arxiv.org/abs/1905.12982.

    Klein, Aaron, Zhenwen Dai, Frank Hutter, Neil Lawrence, and Javier Gonzalez. "Meta-surrogate benchmarking for
    hyperparameter optimization." Advances in Neural Information Processing Systems 32 (2019): 6270-6280.

    Parameters
    ----------
    max_trials : int, optional
        Max number of trials to run, by default 100
    input_dir : Union[Path, str], optional
        Input directory containing the data used to train the meta-model, by default None.
    checkpoint_dir : Union[Path, str], optional
        Directory used to save/load trained meta-models, by default None.
    model_config : MetaModelConfig, optional
        Configuration options for the training of the meta-model, by default None
    device : str, optional
        The device to use for training, by default None.
    with_grad : bool, optional
        Whether the task should also return the gradients of the objective function with respect to
        the inputs. Defaults to `False`.
    """

    # Type of model config to use. Has to be overwritten by subclasses.
    ModelConfig: ClassVar[Type[MetaModelConfig]] = MetaModelConfig

    def __init__(
        self,
        max_trials: int = 100,
        input_dir: Union[Path, str] = "profet_data",
        checkpoint_dir: Union[Path, str] = None,
        model_config: MetaModelConfig = None,
        device: Union[str, Any] = None,
        with_grad: bool = False,
    ):
        super().__init__(max_trials=max_trials)
        self.input_dir = Path(input_dir)
        self.checkpoint_dir = Path(checkpoint_dir or self.input_dir / "checkpoints")

        # The config for the training of the meta-model.
        # NOTE: the train config is used to determine the hash of the task.
        if model_config is None:
            # NOTE: This type error is safe to ignore: the benchmark argument will have been set in
            # each ModelConfig subclass.
            self.model_config = self.ModelConfig()  # type: ignore
        elif isinstance(model_config, dict):
            self.model_config = self.ModelConfig(**model_config)
        elif not isinstance(model_config, self.ModelConfig):
            # If passed a model config, for example through deserializing the configuration,
            # then convert it back to the right type, so the class attributes are correct.
            self.model_config = self.ModelConfig(**asdict(model_config))
        else:
            self.model_config = model_config

        assert isinstance(self.model_config, self.ModelConfig)

        self.seed = self.model_config.seed

        self.with_grad = with_grad
        # The parameters that have an influence over the training of the meta-model are used to
        # create the filename where the model will be saved.
        task_hash_params = asdict(self.model_config)
        logger.info(f"Task hash params: {task_hash_params}")
        task_hash = compute_identity(**task_hash_params)

        filename = f"{task_hash}.pkl"

        self.checkpoint_file = self.checkpoint_dir / filename
        logger.info(f"Checkpoint file for this task: {self.checkpoint_file}")

        if isinstance(device, torch.device):
            self.device = device
        else:
            self.device = torch.device(
                device or ("cuda" if torch.cuda.is_available() else "cpu")
            )

        # NOTE: Need to control the randomness that's happening inside *both* the training
        # function, as well as the loading function (since `load_task_network`` instantiates a model
        # and then loads the weights, it also affects the global rng state of pytorch).

        with make_reproducible(self.seed):
            if os.path.exists(self.checkpoint_file):
                logger.info(
                    f"Model has already been trained: loading it from file {self.checkpoint_file}."
                )
                self.net, h = self.model_config.load_task_network(self.checkpoint_file)
            else:
                warnings.warn(
                    RuntimeWarning(
                        f"Checkpoint file {self.checkpoint_file} doesn't exist: re-training the "
                        f"model. (This may take a *very* long time!)"
                    )
                )
                logger.info(f"Task hash params: {task_hash_params}")
                self.checkpoint_file.parent.mkdir(exist_ok=True, parents=True)

                # Need to re-train the meta-model and sample this task.
                self.net, h = self.model_config.get_task_network(self.input_dir)

        # Numpy random state. Currently only used in `sample()`
        self._np_rng_state = np.random.RandomState(self.seed)

        self.h: np.ndarray = np.array(h)
        self.model_config.save_task_network(self.checkpoint_file, self.net, self.h)

        self.net = self.net.to(device=self.device, dtype=torch.float32)
        self.net.eval()

        self.h_tensor = torch.as_tensor(self.h, dtype=torch.float32, device=self.device)

        self._space: Optional[Space] = None
        self.name = (
            f"profet.{type(self).__qualname__.lower()}_{self.model_config.task_id}"
        )
        self.transformed_space = transformer.build_required_space(
            self.space,
            type_requirement="real",
            shape_requirement="flattened",
            dist_requirement="linear",
        )

    @property
    def space(self) -> Space:
        if self._space is None:
            self._space = SpaceBuilder().build(self.get_search_space())
        return self._space

[docs]    def call(self, **kwargs) -> List[Dict]:
        """Get the value of the sampled objective function at the given point (hyper-parameters).

        If `self.with_grad` is set, also returns the gradient of the objective function with respect
        to the inputs.

        Parameters
        ----------
        **kwargs
            Dictionary of hyper-parameters.

        Returns
        -------
        List[Dict]
            Result dictionaries: objective and optionally gradient.

        Raises
        ------
        ValueError
            If the input isn't of a supported type.
        """
        # A bit of gymnastics to convert the params Dict into a PyTorch tensor.
        trial = dict_to_trial(kwargs, self._space)
        flattened_trial = self.transformed_space.transform(trial)
        flattened_params = flatten(flattened_trial.params)
        flattened_point = np.array(
            [flattened_params[key] for key in self.transformed_space.keys()]
        )

        x_tensor = torch.as_tensor(flattened_point).type_as(self.h_tensor)
        if self.with_grad:
            x_tensor = x_tensor.requires_grad_(True)
        p_tensor = torch.cat([x_tensor, self.h_tensor])
        p_tensor = torch.atleast_2d(p_tensor)

        devices = [] if self.device.type == "cpu" else [self.device]
        # NOTE: Currently no way to locally seed the rng of torch distributions, hence forking the
        # rng for torch only here.
        with torch.random.fork_rng(devices=devices):
            torch.random.manual_seed(self.seed)
            if torch.cuda.is_available():
                torch.cuda.manual_seed_all(self.seed)

            # Forward pass:
            out = self.net(p_tensor)

            y_mean, y_log_std = out[0, 0], out[0, 1]
            y_std = torch.exp(y_log_std)

            # NOTE: Here we create a distribution over `y`, and use `rsample()`, so that we get can
            # also return the gradients if need be.
            y_dist = Normal(loc=y_mean, scale=y_std)

            y_sample = y_dist.rsample()
            logger.debug(f"y_sample: {y_sample}")

        results: List[dict] = [
            dict(name=self.name, type="objective", value=y_sample.detach().cpu().item())
        ]

        if self.with_grad:
            self.net.zero_grad()
            y_sample.backward()
            assert x_tensor.grad is not None
            results.append(
                dict(name=self.name, type="gradient", value=x_tensor.grad.cpu().numpy())
            )

        return results

    @property
    def configuration(self):
        """Return the configuration of the task."""
        return {
            self.__class__.__qualname__: {
                "max_trials": self.max_trials,
                "input_dir": str(self.input_dir),
                "checkpoint_dir": str(self.checkpoint_dir),
                "model_config": asdict(self.model_config),
                "device": self.device.type,
                "with_grad": self.with_grad,
            }
        }