Source code for orion.core.worker.trial

# -*- coding: utf-8 -*-
# pylint: skip-file
"""
:mod:`orion.core.worker.trial` -- Container class for `Trial` entity
====================================================================

.. module:: trial
   :platform: Unix
   :synopsis: Describe a particular training run, parameters and results

"""
import hashlib
import logging

from orion.core.utils.flatten import unflatten


log = logging.getLogger(__name__)


[docs]def validate_status(status):
    """
    Verify if given status is valid. Can be one of ``new``, ``reserved``, ``suspended``,
    ``completed``, ``interrupted``, or ``broken``.
    """
    if status is not None and status not in Trial.allowed_stati:
        raise ValueError("Given status `{0}` not one of: {1}".format(
            status, Trial.allowed_stati))


[docs]class Trial:
    """Represents an entry in database/trials collection.

    Attributes
    ----------
    experiment : str
       Unique identifier for the experiment that produced this trial.
       Same as an `Experiment._id`.
    id_override: str
        Trial id returned by the database. It should be unique for a given
        set of parameters
    heartbeat : datetime.datetime
        Last time trial was identified as being alive.
    status : str
       Indicates how this trial is currently being used. Can take the following
       values:

       * 'new' : Denotes a fresh set of parameters suggested by an algorithm,
          not yet tried out.
       * 'reserved' : Indicates that this trial is currently being evaluated by
          a worker process, it was a 'new' trial that got selected.
       * 'suspended' : Means that an algorithm decided to stop the evaluation of
          a 'reserved' trial prematurely.
       * 'completed' : is the status of a previously 'reserved' trial that
          successfully got evaluated. `Trial.results` must contain the evaluation.
       * 'interrupted' : Indicates trials that are stopped from being evaluated
          by external *actors* (e.g. cluster timeout, KeyboardInterrupt, killing
          of the worker process).
       * 'broken' : Indicates a trial that was not successfully evaluated for not
          expected reason.
    worker : str
       Corresponds to worker's unique id that handled this trial.
    submit_time : `datetime.datetime`
       When was this trial suggested?
    start_time : `datetime.datetime`
       When was this trial first reserved?
    end_time : `datetime.datetime`
       When was this trial evaluated successfully?
    results : list of `Trial.Result`
       List of evaluated metrics for this particular set of params. One and only
       one of them is necessarily an *objective* function value. The other are
       *constraints*, the value of an expression desired to be larger/equal to 0.
    params : dict of params
       Dict of suggested values for the `Experiment` parameter space.
       Consists a sample to be evaluated.

    """

[docs]    @classmethod
    def build(cls, trial_entries):
        """Builder method for a list of trials.

        :param trial_entries: List of trial representation in dictionary form,
           as expected to be saved in a database.

        :returns: a list of corresponding `Trial` objects.
        """
        trials = []
        for entry in trial_entries:
            trials.append(cls(**entry))
        return trials

[docs]    class Value:
        """Container for a value object.

        Attributes
        ----------
        name : str
           A possible named for the quality that this is quantifying.
        type : str
           An identifier with semantic importance for **Oríon**. See
           `Param.type` and `Result.type`.
        value : str or numerical
           value suggested for this dimension of the parameter space.

        """

        __slots__ = ('name', '_type', 'value')
        allowed_types = ()

[docs]        def __init__(self, **kwargs):
            """See attributes of `Value` for possible argument for `kwargs`."""
            for attrname in self.__slots__:
                setattr(self, attrname, None)
            for attrname, value in kwargs.items():
                setattr(self, attrname, value)

            self._ensure_no_ndarray()

        def _ensure_no_ndarray(self):
            """Make sure the current value is not a `numpy.ndarray`."""
            if hasattr(self, 'value') and hasattr(self.value, 'tolist'):
                self.value = self.value.tolist()

[docs]        def to_dict(self):
            """Needed to be able to convert `Value` to `dict` form."""
            ret = dict(
                name=self.name,
                type=self.type,
                value=self.value
                )
            return ret

[docs]        def __eq__(self, other):
            """Test equality based on self.to_dict()"""
            return self.name == other.name and self.type == other.type and self.value == other.value

[docs]        def __str__(self):
            """Represent partially with a string."""
            ret = "{0}(name={1}, type={2}, value={3})".format(
                type(self).__name__, repr(self.name),
                repr(self.type), repr(self.value))
            return ret

        __repr__ = __str__

        @property
        def type(self):
            """For meaning of property type, see `Value.type`."""
            return self._type

        @type.setter
        def type(self, type_):
            if type_ is not None and type_ not in self.allowed_types:
                raise ValueError("Given type, {0}, not one of: {1}".format(
                    type_, self.allowed_types))
            self._type = type_

[docs]    class Result(Value):
        """Types for a `Result` can be either an evaluation of an 'objective'
        function or of an 'constraint' expression.
        """

        __slots__ = ()
        allowed_types = ('objective', 'constraint', 'gradient', 'statistic', 'lie')

[docs]    class Param(Value):
        """Types for a `Param` can be either an integer (discrete value),
        floating precision numerical or a categorical expression (e.g. a string).
        """

        __slots__ = ()
        allowed_types = ('integer', 'real', 'categorical', 'fidelity')

    __slots__ = ('experiment', '_id', '_status', 'worker', '_working_dir', 'heartbeat',
                 'submit_time', 'start_time', 'end_time', '_results', '_params', 'parents',
                 'id_override')
    allowed_stati = ('new', 'reserved', 'suspended', 'completed', 'interrupted', 'broken')

[docs]    def __init__(self, **kwargs):
        """See attributes of `Trial` for meaning and possible arguments for `kwargs`."""
        for attrname in self.__slots__:
            if attrname in ('_results', '_params', 'parents'):
                setattr(self, attrname, list())
            else:
                setattr(self, attrname, None)

        self.status = 'new'

        # Store the id as an override to support different backends
        self.id_override = kwargs.pop('_id', None)

        for attrname, value in kwargs.items():
            if attrname == 'results':
                attr = getattr(self, attrname)
                for item in value:
                    attr.append(self.Result(**item))
            elif attrname == 'params':
                for item in value:
                    self._params.append(self.Param(**item))
            else:
                setattr(self, attrname, value)

[docs]    def to_dict(self):
        """Needed to be able to convert `Trial` to `dict` form."""
        trial_dictionary = dict()

        for attrname in self.__slots__:
            if attrname == "_working_dir":
                continue

            attrname = attrname.lstrip("_")
            trial_dictionary[attrname] = getattr(self, attrname)

        # Overwrite "results" and "params" with list of dictionaries rather
        # than list of Value objects
        trial_dictionary['results'] = list(map(lambda x: x.to_dict(), self.results))
        trial_dictionary['params'] = list(map(lambda x: x.to_dict(), self._params))

        trial_dictionary['_id'] = trial_dictionary.pop('id')

        return trial_dictionary

[docs]    def __str__(self):
        """Represent partially with a string."""
        return "Trial(experiment={0}, status={1}, params={2})".format(
            repr(self.experiment), repr(self._status), self.format_params(self._params))

    __repr__ = __str__

    @property
    def params(self):
        """Parameters of the trial"""
        return unflatten({param.name: param.value for param in self._params})

    @property
    def results(self):
        """List of results of the trial"""
        return self._results

    @results.setter
    def results(self, results):
        """Verify results before setting the property"""
        objective = self._fetch_one_result_of_type('objective', results)

        if objective is None:
            raise ValueError('No objective found in results: {}'.format(results))
        if not isinstance(objective.value, (float, int)):
            raise ValueError(
                'Results must contain a type `objective` with type float/int: {}'.format(objective))

        self._results = results

    @property
    def working_dir(self):
        """Return the current working directory of the trial."""
        return self._working_dir

    @working_dir.setter
    def working_dir(self, value):
        """Change the current working directory of the trial."""
        self._working_dir = value

    @property
    def status(self):
        """For meaning of property type, see `Trial.status`."""
        return self._status

    @status.setter
    def status(self, status):
        validate_status(status)
        self._status = status

    @property
    def id(self):
        """Return hash_name which is also the database key `_id`."""
        if self.id_override is None:
            return self.__hash__()
        return self.id_override

    @property
    def objective(self):
        """Return this trial's objective value if it is evaluated, else None.

        :rtype: `Trial.Result`
        """
        return self._fetch_one_result_of_type('objective')

    @property
    def lie(self):
        """Return this trial's fake objective value if it was set, else None.

        :rtype: `Trial.Result`
        """
        return self._fetch_one_result_of_type('lie')

    @property
    def gradient(self):
        """Return this trial's gradient value if it is evaluated, else None.

        :rtype: `Trial.Result`
        """
        return self._fetch_one_result_of_type('gradient')

    @property
    def constraints(self):
        """
        Return this trial's constraints

        Returns
        -------
        A list of ``Trial.Result`` of type 'constraint'
        """
        return self._fetch_results('constraint', self.results)

    @property
    def statistics(self):
        """
        Return this trial's statistics

        Returns
        -------
        A list of ``Trial.Result`` de type 'statistic'
        """
        return self._fetch_results('statistic', self.results)

    @property
    def hash_name(self):
        """Generate a unique name with an md5sum hash for this `Trial`.

        .. note:: Two trials that have the same `params` must have the same `hash_name`.
        """
        return self.compute_trial_hash(self, ignore_fidelity=False)

    @property
    def hash_params(self):
        """Generate a unique param md5sum hash for this `Trial`.

        .. note:: The params contributing to the hash do not include the fidelity.
        """
        return self.compute_trial_hash(self, ignore_fidelity=True, ignore_lie=True)

[docs]    def __hash__(self):
        """Return the hashname for this trial"""
        return self.hash_name

    @property
    def full_name(self):
        """Generate a unique name using the full definition of parameters."""
        if not self._params or not self.experiment:
            raise ValueError("Cannot distinguish this trial, as 'params' or 'experiment' "
                             "have not been set.")
        return self.format_values(self._params, sep='-').replace('/', '.')

    def _fetch_results(self, type, results):
        """Fetch results for the given type"""
        return [result for result in results if result.type == type]

    def _fetch_one_result_of_type(self, result_type, results=None):
        if results is None:
            results = self.results

        value = self._fetch_results(result_type, results)

        if not value:
            return None

        if len(value) > 1:
            log.warning("Found multiple results of '%s' type:\n%s",
                        result_type, value)
            log.warning("Multi-objective optimization is not currently supported.\n"
                        "Optimizing according to the first one only: %s", value[0])

        return value[0]

    def _repr_values(self, values, sep=','):
        """Represent with a string the given values."""
        return Trial.format_values(values, sep)

[docs]    def params_repr(self, sep=',', ignore_fidelity=False):
        """Represent with a string the parameters contained in this `Trial` object."""
        return Trial.format_params(self._params, sep)

[docs]    @staticmethod
    def format_values(values, sep=','):
        """Represent with a string the given values."""
        return sep.join(map(lambda value: "{0.name}:{0.value}".format(value), values))

[docs]    @staticmethod
    def format_params(params, sep=',', ignore_fidelity=False):
        """Represent with a string the parameters contained in this `Trial` object."""
        if ignore_fidelity:
            params = [x for x in params if x.type != 'fidelity']
        else:
            params = params
        return Trial.format_values(params, sep)

[docs]    @staticmethod
    def compute_trial_hash(trial, ignore_fidelity=False, ignore_experiment=False,
                           ignore_lie=False):
        """Generate a unique param md5sum hash for a given `Trial`"""
        if not trial._params and not trial.experiment:
            raise ValueError("Cannot distinguish this trial, as 'params' or 'experiment' "
                             "have not been set.")

        params = Trial.format_params(trial._params, ignore_fidelity=ignore_fidelity)

        experiment_repr = ""
        if not ignore_experiment:
            experiment_repr = str(trial.experiment)

        lie_repr = ""
        if not ignore_lie and trial.lie:
            lie_repr = Trial.format_values([trial.lie])

        return hashlib.md5((params + experiment_repr + lie_repr).encode('utf-8')).hexdigest()