# -*- coding: utf-8 -*-
"""
:mod:`orion.core.resolve_config` -- Configuration parsing and resolving
=======================================================================
.. module:: resolve_config
:platform: Unix
:synopsis: How does orion resolve configuration settings?
How:
- Experiment name resolves like this:
* cmd-arg **>** cmd-provided orion_config **>** REQUIRED (no default is given)
- Database options resolve with the following precedence (high to low):
* cmd-provided orion_config **>** env vars **>** default files **>** defaults
.. seealso:: :const:`ENV_VARS`, :const:`ENV_VARS_DB`
- All other managerial, `Optimization` or `Dynamic` options resolve like this:
* cmd-args **>** cmd-provided orion_config **>** database (if experiment name
can be found) **>** default files
Default files are given as a list at :const:`orion.core.DEF_CONFIG_FILES_PATHS` and a
precedence is respected when building the settings dictionary:
* default orion example file **<** system-wide config **<** user-wide config
.. note:: `Optimization` entries are required, `Dynamic` entry is optional.
"""
import copy
import getpass
import hashlib
import logging
import os
import git
from numpy import inf as infinity
import yaml
import orion
import orion.core
from orion.core import config
from orion.core.io.orion_cmdline_parser import OrionCmdlineParser
from orion.core.utils.flatten import unflatten
[docs]def is_exe(path):
"""Test whether `path` describes an executable file."""
return os.path.isfile(path) and os.access(path, os.X_OK)
log = logging.getLogger(__name__)
################################################################################
# Default Settings and Environmental Variables #
################################################################################
# Default settings for command line arguments (option, description)
DEF_CMD_MAX_TRIALS = (infinity, 'inf/until preempted')
DEF_CMD_WORKER_TRIALS = (infinity, 'inf/until preempted')
DEF_CMD_POOL_SIZE = (1, str(1))
# list containing tuples of
# (environmental variable names, configuration keys, default values)
ENV_VARS_DB = [
('ORION_DB_NAME', 'name'),
('ORION_DB_TYPE', 'type'),
('ORION_DB_ADDRESS', 'host'),
('ORION_DB_PORT', 'port')
]
# TODO: Default resource from environmental (localhost)
# dictionary describing lists of environmental tuples (e.g. `ENV_VARS_DB`)
# by a 'key' to be used in the experiment's configuration dict
ENV_VARS = dict(
database=ENV_VARS_DB
)
def _convert_dashes(config, ref):
"""Convert dash in keys to underscores based on a reference dict.
The reference is used to avoid converting keys in dictionary that are values
of options.
"""
config = copy.deepcopy(config)
for key in list(config.keys()):
converted_key = key.replace('-', '_')
if converted_key in ref:
config[converted_key] = config.pop(key)
if all(isinstance(item[converted_key], dict) for item in [config, ref]):
config[converted_key] = _convert_dashes(config[converted_key], ref[converted_key])
return config
# NOTE: Silencing this pylint error for now, but seriously this function is quite horrible.
# We'll need to clean this up at some point...
# pylint:disable=too-many-branches
[docs]def fetch_config_from_cmdargs(cmdargs):
"""Turn flat cmdargs into nested dicts like orion.core.config."""
config_file = cmdargs.pop('config', None)
tmp_cmdargs = copy.deepcopy(cmdargs)
tmp_cmdargs['config'] = config_file
cmdargs['config'] = config_file
cmdargs = tmp_cmdargs
cmdargs_config = {}
if cmdargs.get('max_trials') is not None:
log.warning(
'--max-trials is deprecated and will be removed in v0.3. '
'Use --exp-max-trials instead')
cmdargs_config['experiment.max_trials'] = cmdargs.pop('max_trials')
if cmdargs.get('worker_trials') is not None:
log.warning(
'--worker-trials is deprecated and will be removed in v0.3. '
'Use --worker-max-trials instead')
cmdargs_config['worker.max_trials'] = cmdargs.pop('worker_trials')
mappings = dict(
experiment=dict(
exp_max_broken='max_broken',
exp_max_trials='max_trials'),
worker=dict(
worker_max_broken='max_broken',
worker_max_trials='max_trials'))
mappings = dict(
experiment=dict(
max_broken='exp_max_broken',
max_trials='exp_max_trials'),
worker=dict(
max_broken='worker_max_broken',
max_trials='worker_max_trials'))
global_config = config.to_dict()
for key in ['config', 'user_args']:
if cmdargs.get(key) not in [False, None]:
cmdargs_config[key] = cmdargs[key]
for key in ['name', 'user', 'version']:
if cmdargs.get(key) not in [False, None]:
cmdargs_config[f'experiment.{key}'] = cmdargs[key]
for key in ['branch_from', 'branch_to']:
if cmdargs.get(key) not in [False, None]:
cmdargs_config[f'evc.{key}'] = cmdargs[key]
# Apply config at the root
for key in ['debug']:
# Adapt to cli arguments
cli_key = mappings.get(key, key)
value = cmdargs.pop(cli_key, None)
if value is not None:
cmdargs_config[f'{key}'] = value
# Apply to subconfigs
for key in ['experiment', 'worker', 'evc']:
for subkey in global_config[key].keys():
# Adapt to cli arguments
cli_key = mappings.get(key, {}).get(subkey, subkey)
value = cmdargs.pop(cli_key, None)
if value is not None:
cmdargs_config[f'{key}.{subkey}'] = value
return unflatten(cmdargs_config)
[docs]def fetch_config(args):
"""Return the config inside the .yaml file if present."""
orion_file = args.get('config')
local_config = {}
if orion_file:
log.debug("Found orion configuration file at: %s", os.path.abspath(orion_file.name))
orion_file.seek(0)
tmp_config = yaml.safe_load(orion_file)
global_config = config.to_dict()
tmp_config = _convert_dashes(tmp_config, global_config)
# Fix deprecations first because some names are shared by experiment and worker
max_trials = tmp_config.pop('max_trials', None)
if max_trials is not None:
log.warning(
'(DEPRECATED) Option `max_trials` is deprecated '
'and will be removed in v0.3. Use instead the option'
'\nexperiment:\n max_trials: %s', max_trials)
local_config['experiment.max_trials'] = max_trials
worker_trials = tmp_config.get('experiment', {}).pop('worker_trials', None)
if worker_trials is not None:
log.warning(
'(DEPRECATED) Option `experiment.worker_trials` is deprecated '
'and will be removed in v0.3. Use instead the option'
'\nworker:\n max_trials: %s', worker_trials)
local_config['worker.max_trials'] = worker_trials
worker_trials = tmp_config.pop('worker_trials', None)
if worker_trials is not None:
log.warning(
'(DEPRECATED) Option `worker_trials` is deprecated '
'and will be removed in v0.3. Use instead the option'
'\nworker:\n max_trials: %s', worker_trials)
local_config['worker.max_trials'] = worker_trials
producer = tmp_config.pop('producer', None)
if producer is not None:
log.warning(
'(DEPRECATED) Option `producer` is deprecated '
'and will be removed in v0.3. Use instead the option'
'\nexperiment:\n strategy: %s', producer['strategy'])
local_config['experiment.strategy'] = producer['strategy']
producer = tmp_config.get('experiment', {}).pop('producer', None)
if producer is not None:
log.warning(
'(DEPRECATED) Option `experiment.producer` is deprecated '
'and will be removed in v0.3. Use instead the option'
'\nexperiment:\n strategy: %s', producer['strategy'])
local_config['experiment.strategy'] = producer['strategy']
local_config = unflatten(local_config)
# For backward compatibility
for key in ['storage', 'experiment', 'worker', 'evc']:
subkeys = list(global_config[key].keys())
# Arguments that are only supported locally
if key == 'experiment':
subkeys += ['name', 'version', 'user']
elif key == 'evc':
subkeys += ['branch_from', 'branch_to']
for subkey in subkeys:
# Backward compatibility
backward_value = tmp_config.pop(subkey, None)
if backward_value is not None:
log.warning(
'(DEPRECATED) Option `%s` and will be removed in v0.3. '
'Use instead the option' '\n%s:\n %s:\n %s',
subkey, key, subkey, yaml.dump(backward_value, indent=6))
value = tmp_config.get(key, {}).pop(subkey, backward_value)
if value is not None:
local_config.setdefault(key, {})
local_config[key][subkey] = value
return local_config
[docs]def fetch_default_options():
"""Create a dict with options from the default configuration files.
Respect precedence from application's default, to system's and
user's.
.. seealso:: :const:`orion.core.DEF_CONFIG_FILES_PATHS`
"""
default_config = dict()
# get some defaults
default_config['name'] = None
default_config['user'] = getpass.getuser()
default_config['max_trials'] = DEF_CMD_MAX_TRIALS[0]
default_config['worker_trials'] = DEF_CMD_WORKER_TRIALS[0]
default_config['pool_size'] = DEF_CMD_POOL_SIZE[0]
default_config['algorithms'] = 'random'
# get default options for some managerial variables (see :const:`ENV_VARS`)
for signifier, env_vars in ENV_VARS.items():
default_config[signifier] = {}
for _, key in env_vars:
default_config[signifier][key] = config[signifier][key]
# fetch options from default configuration files
for configpath in orion.core.DEF_CONFIG_FILES_PATHS:
try:
with open(configpath) as f:
cfg = yaml.safe_load(f)
if cfg is None:
continue
# implies that yaml must be in dict form
for k, v in cfg.items():
if k in ENV_VARS:
default_config[k] = {}
for vk, vv in v.items():
default_config[k][vk] = vv
else:
if k != 'name':
default_config[k] = v
except IOError as e: # default file could not be found
log.debug(e)
except AttributeError as e:
log.warning("Problem parsing file: %s", configpath)
log.warning(e)
return default_config
[docs]def fetch_env_vars():
"""Fetch environmental variables related to orion's managerial data."""
env_vars = {}
for signif, evars in ENV_VARS.items():
env_vars[signif] = {}
for var_name, key in evars:
value = os.getenv(var_name)
if value is not None:
env_vars[signif][key] = value
return env_vars
[docs]def merge_configs(*configs):
"""Merge configuration dictionaries following the given hierarchy
Suppose function is called as merge_configs(A, B, C). Then any pair (key, value) in C would
overwrite any previous value from A or B. Same apply for B over A.
If for some pair (key, value), the value is a dictionary, then it will either overwrite previous
value if it was not also a directory, or it will be merged following
`merge_configs(old_value, new_value)`.
.. warning:
Redefinition of subdictionaries may lead to confusing results because merges do not remove
data.
If for instance, we have {'a': {'b': 1, 'c': 2}} and we would like to update `'a'` such that
it only have `{'c': 3}`, it won't work with {'a': {'c': 3}}.
merge_configs({'a': {'b': 1, 'c': 2}}, {'a': {'c': 3}}) -> {'a': {'b': 1, 'c': 3}}
Examples
--------
.. code-block:: python
:linenos:
a = {'a': 1, 'b': {'c': 2}}
b = {'b': {'c': 3}}
c = {'b': {'c': {'d': 4}}}
m = resolve_config.merge_configs(a, b, c)
assert m == {'a': 1, 'b': {'c': {'d': 4}}}
a = {'a': 1, 'b': {'c': 2, 'd': 3}}
b = {'b': {'c': 4}}
c = {'b': {'c': {'e': 5}}}
m = resolve_config.merge_configs(a, b, c)
assert m == {'a': 1, 'b': {'c': {'e': 5}, 'd': 3}}
"""
merged_config = configs[0]
for config_i in configs[1:]:
for key, value in config_i.items():
if isinstance(value, dict) and isinstance(merged_config.get(key), dict):
merged_config[key] = merge_configs(merged_config[key], value)
elif value is not None:
merged_config[key] = value
return merged_config
[docs]def fetch_user_repo(user_script):
"""Fetch the GIT repo and its root path given user's script."""
dir_path = os.path.dirname(os.path.abspath(user_script))
try:
git_repo = git.Repo(dir_path, search_parent_directories=True)
except git.exc.InvalidGitRepositoryError:
git_repo = None
logging.warning('Script %s is not in a git repository. Code modification '
'won\'t be detected.', os.path.abspath(user_script))
return git_repo