Source code for orion.core.io.convert

"""
Parse and generate user script's configuration
==============================================

Defines and instantiates a converter for configuration file types.

Given a file path infer which configuration file parser/emitter it corresponds to.
Define `BaseConverter` classes with a common interface for many popular configuration
file types.

Currently supported:
    - YAML
    - JSON
    - See below, for configuration agnostic parsing

A `GenericConverter` is provided that tries and parses configuration
files, regardless of their type, according to predefined Oríon's markers.

"""
import importlib
import os
from abc import ABC, abstractmethod
from collections import deque

from orion.core.utils import GenericFactory, nesteddict


[docs]def infer_converter_from_file_type(config_path, regex=None, default_keyword=""): """Use filetype extension to infer and build the correct configuration file converter. """ _, ext_type = os.path.splitext(os.path.abspath(config_path)) for klass in config_converter_factory.get_classes().values(): if ext_type in klass.file_extensions: return klass() if regex is None: return GenericConverter(expression_prefix=default_keyword) return GenericConverter(regex, expression_prefix=default_keyword)
[docs]class BaseConverter(ABC): """Base class for configuration parsers/generators. Attributes ---------- file_extensions : list of strings Strings starting with '.' which identify usually a file type as a common convention. For instance, ``['.yml', '.yaml']`` for YAML files. """ file_extensions = []
[docs] def get_state_dict(self): """Give state dict that can be used to reconstruct the converter""" return {}
[docs] def set_state_dict(self, state): """Reset the converter based on previous state"""
[docs] @abstractmethod def parse(self, filepath): """Read dictionary out of the configuration file. Parameters ---------- filepath : str Full path to the original user script's configuration. """
[docs] @abstractmethod def generate(self, filepath, data): """Create a configuration file at `filepath` using dictionary `data`."""
[docs]class YAMLConverter(BaseConverter): """Converter for YAML files.""" file_extensions = [".yml", ".yaml"] def __init__(self): """Try to dynamically import yaml module.""" self.yaml = importlib.import_module("yaml")
[docs] def parse(self, filepath): """Read dictionary out of the configuration file. Parameters ---------- file : str Full path to the original user script's configuration. """ with open(filepath, encoding="utf8") as f: return self.yaml.safe_load(stream=f)
[docs] def generate(self, filepath, data): """Create a configuration file at `filepath` using dictionary `data`.""" with open(filepath, "w", encoding="utf8") as f: self.yaml.dump(data, stream=f)
[docs]class JSONConverter(BaseConverter): """Converter for JSON files.""" file_extensions = [".json"] def __init__(self): """Try to dynamically import json module.""" self.json = importlib.import_module("json")
[docs] def parse(self, filepath): """Read dictionary out of the configuration file. Parameters ---------- file : str Full path to the original user script's configuration. """ with open(filepath, encoding="utf8") as f: return self.json.load(f)
[docs] def generate(self, filepath, data): """Create a configuration file at `filepath` using dictionary `data`.""" with open(filepath, "w", encoding="utf8") as f: self.json.dump(data, f)
[docs]class GenericConverter(BaseConverter): """Generic converter for any configuration file type. For each parameter dimension declared here, one must necessarily provide a ``name`` keyword inside the `Dimension` building expression. Implementation details: As this class is supposed to provide with a generic text parser, semantics are going to be tied to their consequent usage. A template document is going to be created on `parse` and filled with values on `read`. This template document consists the state of this `BaseConverter` object. Dimension should be defined for instance as: ``meaningful_name~uniform(0, 4)`` """ def __init__( self, regex=r"([\/]?[\w|\/|-]+)~([\+]?.*\)|\-|\>[A-Za-z_]\w*)", expression_prefix="", ): """Initialize with the regex expression which will be searched for to define a `Dimension`. """ self.re_module = importlib.import_module("re") self.regex = self.re_module.compile(regex) self.expression_prefix = expression_prefix self.template = None self.has_leading = {} self.conflict_msg = "Namespace conflict in configuration file '{}', under '{}'"
[docs] def get_state_dict(self): """Give state dict that can be used to reconstruct the converter""" return dict( regex=self.regex.pattern, expression_prefix=self.expression_prefix, template=self.template, has_leading=self.has_leading, )
[docs] def set_state_dict(self, state): """Reset the converter based on previous state""" self.regex = self.re_module.compile(state["regex"]) self.expression_prefix = state["expression_prefix"] self.template = state["template"] self.has_leading = state["has_leading"]
def _raise_conflict(self, path, namespace): raise ValueError(self.conflict_msg.format(path, namespace))
[docs] def parse(self, filepath): r"""Read dictionary out of the configuration file. Create a template for Python 3 string format and save it as this object's state, by substituting '{\1}' wherever the pattern was matched. By default, the first matched group (\1) corresponds with a dimension's namespace. .. note:: Namespace in substitution templates does not contain the first '/'. Parameters ---------- filepath : str Full path to the original user script's configuration. """ with open(filepath, encoding="utf8") as f: self.template = f.read() # Search for Oríon semantic pattern pairs = self.regex.findall(self.template) ret = dict(pairs) # Every namespace given should be unique, # raise conflict if there are duplicates if len(pairs) != len(ret): namespaces = list(zip(*pairs))[0] for name in namespaces: if namespaces.count(name) != 1: self._raise_conflict(filepath, name) # Create template using each namespace as format key, # exactly as provided by the user subst = self.re_module.sub(r"{", r"{{", self.template) subst = self.re_module.sub(r"}", r"}}", subst) substituted, num_subs = self.regex.subn(r"{\1!s}", subst) assert len(ret) == num_subs, ( "This means an error in the regex. Report bug. Details::\n" f"original: {self.template}\n, regex:{self.regex}" ) self.template = substituted # Wrap it in style of what the rest of `BaseConverter`s return ret_nested = nesteddict() for namespace, expression in ret.items(): keys = namespace.split("/") if not keys[0]: # It means that user wrote a namespace starting from '/' keys = keys[1:] # Safe because of the regex pattern self.has_leading[namespace[1:]] = "/" stuff = ret_nested for i, key in enumerate(keys[:-1]): stuff = stuff[key] if isinstance(stuff, str): # If `stuff` is not a dictionary while traversing the # namespace path, then this amounts to a conflict which was # not sufficiently get caught self._raise_conflict(filepath, "/".join(keys[: i + 1])) # If final value is already filled, # then this must be also due to a conflict if stuff[keys[-1]]: self._raise_conflict(filepath, namespace) # Keep compatibility with `SpaceBuilder._build_from_config` stuff[keys[-1]] = self.expression_prefix + expression return ret_nested
[docs] def generate(self, filepath, data): """Create a configuration file at `filepath` using dictionary `data`.""" unnested_data = {} stack = deque() stack.append(([], data)) while True: try: namespace, stuff = stack.pop() except IndexError: break if isinstance(stuff, dict): for k, v in stuff.items(): stack.append((["/".join(namespace + [str(k)])], v)) else: name = namespace[0] unnested_data[self.has_leading.get(name, "") + name] = stuff document = self.template.format(**unnested_data) with open(filepath, "w", encoding="utf8") as f: f.write(document)
config_converter_factory = GenericFactory(BaseConverter)