Source code for orion.core.io.convert

"""
Parse and generate user script's configuration
==============================================

Defines and instantiates a converter for configuration file types.

Given a file path infer which configuration file parser/emitter it corresponds to.
Define `BaseConverter` classes with a common interface for many popular configuration
file types.

Currently supported:
    - YAML
    - JSON
    - See below, for configuration agnostic parsing

A `GenericConverter` is provided that tries and parses configuration
files, regardless of their type, according to predefined Oríon's markers.

"""
import importlib
import os
from abc import ABC, abstractmethod
from collections import deque

from orion.core.utils import GenericFactory, nesteddict


[docs]def infer_converter_from_file_type(config_path, regex=None, default_keyword=""):
    """Use filetype extension to infer and build the correct configuration file
    converter.
    """
    _, ext_type = os.path.splitext(os.path.abspath(config_path))
    for klass in config_converter_factory.get_classes().values():
        if ext_type in klass.file_extensions:
            return klass()

    if regex is None:
        return GenericConverter(expression_prefix=default_keyword)

    return GenericConverter(regex, expression_prefix=default_keyword)


[docs]class BaseConverter(ABC):
    """Base class for configuration parsers/generators.

    Attributes
    ----------
    file_extensions : list of strings
       Strings starting with '.' which identify usually a file type as a
       common convention. For instance, ``['.yml', '.yaml']`` for YAML files.

    """

    file_extensions = []

[docs]    def get_state_dict(self):
        """Give state dict that can be used to reconstruct the converter"""
        return {}

[docs]    def set_state_dict(self, state):
        """Reset the converter based on previous state"""

[docs]    @abstractmethod
    def parse(self, filepath):
        """Read dictionary out of the configuration file.

        Parameters
        ----------
        filepath : str
           Full path to the original user script's configuration.

        """

[docs]    @abstractmethod
    def generate(self, filepath, data):
        """Create a configuration file at `filepath` using dictionary `data`."""


[docs]class YAMLConverter(BaseConverter):
    """Converter for YAML files."""

    file_extensions = [".yml", ".yaml"]

    def __init__(self):
        """Try to dynamically import yaml module."""
        self.yaml = importlib.import_module("yaml")

[docs]    def parse(self, filepath):
        """Read dictionary out of the configuration file.

        Parameters
        ----------
        file : str
           Full path to the original user script's configuration.

        """
        with open(filepath, encoding="utf8") as f:
            return self.yaml.safe_load(stream=f)

[docs]    def generate(self, filepath, data):
        """Create a configuration file at `filepath` using dictionary `data`."""
        with open(filepath, "w", encoding="utf8") as f:
            self.yaml.dump(data, stream=f)


[docs]class JSONConverter(BaseConverter):
    """Converter for JSON files."""

    file_extensions = [".json"]

    def __init__(self):
        """Try to dynamically import json module."""
        self.json = importlib.import_module("json")

[docs]    def parse(self, filepath):
        """Read dictionary out of the configuration file.

        Parameters
        ----------
        file : str
           Full path to the original user script's configuration.

        """
        with open(filepath, encoding="utf8") as f:
            return self.json.load(f)

[docs]    def generate(self, filepath, data):
        """Create a configuration file at `filepath` using dictionary `data`."""
        with open(filepath, "w", encoding="utf8") as f:
            self.json.dump(data, f)


[docs]class GenericConverter(BaseConverter):
    """Generic converter for any configuration file type.

    For each parameter dimension declared here, one must necessarily
    provide a ``name`` keyword inside the `Dimension` building expression.

    Implementation details: As this class is supposed to provide with a
    generic text parser, semantics are going to be tied to their consequent
    usage. A template document is going to be created on `parse` and filled
    with values on `read`. This template document consists the state of this
    `BaseConverter` object.

    Dimension should be defined for instance as:
    ``meaningful_name~uniform(0, 4)``

    """

    def __init__(
        self,
        regex=r"([\/]?[\w|\/|-]+)~([\+]?.*\)|\-|\>[A-Za-z_]\w*)",
        expression_prefix="",
    ):
        """Initialize with the regex expression which will be searched for
        to define a `Dimension`.
        """
        self.re_module = importlib.import_module("re")
        self.regex = self.re_module.compile(regex)
        self.expression_prefix = expression_prefix
        self.template = None
        self.has_leading = {}
        self.conflict_msg = "Namespace conflict in configuration file '{}', under '{}'"

[docs]    def get_state_dict(self):
        """Give state dict that can be used to reconstruct the converter"""
        return dict(
            regex=self.regex.pattern,
            expression_prefix=self.expression_prefix,
            template=self.template,
            has_leading=self.has_leading,
        )

[docs]    def set_state_dict(self, state):
        """Reset the converter based on previous state"""
        self.regex = self.re_module.compile(state["regex"])
        self.expression_prefix = state["expression_prefix"]
        self.template = state["template"]
        self.has_leading = state["has_leading"]

    def _raise_conflict(self, path, namespace):
        raise ValueError(self.conflict_msg.format(path, namespace))

[docs]    def parse(self, filepath):
        r"""Read dictionary out of the configuration file.

        Create a template for Python 3 string format and save it as this
        object's state, by substituting '{\1}' wherever the pattern
        was matched. By default, the first matched group (\1) corresponds
        with a dimension's namespace.

        .. note:: Namespace in substitution templates does not contain the first '/'.

        Parameters
        ----------
        filepath : str
           Full path to the original user script's configuration.

        """
        with open(filepath, encoding="utf8") as f:
            self.template = f.read()

        # Search for Oríon semantic pattern
        pairs = self.regex.findall(self.template)
        ret = dict(pairs)

        # Every namespace given should be unique,
        # raise conflict if there are duplicates
        if len(pairs) != len(ret):
            namespaces = list(zip(*pairs))[0]
            for name in namespaces:
                if namespaces.count(name) != 1:
                    self._raise_conflict(filepath, name)

        # Create template using each namespace as format key,
        # exactly as provided by the user
        subst = self.re_module.sub(r"{", r"{{", self.template)
        subst = self.re_module.sub(r"}", r"}}", subst)
        substituted, num_subs = self.regex.subn(r"{\1!s}", subst)
        assert len(ret) == num_subs, (
            "This means an error in the regex. Report bug. Details::\n"
            f"original: {self.template}\n, regex:{self.regex}"
        )
        self.template = substituted

        # Wrap it in style of what the rest of `BaseConverter`s return
        ret_nested = nesteddict()
        for namespace, expression in ret.items():
            keys = namespace.split("/")
            if not keys[0]:  # It means that user wrote a namespace starting from '/'
                keys = keys[1:]  # Safe because of the regex pattern
                self.has_leading[namespace[1:]] = "/"

            stuff = ret_nested
            for i, key in enumerate(keys[:-1]):
                stuff = stuff[key]
                if isinstance(stuff, str):
                    # If `stuff` is not a dictionary while traversing the
                    # namespace path, then this amounts to a conflict which was
                    # not sufficiently get caught
                    self._raise_conflict(filepath, "/".join(keys[: i + 1]))
            # If final value is already filled,
            # then this must be also due to a conflict
            if stuff[keys[-1]]:
                self._raise_conflict(filepath, namespace)

            # Keep compatibility with `SpaceBuilder._build_from_config`
            stuff[keys[-1]] = self.expression_prefix + expression

        return ret_nested

[docs]    def generate(self, filepath, data):
        """Create a configuration file at `filepath` using dictionary `data`."""
        unnested_data = {}
        stack = deque()
        stack.append(([], data))
        while True:
            try:
                namespace, stuff = stack.pop()
            except IndexError:
                break
            if isinstance(stuff, dict):
                for k, v in stuff.items():
                    stack.append((["/".join(namespace + [str(k)])], v))
            else:
                name = namespace[0]
                unnested_data[self.has_leading.get(name, "") + name] = stuff

        document = self.template.format(**unnested_data)

        with open(filepath, "w", encoding="utf8") as f:
            f.write(document)


config_converter_factory = GenericFactory(BaseConverter)