Source code for orion.analysis.partial_dependency_utils

"""
Tools to compute Partial Dependency
===================================
"""
import copy
import itertools

import numpy
import pandas

from orion.analysis.base import flatten_numpy, flatten_params, to_numpy, train_regressor
from orion.core.utils import format_trials
from orion.core.worker.transformer import build_required_space


[docs]def partial_dependency( trials, space, params=None, model="RandomForestRegressor", n_grid_points=10, n_samples=50, **kwargs ): """ Calculates the partial dependency of parameters in a collection of :class:`orion.core.worker.trial.Trial`. Parameters ---------- trials: DataFrame or dict A dataframe of trials containing, at least, the columns 'objective' and 'id'. Or a dict equivalent. space: Space object A space object from an experiment. params: list of str, optional The parameters to include in the computation. All parameters are included by default. model: str Name of the regression model to use. Can be one of - AdaBoostRegressor - BaggingRegressor - ExtraTreesRegressor - GradientBoostingRegressor - RandomForestRegressor (Default) n_grid_points: int Number of points in the grid to compute partial dependency. Default is 10. n_samples: int Number of samples to randomly generate the grid used to compute the partial dependency. Default is 50. **kwargs Arguments for the regressor model. Returns ------- dict Dictionary of DataFrames. Each combination of parameters as keys (dim1.name, dim2.name) and for each parameters individually (dim1.name). Columns are (dim1.name, dim2.name, objective) or (dim1.name, objective). """ params = flatten_params(space, params) flattened_space = build_required_space( space, dist_requirement="linear", type_requirement="numerical", shape_requirement="flattened", ) if trials.empty or trials.shape[0] == 0: return {} data = to_numpy(trials, space) data = flatten_numpy(data, flattened_space) model = train_regressor(model, data, **kwargs) data = [ format_trials.trial_to_tuple(trial, flattened_space) for trial in flattened_space.sample(n_samples) ] data = pandas.DataFrame(data, columns=flattened_space.keys()) partial_dependencies = {} for x_i, x_name in enumerate(params): grid, averages, stds = partial_dependency_grid( flattened_space, model, [x_name], data, n_grid_points ) grid = reverse(flattened_space, grid) partial_dependencies[x_name] = (grid, averages, stds) for y_i in range(x_i + 1, len(params)): y_name = params[y_i] grid, averages, stds = partial_dependency_grid( flattened_space, model, [x_name, y_name], data, n_grid_points ) grid = reverse(flattened_space, grid) partial_dependencies[(x_name, y_name)] = (grid, averages, stds) return partial_dependencies
[docs]def reverse(transformed_space, grid): """Reverse transformations on the grid to bring back to original space""" for param in grid.keys(): transformed_dim = transformed_space[param].original_dimension param_grid = [] for value in grid[param]: param_grid.append(transformed_dim.reverse(value)) grid[param] = param_grid return grid
[docs]def make_grid(dim, n_points): """Build a grid of n_points for a dim""" if dim.prior_name == "choices": low, high = dim.interval() return numpy.arange(low, high + 1) return numpy.linspace(*dim.interval(), num=n_points)
[docs]def partial_dependency_grid(space, model, params, samples, n_points=40): """Compute the dependency grid for a given set of params (1 or 2)""" samples = copy.deepcopy(samples) grids = {} for name in params: grids[name] = make_grid(space[name], n_points) lengths = [len(grids[name]) for name in params] averages = numpy.zeros(lengths) stds = numpy.zeros(lengths) indexed_combinations = zip( itertools.product(*(list(range(length)) for length in lengths)), itertools.product(*grids.values()), ) for z_idx, combination in indexed_combinations: for i, name in enumerate(params): samples[name] = combination[i] predictions = model.predict(samples.to_numpy()) averages[z_idx] = numpy.mean(predictions) stds[z_idx] = numpy.std(predictions) return grids, averages.T, stds.T