Source code for orion.plotting.backend_plotly

"""
Plotly backend for plotting methods
===================================

"""
import functools
from collections.abc import Iterable

import numpy
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import orion.analysis
import orion.analysis.base
from orion.algo.space import Categorical, Fidelity
from orion.core.worker.transformer import build_required_space


[docs]def lpi( experiment, with_evc_tree=True, model="RandomForestRegressor", model_kwargs=None, n_points=20, n_runs=10, **kwargs, ): """Plotly implementation of `orion.plotting.lpi`""" if not experiment: raise ValueError("Parameter 'experiment' is None") if model_kwargs is None: model_kwargs = {} df = experiment.to_pandas(with_evc_tree=with_evc_tree) df = df.loc[df["status"] == "completed"] if df.empty: return go.Figure() df = orion.analysis.lpi( df, experiment.space, model=model, n_points=n_points, n_runs=n_runs, **model_kwargs, ) fig = go.Figure( data=[ go.Bar( x=df.index.tolist(), y=df["LPI"].tolist(), error_y=dict(type="data", array=df["STD"].tolist()), ) ] ) y_axis_label = "Local Parameter Importance (LPI)" fig.update_layout( title=f"LPI for experiment '{experiment.name}'", xaxis_title="Hyperparameters", yaxis_title=y_axis_label, ) return fig
[docs]def parallel_coordinates( experiment, with_evc_tree=True, order=None, colorscale="YlOrRd", **kwargs ): """Plotly implementation of `orion.plotting.parallel_coordinates`""" def build_frame(): """Builds the dataframe for the plot""" names = list(experiment.space.keys()) df = experiment.to_pandas(with_evc_tree=with_evc_tree) df = df.loc[df["status"] == "completed"] if df.empty: return df df[names] = df[names].transform( functools.partial(_curate_params, space=experiment.space) ) df = _flatten_dims(df, experiment.space) return df def infer_order(space, order): """Create order if not passed, otherwise verify it""" params = orion.analysis.base.flatten_params(space, order) if order is None: fidelity_dims = [ dim for dim in experiment.space.values() if isinstance(dim, Fidelity) ] fidelity = fidelity_dims[0].name if fidelity_dims else None if fidelity in params: del params[params.index(fidelity)] params.insert(0, fidelity) return params def get_dimension(data, name, dim): dim_data = dict(label=name, values=data[name]) if dim.type == "categorical": categories = dim.interval() dim_data["tickvals"] = list(range(len(categories))) dim_data["ticktext"] = categories else: dim_data["range"] = dim.interval() return dim_data if not experiment: raise ValueError("Parameter 'experiment' is None") df = build_frame() if df.empty: return go.Figure() trial = experiment.fetch_trials_by_status("completed", with_evc_tree=with_evc_tree)[ 0 ] flattened_space = build_required_space( experiment.space, shape_requirement="flattened" ) dimensions = [ get_dimension(df, name, flattened_space[name]) for name in infer_order(experiment.space, order) ] objective_name = trial.objective.name objectives = df["objective"] omin = min(df["objective"]) omax = max(df["objective"]) dimensions.append(dict(label=objective_name, range=(omin, omax), values=objectives)) fig = go.Figure( data=go.Parcoords( line=dict( color=objectives, colorscale=colorscale, showscale=True, cmin=omin, cmax=omax, colorbar=dict(title=objective_name), ), dimensions=dimensions, ) ) fig.update_layout( title=f"Parallel Coordinates Plot for experiment '{experiment.name}'" ) return fig
[docs]def rankings(experiments, with_evc_tree=True, order_by="suggested", **kwargs): """Plotly implementation of `orion.plotting.rankings`""" def reformat_competitions(experiments): if isinstance(experiments, dict) and isinstance( next(iter(experiments.values())), Iterable ): competitions = [] remaining = True n_competitions = len(next(iter(experiments.values()))) for ith_competition in range(n_competitions): competition = {} for name in experiments.keys(): competition[name] = experiments[name][ith_competition] competitions.append(competition) elif isinstance(experiments, dict): competitions = experiments elif isinstance(experiments, Iterable) and not isinstance(experiments[0], dict): competitions = { f"{experiment.name}-v{experiment.version}": experiment for experiment in experiments } else: competitions = experiments return competitions def build_groups(competitions): if not isinstance(competitions, dict): rankings = [] for competition in competitions: rankings.append(build_frame(competition)) df = pd.concat(rankings) data_frames = orion.analysis.average( df, group_by=["order", "name"], key="rank", return_var=True ) else: data_frames = build_frame(competitions) return data_frames def build_frame(competition): """Builds the dataframe for the plot""" frames = [] for name, experiment in competition.items(): df = experiment.to_pandas(with_evc_tree=with_evc_tree) df = df.loc[df["status"] == "completed"] df = df.sort_values(order_by) df = orion.analysis.regret(df) df["name"] = [name] * len(df) df["order"] = range(len(df)) frames.append(df) df = pd.concat(frames) return orion.analysis.ranking(df) def get_objective_name(competition): """Infer name of objective based on trials of one experiment""" if not isinstance(competition, dict): return get_objective_name(competition[0]) for experiment in competition.values(): trials = experiment.fetch_trials_by_status("completed") if trials: return trials[0].objective.name return "objective" ORDER_KEYS = ["suggested", "reserved", "completed"] if not experiments: raise ValueError("Parameter 'experiment' is None") if order_by not in ORDER_KEYS: raise ValueError(f"Parameter 'order_by' is not one of {ORDER_KEYS}") competitions = reformat_competitions(experiments) df = build_groups(competitions) fig = go.Figure() if df.empty: return fig names = set(df["name"]) for i, name in enumerate(sorted(names)): exp_data = df[df["name"] == name] if "rank_mean" in exp_data: y = exp_data["rank_mean"] else: y = exp_data["rank"] x = list(range(len(y))) fig.add_scatter( x=x, y=y, mode="lines", line=dict(color=px.colors.qualitative.G10[i]), name=name, ) if "rank_var" in exp_data: dy = exp_data["rank_var"] fig.add_scatter( x=list(x) + list(x)[::-1], y=list(y - dy) + list(y + dy)[::-1], fill="toself", showlegend=False, line=dict(color=px.colors.qualitative.G10[i], width=0), name=name, ) objective = get_objective_name(competitions) fig.update_layout( title="Average Rankings", xaxis_title=f"Trials ordered by {order_by} time", yaxis_title=f"Ranking based on {objective}", hovermode="x", ) return fig
[docs]def partial_dependencies( experiment, with_evc_tree=True, params=None, smoothing=0.85, n_grid_points=10, n_samples=50, colorscale="Blues", model="RandomForestRegressor", model_kwargs=None, verbose_hover=True, ): """Plotly implementation of `orion.plotting.partial_dependencies`""" def build_data(): """Builds the dataframe for the plot""" df = experiment.to_pandas(with_evc_tree=with_evc_tree) names = list(experiment.space.keys()) df["params"] = df[names].apply(_format_hyperparameters, args=(names,), axis=1) df = df.loc[df["status"] == "completed"] data = orion.analysis.partial_dependency( df, experiment.space, params=params, model=model, n_grid_points=n_grid_points, n_samples=n_samples, **model_kwargs, ) df = _flatten_dims(df, experiment.space) return (df, data) def _set_scale(figure, dims, x, y): for axis, dim in zip("xy", dims): if "reciprocal" in dim.prior_name or dim.type == "fidelity": getattr(figure, f"update_{axis}axes")(type="log", row=y, col=x) def _plot_marginalized_avg(data, x_name): return go.Scatter( x=data[0][x_name], y=data[1], mode="lines", name=None, showlegend=False, line=dict( color=px.colors.qualitative.D3[0], ), ) def _plot_marginalized_std(data, x_name): return go.Scatter( x=list(data[0][x_name]) + list(data[0][x_name])[::-1], y=list(data[1] - data[2]) + list(data[1] + data[2])[::-1], mode="lines", name=None, fill="toself", showlegend=False, line=dict( color=px.colors.qualitative.D3[0], width=0, ), ) def _plot_contour(data, x_name, y_name): return go.Contour( x=data[0][x_name], y=data[0][y_name], z=data[1], connectgaps=True, # Share the same color range across contour plots coloraxis="coloraxis", line_smoothing=smoothing, # To show labels contours=dict( coloring="heatmap", showlabels=True, # show labels on contours labelfont=dict( # label font properties size=12, color="white", ), ), ) def _plot_scatter(x, y, df): return go.Scatter( x=x, y=y, marker={ "line": {"width": 0.5, "color": "Grey"}, "color": "black", "size": 5, }, mode="markers", opacity=0.5, showlegend=False, customdata=list(zip(df["id"], df["suggested"], df["params"])), hovertemplate=_template_trials(verbose_hover), ) if model_kwargs is None: model_kwargs = {} df, data = build_data() if not data: return go.Figure() params = [ param_names for param_names in data.keys() if isinstance(param_names, str) ] flattened_space = build_required_space( experiment.space, shape_requirement="flattened", ) fig = make_subplots( rows=len(params), cols=len(params), shared_xaxes=True, shared_yaxes=False, ) fig.update_layout(paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)") cmin = float("inf") cmax = -float("inf") for x_i in range(len(params)): x_name = params[x_i] fig.add_trace( _plot_marginalized_avg(data[x_name], x_name), row=x_i + 1, col=x_i + 1, ) fig.add_trace( _plot_marginalized_std(data[x_name], x_name), row=x_i + 1, col=x_i + 1, ) fig.add_trace( _plot_scatter(df[x_name], df["objective"], df), row=x_i + 1, col=x_i + 1, ) _set_scale(fig, [flattened_space[x_name]], x_i + 1, x_i + 1) fig.update_xaxes(title_text=x_name, row=len(params), col=x_i + 1) if x_i > 0: fig.update_yaxes(title_text=x_name, row=x_i + 1, col=1) else: fig.update_yaxes(title_text="Objective", row=x_i + 1, col=x_i + 1) for y_i in range(x_i + 1, len(params)): y_name = params[y_i] fig.add_trace( _plot_contour( data[(x_name, y_name)], x_name, y_name, ), row=y_i + 1, col=x_i + 1, ) fig.add_trace( _plot_scatter(df[x_name], df[y_name], df), row=y_i + 1, col=x_i + 1, ) cmin = min(cmin, data[(x_name, y_name)][1].min()) cmax = max(cmax, data[(x_name, y_name)][1].max()) _set_scale( fig, [flattened_space[name] for name in [x_name, y_name]], x_i + 1, y_i + 1, ) for x_i in range(len(params)): plot_id = len(params) * x_i + x_i + 1 if plot_id > 1: key = f"yaxis{plot_id}_range" else: key = "yaxis_range" fig.update_layout(**{key: [cmin, cmax]}) fig.update_layout( title=f"Partial dependencies for experiment '{experiment.name}'", ) fig.layout.coloraxis.colorbar.title = "Objective" fig.update_layout(coloraxis=dict(colorscale=colorscale), showlegend=False) return fig
[docs]def regret( experiment, with_evc_tree=True, order_by="suggested", verbose_hover=True, **kwargs ): """Plotly implementation of `orion.plotting.regret`""" def build_frame(): """Builds the dataframe for the plot""" df = experiment.to_pandas(with_evc_tree=with_evc_tree) names = list(experiment.space.keys()) df["params"] = df[names].apply(_format_hyperparameters, args=(names,), axis=1) df = df.loc[df["status"] == "completed"] df = df.sort_values(order_by) df = orion.analysis.regret(df) return df ORDER_KEYS = ["suggested", "reserved", "completed"] if not experiment: raise ValueError("Parameter 'experiment' is None") if order_by not in ORDER_KEYS: raise ValueError(f"Parameter 'order_by' is not one of {ORDER_KEYS}") df = build_frame() fig = go.Figure() if df.empty: return fig trial = experiment.fetch_trials_by_status("completed", with_evc_tree=with_evc_tree)[ 0 ] fig.add_scatter( y=df["objective"], mode="markers", name="trials", customdata=list(zip(df["id"], df[order_by], df["params"])), hovertemplate=_template_trials(verbose_hover), ) fig.add_scatter( y=df["best"], mode="lines", name="best-to-date", customdata=list(zip(df["best_id"], df["best"])), hovertemplate=_template_best(), ) if trial is None: y_axis_label = "Objective unknown" else: y_axis_label = f"{trial.objective.type.capitalize()} '{trial.objective.name}'" fig.update_layout( title=f"Regret for experiment '{experiment.name}'", xaxis_title=f"Trials ordered by {order_by} time", yaxis_title=y_axis_label, ) return fig
[docs]def build_parallel_frame( experiments, names=None, with_evc_tree=True, order_by="objective" ): """Builds the dataframe for the parallel assessment""" exp_parallels = dict( name=list(), objective=list(), n_workers=list(), duration=list() ) average_key = "objective,duration" group = "n_workers" x = group x_title = "Number of workers" y = "objective" for i, experiment in enumerate(experiments): dfs = experiment.to_pandas(with_evc_tree=with_evc_tree) dfs = dfs.loc[dfs["status"] == "completed"] if len(dfs) > experiment.max_trials: dfs = dfs.head(experiment.max_trials) if not dfs.empty: start = dfs["suggested"].iloc[0] end = dfs["completed"].iloc[-1] duration = (end - start).seconds + 1 dfs = dfs.sort_values(order_by) exp_parallels["duration"].append(duration) name = names[i] if names else f"{experiment.name}-v{experiment.version}" exp_parallels["name"].append(name) exp_parallels[group].append(experiment.executor.n_workers) exp_parallels["objective"].append(dfs["objective"].tolist()[0]) return pd.DataFrame(exp_parallels), group, average_key, x, y, x_title
[docs]def build_durations_frame( experiments, names=None, with_evc_tree=True, order_by="suggested" ): """Builds the dataframe for the duration plot""" x = "duration" x_title = "Experiment duration by second(s)" frames, group, average_key, _, y, _ = build_regrets_frame( experiments, names, with_evc_tree, order_by ) return frames, group, average_key, x, y, x_title
[docs]def build_regrets_frame( experiments, names=None, with_evc_tree=True, order_by="suggested" ): """Builds the dataframe for regrets plot""" frames = [] average_key = "best,duration" group = "order" x = group x_title = f"Trials ordered by {order_by} time" y = "best" for i, experiment in enumerate(experiments): df = experiment.to_pandas(with_evc_tree=with_evc_tree) df = df.loc[df["status"] == "completed"] df = df.sort_values(order_by) if len(df) > experiment.max_trials: df = df.head(experiment.max_trials) df = orion.analysis.regret(df) start = df["suggested"][0] durations = [t.seconds + 1 for t in (df["completed"] - start)] df["duration"] = durations df["n_workers"] = experiment.executor.n_workers df["name"] = [ names[i] if names else f"{experiment.name}-v{experiment.version}" ] * len(df) df[group] = list(range(len(df))) frames.append(df) return pd.concat(frames), group, average_key, x, y, x_title
def infer_unit_time(data, min_unit=3): duration_col = "duration_mean" if "duration_mean" in data else "duration" durations = data[duration_col].tolist() if durations[0] > durations[-1]: unit_time = durations[0] / len(durations) else: unit_time = durations[-1] / len(durations) if unit_time >= 60 * 60 * 24 * min_unit: duration = data[duration_col] / (60 * 60 * 24) time_unit = "day(s)" elif unit_time >= 60 * 60 * min_unit: duration = data[duration_col] / (60 * 60) time_unit = "hour(s)" elif unit_time >= 60 * min_unit: duration = data[duration_col] / 60 time_unit = "minute(s)" else: duration = data[duration_col] time_unit = "second(s)" return duration, time_unit
[docs]def regrets( experiments, with_evc_tree=True, order_by="suggested", build_frame_fn=build_regrets_frame, return_var=False, title="Average Regret", **kwargs, ): """Plotly implementation of `orion.plotting.regrets`""" compute_average = bool( isinstance(experiments, dict) and isinstance(next(iter(experiments.values())), Iterable) ) def build_groups(): """Build dataframes for groups of experiments""" if compute_average: data_frames = dict() for name, group in experiments.items(): frames, group_by, average_keys, x, y, x_title = build_frame_fn( group, with_evc_tree=with_evc_tree, order_by=order_by ) df = orion.analysis.average( frames, group_by, average_keys, return_var=return_var ) if x in average_keys.split(","): x = f"{x}_mean" if y in average_keys.split(","): y = f"{y}_mean" df["name"] = [name] * len(df) if "n_workers" not in df: df["n_workers"] = [group[0].executor.n_workers] * len(df) data_frames[name] = df data_frames = pd.concat(data_frames) elif isinstance(experiments, dict): data_frames, group, average_keys, x, y, x_title = build_frame_fn( experiments.values(), list(experiments.keys()), with_evc_tree=with_evc_tree, order_by=order_by, ) else: data_frames, group, average_keys, x, y, x_title = build_frame_fn( experiments, with_evc_tree=with_evc_tree, order_by=order_by ) return data_frames, x, y, x_title def get_objective_name(experiments): """Infer name of objective based on trials of one experiment""" if compute_average and isinstance(experiments, dict): return get_objective_name(sum(map(list, experiments.values()), [])) if isinstance(experiments, dict): experiments = experiments.values() for experiment in experiments: trials = experiment.fetch_trials_by_status("completed") if trials: return trials[0].objective.name return "objective" ORDER_KEYS = ["suggested", "reserved", "completed", "objective"] if not experiments: raise ValueError("Parameter 'experiment' is None") if order_by not in ORDER_KEYS: raise ValueError(f"Parameter 'order_by' is not one of {ORDER_KEYS}") df, x_col, y_col, x_title = build_groups() fig = go.Figure() if df.empty: return fig names = set(df["name"]) for i, name in enumerate(sorted(names)): exp_data = df[df["name"] == name] y = exp_data[y_col] x = exp_data[x_col] duration, time_unit = infer_unit_time(exp_data, min_unit=3) if x_col in ["duration", "duration_mean"]: x = duration x_title = f"Experiment duration by {time_unit}" fig.add_scatter( x=x, y=y, mode="lines+markers", line=dict(color=px.colors.qualitative.G10[i]), name=name, legendgroup=name, customdata=list(zip(exp_data["n_workers"], duration)), hovertemplate=_template_regrets(name, time_unit), ) if "best_var" in exp_data: dy = numpy.sqrt(exp_data["best_var"]) fig.add_scatter( x=list(x) + list(x)[::-1], y=list(y - dy) + list(y + dy)[::-1], fill="toself", showlegend=False, line=dict(color=px.colors.qualitative.G10[i]), name=name, legendgroup=name, ) fig.update_layout( title=title, xaxis_title=x_title, yaxis_title=get_objective_name(experiments), hovermode="x", ) return fig
def _format_value(value): """ Hyperparameter can have many types, sometimes they can even be lists. If one of the value is a float, it has to be compact. """ if isinstance(value, str): return value try: return f"[{','.join(_format_value(x) for x in value)}]" except TypeError: if isinstance(value, float): return f"{value:.5G}" else: return value def _format_hyperparameters(hyperparameters, names): result = "" for name, value in zip(names, hyperparameters): x = f"<br> {name}: {_format_value(value)}" result += x return result def _template_regrets(name, time_unit): template = ( "algorithm: " + name + "<br>" "best: %{y}<br>" "workers: %{customdata[0]}<br>" "duration: %{customdata[1]} " + time_unit ) template += "<extra></extra>" return template def _template_trials(verbose_hover): template = ( "<b>ID: %{customdata[0]}</b><br>" "value: %{y}<br>" "time: %{customdata[1]|%Y-%m-%d %H:%M:%S}<br>" ) if verbose_hover: template += "parameters: %{customdata[2]}" template += "<extra></extra>" return template def _template_best(): return ( "<b>Best ID: %{customdata[0]}</b><br>" "value: %{customdata[1]}" "<extra></extra>" ) def _curate_params(data, space): dim = space[data.name] if isinstance(dim, Categorical): data = numpy.array(data.tolist()) # To unpack lists if dim shape > 1 shape = data.shape assert len(shape) <= 2 idx = numpy.argmax(data.reshape(-1, 1) == numpy.array(dim.categories), axis=1) idx = idx.reshape(shape) if len(shape) > 1: return [list(idx[i]) for i in range(shape[0])] return idx return data def _flatten_dims(data, space): for key, dim in space.items(): if dim.shape: assert len(dim.shape) == 1 # expand df.tags into its own dataframe values = data[key].apply(pd.Series) # rename each hp values = values.rename(columns=lambda x: f"{key}[{x}]") data = pd.concat([data[:], values[:]], axis=1) return data