Source code for meta_policy_search.baselines.base

import numpy as np


[docs]class Baseline(): """ Reward baseline interface """
[docs] def get_param_values(self): """ Returns the parameter values of the baseline object """ raise NotImplementedError
[docs] def set_params(self, value): """ Sets the parameter values of the baseline object Args: value: parameter value to be set """ raise NotImplementedError
[docs] def fit(self, paths): """ Fits the baseline model with the provided paths Args: paths: list of paths """ raise NotImplementedError
[docs] def predict(self, path): """ Predicts the reward baselines for a provided trajectory / path Args: path: dict of lists/numpy array containing trajectory / path information such as "observations", "rewards", ... Returns: numpy array of the same length as paths["observations"] specifying the reward baseline """ raise NotImplementedError
[docs] def log_diagnostics(self, paths, prefix): """ Log extra information per iteration based on the collected paths """ pass