import numpy as np
[docs]class Baseline():
"""
Reward baseline interface
"""
[docs] def get_param_values(self):
"""
Returns the parameter values of the baseline object
"""
raise NotImplementedError
[docs] def set_params(self, value):
"""
Sets the parameter values of the baseline object
Args:
value: parameter value to be set
"""
raise NotImplementedError
[docs] def fit(self, paths):
"""
Fits the baseline model with the provided paths
Args:
paths: list of paths
"""
raise NotImplementedError
[docs] def predict(self, path):
"""
Predicts the reward baselines for a provided trajectory / path
Args:
path: dict of lists/numpy array containing trajectory / path information
such as "observations", "rewards", ...
Returns: numpy array of the same length as paths["observations"] specifying the reward baseline
"""
raise NotImplementedError
[docs] def log_diagnostics(self, paths, prefix):
"""
Log extra information per iteration based on the collected paths
"""
pass