Source code for meta_policy_search.policies.distributions.diagonal_gaussian

import tensorflow as tf
import numpy as np
from meta_policy_search.policies.distributions.base import Distribution

[docs]class DiagonalGaussian(Distribution): """ General methods for a diagonal gaussian distribution of this size """ def __init__(self, dim): self._dim = dim @property def dim(self): return self._dim
[docs] def kl_sym(self, old_dist_info_vars, new_dist_info_vars): """ Computes the symbolic representation of the KL divergence of two multivariate Gaussian distribution with diagonal covariance matrices Args: old_dist_info_vars (dict) : dict of old distribution parameters as tf.Tensor new_dist_info_vars (dict) : dict of new distribution parameters as tf.Tensor Returns: (tf.Tensor) : Symbolic representation of kl divergence (tensorflow op) """ old_means = old_dist_info_vars["mean"] old_log_stds = old_dist_info_vars["log_std"] new_means = new_dist_info_vars["mean"] new_log_stds = new_dist_info_vars["log_std"] # assert ranks tf.assert_rank(old_means, 2), tf.assert_rank(old_log_stds, 2) tf.assert_rank(new_means, 2), tf.assert_rank(new_log_stds, 2) old_std = tf.exp(old_log_stds) new_std = tf.exp(new_log_stds) numerator = tf.square(old_means - new_means) + \ tf.square(old_std) - tf.square(new_std) denominator = 2 * tf.square(new_std) + 1e-8 return tf.reduce_sum( numerator / denominator + new_log_stds - old_log_stds, reduction_indices=-1)
[docs] def kl(self, old_dist_info, new_dist_info): """ Compute the KL divergence of two multivariate Gaussian distribution with diagonal covariance matrices Args: old_dist_info (dict): dict of old distribution parameters as numpy array new_dist_info (dict): dict of new distribution parameters as numpy array Returns: (numpy array): kl divergence of distributions """ old_means = old_dist_info["mean"] old_log_stds = old_dist_info["log_std"] new_means = new_dist_info["mean"] new_log_stds = new_dist_info["log_std"] old_std = np.exp(old_log_stds) new_std = np.exp(new_log_stds) numerator = np.square(old_means - new_means) + \ np.square(old_std) - np.square(new_std) denominator = 2 * np.square(new_std) + 1e-8 return np.sum( numerator / denominator + new_log_stds - old_log_stds, axis=-1)
[docs] def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars): """ Symbolic likelihood ratio p_new(x)/p_old(x) of two distributions Args: x_var (tf.Tensor): variable where to evaluate the likelihood ratio p_new(x)/p_old(x) old_dist_info_vars (dict) : dict of old distribution parameters as tf.Tensor new_dist_info_vars (dict) : dict of new distribution parameters as tf.Tensor Returns: (tf.Tensor): likelihood ratio """ with tf.variable_scope("log_li_new"): logli_new = self.log_likelihood_sym(x_var, new_dist_info_vars) with tf.variable_scope("log_li_old"): logli_old = self.log_likelihood_sym(x_var, old_dist_info_vars) return tf.exp(logli_new - logli_old)
[docs] def log_likelihood_sym(self, x_var, dist_info_vars): """ Symbolic log likelihood log p(x) of the distribution Args: x_var (tf.Tensor): variable where to evaluate the log likelihood dist_info_vars (dict) : dict of distribution parameters as tf.Tensor Returns: (numpy array): log likelihood """ means = dist_info_vars["mean"] log_stds = dist_info_vars["log_std"] # assert ranks tf.assert_rank(x_var, 2), tf.assert_rank(means, 2), tf.assert_rank(log_stds, 2) zs = (x_var - means) / tf.exp(log_stds) return - tf.reduce_sum(log_stds, reduction_indices=-1) - \ 0.5 * tf.reduce_sum(tf.square(zs), reduction_indices=-1) - \ 0.5 * self.dim * np.log(2 * np.pi)
[docs] def log_likelihood(self, xs, dist_info): """ Compute the log likelihood log p(x) of the distribution Args: x_var (numpy array): variable where to evaluate the log likelihood dist_info_vars (dict) : dict of distribution parameters as numpy array Returns: (numpy array): log likelihood """ means = dist_info["mean"] log_stds = dist_info["log_std"] zs = (xs - means) / np.exp(log_stds) return - np.sum(log_stds, axis=-1) - \ 0.5 * np.sum(np.square(zs), axis=-1) - \ 0.5 * self.dim * np.log(2 * np.pi)
[docs] def entropy_sym(self, dist_info_vars): """ Symbolic entropy of the distribution Args: dist_info (dict) : dict of distribution parameters as tf.Tensor Returns: (tf.Tensor): entropy """ log_stds = dist_info_vars["log_std"] return tf.reduce_sum(log_stds + np.log(np.sqrt(2 * np.pi * np.e)), reduction_indices=-1)
[docs] def entropy(self, dist_info): """ Compute the entropy of the distribution Args: dist_info (dict) : dict of distribution parameters as numpy array Returns: (numpy array): entropy """ log_stds = dist_info["log_std"] return np.sum(log_stds + np.log(np.sqrt(2 * np.pi * np.e)), axis=-1)
[docs] def sample(self, dist_info): """ Draws a sample from the distribution Args: dist_info (dict) : dict of distribution parameter instantiations as numpy array Returns: (obj): sample drawn from the corresponding instantiation """ means = dist_info["mean"] log_stds = dist_info["log_std"] rnd = np.random.normal(size=means.shape) return rnd * np.exp(log_stds) + means
@property def dist_info_specs(self): return [("mean", (self.dim,)), ("log_std", (self.dim,))]