Source code for meta_policy_search.optimizers.conjugate_gradient_optimizer

from meta_policy_search.utils import logger
import numpy as np
import tensorflow as tf
from collections import OrderedDict
from meta_policy_search.optimizers.base import Optimizer

class FiniteDifferenceHvp(Optimizer):
    def __init__(self, base_eps=1e-5, symmetric=True, grad_clip=None):
        self.base_eps = np.cast['float32'](base_eps)
        self.symmetric = symmetric
        self.grad_clip = grad_clip
        self._target = None
        self.reg_coeff = None
        self._constraint_gradient = None
        self._input_ph_dict = None

    def build_graph(self, constraint_obj, target, input_val_dict, reg_coeff):
        Sets the objective function and target weights for the optimize function

            constraint_obj (tf_op) : constraint objective
            target (Policy) : Policy whose values we are optimizing over
            inputs (list) : tuple of tf.placeholders for input data which may be subsampled. The first dimension corresponds to the number of data points
            reg_coeff (float): regularization coefficient
        self._target = target
        self.reg_coeff = reg_coeff
        self._input_ph_dict = input_val_dict

        params = list(target.get_params().values())
        constraint_grads = tf.gradients(constraint_obj, xs=params)

        for idx, (grad, param) in enumerate(zip(constraint_grads, params)):
            if grad is None:
                constraint_grads[idx] = tf.zeros_like(param)

        constraint_gradient = tf.concat([tf.reshape(grad, [-1]) for grad in constraint_grads], axis=0)

        self._constraint_gradient = constraint_gradient

    def constraint_gradient(self, input_val_dict):
        Computes the gradient of the constraint objective

            inputs (list): inputs needed to compute the gradient

            (np.ndarray): flattened gradient

        sess = tf.get_default_session()
        feed_dict = self.create_feed_dict(input_val_dict)
        constraint_gradient =, feed_dict)
        return constraint_gradient

    def Hx(self, input_val_dict, x):
        Compute the second derivative of the constraint val in the direction of the vector x
            inputs (list): inputs needed to compute the gradient of the constraint objective
            x (np.ndarray): vector indicating the direction on which the Hessian has to be computed

        Returns: (np.ndarray): second derivative in the direction of x

        assert isinstance(x, np.ndarray)

        param_vals = self._target.get_param_values().copy()
        flat_param_vals = _flatten_params(param_vals)
        eps = self.base_eps
        params_plus_eps_vals = _unflatten_params(flat_param_vals + eps * x, params_example=param_vals)
        constraint_grad_plus_eps = self.constraint_gradient(input_val_dict)

        if self.symmetric:
            params_minus_eps_vals = _unflatten_params(flat_param_vals - eps * x, params_example=param_vals)
            constraint_grad_minus_eps = self.constraint_gradient(input_val_dict)
            hx = (constraint_grad_plus_eps - constraint_grad_minus_eps)/(2 * eps)

            constraint_grad = self.constraint_gradient(input_val_dict)
            hx = (constraint_grad_plus_eps - constraint_grad)/eps
        return hx

    def build_eval(self, inputs):
        Build the Hessian evaluation function. It let's you evaluate the hessian of the constraint objective
        in any direction.
            inputs (list): inputs needed to compute the gradient of the constraint objective

            (function): function that evaluates the Hessian of the constraint objective in the input direction
        def evaluate_hessian(x):
            return self.Hx(inputs, x) + self.reg_coeff * x

        return evaluate_hessian

[docs]class ConjugateGradientOptimizer(Optimizer): """ Performs constrained optimization via line search. The search direction is computed using a conjugate gradient algorithm, which gives x = A^{-1}g, where A is a second order approximation of the constraint and g is the gradient of the loss function. Args: cg_iters (int) : The number of conjugate gradients iterations used to calculate A^-1 g reg_coeff (float) : A small value so that A -> A + reg*I subsample_factor (float) : Subsampling factor to reduce samples when using "conjugate gradient. Since the computation time for the descent direction dominates, this can greatly reduce the overall computation time. backtrack_ratio (float) : ratio for decreasing the step size for the line search max_backtracks (int) : maximum number of backtracking iterations for the line search debug_nan (bool) : if set to True, NanGuard will be added to the compilation, and ipdb will be invoked when nan is detected accept_violation (bool) : whether to accept the descent step if it violates the line search condition after exhausting all backtracking budgets hvp_approach (obj) : Hessian vector product approach """ def __init__( self, cg_iters=10, reg_coeff=0, subsample_factor=1., backtrack_ratio=0.8, max_backtracks=15, debug_nan=False, accept_violation=False, hvp_approach=FiniteDifferenceHvp(), ): self._cg_iters = cg_iters self._reg_coeff = reg_coeff self._subsample_factor = subsample_factor self._backtrack_ratio = backtrack_ratio self._max_backtracks = max_backtracks self._target = None self._max_constraint_val = None self._constraint_name = "kl-div" self._debug_nan = debug_nan self._accept_violation = accept_violation self._hvp_approach = hvp_approach self._loss = None self._gradient = None self._constraint_objective = None self._input_ph_dict = None
[docs] def build_graph(self, loss, target, input_ph_dict, leq_constraint): """ Sets the objective function and target weights for the optimize function Args: loss (tf_op) : minimization objective target (Policy) : Policy whose values we are optimizing over inputs (list) : tuple of tf.placeholders for input data which may be subsampled. The first dimension corresponds to the number of data points extra_inputs (list) : tuple of tf.placeholders for hyperparameters (e.g. learning rate, if annealed) leq_constraint (tuple) : A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon. """ assert isinstance(loss, tf.Tensor) assert hasattr(target, 'get_params') assert isinstance(input_ph_dict, dict) constraint_objective, constraint_value = leq_constraint self._target = target self._constraint_objective = constraint_objective self._max_constraint_val = constraint_value self._input_ph_dict = input_ph_dict self._loss = loss # build the graph of the hessian vector product (hvp) self._hvp_approach.build_graph(constraint_objective, target, self._input_ph_dict, self._reg_coeff) # build the graph of the gradients params = list(target.get_params().values()) grads = tf.gradients(loss, xs=params) for idx, (grad, param) in enumerate(zip(grads, params)): if grad is None: grads[idx] = tf.zeros_like(param) gradient = tf.concat([tf.reshape(grad, [-1]) for grad in grads], axis=0) self._gradient = gradient
[docs] def loss(self, input_val_dict): """ Computes the value of the loss for given inputs Args: inputs (list): inputs needed to compute the loss function extra_inputs (list): additional inputs needed to compute the loss function Returns: (float): value of the loss """ sess = tf.get_default_session() feed_dict = self.create_feed_dict(input_val_dict) loss =, feed_dict=feed_dict) return loss
[docs] def constraint_val(self, input_val_dict): """ Computes the value of the KL-divergence between pre-update policies for given inputs Args: inputs (list): inputs needed to compute the inner KL extra_inputs (list): additional inputs needed to compute the inner KL Returns: (float): value of the loss """ sess = tf.get_default_session() feed_dict = self.create_feed_dict(input_val_dict) constrain_val =, feed_dict) return constrain_val
[docs] def gradient(self, input_val_dict): """ Computes the gradient of the loss function Args: inputs (list): inputs needed to compute the gradient extra_inputs (list): additional inputs needed to compute the loss function Returns: (np.ndarray): flattened gradient """ sess = tf.get_default_session() feed_dict = self.create_feed_dict(input_val_dict) gradient =, feed_dict) return gradient
[docs] def optimize(self, input_val_dict): """ Carries out the optimization step Args: inputs (list): inputs for the optimization extra_inputs (list): extra inputs for the optimization subsample_grouped_inputs (None or list): subsample data from each element of the list """ logger.log("Start CG optimization") logger.log("computing loss before") loss_before = self.loss(input_val_dict) logger.log("performing update") logger.log("computing gradient") gradient = self.gradient(input_val_dict) logger.log("gradient computed") logger.log("computing descent direction") Hx = self._hvp_approach.build_eval(input_val_dict) descent_direction = conjugate_gradients(Hx, gradient, cg_iters=self._cg_iters) initial_step_size = np.sqrt(2.0 * self._max_constraint_val * (1. / ( + 1e-8))) if np.isnan(initial_step_size): logger.log("Initial step size is NaN! Rejecting the step!") return initial_descent_step = initial_step_size * descent_direction logger.log("descent direction computed") prev_params = self._target.get_param_values() prev_params_values = _flatten_params(prev_params) loss, constraint_val, n_iter, violated = 0, 0, 0, False for n_iter, ratio in enumerate(self._backtrack_ratio ** np.arange(self._max_backtracks)): cur_step = ratio * initial_descent_step cur_params_values = prev_params_values - cur_step cur_params = _unflatten_params(cur_params_values, params_example=prev_params) self._target.set_params(cur_params) loss, constraint_val = self.loss(input_val_dict), self.constraint_val(input_val_dict) if loss < loss_before and constraint_val <= self._max_constraint_val: break """ ------------------- Logging Stuff -------------------------- """ if np.isnan(loss): violated = True logger.log("Line search violated because loss is NaN") if np.isnan(constraint_val): violated = True logger.log("Line search violated because constraint %s is NaN" % self._constraint_name) if loss >= loss_before: violated = True logger.log("Line search violated because loss not improving") if constraint_val >= self._max_constraint_val: violated = True logger.log("Line search violated because constraint %s is violated" % self._constraint_name) if violated and not self._accept_violation: logger.log("Line search condition violated. Rejecting the step!") self._target.set_params(prev_params) logger.log("backtrack iters: %d" % n_iter) logger.log("computing loss after") logger.log("optimization finished")
def _unflatten_params(flat_params, params_example): unflat_params = [] idx = 0 for key, param in params_example.items(): size_param = reshaped_param = np.reshape(flat_params[idx:idx+size_param], newshape=param.shape) unflat_params.append((key, reshaped_param)) idx += size_param return OrderedDict(unflat_params) def _flatten_params(params): return np.concatenate([param.reshape(-1) for param in params.values()]) def conjugate_gradients(f_Ax, b, cg_iters=10, verbose=False, residual_tol=1e-10): """ Demmel p 312 """ p = b.copy() r = b.copy() x = np.zeros_like(b, dtype=np.float32) rdotr = fmtstr = "%10i %10.3g %10.3g" titlestr = "%10s %10s %10s" if verbose: print(titlestr % ("iter", "residual norm", "soln norm")) for i in range(cg_iters): if verbose: print(fmtstr % (i, rdotr, np.linalg.norm(x))) z = f_Ax(p) v = rdotr / x += v * p r -= v * z newrdotr = mu = newrdotr / rdotr p = r + mu * p rdotr = newrdotr if rdotr < residual_tol: break if verbose: print(fmtstr % (i + 1, rdotr, np.linalg.norm(x))) return x