plotting_utils.py

# jax
import time
from functools import partial

import ipdb
import jax
import jax.numpy as np
# import tk as tkinter
# import matplotlib
# matplotlib.use('Qt5Agg')
import matplotlib.pyplot as pl
# other, trivial stuff
import numpy as onp

cmap = 'viridis'

import pontryagin_utils
from misc import *


def plot_sol(sol, problem_params):

    # adapted from plot_forward_backward in ddp_optimizer
    # this works regardless of v/t reparameterisation.
    # all the x axes are physical times as stored in sol.ys['t']
    # all interpolations are done with ODE solver "t", so whatever independent
    # variable we happen to have

    interp_ts = np.linspace(sol.t0, sol.t1, 5001)

    # plot the state trajectory of the forward pass, interpolation & nodes.
    ax1 = pl.subplot(221)

    pl.plot(sol.ys['t'], sol.ys['x'], marker='.', linestyle='', alpha=1)
    # pl.plot(sol.ys['t'], sol.ys['v'], marker='.', linestyle='', alpha=1)
    interp_ys = jax.vmap(sol.evaluate)(interp_ts)
    pl.gca().set_prop_cycle(None)
    pl.plot(interp_ys['t'], interp_ys['x'], alpha=0.5, label=problem_params['state_names'])
    # pl.plot(interp_ys['t'], interp_ys['v'], alpha=0.5, label='v(x(t))')
    pl.legend()


    pl.subplot(222, sharex=ax1)
    us = jax.vmap(pontryagin_utils.u_star_2d, in_axes=(0, 0, None))(
            sol.ys['x'], sol.ys['vx'], problem_params
            )
    def u_t(t):
        state_t = sol.evaluate(t)
        return pontryagin_utils.u_star_2d(state_t['x'], state_t['vx'], problem_params)

    us_interp = jax.vmap(u_t)(interp_ts)

    pl.plot(sol.ys['t'], us, linestyle='', marker='.')
    pl.gca().set_prop_cycle(None)
    pl.plot(interp_ys['t'], us_interp, label=('u_0', 'u_1'))
    pl.legend()

    if 'vxx' not in sol.ys:
        # from here on we only plot hessian related stuff
        # so if that was not calculated, exit.
        return


    # plot the eigenvalues of S from the backward pass.
    pl.subplot(223, sharex=ax1)

    # eigenvalues at nodes.
    sorted_eigs = lambda S: np.sort(np.linalg.eig(S)[0].real)

    S_eigenvalues = jax.vmap(sorted_eigs)(sol.ys['vxx'])
    eigv_label = ['S(t) eigenvalues'] + [None] * (problem_params['nx']-1)

    eig_plot_fct = pl.plot  # = pl.semilogy

    eig_plot_fct(sol.ys['t'], S_eigenvalues, color='C0', marker='.', linestyle='', label=eigv_label)
    # also as line bc this line is more accurate than the "interpolated" one below if timesteps become very small
    eig_plot_fct(sol.ys['t'], S_eigenvalues, color='C0')

    # eigenvalues interpolated. though this is kind of dumb seeing how the backward
    # solver very closely steps to the non-differentiable points.
    sorted_eigs_interp = jax.vmap(sorted_eigs)(interp_ys['vxx'])
    eig_plot_fct(interp_ys['t'], sorted_eigs_interp, color='C0', linestyle='--', alpha=.5)

    # product of all eigenvalues = det(S)
    # dets = np.prod(S_eigenvalues, axis=1)
    # eig_plot_fct(sol.ys['t'], dets, color='C1', marker='.', label='prod(eigs(S))', alpha=.5)


    pl.legend()

    pl.subplot(224, sharex=ax1)
    # and raw Vxx entries.
    vxx_entries = interp_ys['vxx'].reshape(-1, problem_params['nx']**2)
    label = ['entries of Vxx(t)'] + [None] * (problem_params['nx']**2-1)
    pl.plot(interp_ys['t'], vxx_entries, label=label, color='green', alpha=.3)
    pl.legend()


    # or, pd-ness of the ricatti equation terms.
    # oups = jax.vmap(ricatti_rhs_eigenvalues)(sol.ys)

    # for j, k in enumerate(oups.keys()):
    #     # this is how we do it dadaTadadadaTada this is how we do it
    #     label = k # if len(oups[k].shape) == 1 else [k] + [None] * (oups[k].shape[1]-1)
    #     pl.plot(sol.ys['t'], oups[k], label=label, color=f'C{j}', alpha=.5)

    pl.legend()


def plot_us(sols, problem_params, rotate=True, c='C0'):

    # plot all the u trajectories of a vmapped solutions object.

    # we flatten them here -- the inf padding breaks up the plot nicely
    all_xs = sols.ys['x'].reshape(-1, problem_params['nx'])
    all_lams = sols.ys['vx'].reshape(-1, problem_params['nx'])
    us = jax.vmap(pontryagin_utils.u_star_2d, in_axes=(0, 0, None))(all_xs, all_lams, problem_params)

    if rotate:
        diff_and_sum = np.array([[1, -1], [1, 1]]).T
        us = us @ diff_and_sum
        pl.xlabel('u0 - u1')
        pl.ylabel('u0 + u1')
    else:
        pl.xlabel('u0')
        pl.ylabel('u1')

    pl.plot(us[:, 0], us[:, 1], alpha=0.1, marker='.', c=c)
    pl.legend()


def plot_ellipse(Q, N_pts=101):

    # plot ellipse S = {x | x.T Q x = 1}
    # x.T Q x = x.T Q^.5.T Q^.5 x = || Q^.5 x || == 1
    # so basically, Q^.5 x is the unit circle, for x in S.
    # Therefore, Q^(-1/2) (unit circle) = S

    thetas = np.linspace(0, 2*np.pi, N_pts)

    circle = jax.vmap(lambda t: np.array([np.cos(t), np.sin(t)]))(thetas).T

    # it should be positive definite for a unique solution
    # hopefully the user is smart enough
    Q_half_inv = jax.scipy.linalg.sqrtm(np.linalg.inv(Q)).real

    ellipse = Q_half_inv @ circle
    pl.plot(ellipse[0, :], ellipse[1, :], color='red', alpha=.5)


def plot_trajectory_vs_nn(sol, params, v_nn_unnormalised):

    # outside, do this:
    # v_nn_unnormalised = lambda params, x: normaliser.unnormalise_v(v_nn(params, normaliser.normalise_x(x)))


    ax = pl.subplot(211)

    interp_ts = np.linspace(sol.t0, sol.t1, 1000)

    xs = sol.ys['x']
    ts = sol.ys['t']
    vs = sol.ys['v']
    interp_ys = jax.vmap(sol.evaluate)(interp_ts)

    nx = sol.ys['x'].shape[-1]
    onelabel = lambda s: [s] + [None] * (nx-1)

    pl.plot(interp_ts, interp_ys['v'], alpha=.5, label='trajectory v(x(t))', c='C0')
    pl.plot(ts, vs, alpha=.5, linestyle='', marker='.', c='C0')

    # same with the NN
    vs = jax.vmap(v_nn_unnormalised, in_axes=(None, 0))(params, interp_ys['x'])
    pl.plot(interp_ts, vs, c='C1', label='NN v(x(t))')
    pl.legend()

    # and now the same for vx
    pl.subplot(212, sharex=ax)

    vxs = sol.ys['vx']
    pl.plot(interp_ts, interp_ys['vx'], alpha=.5, label=onelabel('trajectory vx(x(t))'), c='C0')
    pl.plot(ts, vxs, alpha=.5, linestyle='', marker='.', c='C0')

    nn_vx_fct = jax.jacobian(v_nn_unnormalised, argnums=1)
    nn_vxs = jax.vmap(nn_vx_fct, in_axes=(None, 0))(params, interp_ys['x'])
    pl.plot(interp_ts, nn_vxs, label=onelabel('NN v_x(x(t))'), c='C1')

    pl.legend()


def plot_trajectory_vs_nn_ensemble(sol, vmapped_params, v_nn_unnormalised):

    # outside, do this:
    # v_nn_unnormalised = lambda params, x: normaliser.unnormalise_v(v_nn(params, normaliser.normalise_x(x)))

    ax = pl.subplot(211)

    interp_ts = np.linspace(sol.t0, sol.t1, 2000)

    xs = sol.ys['x']
    ts = sol.ys['t']
    vs = sol.ys['v']
    interp_ys = jax.vmap(sol.evaluate)(interp_ts)

    nx = sol.ys['x'].shape[-1]
    onelabel = lambda s: [s] + [None] * (nx-1)

    pl.plot(interp_ts, interp_ys['v'], alpha=.5, label='trajectory v(x(t))', c='C0')
    pl.plot(ts, vs, alpha=.5, linestyle='', marker='.', c='C0')

    confidence_width = 1  # make small to look better.

    # same with the NN
    # inner vmap for ys, outer vmap for params.
    vs = jax.vmap(jax.vmap(v_nn_unnormalised, in_axes=(None, 0)), in_axes=(0, None))(vmapped_params, interp_ys['x'])
    vs_mean = vs.mean(axis=0)
    vs_std = vs.std(axis=0)
    pl.plot(interp_ts, vs_mean, c='C1', label='NN v mean')
    pl.fill_between(interp_ts, vs_mean - confidence_width * vs_std, vs_mean + confidence_width * vs_std, color='C1', alpha=.2, label=f'NN v {confidence_width}σ band')
    pl.legend()

    # and now the same for vx
    pl.subplot(212, sharex=ax)

    vxs = sol.ys['vx']
    pl.plot(interp_ts, interp_ys['vx'], alpha=.5, label=onelabel('trajectory vx(x(t))'), c='C0')
    pl.plot(ts, vxs, alpha=.5, linestyle='', marker='.', c='C0')

    nn_vx_fct = jax.jacobian(v_nn_unnormalised, argnums=1)

    # shaped (N_params, N_ts, nx)
    vxs = jax.vmap(jax.vmap(nn_vx_fct, in_axes=(None, 0)), in_axes=(0, None))(vmapped_params, interp_ys['x'])

    # mean/std of NN ensemble
    vx_means = vxs.mean(axis=0)
    vx_stds = vxs.std(axis=0)

    nx = sol.ys['x'].shape[-1]
    # do this in a loop because fill_between does not like vectorised data
    for j in range(nx):

        label = 'NN vx mean' if j==0 else None
        pl.plot(interp_ts, vx_means[:, j], label=label, c='C1')

        lower = vx_means[:, j] - confidence_width*vx_stds[:, j]
        upper = vx_means[:, j] + confidence_width*vx_stds[:, j]
        label = f'NN vx {confidence_width}σ band' if j==0 else None
        pl.fill_between(interp_ts, lower, upper, color='C1', alpha=.2, label=label)

    pl.legend()


def plot_nn_train_outputs(outputs, subsample=256):

    # new version of this, for dict output, like:
    # outputs.keys() == ['lr', 'test_loss_terms', 'train_loss_terms']
    # outputs['test_loss_terms'].keys() == ['prior', 'v', 'vx', 'vx_label', 'vx_reg', whatever really]

    # plots the UNSCALED loss terms! so don't fear the worst if some numbers seem rather high or low.

    # works for ensemble (each array node of the pytree has an additional leading dim) or single.
    # (only ensemble tested though. if single, reshape everything (N,) -> (1, N)? )

    # if single, act as if it was an ensemble (with 1 member).

    if len(outputs['lr'].shape) == 1:
        outputs = jax.tree_util.tree_map(lambda n: n[None, :], outputs)

    N_ensemble, N_steps = outputs['lr'].shape


    # then, make everything flat for easy plotting, including the "iters" array for the x axis
    # also NaN in the last spot to break up lines.
    outputs['iters'] = np.kron(np.ones((N_ensemble, 1), dtype=int), np.arange(N_steps))

    if subsample != 1:
        # makes the plots easier to view & less resource hungry
        # outputs = jax.tree_util.tree_map(lambda n: n[:, ::subsample], outputs)

        # would be even cooler if we replace just subsampling with moving average...
        # idea: reshape from (N_chunks * chunklen) to (N_chunks, chunklen)
        #       *.mean(axis=1) -> shaped (N_chunks)
        chunklen = subsample

        N_chunks = N_steps // subsample

        # remove the last bit to make evenly divisible
        N_steps = N_chunks * subsample
        outputs = jax.tree_util.tree_map(lambda n: n[:, 0:N_steps], outputs)
        outputs = jax.tree_util.tree_map(lambda n: n.reshape(N_ensemble, N_chunks, chunklen).mean(axis=2), outputs)


    outputs = jax.tree_util.tree_map(lambda n: n.at[:, -1].set(np.nan), outputs)
    outputs = jax.tree_util.tree_map(lambda n: n.reshape(-1), outputs)

    has_test = 'test_loss_terms' in outputs
    assert has_test == False, 'not supported anymore here'

    # if there is test data we want 2 subplots. otherwise just 1.
    if has_test:
        ax = pl.subplot(211)

    pl.semilogy(outputs['iters'], outputs['lr'], label='learning rate', linestyle='--', color='gray', alpha=.5)

    if 'v_sweep' in outputs:
        pl.semilogy(outputs['iters'], outputs['v_sweep'], label='v sweep', linestyle='--', alpha=.5)

    if 'weight_norm' in outputs:
        pl.semilogy(outputs['iters'], outputs['weight_norm'], label='weight norm', linestyle='--', alpha=.5)


    for k in outputs['lossterms']:
        pl.semilogy(outputs['iters'], outputs['lossterms'][k], alpha=.3, label=f'train {k}')
    pl.legend()
    pl.grid('on')
    pl.ylim([1e-5, 1e3])


def plot_proposals(v_means, v_stds, testpts_known, proposal_vmeans, proposal_vstds, v_k, v_next_target, algo_params):

    pl.xlabel('v mean')
    pl.ylabel('v std')
    pl.loglog(v_means + (np.nan * testpts_known), v_stds, '. ', alpha=.1, c='C1', label='unknown points')
    pl.loglog(v_means + (np.nan * ~testpts_known), v_stds, '. ', alpha=.1, c='C0', label='known points')

    pl.loglog(proposal_vmeans, proposal_vstds, '. ', alpha=.5, c='green', label='proposed points')

    pl.loglog([v_k, v_k], [v_stds.min(), v_stds.max()], linestyle='--', color='black', alpha=.2, label='v_k')
    pl.loglog([v_next_target, v_next_target], [v_stds.min(), v_stds.max()], linestyle='--', color='black', alpha=.2, label='v_next_target')

    vmax = v_means.max()
    plot_vs = np.logspace(-4, np.log10(vmax)+0.5, 200)
    plot_sig_maxs = algo_params['sigma_max_abs'] + plot_vs * algo_params['sigma_max_rel']
    pl.loglog(plot_vs, plot_sig_maxs, linestyle='--', alpha=.5, label='$σ_{max}(v)$')
    pl.legend()
    pl.xlim([1e-2, 1e5])
    pl.ylim([1e-2, 1e5])


def orbits_plot_all(xx, yy, v_means, v_stds, v_stds_new, vk, vnext, proposals, forward_sols, backward_sols, problem_params, algo_params):

    # subplots:
    #  nn value function & relevant levelsets     |     uncertainty with proposal samples & proposals
    #  uncertainty with forward trajs             |     uncertainty with backward trajs.

    ax = pl.subplot(221)
    ax.set_aspect('equal')
    # plot value
    pl.contourf(xx, yy, v_means, levels=np.linspace(0, 2 * vnext, 20))
    pl.colorbar()

    def plot_common():
        # plot level sets
        pl.contour(xx, yy, v_means, levels=[vk, vnext, vnext + (vnext - vk)], colors='black')
        # plot circle
        thetas = np.linspace(-np.pi, np.pi, 300)
        circle = np.array([np.sin(thetas), np.cos(thetas)]).T
        pl.plot(circle[:, 0], circle[:, 1], c='black', alpha=.1, linestyle='--')

    plot_common()
    pl.xlabel('v mean')

    # now the proposals. proposal samples ('pool') not yet \o/
    ax = pl.subplot(222, sharex=ax, sharey=ax)
    ax.set_aspect('equal')
    sigma_max = algo_params['sigma_max_abs'] + v_means * algo_params['sigma_max_rel']
    rel_vstds = np.log10(v_stds / sigma_max)  # so that <0 good and >0 bad
    vmax_abs = np.max(np.abs(rel_vstds))
    pl.plot(*proposals.T, 'x', c='black', alpha=.1, label='proposals')

    pl.contourf(xx, yy, rel_vstds, cmap='bwr', vmin=-vmax_abs, vmax=vmax_abs, levels=30)
    pl.colorbar()
    pl.xlabel('proposals, previous log10(sigma_v / sigma_max)')
    plot_common()

    # forward trajectories
    ax = pl.subplot(223, sharex=ax, sharey=ax)
    ax.set_aspect('equal')
    sigma_max = algo_params['sigma_max_abs'] + v_means * algo_params['sigma_max_rel']
    rel_vstds = np.log10(v_stds / sigma_max)  # so that <0 good and >0 bad
    vmax_abs = np.max(np.abs(rel_vstds))

    pl.contourf(xx, yy, rel_vstds, cmap='bwr', vmin=-vmax_abs, vmax=vmax_abs, levels=30)
    pl.colorbar()
    pl.plot(forward_sols.ys[:, :, 0].flatten(), forward_sols.ys[:, :, 1].flatten(), '.-', c='black', alpha=.3, label='forward sols')
    pl.xlabel('forward trajectories, previous log10(sigma_v / sigma_max)')
    plot_common()

    pl.legend()

    # backward trajectories :)
    ax = pl.subplot(224, sharex=ax, sharey=ax)
    ax.set_aspect('equal')
    sigma_max = algo_params['sigma_max_abs'] + v_means * algo_params['sigma_max_rel']
    rel_vstds = np.log10(v_stds_new / sigma_max)  # so that <0 good and >0 bad
    vmax_abs = np.max(np.abs(rel_vstds))

    pl.contourf(xx, yy, rel_vstds, cmap='bwr', vmin=-vmax_abs, vmax=vmax_abs, levels=30)
    pl.colorbar()
    pl.plot(backward_sols.ys['x'][:, :, 0].flatten(), backward_sols.ys['x'][:, :, 1].flatten(), '.-', c='black', alpha=.3, label='backward sols')
    pl.xlabel('backward trajectories, new log10(sigma_v / sigma_max)')
    plot_common()
    pl.legend()

    # zoom in to the relevant part
    is_relevant = v_means <= vnext * 2
    pl.xlim([xx[is_relevant].min(), xx[is_relevant].max()])
    pl.ylim([yy[is_relevant].min(), yy[is_relevant].max()])


def plot_calibration(all_ys, pred_v_means, pred_v_stds):

    # calibration plot = plot of true frequency of data in each confidence band
    # vs predicted frequency.

    # although it may be questioned if this plot is at all relevant for us. we
    # basically have deterministic data (except ODE solver error) and just want
    # to distinguish between "inside" the known set and "outside" of it.

    sigmas = np.linspace(-5, 5, 300)
    predicted_fractions = jax.scipy.stats.norm.cdf(sigmas)

    # the error between predicted and label, scaled by the std dev.
    # if model is well calibrated, this should be normally distributed.
    normalised_predictions = (pred_v_means.flatten() - all_ys['v'].flatten()) / pred_v_stds.flatten()

    where_usable = ~np.isnan(normalised_predictions)
    normalised_predictions = normalised_predictions[where_usable]

    observed_fractions = np.mean(normalised_predictions[:, None] < sigmas, axis=0)

    pl.plot(predicted_fractions, observed_fractions, '.-')
    pl.plot([0, 1], [0, 1], '--', c='black', alpha=.1)
    pl.xlabel('predicted fraction')
    pl.ylabel('observed fraction')


def plot_loss_distribution(loss_means):

    # to get insight on the loss distribution. this will ONLY evaluate the loss
    # at the predicted mean, and ignore std. dev. maybe this is not exactly
    # relevant though...

    # aux_mean: dictionary of auxiliary outputs from loss function, containing
    # individual loss function terms, meaned across axis 0 (NN ensemble)

    # plot all the cdfs.
    def plotcdf(data, **pl_kwargs):
        assert len(data.shape) == 1
        pl.semilogx(data.sort(), np.linspace(0, 1, data.shape[0]), **pl_kwargs)

    for k in loss_means:
        plotcdf(loss_means[k].flatten(), label=k)

    pl.legend()


def plot_manifold(v_meanstds, vx_meanstds, vmap_params, problem_params):

    # visualise the value function when just changing the angle, leaving
    # the rest ("cartesian" states) fixed.

    thetas = np.linspace(-np.pi, np.pi, 300)

    xs = jax.vmap(lambda theta: np.array([0, 0, np.sin(theta), np.cos(theta), 0, 0, 0]))(thetas)

    mus, sigmas = v_meanstds(xs, vmap_params)

    ax = pl.subplot(211)
    pl.plot(thetas, mus, label='value mean')
    pl.fill_between(thetas, mus - sigmas, mus + sigmas, color='C0', alpha=.2, label=f'value 1σ confidence')
    pl.legend()

    vx_mu, vx_sigma = vx_meanstds(xs, vmap_params)

    pl.subplot(212, sharex=ax)
    pl.plot(thetas, vx_mu, label=problem_params['state_names'])

    pl.gca().set_prop_cycle(None)

    for j in range(7):
        pl.fill_between(thetas, vx_mu[:, j] - vx_sigma[:, j], vx_mu[:, j] + vx_sigma[:, j], alpha=.2)

    pl.legend()