Hyperparameter Fitting (Empirical Bayes) Animated

[1]:
import matplotlib as mpl; mpl.use('pgf')
[2]:
%matplotlib inline
[3]:
import numpy as np

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

import tensorflow_probability as tfp

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

from collections import defaultdict

from matplotlib import animation
from IPython.display import HTML
from etudes.gaussian_processes import gp_sample_custom, dataframe_from_gp_samples
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/compat/v2_compat.py:88: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
Instructions for updating:
non-resource variables are not supported in the long term
[4]:
golden_ratio = 0.5 * (1 + np.sqrt(5))
golden_size = lambda width: (width, width / golden_ratio)
[5]:
width = 10

rc = {
    "figure.figsize": golden_size(width),
    "text.usetex": True,
}

sns.set(context="notebook", style="ticks", palette="colorblind", font="serif", rc=rc)
[6]:
# shortcuts
tfd = tfp.distributions
kernels = tfp.math.psd_kernels

# constants
n_features = 1 # dimensionality
n_index_points = 256 # nbr of index points
n_samples = 8 # nbr of GP prior samples
jitter = 1e-6
kernel_cls = kernels.ExponentiatedQuadratic

num_epochs = 250

seed = 42 # set random seed for reproducibility
random_state = np.random.RandomState(seed)

# index points
X_q = np.linspace(-1.0, 1.0, n_index_points).reshape(-1, n_features)

# kernel specification
amplitude_grid, length_scale_grid = np.ogrid[5e-2:4.0:100j, 1e-5:5e-1:100j]
kernel_grid = kernel_cls(amplitude=amplitude_grid, length_scale=length_scale_grid)

Synthetic dataset

[7]:
n_train = 20 # nbr training points in synthetic dataset
observation_noise_variance = 1e-1

f = lambda x: np.sin(12.0*x) + 0.66*np.cos(25.0*x)

X = random_state.rand(n_train, n_features) - 0.5
eps = observation_noise_variance * random_state.randn(n_train, n_features)
Y = np.squeeze(f(X) + eps)

fig, ax = plt.subplots()

ax.plot(X_q, f(X_q), label="true")
ax.scatter(X, Y, marker='x', color='k', label="noisy observations")

ax.legend()

ax.set_xlim(-0.5, 0.5)
ax.set_xlabel('$x$')
ax.set_ylabel('$y$')

plt.show()
../_images/gaussian_processes_old_gp_hyperparameter_fitting_empirical_bayes_animation_8_0.png

Log marginal likelihood

[8]:
gp_grid = tfd.GaussianProcess(
    kernel=kernel_grid,
    index_points=X,
    observation_noise_variance=observation_noise_variance
)
nll_grid = - gp_grid.log_prob(Y)

with tf.Session() as sess:
    nll_grid_value = sess.run(nll_grid)
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/linalg/linear_operator_lower_triangular.py:158: calling LinearOperator.__init__ (from tensorflow.python.ops.linalg.linear_operator) with graph_parents is deprecated and will be removed in a future version.
Instructions for updating:
Do not pass `graph_parents`.  They will  no longer be used.
[9]:
_amplitude_grid, _length_scale_grid = np.broadcast_arrays(amplitude_grid, length_scale_grid)

fig, ax = plt.subplots()

contours = ax.contour(_amplitude_grid, _length_scale_grid, nll_grid_value, cmap="Spectral_r")

fig.colorbar(contours, ax=ax)
ax.clabel(contours, fmt='%.1f')

ax.set_xlabel(r"amplitude $\sigma$")
ax.set_ylabel(r"lengthscale $\ell$")

plt.show()
../_images/gaussian_processes_old_gp_hyperparameter_fitting_empirical_bayes_animation_11_0.png

Empirical Bayes (maximizing the log marginal likelihood)

[10]:
amplitude = tf.exp(tf.Variable(np.float64(0)), name='amplitude')
length_scale = tf.exp(tf.Variable(np.float64(-1)), name='length_scale')
observation_noise_variance = tf.exp(tf.Variable(np.float64(-5)), name='observation_noise_variance')
kernel = kernel_cls(amplitude=amplitude, length_scale=length_scale)
gp = tfd.GaussianProcess(
    kernel=kernel,
    index_points=X,
    observation_noise_variance=observation_noise_variance
)
[11]:
nll = - gp.log_prob(Y)
nll
[11]:
<tf.Tensor 'Neg_1:0' shape=() dtype=float64>
[12]:
optimizer = tf.train.AdamOptimizer(learning_rate=.05, beta1=.5, beta2=.99)
optimize = optimizer.minimize(nll)
[13]:
history = defaultdict(list)

with tf.Session() as sess:

    sess.run(tf.global_variables_initializer())

    for i in range(num_epochs):
        (_, nll_value, amplitude_value, length_scale_value,
         observation_noise_variance_value) = sess.run([optimize, nll, amplitude, length_scale, observation_noise_variance])
        history["nll"].append(nll_value)
        history["amplitude"].append(amplitude_value)
        history["length_scale"].append(length_scale_value)
        history["observation_noise_variance"].append(observation_noise_variance_value)
[14]:
fig, ax = plt.subplots()

contours = ax.contour(_amplitude_grid, _length_scale_grid, nll_grid_value, cmap="Spectral_r")

sns.lineplot(x='amplitude', y='length_scale',
             sort=False, data=pd.DataFrame(history), alpha=0.8, ax=ax)

fig.colorbar(contours, ax=ax)
ax.clabel(contours, fmt='%.1f')

ax.set_xlabel(r"amplitude $\sigma$")
ax.set_ylabel(r"lengthscale $\ell$")

plt.show()
../_images/gaussian_processes_old_gp_hyperparameter_fitting_empirical_bayes_animation_17_0.png

GP posterior predictive animated

[15]:
kernel_history = kernel_cls(amplitude=history.get("amplitude"), length_scale=history.get("length_scale"))
gprm_history = tfd.GaussianProcessRegressionModel(
    kernel=kernel_history, index_points=X_q, observation_index_points=X, observations=Y,
    observation_noise_variance=history.get("observation_noise_variance"), jitter=jitter
)
gprm_mean = gprm_history.mean()
gprm_stddev = gprm_history.stddev()
[16]:
with tf.Session() as sess:
    gprm_mean_value, gprm_stddev_value = sess.run([gprm_mean, gprm_stddev])
[17]:
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(14, 5))

line_mean, = ax1.plot(X_q, gprm_mean_value[0], c="steelblue")
line_stddev_lower, = ax1.plot(X_q, gprm_mean_value[0] - 3*gprm_stddev_value[0],
                              c="steelblue", alpha=0.4)
line_stddev_upper, = ax1.plot(X_q, gprm_mean_value[0] + 3*gprm_stddev_value[0],
                              c="steelblue", alpha=0.4)

ax1.scatter(X, Y, marker='x', color='k', label="noisy observations")

ax1.set_xlabel('$x$')
ax1.set_ylabel('$y$')
ax1.set_ylim(-3, 3)

_amplitude_grid, _length_scale_grid = np.broadcast_arrays(amplitude_grid, length_scale_grid)
contours = ax2.contour(_amplitude_grid, _length_scale_grid, nll_grid_value, cmap="Spectral_r")

path, = ax2.plot([], [], c='k')
point, = ax2.plot([], [], 'ko')

text = ax2.annotate('angle,\nround4',
                    xy=(0.95, 0.95), xycoords='axes fraction',
                    ha="right", va="top",
                    bbox=dict(boxstyle="round", fc="1.0"))

fig.colorbar(contours, ax=ax2)
ax2.clabel(contours, fmt='%.1f')

ax2.set_xlabel(r"amplitude $\sigma$")
ax2.set_ylabel(r"lengthscale $\ell$")

plt.show()
../_images/gaussian_processes_old_gp_hyperparameter_fitting_empirical_bayes_animation_21_0.png
[18]:
def animate(i):

    line_mean.set_data(X_q, gprm_mean_value[i])
    line_stddev_lower.set_data(X_q, gprm_mean_value[i] - 3*gprm_stddev_value[i])
    line_stddev_upper.set_data(X_q, gprm_mean_value[i] + 3*gprm_stddev_value[i])

    path.set_data(history["amplitude"][:i], history["length_scale"][:i])
    point.set_data(history["amplitude"][i], history["length_scale"][i])

    text.set_text("epoch: {i:3d}\nnll: {nll:.2f}".format(i=i, nll=history["nll"][i]))

    return line_mean, line_stddev_lower, line_stddev_upper, path, point, text
[19]:
anim = animation.FuncAnimation(fig, animate, frames=num_epochs,
                               interval=60, repeat_delay=5, blit=True)
[20]:
HTML(anim.to_html5_video())
[20]:

GP prior samples animated

[21]:
gp_history = tfd.GaussianProcess(kernel=kernel_history, index_points=X_q, jitter=jitter)
gp_history_samples = gp_sample_custom(gp_history, n_samples, seed=seed)
[22]:
with tf.Session() as sess:
    gp_history_samples_value = sess.run(gp_history_samples)
[23]:
fig, ax = plt.subplots()

lines = ax.plot(X_q, gp_history_samples_value[::, -1].T)

ax.set_xlabel('$x$')
ax.set_ylabel('$y$')
ax.set_ylim(-3, 3)

plt.show()
../_images/gaussian_processes_old_gp_hyperparameter_fitting_empirical_bayes_animation_28_0.png
[24]:
def animate(i):
    for j, line in enumerate(lines):
        line.set_data(X_q, gp_history_samples_value[j, i])
    return lines
[25]:
anim = animation.FuncAnimation(fig, animate, frames=num_epochs,
                               interval=60, repeat_delay=5, blit=True)
[26]:
HTML(anim.to_html5_video())
[26]: