Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Base GP models on gpflow 2 #1

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
101 changes: 56 additions & 45 deletions bayesian_benchmarks/models/variationally_sparse_gp/models.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import gpflow
import numpy as np
import tensorflow as tf
from scipy.cluster.vq import kmeans2
from scipy.stats import norm

class RegressionModel(object):

class RegressionModel:
def __init__(self, is_test=False, seed=0):
if is_test:
class ARGS:
Expand All @@ -19,43 +21,49 @@ class ARGS:
initial_likelihood_var = 0.01
self.ARGS = ARGS
self.model = None
self.model_objective = None

def fit(self, X, Y):
if X.shape[0] > self.ARGS.num_inducing:
Z = kmeans2(X, self.ARGS.num_inducing, minit='points')[0]
num_data, input_dim = X.shape

if num_data > self.ARGS.num_inducing:
Z, _ = kmeans2(X, self.ARGS.num_inducing, minit='points')
else:
# pad with random values
Z = np.concatenate([X, np.random.randn(self.ARGS.num_inducing - X.shape[0], X.shape[1])], 0)
Z = np.concatenate([X, np.random.randn(self.ARGS.num_inducing - num_data, input_dim)],
axis=0)

# make model if necessary
if not self.model:
kern = gpflow.kernels.RBF(X.shape[1], lengthscales=float(X.shape[1])**0.5)
lik = gpflow.likelihoods.Gaussian()
lik.variance = self.ARGS.initial_likelihood_var
if self.model is None:
data = (tf.Variable(X, trainable=False), tf.Variable(Y, trainable=False))
lengthscales = np.full(input_dim, float(input_dim)**0.5)
kernel = gpflow.kernels.SquaredExponential(lengthscales=lengthscales)
# Gaussian likelihood: use SGPR
self.model = gpflow.models.SGPR(data, kernel, inducing_variable=Z,
noise_variance=self.ARGS.initial_likelihood_var)

self.model = gpflow.models.SGPR(X, Y, kern, feat=Z)
self.model.likelihood.variance = lik.variance.read_value()
self.sess = self.model.enquire_session()
self.opt = gpflow.train.ScipyOptimizer()
self.model_objective = self.model.training_loss_closure()

# we might have new data
self.model.X.assign(X, session=self.sess)
self.model.Y.assign(Y, session=self.sess)
self.model.feature.Z.assign(Z, session=self.sess)
self.model.data[0].assign(X)
self.model.data[1].assign(Y)
self.model.inducing_variable.Z.assign(Z)

self.opt.minimize(self.model, session=self.sess, maxiter=self.ARGS.iterations)
opt = gpflow.optimizers.Scipy()
opt.minimize(self.model_objective, self.model.trainable_variables,
options=dict(maxiter=self.ARGS.iterations))

def predict(self, Xs):
return self.model.predict_y(Xs, session=self.sess)
return self.model.predict_y(Xs)

def sample(self, Xs, num_samples):
m, v = self.predict(Xs)
N, D = np.shape(m)
N, L = np.shape(m)
m, v = np.expand_dims(m, 0), np.expand_dims(v, 0)
return m + np.random.randn(num_samples, N, D) * (v ** 0.5)
return m + np.random.randn(num_samples, N, L) * (v ** 0.5)


class ClassificationModel(object):
class ClassificationModel:
def __init__(self, K, is_test=False, seed=0):
if is_test:
class ARGS:
Expand All @@ -73,51 +81,54 @@ class ARGS:
self.ARGS = ARGS
self.K = K
self.model = None
self.model_objective = None

def fit(self, X, Y):
Z = kmeans2(X, self.ARGS.num_inducing, minit='points')[0] if X.shape[0] > self.ARGS.num_inducing else X.copy()
num_data, input_dim = X.shape

if not self.model:
if num_data > self.ARGS.num_inducing:
Z, _ = kmeans2(X, self.ARGS.num_inducing, minit='points')
else:
Z = X.copy()

if self.model is None:
if self.K == 2:
lik = gpflow.likelihoods.Bernoulli()
num_latent = 1
num_latent_gps = 1
else:
lik = gpflow.likelihoods.MultiClass(self.K)
num_latent = self.K

kern = gpflow.kernels.RBF(X.shape[1], lengthscales=float(X.shape[1]) ** 0.5)
self.model = gpflow.models.SVGP(X, Y, kern, lik,
feat=Z,
whiten=False,
num_latent=num_latent,
minibatch_size=None)
num_latent_gps = self.K

self.sess = self.model.enquire_session()
self.opt = gpflow.train.ScipyOptimizer()
lengthscales = np.full(input_dim, float(input_dim)**0.5)
kernel = gpflow.kernels.SquaredExponential(lengthscales=lengthscales)
self.model = gpflow.models.SVGP(kernel, lik,
inducing_variable=Z,
num_latent_gps=num_latent_gps,
num_data=num_data)

iters = self.ARGS.iterations

else:
iters = self.ARGS.small_iterations

# we might have new data
self.model.X.assign(X, session=self.sess)
self.model.Y.assign(Y, session=self.sess)
self.model.feature.Z.assign(Z, session=self.sess)
self.model.inducing_variable.Z.assign(Z)

num_outputs = self.model.q_sqrt.shape[0]
self.model.q_mu.assign(np.zeros((self.ARGS.num_inducing, num_outputs)), session=self.sess)
self.model.q_sqrt.assign(np.tile(np.eye(self.ARGS.num_inducing)[None], [num_outputs, 1, 1]), session=self.sess)
self.model.q_mu.assign(np.zeros((self.ARGS.num_inducing, num_outputs)))
self.model.q_sqrt.assign(np.tile(np.eye(self.ARGS.num_inducing)[None], [num_outputs, 1, 1]))

self.opt.minimize(self.model, maxiter=iters, session=self.sess)
data = (tf.constant(X), tf.constant(Y))
model_objective = self.model.training_loss_closure(data)

opt = gpflow.optimizers.Scipy()
opt.minimize(model_objective, self.model.trainable_variables,
options=dict(maxiter=iters))

def predict(self, Xs):
m, v = self.model.predict_y(Xs, session=self.sess)
m, v = self.model.predict_y(Xs)
if self.K == 2:
# convert Bernoulli to onehot
return np.concatenate([1 - m, m], 1)
# convert Bernoulli to one-hot
return np.concatenate([1 - m, m], axis=1)
else:
return m



139 changes: 85 additions & 54 deletions bayesian_benchmarks/models/variationally_sparse_gp_minibatch/models.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
import gpflow
import tensorflow as tf
import numpy as np
import tensorflow as tf
from scipy.cluster.vq import kmeans2
from scipy.stats import norm

class RegressionModel(object):
try:
from tqdm import trange
except ImportError:
trange = range

class RegressionModel:
def __init__(self, is_test=False, seed=0):
if is_test:
class ARGS:
Expand All @@ -25,51 +32,67 @@ class ARGS:
initial_likelihood_var = 0.01
self.ARGS = ARGS
self.model = None
self.model_objective = None
self._adam_opt = None
self._natgrad_opt = None

def fit(self, X, Y):
if X.shape[0] > self.ARGS.num_inducing:
Z = kmeans2(X, self.ARGS.num_inducing, minit='points')[0]
num_data, input_dim = X.shape

if num_data > self.ARGS.num_inducing:
Z, _ = kmeans2(X, self.ARGS.num_inducing, minit='points')
else:
# pad with random values
Z = np.concatenate([X, np.random.randn(self.ARGS.num_inducing - X.shape[0], X.shape[1])], 0)
Z = np.concatenate([X, np.random.randn(self.ARGS.num_inducing - num_data, input_dim)],
axis=0)

# make model if necessary
if not self.model:
kern = gpflow.kernels.RBF(X.shape[1], lengthscales=float(X.shape[1])**0.5)
lik = gpflow.likelihoods.Gaussian()
lik.variance = self.ARGS.initial_likelihood_var
mb_size = self.ARGS.minibatch_size if X.shape[0] > self.ARGS.minibatch_size else None
self.model = gpflow.models.SVGP(X, Y, kern, lik, feat=Z, minibatch_size=mb_size)

var_list = [[self.model.q_mu, self.model.q_sqrt]]
self.model.q_mu.set_trainable(False)
self.model.q_sqrt.set_trainable(False)
self.ng = gpflow.train.NatGradOptimizer(gamma=self.ARGS.gamma).make_optimize_tensor(self.model, var_list=var_list)
self.adam = gpflow.train.AdamOptimizer(self.ARGS.adam_lr).make_optimize_tensor(self.model)
if self.model is None:
lengthscales = np.full(input_dim, float(input_dim)**0.5)
kernel = gpflow.kernels.SquaredExponential(lengthscales=lengthscales)
lik = gpflow.likelihoods.Gaussian(variance=self.ARGS.initial_likelihood_var)
self.model = gpflow.models.SVGP(kernel, likelihood=lik, inducing_variable=Z, num_data=num_data)

self.sess = self.model.enquire_session()
gpflow.set_trainable(self.model.q_mu, False)
gpflow.set_trainable(self.model.q_sqrt, False)
self._natgrad_opt = gpflow.optimizers.NaturalGradient(gamma=self.ARGS.gamma)
self._adam_opt = tf.optimizers.Adam(learning_rate=self.ARGS.adam_lr)

iters = self.ARGS.iterations

else:
iters = self.ARGS.small_iterations

# we might have new data
self.model.X.assign(X, session=self.sess)
self.model.Y.assign(Y, session=self.sess)
self.model.feature.Z.assign(Z, session=self.sess)
self.model.inducing_variable.Z.assign(Z)
num_outputs = self.model.q_sqrt.shape[0]
self.model.q_mu.assign(np.zeros((self.ARGS.num_inducing, num_outputs)))
self.model.q_sqrt.assign(np.tile(np.eye(self.ARGS.num_inducing)[None], [num_outputs, 1, 1]))

self.model.q_mu.assign(np.zeros((self.ARGS.num_inducing, Y.shape[1])), session=self.sess)
self.model.q_sqrt.assign(np.tile(np.eye(self.ARGS.num_inducing)[None], [Y.shape[1], 1, 1]), session=self.sess)
if num_data < self.ARGS.minibatch_size:
model_objective = self.model.training_loss_closure((X, Y))
else:
train_dataset = tf.data.Dataset.from_tensor_slices((X, Y)) \
.prefetch(num_data).repeat().shuffle(num_data)
train_iter = iter(train_dataset.batch(self.ARGS.minibatch_size))
model_objective = self.model.training_loss_closure(train_iter)

variational_params = [(self.model.q_mu, self.model.q_sqrt)]

for _ in range(iters):
self.sess.run(self.ng)
self.sess.run(self.adam)
self.model.anchor(session=self.sess)
@tf.function
def natgrad_step():
self._natgrad_opt.minimize(model_objective, var_list=variational_params)

@tf.function
def adam_step():
self._adam_opt.minimize(model_objective, var_list=self.model.trainable_variables)

for _ in trange(iters):
natgrad_step()
adam_step()

def predict(self, Xs):
return self.model.predict_y(Xs, session=self.sess)
return self.model.predict_y(Xs)

def sample(self, Xs, num_samples):
m, v = self.predict(Xs)
Expand All @@ -78,7 +101,7 @@ def sample(self, Xs, num_samples):
return m + np.random.randn(num_samples, N, D) * (v ** 0.5)


class ClassificationModel(object):
class ClassificationModel:
def __init__(self, K, is_test=False, seed=0):
if is_test:
class ARGS:
Expand All @@ -98,54 +121,62 @@ class ARGS:

self.K = K
self.model = None
self.model_objective = None
self.opt = None

def fit(self, X, Y):
Z = kmeans2(X, self.ARGS.num_inducing, minit='points')[0] if X.shape[0] > self.ARGS.num_inducing else X.copy()
num_data, input_dim = X.shape

if not self.model:
# NB mb_size does not change once the model is created
mb_size = self.ARGS.minibatch_size if X.shape[0] >= self.ARGS.minibatch_size else None
if num_data > self.ARGS.num_inducing:
Z, _ = kmeans2(X, self.ARGS.num_inducing, minit='points')
else:
Z = X.copy()

if self.model is None:
if self.K == 2:
lik = gpflow.likelihoods.Bernoulli()
num_latent = 1
num_latent_gps = 1
else:
lik = gpflow.likelihoods.MultiClass(self.K)
num_latent = self.K
num_latent_gps = self.K

kern = gpflow.kernels.RBF(X.shape[1], lengthscales=float(X.shape[1]) ** 0.5)
self.model = gpflow.models.SVGP(X, Y, kern, lik,
feat=Z,
whiten=False,
num_latent=num_latent,
minibatch_size=mb_size)
lengthscales = np.full(input_dim, float(input_dim)**0.5)
kernel = gpflow.kernels.SquaredExponential(lengthscales=lengthscales)
self.model = gpflow.models.SVGP(kernel, likelihood=lik, inducing_variable=Z,
num_latent_gps=num_latent_gps)

self.opt = gpflow.train.AdamOptimizer(self.ARGS.adam_lr)
self.opt = tf.optimizers.Adam(self.ARGS.adam_lr)

self.sess = self.model.enquire_session()
iters = self.ARGS.iterations

else:
iters = self.ARGS.small_iterations

# we might have new data
self.model.X.assign(X, session=self.sess)
self.model.Y.assign(Y, session=self.sess)
self.model.feature.Z.assign(Z, session=self.sess)

self.model.inducing_variable.Z.assign(Z)
num_outputs = self.model.q_sqrt.shape[0]
self.model.q_mu.assign(np.zeros((self.ARGS.num_inducing, num_outputs)), session=self.sess)
self.model.q_sqrt.assign(np.tile(np.eye(self.ARGS.num_inducing)[None], [num_outputs, 1, 1]), session=self.sess)
self.model.q_mu.assign(np.zeros((self.ARGS.num_inducing, num_outputs)))
self.model.q_sqrt.assign(np.tile(np.eye(self.ARGS.num_inducing)[None], [num_outputs, 1, 1]))

if num_data < self.ARGS.minibatch_size:
model_objective = self.model.training_loss_closure((X, Y))
else:
train_dataset = tf.data.Dataset.from_tensor_slices((X, Y)) \
.prefetch(num_data).repeat().shuffle(num_data)
train_iter = iter(train_dataset.batch(self.ARGS.minibatch_size))
model_objective = self.model.training_loss_closure(train_iter)

@tf.function
def adam_step():
self.opt.minimize(objective_closure, var_list=self.model.trainable_variables)

self.opt.minimize(self.model, maxiter=iters, session=self.sess)
for _ in trange(iters):
adam_step()

def predict(self, Xs):
m, v = self.model.predict_y(Xs, session=self.sess)
m, v = self.model.predict_y(Xs)
if self.K == 2:
# convert Bernoulli to onehot
return np.concatenate([1 - m, m], 1)
return np.concatenate([1 - m, m], axis=1)
else:
return m