Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Robust surrogate model #483

Open
wants to merge 13 commits into
base: dev
Choose a base branch
from
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Changelog
=========

- Add RobustGPyRegression model that can handle non-finite output values
- Use kernel copy to avoid pickle issue and allow BOLFI parallelisation with non-default kernel
- Restrict matplotlib version < 3.9 for compatibility with GPy
- Add option to use additive or multiplicative adjustment in any acquisition method
Expand Down
2 changes: 1 addition & 1 deletion elfi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from elfi.visualization.visualization import nx_draw as draw
from elfi.visualization.visualization import plot_params_vs_node
from elfi.visualization.visualization import plot_predicted_summaries
from elfi.methods.bo.gpy_regression import GPyRegression
from elfi.methods.bo.gpy_regression import GPyRegression, RobustGPyRegression

__author__ = 'ELFI authors'
__email__ = 'elfi-support@hiit.fi'
Expand Down
2 changes: 1 addition & 1 deletion elfi/methods/bo/acquisition.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ def evaluate_gradient(self, theta_new, t=None):
a = (self.eps - mean) / scale
b = np.sqrt(sigma2_n) / np.sqrt(sigma2_n + 2 * var)
grad_a = (-1. / scale) * grad_mean - \
((self.eps - mean) / (2. * (sigma2_n + var)**(1.5))) * grad_var
np.nan_to_num((self.eps - mean) / (2. * (sigma2_n + var)**(1.5))) * grad_var
grad_b = (-np.sqrt(sigma2_n) / (sigma2_n + 2 * var)**(1.5)) * grad_var

_phi_a = phi(a)
Expand Down
249 changes: 248 additions & 1 deletion elfi/methods/bo/gpy_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(self,
Alternatives: "scg", "fmin_tnc", "simplex", "lbfgsb", "lbfgs", "sgd"
See also: paramz.Model.optimize()
max_opt_iters : int, optional
gp : GPy.model.GPRegression instance, optional
gp : GPy.models.GPRegression instance, optional
**gp_params
kernel : GPy.Kern
noise_var : float
Expand Down Expand Up @@ -362,3 +362,250 @@ def copy(self):
kopy.gp_params['mean_function'] = self.gp_params['mean_function'].copy()

return kopy


class RobustGPyRegression(GPyRegression):
"""Robust Gaussian Process regression using the GPy library.

Restricts regression model to use evidence with finite output values. Simulations that
produce non-finite output are considered failed.
"""

def __init__(self,
parameter_names=None,
bounds=None,
train_classifier=False,
thd=0,
clf=None,
clf_kernel=None,
**kwargs):
"""Initialize RobustGPyRegression.

Parameters
----------
parameter_names : list of str, optional
Names of parameter nodes. If None, sets dimension to 1.
bounds : dict, optional
The region where to estimate the posterior for each parameter in
model.parameters.
`{'parameter_name':(lower, upper), ... }`
If not supplied, defaults to (0, 1) bounds for all dimensions.
train_classifier : boolean, optional
Train a classifier to predict finite output probabilities.
thd : float, optional
Threshold value used to exclude inputs that return non-finite output values.
clf: GPy.models.GPClassification instance, optional
Differentiates between inputs that return finite (1) and non-finite (0) outputs.
clf_kernel : GPy.Kern, optional
Kernel function, defaults to RBF.
**kwargs: see GPyRegression

"""
super().__init__(parameter_names, bounds, **kwargs)

self._X = np.zeros((0, self.input_dim))
self._Y = np.zeros((0, 1))

self.thd = thd
self.train_classifier = self.thd > 0 or train_classifier

self._clf = clf
self._clf_kernel = clf_kernel or GPy.kern.RBF(self.input_dim, ARD=True)

self.FAILED_OUTPUT = np.inf
self.FAILED_VAR = 0.00001

def success_proba(self, x):
"""Return predicted finite output probabilities at x.

Parameters
----------
x : np.array
numpy compatible (n, input_dim) array of points to evaluate

Returns
-------
np.array
with shape (x.shape[0], 1)

"""
# Ensure it's 2d for GPy
x = np.asanyarray(x).reshape((-1, self.input_dim))

if self._clf is not None:
return self._clf.predict(x)[0]
else:
return np.ones((len(x), 1))

def success_proba_gradients(self, x):
"""Return predicted finite output probability gradients at x.

Parameters
----------
x : np.array
numpy compatible (n, input_dim) array of points to evaluate

Returns
-------
np.array
with shape (x.shape[0], input_dim)

"""
# Ensure it's 2d for GPy
x = np.asanyarray(x).reshape((-1, self.input_dim))

if self._clf is not None:
return self._clf.predictive_gradients(x)[0][:, :, 0]
else:
return np.zeros_like((x))

def predict(self, x, **kwargs):
"""Return predicted mean and variance at x.

Parameters
----------
x : np.array
numpy compatible (n, input_dim) array of points to evaluate
if len(x.shape) == 1 will be cast to 2D with x[None, :]

Returns
-------
tuple
GP (mean, var) at x where
mean : np.array
with shape (x.shape[0], 1)
var : np.array
with shape (x.shape[0], 1)

"""
# Ensure it's 2d for GPy
x = np.asanyarray(x).reshape((-1, self.input_dim))
mean, var = super().predict(x)

if self.thd > 0 and self._clf is not None:
mask = (self.success_proba(x) < self.thd).reshape(-1)
mean[mask] = self.FAILED_OUTPUT
var[mask] = self.FAILED_VAR

return mean, var

def predictive_gradients(self, x):
"""Return the gradients of predicted mean and variance at x.

Parameters
----------
x : np.array
numpy compatible (n, input_dim) array of points to evaluate
if len(x.shape) == 1 will be cast to 2D with x[None, :]

Returns
-------
tuple
GP (grad_mean, grad_var) at x where
grad_mean : np.array
with shape (x.shape[0], input_dim)
grad_var : np.array
with shape (x.shape[0], input_dim)

"""
# Ensure it's 2d for GPy
x = np.asanyarray(x).reshape((-1, self.input_dim))
grad_mean, grad_var = super().predictive_gradients(x)

if self.thd > 0 and self._clf is not None:
mask = (self.success_proba(x) < self.thd).reshape(-1)
grad_mean[mask] = 0
grad_var[mask] = 0

return grad_mean, grad_var

def _make_classifier_instance(self, x, y, kernel):
return GPy.models.GPClassification(x, y, kernel=kernel)

def update(self, x, y, optimize=False):
"""Update the surrogate model with new data.

Parameters
----------
x : np.array
y : np.array
optimize : bool, optional
Whether to optimize hyperparameters.

"""
# Must cast these as 2d for GPy
x = x.reshape((-1, self.input_dim))
y = y.reshape((-1, 1))

self._X = np.r_[self._X, x]
self._Y = np.r_[self._Y, y]

# Update regression model without failed simulations
mask = np.isfinite(y).reshape(-1)
if mask.any():
super().update(x[mask], y[mask], optimize=False)
elif self._gp is None:
raise RuntimeError("No finite outputs available for model initialisation.")

# Update classification model
if self.train_classifier:
labels = np.isfinite(self._Y).astype(int)
if self._clf is not None:
# Reconstruct with new data
self._clf = self._make_classifier_instance(self._X, labels, self._clf.kern.copy())
elif not mask.all():
# Initialise
self._clf = self._make_classifier_instance(self._X, labels, self._clf_kernel)

if optimize:
self.optimize()

def optimize(self):
"""Optimize GP hyperparameters."""
super().optimize()
if self._clf is not None:
try:
self._clf.optimize(self.optimizer, max_iters=self.max_opt_iters)
except np.linalg.linalg.LinAlgError:
logger.warning("Numerical error in GP optimization. Stopping optimization")

@property
def n_evidence(self):
"""Return the number of observed samples."""
return len(self._Y)

@property
def n_valid_evidence(self):
"""Return the number of valid observed samples."""
return np.sum(np.isfinite(self._Y))

@property
def X(self):
"""Return input evidence."""
return self._X

@property
def Y(self):
"""Return output evidence."""
Y = self._Y.copy()
Y[~np.isfinite(Y)] = self.FAILED_OUTPUT
return Y

@property
def classifier_instance(self):
"""Return the classifier instance."""
return self._clf

def copy(self):
"""Return a copy of current instance."""
kopy = super().copy()

if self._clf:
x = self._clf.X.copy()
y = self._clf.Y.copy()
kopy._clf = self._make_classifier_instance(x, y, self._clf.kern.copy())

if self._clf_kernel:
kopy._clf_kernel = self._clf_kernel.copy()

return kopy
13 changes: 11 additions & 2 deletions elfi/visualization/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,8 +450,10 @@ def plot_gp(gp, parameter_names, axes=None, resol=50,
shape = (n_plots, n_plots)
axes, kwargs = _create_axes(axes, shape, **kwargs)

x_evidence = gp.X
y_evidence = gp.Y
valid_inds = np.isfinite(gp.Y).squeeze()
x_evidence = gp.X[valid_inds]
y_evidence = gp.Y[valid_inds]
x_evidence_failed = gp.X[~valid_inds]
if const is None:
const = x_evidence[np.argmin(y_evidence), :]
bounds = bounds or gp.bounds
Expand Down Expand Up @@ -488,6 +490,13 @@ def plot_gp(gp, parameter_names, axes=None, resol=50,
color="red",
alpha=0.7,
s=5)
if len(x_evidence_failed) > 0:
axes[jy, ix].scatter(x_evidence_failed[:, ix],
x_evidence_failed[:, jy],
marker="^",
color="blue",
alpha=0.5,
s=7)

if true_params is not None:
axes[jy, ix].plot([true_params[parameter_names[ix]],
Expand Down
54 changes: 54 additions & 0 deletions tests/functional/test_failure_robust.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from functools import partial

import numpy as np
import pytest

import elfi
from elfi.examples.gauss import euclidean_multidim, gauss_nd_mean


def toy_sim(*mu, cov, batch_size=1, random_state=None):
out = gauss_nd_mean(*mu, cov_matrix=cov, batch_size=batch_size, random_state=random_state)
failed = np.sqrt(np.sum(np.power(mu, 2), axis=0)) < 2.5
out[failed] = np.nan
return out


def get_model(true_params, seed):
cov = (0.1 * np.diag(np.ones((2,))) + 0.5 * np.ones((2, 2)))
sim = partial(toy_sim, cov=cov)

random_state = np.random.RandomState(seed)
obs = sim(*true_params, random_state=random_state)

m = elfi.ElfiModel()
mu_1 = elfi.Prior('uniform', 0, 5, model=m)
mu_2 = elfi.Prior('uniform', 0, 5, model=m)
y = elfi.Simulator(sim, mu_1, mu_2, observed=obs)
mean = elfi.Summary(partial(np.mean, axis=1), y)
d = elfi.Discrepancy(euclidean_multidim, mean)
return m


@pytest.mark.slowtest
def test_failure_robust_BOLFI():
seed = 123
true_params = [3, 3]
m = get_model(true_params, seed)
bounds = {'mu_1': (0, 5), 'mu_2': (0, 5)}
target_model = elfi.RobustGPyRegression(m.parameter_names, bounds=bounds, thd=0.5)
bolfi = elfi.BOLFI(m['d'], initial_evidence=20, target_model=target_model, seed=seed)
post = bolfi.fit(n_evidence=100)

# check that optimisation avoided infeasible parameter combinations
assert bolfi.target_model.n_valid_evidence > 90

# check model minimum
res_1 = bolfi.extract_result()
assert np.isclose(res_1.x_min['mu_1'], 3, atol=0.35)
assert np.isclose(res_1.x_min['mu_2'], 3, atol=0.35)

# check posterior mean
res_2 = bolfi.sample(500)
assert np.isclose(np.mean(res_2.samples['mu_1']), 3, atol=0.35)
assert np.isclose(np.mean(res_2.samples['mu_2']), 3, atol=0.35)
Loading
Loading