From 3bf2b53286c9b6bd88dce54316925ac788b38b94 Mon Sep 17 00:00:00 2001 From: Henri Vuollekoski Date: Tue, 10 Apr 2018 21:13:50 +0300 Subject: [PATCH] Implemented model comparison. (#247) * Implemented model comparison. * Improve documentation * Address comment * Relax the requirement for equal n_samples --- CHANGELOG.rst | 8 +++- README.md | 2 + docs/api.rst | 7 ++++ docs/index.rst | 2 + elfi/__init__.py | 3 +- elfi/methods/model_selection.py | 59 ++++++++++++++++++++++++++++++ tests/unit/test_model_selection.py | 19 ++++++++++ 7 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 elfi/methods/model_selection.py create mode 100644 tests/unit/test_model_selection.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 71ca6094..fa65a4eb 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,9 +1,15 @@ Changelog ========= +dev +--- +- Implemented model selection (elfi.compare_models). See API documentation. + 0.7 (2017-11-30) ---------------- - +- Added new example: the stochastic Lotka-Volterra model +- Fix methods.bo.utils.minimize to be strictly within bounds +- Implemented the Two Stage Procedure, a method of summary-statistics diagnostics - Added the MaxVar acquisition method - Added the RandMaxVar acquisition method - Added the ExpIntVar acquisition method diff --git a/README.md b/README.md index fc6c944f..dc0b8d81 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ Other notable included algorithms and methods: - Bayesian Optimization - [No-U-Turn-Sampler](http://jmlr.org/papers/volume15/hoffman14a/hoffman14a.pdf), a Hamiltonian Monte Carlo MCMC sampler +ELFI also integrates tools for visualization, model comparison, diagnostics and post-processing. + See examples under [notebooks](https://github.com/elfi-dev/notebooks) to get started. Full documentation can be found at http://elfi.readthedocs.io/. Limited user-support may be asked from elfi-support.at.hiit.fi, but the diff --git a/docs/api.rst b/docs/api.rst index 913f4139..03791f1b 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -265,6 +265,13 @@ Inference API classes :members: :inherited-members: +**Model selection** + +.. currentmodule:: . + +.. autofunction:: elfi.compare_models + + Other ..... diff --git a/docs/index.rst b/docs/index.rst index c6a78876..d5102660 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -34,6 +34,8 @@ ELFI also has the following non LFI methods: .. _No-U-Turn-Sampler: http://jmlr.org/papers/volume15/hoffman14a/hoffman14a.pdf +Additionally, ELFI integrates tools for visualization, model comparison, diagnostics and post-processing. + .. toctree:: :maxdepth: 1 diff --git a/elfi/__init__.py b/elfi/__init__.py index 5187b668..508bc038 100644 --- a/elfi/__init__.py +++ b/elfi/__init__.py @@ -12,6 +12,7 @@ import elfi.model.tools as tools from elfi.client import get_client, set_client from elfi.methods.diagnostics import TwoStageSelection +from elfi.methods.model_selection import * from elfi.methods.parameter_inference import * from elfi.methods.post_processing import adjust_posterior from elfi.model.elfi_model import * @@ -24,4 +25,4 @@ __email__ = 'elfi-support@hiit.fi' # make sure __version_ is on the last non-empty line (read by setup.py) -__version__ = '0.7' +__version__ = '0.7_dev' diff --git a/elfi/methods/model_selection.py b/elfi/methods/model_selection.py new file mode 100644 index 00000000..794f6e4f --- /dev/null +++ b/elfi/methods/model_selection.py @@ -0,0 +1,59 @@ +"""This module contains methods for model comparison and selection.""" + +import numpy as np + + +def compare_models(sample_objs, model_priors=None): + """Find posterior probabilities for different models. + + The algorithm requires elfi.Sample objects from prerun inference methods. For example the + output from elfi.Rejection.sample is valid. The portion of samples for each model in the top + discrepancies are adjusted by each models acceptance ratio and prior probability. + + The discrepancies (including summary statistics) must be comparable so that it is + meaningful to sort them! + + Parameters + ---------- + sample_objs : list of elfi.Sample + Resulting Sample objects from prerun inference models. The objects must include + a valid `discrepancies` attribute. + model_priors : array_like, optional + Prior probability of each model. Defaults to 1 / n_models. + + Returns + ------- + np.array + Posterior probabilities for the considered models. + + """ + n_models = len(sample_objs) + n_min = min([s.n_samples for s in sample_objs]) + + # concatenate discrepancy vectors + try: + discrepancies = np.concatenate([s.discrepancies for s in sample_objs]) + except ValueError: + raise ValueError("All Sample objects must include valid discrepancies.") + + # sort and take the smallest n_min + inds = np.argsort(discrepancies)[:n_min] + + # calculate the portions of accepted samples for each model in the top discrepancies + p_models = np.empty(n_models) + up_bound = 0 + for i in range(n_models): + low_bound = up_bound + up_bound += sample_objs[i].n_samples + p_models[i] = np.logical_and(inds >= low_bound, inds < up_bound).sum() + + # adjust by the number of simulations run + p_models[i] /= sample_objs[i].n_sim + + # adjust by the prior model probability + if model_priors is not None: + p_models[i] *= model_priors[i] + + p_models = p_models / p_models.sum() + + return p_models diff --git a/tests/unit/test_model_selection.py b/tests/unit/test_model_selection.py new file mode 100644 index 00000000..39f6b368 --- /dev/null +++ b/tests/unit/test_model_selection.py @@ -0,0 +1,19 @@ +import elfi +from elfi.examples import gauss, ma2 + + +def test_compare_models(): + m = gauss.get_model() + res1 = elfi.Rejection(m['d']).sample(100) + + # use less informative prior + m['mu'].become(elfi.Prior('uniform', -10, 50)) + res2 = elfi.Rejection(m['d']).sample(100) + + # use different simulator + m['gauss'].become(elfi.Simulator(ma2.MA2, m['mu'], m['sigma'], observed=m.observed['gauss'])) + res3 = elfi.Rejection(m['d']).sample(100) + + p = elfi.compare_models([res1, res2, res3]) + assert p[0] > p[1] + assert p[1] > p[2]