Implemented model comparison. (#247)

* Implemented model comparison. * Improve documentation * Address comment * Relax the requirement for equal n_samples
elfi-dev · Apr 10, 2018 · 3bf2b53 · 3bf2b53
1 parent 6b46971
commit 3bf2b53
Show file tree

Hide file tree

Showing 7 changed files with 98 additions and 2 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,9 +1,15 @@
 Changelog
 =========
 
+dev
+---
+- Implemented model selection (elfi.compare_models). See API documentation.
+
 0.7 (2017-11-30)
 ----------------
-
+- Added new example: the stochastic Lotka-Volterra model
+- Fix methods.bo.utils.minimize to be strictly within bounds
+- Implemented the Two Stage Procedure, a method of summary-statistics diagnostics
 - Added the MaxVar acquisition method
 - Added the RandMaxVar acquisition method
 - Added the ExpIntVar acquisition method

diff --git a/README.md b/README.md
@@ -28,6 +28,8 @@ Other notable included algorithms and methods:
 - Bayesian Optimization
 - [No-U-Turn-Sampler](http://jmlr.org/papers/volume15/hoffman14a/hoffman14a.pdf), a Hamiltonian Monte Carlo MCMC sampler
 
+ELFI also integrates tools for visualization, model comparison, diagnostics and post-processing.
+
 See examples under [notebooks](https://github.com/elfi-dev/notebooks) to get started. Full
 documentation can be found at http://elfi.readthedocs.io/. Limited user-support may be
 asked from elfi-support.at.hiit.fi, but the 

diff --git a/docs/api.rst b/docs/api.rst
@@ -265,6 +265,13 @@ Inference API classes
    :members:
    :inherited-members:
 
+**Model selection**
+
+.. currentmodule:: .
+
+.. autofunction:: elfi.compare_models
+
+
 Other
 .....
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -34,6 +34,8 @@ ELFI also has the following non LFI methods:
 
 .. _No-U-Turn-Sampler: http://jmlr.org/papers/volume15/hoffman14a/hoffman14a.pdf
 
+Additionally, ELFI integrates tools for visualization, model comparison, diagnostics and post-processing.
+
 
 .. toctree::
     :maxdepth: 1

diff --git a/elfi/__init__.py b/elfi/__init__.py
@@ -12,6 +12,7 @@
 import elfi.model.tools as tools
 from elfi.client import get_client, set_client
 from elfi.methods.diagnostics import TwoStageSelection
+from elfi.methods.model_selection import *
 from elfi.methods.parameter_inference import *
 from elfi.methods.post_processing import adjust_posterior
 from elfi.model.elfi_model import *
@@ -24,4 +25,4 @@
 __email__ = 'elfi-support@hiit.fi'
 
 # make sure __version_ is on the last non-empty line (read by setup.py)
-__version__ = '0.7'
+__version__ = '0.7_dev'
diff --git a/elfi/methods/model_selection.py b/elfi/methods/model_selection.py
@@ -0,0 +1,59 @@
+"""This module contains methods for model comparison and selection."""
+
+import numpy as np
+
+
+def compare_models(sample_objs, model_priors=None):
+    """Find posterior probabilities for different models.
+
+    The algorithm requires elfi.Sample objects from prerun inference methods. For example the
+    output from elfi.Rejection.sample is valid. The portion of samples for each model in the top
+    discrepancies are adjusted by each models acceptance ratio and prior probability.
+
+    The discrepancies (including summary statistics) must be comparable so that it is
+    meaningful to sort them!
+
+    Parameters
+    ----------
+    sample_objs : list of elfi.Sample
+        Resulting Sample objects from prerun inference models. The objects must include
+        a valid `discrepancies` attribute.
+    model_priors : array_like, optional
+        Prior probability of each model. Defaults to 1 / n_models.
+
+    Returns
+    -------
+    np.array
+        Posterior probabilities for the considered models.
+
+    """
+    n_models = len(sample_objs)
+    n_min = min([s.n_samples for s in sample_objs])
+
+    # concatenate discrepancy vectors
+    try:
+        discrepancies = np.concatenate([s.discrepancies for s in sample_objs])
+    except ValueError:
+        raise ValueError("All Sample objects must include valid discrepancies.")
+
+    # sort and take the smallest n_min
+    inds = np.argsort(discrepancies)[:n_min]
+
+    # calculate the portions of accepted samples for each model in the top discrepancies
+    p_models = np.empty(n_models)
+    up_bound = 0
+    for i in range(n_models):
+        low_bound = up_bound
+        up_bound += sample_objs[i].n_samples
+        p_models[i] = np.logical_and(inds >= low_bound, inds < up_bound).sum()
+
+        # adjust by the number of simulations run
+        p_models[i] /= sample_objs[i].n_sim
+
+        # adjust by the prior model probability
+        if model_priors is not None:
+            p_models[i] *= model_priors[i]
+
+    p_models = p_models / p_models.sum()
+
+    return p_models
diff --git a/tests/unit/test_model_selection.py b/tests/unit/test_model_selection.py
@@ -0,0 +1,19 @@
+import elfi
+from elfi.examples import gauss, ma2
+
+
+def test_compare_models():
+    m = gauss.get_model()
+    res1 = elfi.Rejection(m['d']).sample(100)
+
+    # use less informative prior
+    m['mu'].become(elfi.Prior('uniform', -10, 50))
+    res2 = elfi.Rejection(m['d']).sample(100)
+
+    # use different simulator
+    m['gauss'].become(elfi.Simulator(ma2.MA2, m['mu'], m['sigma'], observed=m.observed['gauss']))
+    res3 = elfi.Rejection(m['d']).sample(100)
+
+    p = elfi.compare_models([res1, res2, res3])
+    assert p[0] > p[1]
+    assert p[1] > p[2]