From 3bf2b53286c9b6bd88dce54316925ac788b38b94 Mon Sep 17 00:00:00 2001
From: Henri Vuollekoski <henri.vuollekoski@gmail.com>
Date: Tue, 10 Apr 2018 21:13:50 +0300
Subject: [PATCH] Implemented model comparison. (#247)

* Implemented model comparison.

* Improve documentation

* Address comment

* Relax the requirement for equal n_samples
---
 CHANGELOG.rst                      |  8 +++-
 README.md                          |  2 +
 docs/api.rst                       |  7 ++++
 docs/index.rst                     |  2 +
 elfi/__init__.py                   |  3 +-
 elfi/methods/model_selection.py    | 59 ++++++++++++++++++++++++++++++
 tests/unit/test_model_selection.py | 19 ++++++++++
 7 files changed, 98 insertions(+), 2 deletions(-)
 create mode 100644 elfi/methods/model_selection.py
 create mode 100644 tests/unit/test_model_selection.py

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 71ca6094..fa65a4eb 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,9 +1,15 @@
 Changelog
 =========
 
+dev
+---
+- Implemented model selection (elfi.compare_models). See API documentation.
+
 0.7 (2017-11-30)
 ----------------
-
+- Added new example: the stochastic Lotka-Volterra model
+- Fix methods.bo.utils.minimize to be strictly within bounds
+- Implemented the Two Stage Procedure, a method of summary-statistics diagnostics
 - Added the MaxVar acquisition method
 - Added the RandMaxVar acquisition method
 - Added the ExpIntVar acquisition method
diff --git a/README.md b/README.md
index fc6c944f..dc0b8d81 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,8 @@ Other notable included algorithms and methods:
 - Bayesian Optimization
 - [No-U-Turn-Sampler](http://jmlr.org/papers/volume15/hoffman14a/hoffman14a.pdf), a Hamiltonian Monte Carlo MCMC sampler
 
+ELFI also integrates tools for visualization, model comparison, diagnostics and post-processing.
+
 See examples under [notebooks](https://github.com/elfi-dev/notebooks) to get started. Full
 documentation can be found at http://elfi.readthedocs.io/. Limited user-support may be
 asked from elfi-support.at.hiit.fi, but the 
diff --git a/docs/api.rst b/docs/api.rst
index 913f4139..03791f1b 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -265,6 +265,13 @@ Inference API classes
    :members:
    :inherited-members:
 
+**Model selection**
+
+.. currentmodule:: .
+
+.. autofunction:: elfi.compare_models
+
+
 Other
 .....
 
diff --git a/docs/index.rst b/docs/index.rst
index c6a78876..d5102660 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -34,6 +34,8 @@ ELFI also has the following non LFI methods:
 
 .. _No-U-Turn-Sampler: http://jmlr.org/papers/volume15/hoffman14a/hoffman14a.pdf
 
+Additionally, ELFI integrates tools for visualization, model comparison, diagnostics and post-processing.
+
 
 .. toctree::
     :maxdepth: 1
diff --git a/elfi/__init__.py b/elfi/__init__.py
index 5187b668..508bc038 100644
--- a/elfi/__init__.py
+++ b/elfi/__init__.py
@@ -12,6 +12,7 @@
 import elfi.model.tools as tools
 from elfi.client import get_client, set_client
 from elfi.methods.diagnostics import TwoStageSelection
+from elfi.methods.model_selection import *
 from elfi.methods.parameter_inference import *
 from elfi.methods.post_processing import adjust_posterior
 from elfi.model.elfi_model import *
@@ -24,4 +25,4 @@
 __email__ = 'elfi-support@hiit.fi'
 
 # make sure __version_ is on the last non-empty line (read by setup.py)
-__version__ = '0.7'
+__version__ = '0.7_dev'
diff --git a/elfi/methods/model_selection.py b/elfi/methods/model_selection.py
new file mode 100644
index 00000000..794f6e4f
--- /dev/null
+++ b/elfi/methods/model_selection.py
@@ -0,0 +1,59 @@
+"""This module contains methods for model comparison and selection."""
+
+import numpy as np
+
+
+def compare_models(sample_objs, model_priors=None):
+    """Find posterior probabilities for different models.
+
+    The algorithm requires elfi.Sample objects from prerun inference methods. For example the
+    output from elfi.Rejection.sample is valid. The portion of samples for each model in the top
+    discrepancies are adjusted by each models acceptance ratio and prior probability.
+
+    The discrepancies (including summary statistics) must be comparable so that it is
+    meaningful to sort them!
+
+    Parameters
+    ----------
+    sample_objs : list of elfi.Sample
+        Resulting Sample objects from prerun inference models. The objects must include
+        a valid `discrepancies` attribute.
+    model_priors : array_like, optional
+        Prior probability of each model. Defaults to 1 / n_models.
+
+    Returns
+    -------
+    np.array
+        Posterior probabilities for the considered models.
+
+    """
+    n_models = len(sample_objs)
+    n_min = min([s.n_samples for s in sample_objs])
+
+    # concatenate discrepancy vectors
+    try:
+        discrepancies = np.concatenate([s.discrepancies for s in sample_objs])
+    except ValueError:
+        raise ValueError("All Sample objects must include valid discrepancies.")
+
+    # sort and take the smallest n_min
+    inds = np.argsort(discrepancies)[:n_min]
+
+    # calculate the portions of accepted samples for each model in the top discrepancies
+    p_models = np.empty(n_models)
+    up_bound = 0
+    for i in range(n_models):
+        low_bound = up_bound
+        up_bound += sample_objs[i].n_samples
+        p_models[i] = np.logical_and(inds >= low_bound, inds < up_bound).sum()
+
+        # adjust by the number of simulations run
+        p_models[i] /= sample_objs[i].n_sim
+
+        # adjust by the prior model probability
+        if model_priors is not None:
+            p_models[i] *= model_priors[i]
+
+    p_models = p_models / p_models.sum()
+
+    return p_models
diff --git a/tests/unit/test_model_selection.py b/tests/unit/test_model_selection.py
new file mode 100644
index 00000000..39f6b368
--- /dev/null
+++ b/tests/unit/test_model_selection.py
@@ -0,0 +1,19 @@
+import elfi
+from elfi.examples import gauss, ma2
+
+
+def test_compare_models():
+    m = gauss.get_model()
+    res1 = elfi.Rejection(m['d']).sample(100)
+
+    # use less informative prior
+    m['mu'].become(elfi.Prior('uniform', -10, 50))
+    res2 = elfi.Rejection(m['d']).sample(100)
+
+    # use different simulator
+    m['gauss'].become(elfi.Simulator(ma2.MA2, m['mu'], m['sigma'], observed=m.observed['gauss']))
+    res3 = elfi.Rejection(m['d']).sample(100)
+
+    p = elfi.compare_models([res1, res2, res3])
+    assert p[0] > p[1]
+    assert p[1] > p[2]