forked from elfi-dev/elfi
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmcmc.py
427 lines (345 loc) · 16.2 KB
/
mcmc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
"""MCMC sampling methods."""
import logging
import numpy as np
logger = logging.getLogger(__name__)
# TODO: combine ESS and Rhat?, consider transforming parameters to allowed
# region to increase acceptance ratio
def eff_sample_size(chains):
"""Calculate the effective sample size for 1 or more chains.
See:
Gelman, Carlin, Stern, Dunson, Vehtari, Rubin: Bayesian Data Analysis, 2013.
Stan modeling language user's guide and reference manual, v. 2.14.0.
Parameters
----------
chains : np.array of shape (N,) or (M, N)
Samples of a parameter from an MCMC algorithm. No burn-in subtracted here!
Returns
-------
ess : float
"""
chains = np.atleast_2d(chains)
n_chains, n_samples = chains.shape
means = np.mean(chains, axis=1)
variances = np.var(chains, ddof=1, axis=1)
var_between = 0 if n_chains == 1 else n_samples * np.var(means, ddof=1)
var_within = np.mean(variances)
var_pooled = ((n_samples - 1.) * var_within + var_between) / n_samples
# autocovariances for lags 1..n_samples
# https://en.wikipedia.org/wiki/Autocorrelation#Estimation
n_padded = int(2**np.ceil(1 + np.log2(n_samples)))
freqs = np.fft.rfft(chains - means[:, None], n_padded)
autocov = np.fft.irfft(np.abs(freqs)**2)[:, :n_samples].real
autocov = autocov / np.arange(n_samples, 0, -1)
estimator_sum = 0.
lag = 1
while lag < n_samples:
# estimate multi-chain autocorrelation using variogram
temp = 1. - (var_within - np.mean(autocov[:, lag])) / var_pooled
# only use the first non-negative autocorrelations to avoid noise
if temp >= 0:
estimator_sum += temp
lag += 1
else:
break
ess = n_chains * n_samples / (1. + 2. * estimator_sum)
return ess
def gelman_rubin(chains):
r"""Calculate the Gelman--Rubin convergence statistic.
Also known as the potential scale reduction factor, or \hat{R}.
Uses the split version, as in Stan.
See:
Gelman, Carlin, Stern, Dunson, Vehtari, Rubin: Bayesian Data Analysis, 2013.
Gelman, A. and D. B. Rubin: Inference from iterative simulation using
multiple sequences (with discussion). Statistical Science, 7:457-511, 1992.
Stan modeling language user's guide and reference manual, v. 2.14.0.
Parameters
----------
chains : np.array of shape (M, N)
Samples of a parameter from an MCMC algorithm, 1 row per chain. No burn-in subtracted here!
Returns
-------
psrf : float
Should be below 1.1 to support convergence, or at least below 1.2 for all parameters.
"""
chains = np.atleast_2d(chains)
n_chains, n_samples = chains.shape
# split chains in the middle
n_chains *= 2
n_samples //= 2 # drop 1 if odd
chains = chains[:, :2 * n_samples].reshape((n_chains, n_samples))
means = np.mean(chains, axis=1)
variances = np.var(chains, ddof=1, axis=1)
var_between = n_samples * np.var(means, ddof=1)
var_within = np.mean(variances)
var_pooled = ((n_samples - 1.) * var_within + var_between) / n_samples
# potential scale reduction factor, should be close to 1
psrf = np.sqrt(var_pooled / var_within)
return psrf
def nuts(n_iter,
params0,
target,
grad_target,
n_adapt=None,
target_prob=0.6,
max_depth=5,
seed=0,
info_freq=100,
max_retry_inits=20,
stepsize=None):
r"""Sample the target using the NUTS algorithm.
No-U-Turn Sampler, an improved version of the Hamiltonian (Markov Chain) Monte Carlo sampler.
Based on Algorithm 6 in
Hoffman & Gelman, depthMLR 15, 1351-1381, 2014.
Parameters
----------
n_iter : int
The number of iterations, including n_adapt and possible other warmup iterations.
params0 : np.array
Initial values for sampled parameters.
target : function
The target's log density to sample (possibly unnormalized).
grad_target : function
The gradient of target.
n_adapt : int, optional
The number of automatic adjustments to stepsize. Defaults to n_iter/2.
target_prob : float, optional
Desired average acceptance probability. (Parameter \delta in the original paper.)
max_depth : int, optional
Maximum recursion depth.
seed : int, optional
Seed for pseudo-random number generator.
info_freq : int, optional
How often to log progress to loglevel INFO.
max_retry_inits : int, optional
How many times to retry finding initial stepsize (if stepped outside allowed region).
stepsize : float, optional
Initial stepsize (will be still adapted). Defaults to finding by trial and error.
Returns
-------
samples : np.array
Samples from the MCMC algorithm, including those during adaptation.
"""
random_state = np.random.RandomState(seed)
n_adapt = n_adapt if n_adapt is not None else n_iter // 2
logger.info("NUTS: Performing {} iterations with {} adaptation steps.".format(n_iter, n_adapt))
target0 = target(params0)
if np.isinf(target0):
raise ValueError("NUTS: Bad initialization point {}, logpdf -> -inf.".format(params0))
# ********************************
# Find reasonable initial stepsize
# ********************************
if stepsize is None:
grad0 = grad_target(params0)
logger.debug("NUTS: Trying to find initial stepsize from point {} with gradient {}.".
format(params0, grad0))
init_tries = 0
while init_tries < max_retry_inits: # might step into region unallowed by priors
stepsize = np.exp(-init_tries)
init_tries += 1
momentum0 = random_state.randn(*params0.shape)
# leapfrog
momentum1 = momentum0 + 0.5 * stepsize * grad0
params1 = params0 + stepsize * momentum1
momentum1 += 0.5 * stepsize * grad_target(params1)
joint0 = target0 - 0.5 * momentum0.dot(momentum0)
joint1 = target(params1) - 0.5 * momentum1.dot(momentum1)
if np.isfinite(joint1):
break
else:
if init_tries == max_retry_inits:
raise ValueError(
"NUTS: Cannot find acceptable stepsize starting from point {}. All "
"trials ended in region with 0 probability.".format(params0))
# logger.debug("momentum0 {}, momentum1 {}, params1 {}, joint0 {}, joint1 {}"
# .format(momentum0, momentum1, params1, joint0, joint1))
logger.debug("NUTS: Problem finding acceptable stepsize, now {}. Retrying {}/{}."
.format(stepsize, init_tries, max_retry_inits))
plusminus = 1 if np.exp(joint1 - joint0) > 0.5 else -1
factor = 2. if plusminus == 1 else 0.5
while factor * np.exp(plusminus * (joint1 - joint0)) > 1.:
stepsize *= factor
if stepsize == 0. or stepsize > 1e7: # bounds as in STAN
raise SystemExit("NUTS: Found invalid stepsize {} starting from point {}."
.format(stepsize, params0))
# leapfrog
momentum1 = momentum0 + 0.5 * stepsize * grad0
params1 = params0 + stepsize * momentum1
momentum1 += 0.5 * stepsize * grad_target(params1)
joint1 = target(params1) - 0.5 * momentum1.dot(momentum1)
logger.debug("NUTS: Set initial stepsize {}.".format(stepsize))
# Some parameters from the NUTS paper, used for adapting the stepsize
target_stepsize = np.log(10. * stepsize)
log_avg_stepsize = 0.
accept_ratio = 0. # tends to target_prob
shrinkage = 0.05 # controls shrinkage accept_ratio to target_prob
ii_offset = 10. # stabilizes initialization
discount = -0.75 # reduce weight of past
# ********
# Sampling
# ********
samples = np.empty((n_iter + 1, ) + params0.shape)
samples[0, :] = params0
n_diverged = 0 # counter for proposals whose error diverged
n_outside = 0 # counter for proposals outside priors (pdf=0)
n_total = 0 # total number of proposals
for ii in range(1, n_iter + 1):
momentum0 = random_state.randn(*params0.shape)
samples_prev = samples[ii - 1, :]
log_joint0 = target(samples_prev) - 0.5 * momentum0.dot(momentum0)
log_slicevar = log_joint0 - random_state.exponential()
samples[ii, :] = samples_prev
params_left = samples_prev
params_right = samples_prev
momentum_left = momentum0
momentum_right = momentum0
depth = 0
n_ok = 1
all_ok = True # criteria for no U-turn, diverging error
while all_ok and depth <= max_depth:
direction = 1 if random_state.rand() < 0.5 else -1
if direction == -1:
params_left, momentum_left, _, _, params1, n_sub, sub_ok, mh_ratio, n_steps, \
is_div, is_out = _build_tree_nuts(
params_left, momentum_left, log_slicevar, -stepsize, depth, log_joint0,
target, grad_target, random_state)
else:
_, _, params_right, momentum_right, params1, n_sub, sub_ok, mh_ratio, n_steps, \
is_div, is_out = _build_tree_nuts(
params_right, momentum_right, log_slicevar, stepsize, depth, log_joint0,
target, grad_target, random_state)
if sub_ok == 1:
if random_state.rand() < float(n_sub) / n_ok:
samples[ii, :] = params1 # accept proposal
n_ok += n_sub
if not is_out: # params1 outside allowed region; don't count this as diverging error
n_diverged += is_div
n_outside += is_out
n_total += n_steps
all_ok = sub_ok and ((params_right - params_left).dot(momentum_left) >= 0) \
and ((params_right - params_left).dot(momentum_right) >= 0)
depth += 1
if depth > max_depth:
logger.debug("NUTS: Maximum recursion depth {} exceeded.".format(max_depth))
# adjust stepsize according to target acceptance ratio
if ii <= n_adapt:
accept_ratio = (1. - 1. / (ii + ii_offset)) * accept_ratio \
+ (target_prob - float(mh_ratio) / n_steps) / (ii + ii_offset)
log_stepsize = target_stepsize - np.sqrt(ii) / shrinkage * accept_ratio
log_avg_stepsize = ii ** discount * log_stepsize + \
(1. - ii ** discount) * log_avg_stepsize
stepsize = np.exp(log_stepsize)
elif ii == n_adapt + 1: # adaptation/warmup finished
stepsize = np.exp(log_avg_stepsize) # final stepsize
n_diverged = 0
n_outside = 0
n_total = 0
logger.info("NUTS: Adaptation/warmup finished. Sampling...")
logger.debug("NUTS: Set final stepsize {}.".format(stepsize))
if ii % info_freq == 0 and ii < n_iter:
logger.info("NUTS: Iterations performed: {}/{}...".format(ii, n_iter))
info_str = "NUTS: Acceptance ratio: {:.3f}".format(float(n_iter - n_adapt) / n_total)
if n_outside > 0:
info_str += ". After warmup {} proposals were outside of the region allowed by priors " \
"and rejected, decreasing acceptance ratio.".format(n_outside)
logger.info(info_str)
if n_diverged > 0:
logger.warning("NUTS: Diverged proposals after warmup (i.e. n_adapt={} steps): {}".format(
n_adapt, n_diverged))
return samples[1:, :]
def _build_tree_nuts(params, momentum, log_slicevar, step, depth, log_joint0, target, grad_target,
random_state):
"""Recursively build a balanced binary tree needed by NUTS.
Based on Algorithm 6 in
Hoffman & Gelman, JMLR 15, 1351-1381, 2014.
"""
# Base case: one leapfrog step
if depth == 0:
momentum1 = momentum + 0.5 * step * grad_target(params)
params1 = params + step * momentum1
momentum1 = momentum1 + 0.5 * step * grad_target(params1)
log_joint = target(params1) - 0.5 * momentum1.dot(momentum1)
n_ok = float(log_slicevar <= log_joint)
sub_ok = log_slicevar < (1000. + log_joint) # check for diverging error
is_out = False
if not sub_ok:
if np.isinf(target(params1)): # logpdf(params1) = -inf i.e. pdf(params1) = 0
is_out = True
else:
logger.debug(
"NUTS: Diverging error: log_joint={}, params={}, params1={}, momentum={}, "
"momentum1={}.".format(log_joint, params, params1, momentum, momentum1))
mh_ratio = 0. # reject
else:
mh_ratio = min(1., np.exp(log_joint - log_joint0))
return params1, momentum1, params1, momentum1, params1, n_ok, sub_ok, mh_ratio, 1., \
not sub_ok, is_out
else:
# Recursion to build subtrees, doubling size
params_left, momentum_left, params_right, momentum_right, params1, n_sub, sub_ok, \
mh_ratio, n_steps, is_div, is_out = _build_tree_nuts(
params, momentum, log_slicevar, step, depth - 1, log_joint0, target,
grad_target, random_state)
if sub_ok: # recurse further
if step < 0:
params_left, momentum_left, _, _, params2, n_sub2, sub_ok, mh_ratio2, n_steps2, \
is_div, is_out = _build_tree_nuts(
params_left, momentum_left, log_slicevar,
step, depth - 1, log_joint0, target, grad_target, random_state)
else:
_, _, params_right, momentum_right, params2, n_sub2, sub_ok, mh_ratio2, n_steps2, \
is_div, is_out = _build_tree_nuts(
params_right, momentum_right, log_slicevar,
step, depth - 1, log_joint0, target, grad_target, random_state)
if n_sub2 > 0:
if float(n_sub2) / (n_sub + n_sub2) > random_state.rand():
params1 = params2 # accept move
mh_ratio += mh_ratio2
n_steps += n_steps2
sub_ok = sub_ok and ((params_right - params_left).dot(momentum_left) >= 0) \
and ((params_right - params_left).dot(momentum_right) >= 0)
n_sub += n_sub2
return params_left, momentum_left, params_right, momentum_right, params1, n_sub, sub_ok, \
mh_ratio, n_steps, is_div, is_out
def metropolis(n_samples, params0, target, sigma_proposals, warmup=0, seed=0):
"""Sample the target with a Metropolis Markov Chain Monte Carlo using Gaussian proposals.
Parameters
----------
n_samples : int
The number of requested samples.
params0 : np.array
Initial values for each sampled parameter.
target : function
The target log density to sample (possibly unnormalized).
sigma_proposals : np.array
Standard deviations for Gaussian proposals of each parameter.
warmup : int
Number of warmup samples.
seed : int, optional
Seed for pseudo-random number generator.
Returns
-------
samples : np.array
"""
random_state = np.random.RandomState(seed)
samples = np.empty((n_samples + warmup + 1, ) + params0.shape)
samples[0, :] = params0
target_current = target(params0)
if np.isinf(target_current):
raise ValueError(
"Metropolis: Bad initialization point {},logpdf -> -inf.".format(params0))
n_accepted = 0
for ii in range(1, n_samples + warmup + 1):
samples[ii, :] = samples[ii - 1, :] + sigma_proposals * random_state.randn(*params0.shape)
target_prev = target_current
target_current = target(samples[ii, :])
if ((np.exp(target_current - target_prev) < random_state.rand())
or np.isinf(target_current)
or np.isnan(target_current)): # reject proposal
samples[ii, :] = samples[ii - 1, :]
target_current = target_prev
else:
n_accepted += 1
logger.info(
"{}: Total acceptance ratio: {:.3f}".format(__name__,
float(n_accepted) / (n_samples + warmup)))
return samples[(1 + warmup):, :]