diff --git a/NAMESPACE b/NAMESPACE index 0436c4cb..7d557ce7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -126,6 +126,7 @@ export(ppc_stat_freqpoly_grouped) export(ppc_stat_grouped) export(ppc_violin_grouped) export(rhat) +export(sbc_hist) export(scatter_style_np) export(theme_default) export(trace_style_np) diff --git a/R/sbc.R b/R/sbc.R new file mode 100644 index 00000000..65558848 --- /dev/null +++ b/R/sbc.R @@ -0,0 +1,188 @@ +#' Diagnostics for Simulation Based Calibration +#' +#' @export +#' @param ranks A list of matrices. See **Details**. +#' @param thin An integer indicating the thinning interval to use when plotting +#' so that the histograms consist of (close to) independent realizations. Set +#' the `thin` argument such that the resulting number of draws approximately +#' matches the effective sample size. +#' @param per_bin An integer indicating the number of histogram entries to +#' combine into a single bar. For best results, one plus the number of draws +#' from the posterior should be evenly divisible by the number of histogram +#' bins after thinning. For example, if there are `511` posterior draws (per +#' matrix in `ranks`) and `thin=4`, then after thinning there will be `128` +#' draws. If `per_bin=4`, then `128/4=32` histogram bars will be drawn. +#' @param worst How many parameters to show, or `NA` to plot all parameters. If +#' `worst` is not `NA` then parameters are ordered by the degree of +#' non-uniformity so, for example, `worst = 10` means to plot only the `10` +#' worst parameters. +#' @param prob The size of the interval plotted to show the expected behavior +#' under uniformity. The default is `prob=0.99`. +#' @param ... Currently ignored. +#' @template args-facet_args +#' +#' @details +#' Each element of the list `ranks` should be a matrix of rank comparison +#' results (encoded as `0` or `1`) associated with a single draw from the prior +#' distribution. (These are not actually "ranks" but can be used afterwards to +#' reconstruct (thinned) ranks.) The columns of each matrix are model parameters +#' and the rows are posterior draws from the model fit to the data generated +#' from the corresponding prior draw. An element is `0` or `1` depending on +#' whether the posterior draw is greater or less than the corresponding "true" +#' realization (from the prior). The matrices should have column names to +#' correctly label the parameters. +#' +#' @section Plot Descriptions: +#' \describe{ +#' \item{`sbc_hist()`}{ +#' SBC histogram from Talts et al. (2018). A separate plot (facet) is created +#' for each parameter. +#' } +#' } +#' +#' @template return-ggplot +#' +#' @references +#' Talts, S., Betancourt, M., Simpson, D., Vehtari, A., and Gelman, A. (2018). +#' Validating Bayesian Inference Algorithms with Simulation-Based Calibration. +#' arXiv preprint arXiv:1804.06788. \url{https://arxiv.org/abs/1804.06788} +#' +#' @seealso `rstan::sbc()` +#' +#' @examples +#' # create some fake inputs to use for sbc_hist() +#' set.seed(19) +#' pars <- paste0("beta[", 1:4, "]") +#' samples_per_prior <- 511 +#' n_replications <- 500 +#' ranks <- list() +#' for (n in 1:n_replications) { +#' r1 <- matrix(0, nrow=samples_per_prior, ncol=length(pars), +#' dimnames=list(NULL, pars)) +#' for (p1 in 1:length(pars)) { +#' r1[sample.int(samples_per_prior, floor(runif(1, 0, samples_per_prior))), p1] <- 1 +#' } +#' ranks[[n]] <- r1 +#' } +#' +#' color_scheme_set("purple") +#' sbc_hist(ranks) +#' sbc_hist(ranks, worst = NA) # uses original parameter ordering +#' sbc_hist(ranks, worst = 3, facet_args = list(labeller = ggplot2::label_parsed)) +#' +sbc_hist <- function(ranks, + ..., + thin = 4, + per_bin = 4, + worst = 16, + prob = 0.99, + facet_args = list()) { + check_ignored_arguments(...) + stopifnot(is.list(ranks), all(sapply(ranks, is.matrix))) + rows <- sapply(ranks, nrow) + cols <- sapply(ranks, ncol) + if (any(rows != rows[1]) || any(cols != cols[1])) { + stop("Not all matrices in 'ranks' have the same dimensions.") + } + + num_prior_draws <- length(ranks) + thinner <- seq(from = 1, to = nrow(ranks[[1]]), by = thin) + thinned_sample_size <- length(thinner) + u <- t(sapply(ranks, FUN = function(r) 1 + colSums(r[thinner, , drop = FALSE]))) + if (ncol(ranks[[1]]) == 1) { + u <- t(u) + dimnames(u) <- list(NULL, colnames(ranks[[1]])) + } + + if (!is.na(worst)) { + # order starting with worst (least uniform) + kl <- apply(u, 2, function(v) kl_uniform(v, num_prior_draws, thinned_sample_size)) + filter <- order(-kl)[1:min(worst, ncol(u))] + u <- u[, filter, drop = FALSE] + } + + pars <- parameter_names(u) + data <- data.frame( + Parameter = ordered(rep(pars, each = nrow(u)), levels = pars), + u = c(u) + ) + + num_bins <- thinned_sample_size / per_bin + if (thinned_sample_size %% per_bin != 0) { + warning(paste0("per_bin (", per_bin, ") does not evenly divide the ", + "number of samples per prior (", thinned_sample_size ,").")) + } + + # data for polygon showing expected behavior under uniformity + alpha <- 1 - prob + CI <- qbinom( + p = c(alpha / 2, 0.5, 1 - alpha / 2), + size = num_prior_draws, + prob = 1 / num_bins + ) + CI <- CI + c(-0.5, 0, 0.5) + offset <- 2 * per_bin + polygon_data <- data.frame( + x= c(-offset, 0, -offset, thinned_sample_size + offset, + thinned_sample_size, thinned_sample_size + offset, -offset), + y = c(CI[1], CI[2], CI[3], CI[3], CI[2], CI[1], CI[1]) + ) + + graph <- ggplot(data, aes_(x = ~ u)) + + geom_polygon( + aes_(x = ~ x, y = ~ y), + data = polygon_data, + fill = "lightgray", + color = NA, + alpha = 1 + ) + + geom_segment( + x = -offset, + xend = thinned_sample_size + offset, + y = CI[3], + yend = CI[3], + color = get_color("mid_highlight"), + alpha = 0.5, + size = 0.2 + ) + + geom_histogram( + bins = num_bins, + fill = get_color("mid"), + color = get_color("mid_highlight"), + size = 0.25, + na.rm = TRUE + ) + + geom_segment( + x = -offset, + xend = thinned_sample_size + offset, + y = CI[1], + yend = CI[1], + color = get_color("mid_highlight"), + alpha = 0.1, + size = 0.2 + ) + + scale_x_continuous( + name = "Rank statistic", + breaks = c(0, round(thinned_sample_size / 2), thinned_sample_size) + ) + + coord_cartesian(expand = FALSE) + + facet_args[["facets"]] <- ~ Parameter + graph + + do.call("facet_wrap", facet_args) + + bayesplot_theme_get() + + yaxis_text(FALSE) + + yaxis_title(FALSE) + + yaxis_ticks(FALSE) +} + + + +# internal ---------------------------------------------------------------- +kl_uniform <- function(v, num_prior_draws, samples_per_prior) { + # D_{KL}(v || uniform) + # https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence + expected_pr <- 1.0 / samples_per_prior + observed_pr <- table(v) / num_prior_draws + sum(observed_pr * log(observed_pr / expected_pr)) +} diff --git a/man/sbc_hist.Rd b/man/sbc_hist.Rd new file mode 100644 index 00000000..424534e2 --- /dev/null +++ b/man/sbc_hist.Rd @@ -0,0 +1,95 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sbc.R +\name{sbc_hist} +\alias{sbc_hist} +\title{Diagnostics for Simulation Based Calibration} +\usage{ +sbc_hist(ranks, ..., thin = 4, per_bin = 4, worst = 16, + prob = 0.99, facet_args = list()) +} +\arguments{ +\item{ranks}{A list of matrices. See \strong{Details}.} + +\item{...}{Currently ignored.} + +\item{thin}{An integer indicating the thinning interval to use when plotting +so that the histograms consist of (close to) independent realizations. Set +the \code{thin} argument such that the resulting number of draws approximately +matches the effective sample size.} + +\item{per_bin}{An integer indicating the number of histogram entries to +combine into a single bar. For best results, one plus the number of draws +from the posterior should be evenly divisible by the number of histogram +bins after thinning. For example, if there are \code{511} posterior draws (per +matrix in \code{ranks}) and \code{thin=4}, then after thinning there will be \code{128} +draws. If \code{per_bin=4}, then \code{128/4=32} histogram bars will be drawn.} + +\item{worst}{How many parameters to show, or \code{NA} to plot all parameters. If +\code{worst} is not \code{NA} then parameters are ordered by the degree of +non-uniformity so, for example, \code{worst = 10} means to plot only the \code{10} +worst parameters.} + +\item{prob}{The size of the interval plotted to show the expected behavior +under uniformity. The default is \code{prob=0.99}.} + +\item{facet_args}{A named list of arguments (other than \code{facets}) passed +to \code{\link[ggplot2:facet_wrap]{ggplot2::facet_wrap()}} or \code{\link[ggplot2:facet_grid]{ggplot2::facet_grid()}} +to control faceting.} +} +\value{ +A ggplot object that can be further customized using the \strong{ggplot2} package. +} +\description{ +Diagnostics for Simulation Based Calibration +} +\details{ +Each element of the list \code{ranks} should be a matrix of rank comparison +results (encoded as \code{0} or \code{1}) associated with a single draw from the prior +distribution. (These are not actually "ranks" but can be used afterwards to +reconstruct (thinned) ranks.) The columns of each matrix are model parameters +and the rows are posterior draws from the model fit to the data generated +from the corresponding prior draw. An element is \code{0} or \code{1} depending on +whether the posterior draw is greater or less than the corresponding "true" +realization (from the prior). The matrices should have column names to +correctly label the parameters. +} +\section{Plot Descriptions}{ + +\describe{ +\item{\code{sbc_hist()}}{ +SBC histogram from Talts et al. (2018). A separate plot (facet) is created +for each parameter. +} +} +} + +\examples{ +# create some fake inputs to use for sbc_hist() +set.seed(19) +pars <- paste0("beta[", 1:4, "]") +samples_per_prior <- 511 +n_replications <- 500 +ranks <- list() +for (n in 1:n_replications) { + r1 <- matrix(0, nrow=samples_per_prior, ncol=length(pars), + dimnames=list(NULL, pars)) + for (p1 in 1:length(pars)) { + r1[sample.int(samples_per_prior, floor(runif(1, 0, samples_per_prior))), p1] <- 1 + } + ranks[[n]] <- r1 +} + +color_scheme_set("purple") +sbc_hist(ranks) +sbc_hist(ranks, worst = NA) # uses original parameter ordering +sbc_hist(ranks, worst = 3, facet_args = list(labeller = ggplot2::label_parsed)) + +} +\references{ +Talts, S., Betancourt, M., Simpson, D., Vehtari, A., and Gelman, A. (2018). +Validating Bayesian Inference Algorithms with Simulation-Based Calibration. +arXiv preprint arXiv:1804.06788. \url{https://arxiv.org/abs/1804.06788} +} +\seealso{ +\code{rstan::sbc()} +} diff --git a/tests/testthat/test-sbc.R b/tests/testthat/test-sbc.R new file mode 100644 index 00000000..ccd27965 --- /dev/null +++ b/tests/testthat/test-sbc.R @@ -0,0 +1,44 @@ +library(bayesplot) +context("SBC") + +set.seed(19) +pars <- paste0("beta[", 1:2, "]") +samples_per_prior <- 511 +n_replications <- 50 +ranks <- list() +for (n in 1:n_replications) { + r1 <- matrix(0, nrow=samples_per_prior, ncol=length(pars), dimnames=list(NULL, pars)) + for (p1 in 1:length(pars)) { + r1[sample.int(samples_per_prior, floor(runif(1, 0, samples_per_prior))), p1] <- 1 + } + ranks[[n]] <- r1 +} +set.seed(NULL) + +test_that("sbc_hist returns a ggplot object", { + expect_gg(sbc_hist(ranks, worst = NA)) + + g <- sbc_hist(ranks) + expect_gg(g) + expect_equal(nlevels(g$data$Parameter), ncol(ranks[[1]])) + + g2 <- sbc_hist(ranks, worst = 1) + expect_gg(g2) + expect_equal(nlevels(g2$data$Parameter), 1) +}) + +test_that("sbc_hist throws correct warnings and errors", { + expect_warning(sbc_hist(ranks, per_bin = 27), + "does not evenly divide the number of samples per prior") + expect_warning(sbc_hist(ranks, thin = 3), + "does not evenly divide the number of samples per prior") + + ranks2 <- ranks + ranks2[[1]] <- ranks[[1]][, 1, drop=FALSE] + expect_error(sbc_hist(ranks2), "Not all matrices in 'ranks' have the same dimensions") + + ranks2[[1]] <- ranks[[1]][, 1, drop=TRUE] + expect_error(sbc_hist(ranks2), "all(sapply(ranks, is.matrix)) is not TRUE", fixed = TRUE) + + expect_error(sbc_hist(ranks[[1]]), "is.list(ranks) is not TRUE", fixed=TRUE) +})