diff --git a/DESCRIPTION b/DESCRIPTION index d5ed0fa50..e0dfc741b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: cmdstanr Title: R Interface to 'CmdStan' -Version: 0.2.2 -Date: 2020-12-03 +Version: 0.3.0 +Date: 2020-12-17 Authors@R: c(person(given = "Jonah", family = "Gabry", role = c("aut", "cre"), email = "jsg2201@columbia.edu"), @@ -26,11 +26,11 @@ Roxygen: list(markdown = TRUE, r6 = FALSE) SystemRequirements: CmdStan (https://mc-stan.org/users/interfaces/cmdstan) Imports: checkmate, + data.table, jsonlite (>= 1.2.0), posterior (>= 0.1.3), processx (>= 3.4.5), - R6 (>= 2.4.0), - data.table + R6 (>= 2.4.0) Suggests: bayesplot, knitr, diff --git a/NEWS.md b/NEWS.md index 74d1142a0..8d7cc822c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# Items for next tagged release +# cmdstanr 0.3.0 ### Bug fixes @@ -29,6 +29,10 @@ objects. (#390) * The `$optimize()` method now supports all of CmdStan's tolerance-related arguments for (L)BFGS. (#398) +* The documentation for the R6 methods now uses `@param`, which allows package +developers to import the CmdStanR documentation using roxygen2's +`@inheritParams`. (#408) + # cmdstanr 0.2.2 ### Bug fixes diff --git a/R/args.R b/R/args.R index aa03b1068..9f39bebe8 100644 --- a/R/args.R +++ b/R/args.R @@ -829,7 +829,7 @@ validate_seed <- function(seed, num_procs) { #' @return An integer vector of length `num_procs`. maybe_generate_seed <- function(seed, num_procs) { if (is.null(seed)) { - seed <- sample(.Machine$integer.max, num_procs) + seed <- base::sample(.Machine$integer.max, num_procs) } else if (length(seed) == 1 && num_procs > 1) { seed <- as.integer(seed) seed <- c(seed, seed + 1:(num_procs -1)) diff --git a/R/cmdstanr-package.R b/R/cmdstanr-package.R index a65a42c49..37822b29c 100644 --- a/R/cmdstanr-package.R +++ b/R/cmdstanr-package.R @@ -52,4 +52,4 @@ #' NULL -if (getRversion() >= "2.15.1") utils::globalVariables(c("self", "private")) +if (getRversion() >= "2.15.1") utils::globalVariables(c("self", "private", "super")) diff --git a/R/fit.R b/R/fit.R index 815e21e88..5720de1d9 100644 --- a/R/fit.R +++ b/R/fit.R @@ -11,86 +11,9 @@ CmdStanFit <- R6::R6Class( self$runset <- runset invisible(self) }, - - save_object = function(file, ...) { - self$draws() - try(self$sampler_diagnostics(), silent = TRUE) - try(self$init(), silent = TRUE) - saveRDS(self, file = file, ...) - invisible(self) - }, - num_procs = function() { self$runset$num_procs() }, - - time = function() { - self$runset$time() - }, - - draws = function(variables = NULL) { - if (!length(self$output_files(include_failed = FALSE))) { - stop("Fitting failed. Unable to retrieve the draws.", call. = FALSE) - } - # CmdStanMCMC has its own implementation, this is used for VB and MLE - if (is.null(private$draws_)) { - private$read_csv_() - } - posterior::subset_draws(private$draws_, variable = variables) - }, - - lp = function() { - lp__ <- self$draws(variables = "lp__") - lp__ <- posterior::as_draws_matrix(lp__) # if mcmc this combines all chains, otherwise does nothing - as.numeric(lp__) - }, - - metadata = function() { - if (!length(self$output_files(include_failed = FALSE))) { - stop("Fitting failed. Unable to retrieve the metadata.", call. = FALSE) - } - if (is.null(private$metadata_)) { - private$read_csv_() - } - private$metadata_ - }, - - init = function() { - if (is.null(private$init_)) { - init_paths <- self$metadata()$init - if (!is.character(init_paths) || any(!file.exists(init_paths))) { - stop("Can't find initial values files.", call. = FALSE) - } - private$init_ <- lapply(init_paths, jsonlite::read_json, simplifyVector = TRUE) - } - private$init_ - }, - - summary = function(variables = NULL, ...) { - draws <- self$draws(variables) - if (self$runset$method() == "sample") { - summary <- posterior::summarise_draws(draws, ...) - } else { - if (!length(list(...))) { - # if user didn't supply any args use default summary measures, - # which don't include MCMC-specific things - summary <- posterior::summarise_draws( - draws, - posterior::default_summary_measures() - ) - } else { - # otherwise use whatever the user specified via ... - summary <- posterior::summarise_draws(draws, ...) - } - } - if (self$runset$method() == "optimize") { - summary <- summary[, c("variable", "mean")] - colnames(summary) <- c("variable", "estimate") - } - summary - }, - - # print summary table without using tibbles print = function(variables = NULL, ..., digits = 2, max_rows = 10) { if (!length(self$output_files(include_failed = FALSE))) { stop("Fitting failed. Unable to print.", call. = FALSE) @@ -124,54 +47,12 @@ CmdStanFit <- R6::R6Class( opts <- options(max.print = prod(dim(out))) on.exit(options(max.print = opts$max.print), add = TRUE) - print(out, row.names=FALSE) + base::print(out, row.names=FALSE) if (max_rows < total_rows) { cat("\n # showing", max_rows, "of", total_rows, "rows (change via 'max_rows' argument)\n") } invisible(self) - }, - - cmdstan_summary = function(...) { - self$runset$run_cmdstan_tool("stansummary", ...) - }, - cmdstan_diagnose = function(...) { - self$runset$run_cmdstan_tool("diagnose", ...) - }, - output = function(id = NULL) { - # non-MCMC fit is obtained with one process only - # so fit$output() prints the output of that process - cat(paste(self$runset$procs$proc_output(1), collapse="\n")) - }, - output_files = function(include_failed = FALSE) { - self$runset$output_files(include_failed) - }, - latent_dynamics_files = function(include_failed = FALSE) { - self$runset$latent_dynamics_files(include_failed) - }, - data_file = function() { - self$runset$data_file() - }, - save_output_files = function(dir = ".", - basename = NULL, - timestamp = TRUE, - random = TRUE) { - self$runset$save_output_files(dir, basename, timestamp, random) - }, - save_latent_dynamics_files = function(dir = ".", - basename = NULL, - timestamp = TRUE, - random = TRUE) { - self$runset$save_latent_dynamics_files(dir, basename, timestamp, random) - }, - save_data_file = function(dir = ".", - basename = NULL, - timestamp = TRUE, - random = TRUE) { - self$runset$save_data_file(dir, basename, timestamp, random) - }, - return_codes = function() { - self$runset$procs$return_codes() } ), private = list( @@ -181,9 +62,6 @@ CmdStanFit <- R6::R6Class( ) ) - -# Document methods ---------------------------------------------------------- - #' Save fitted model object to a file #' #' @name fit-method-save_object @@ -194,14 +72,8 @@ CmdStanFit <- R6::R6Class( #' read into R lazily (i.e., as needed), the `$save_object()` method is the #' safest way to guarantee that everything has been read in before saving. #' -#' @section Usage: -#' ``` -#' $save_object(file, ...) -#' ``` -#' -#' @section Arguments: -#' * `file`: (string) Path where the file should be saved. -#' * `...`: Other arguments to pass to [base::saveRDS()] besides `object` and `file`. +#' @param file (string) Path where the file should be saved. +#' @param ... Other arguments to pass to [base::saveRDS()] besides `object` and `file`. #' #' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanGQ`] #' @@ -217,7 +89,14 @@ CmdStanFit <- R6::R6Class( #' fit$summary() #' } #' -NULL +save_object <- function(file, ...) { + self$draws() + try(self$sampler_diagnostics(), silent = TRUE) + try(self$init(), silent = TRUE) + saveRDS(self, file = file, ...) + invisible(self) +} +CmdStanFit$set("public", name = "save_object", value = save_object) #' Extract posterior draws #' @@ -231,20 +110,12 @@ NULL #' `generated quantities` from the Stan program as well as `lp__`, the total #' log probability (`target`) accumulated in the `model` block. #' -#' @section Usage: -#' ``` -#' $draws(variables = NULL, inc_warmup = FALSE, ...) -#' ``` -#' @section Arguments: -#' * `variables`: (character vector) The variables (parameters and generated -#' quantities) to read in. If `NULL` (the default) then the draws of all -#' variables are included. -#' * `inc_warmup`: (logical) For MCMC only, should warmup draws be included? -#' Defaults to `FALSE`. -#' * `...`: Arguments passed on to -#' [`posterior::as_draws_array()`][posterior::draws_array]. -#' -#' @section Value: +#' @param variables (character vector) The variables to read in. If `NULL` (the +#' default) then all variables are included. +#' @param inc_warmup (logical) Should warmup draws be included? Defaults to +#' `FALSE`. Ignored except when used with [CmdStanMCMC] objects. +#' +#' @return #' * For [MCMC][model-method-sample], a 3-D #' [`draws_array`][posterior::draws_array] object (iteration x chain x #' variable). @@ -291,95 +162,35 @@ NULL #' mcmc_scatter(fit$draws(c("beta[1]", "beta[2]")), alpha = 0.3) #' } #' -NULL - -#' Extract sampler diagnostics -#' -#' @name fit-method-sampler_diagnostics -#' @aliases sampler_diagnostics -#' @description Extract the values of sampler diagnostics for each iteration and -#' chain of MCMC. -#' -#' @section Usage: -#' ``` -#' $sampler_diagnostics(inc_warmup = FALSE, ...) -#' ``` -#' @section Arguments: -#' * `inc_warmup`: (logical) Should warmup draws be included? Defaults to `FALSE`. -#' * `...`: Arguments passed on to -#' [`posterior::as_draws_array()`][posterior::draws_array]. -#' -#' @section Value: -#' A 3-D [`draws_array`][posterior::draws_array] object (iteration x chain x -#' variable). The variables for Stan's default MCMC algorithm are -#' `"accept_stat__"`, `"stepsize__"`, `"treedepth__"`, `"n_leapfrog__"`, -#' `"divergent__"`, `"energy__"`. -#' -#' @seealso [`CmdStanMCMC`] -#' -#' @examples -#' \dontrun{ -#' fit <- cmdstanr_example("logistic") -#' sampler_diagnostics <- fit$sampler_diagnostics() -#' str(sampler_diagnostics) -#' -#' library(posterior) -#' as_draws_df(sampler_diagnostics) -#' } -#' -NULL - -#' Extract inverse metric (mass matrix) -#' -#' @name fit-method-inv_metric -#' @aliases inv_metric -#' @description Return a list containing the inverse metric (mass matrix) for -#' each chain. -#' -#' @section Usage: -#' ``` -#' $inv_metric(matrix = TRUE) -#' ``` -#' @section Arguments: -#' * `matrix`: (logical) If a diagonal metric was used, setting `matrix = FALSE` -#' returns a list containing just the diagonals of the matrices instead of the -#' full matrices. Setting `matrix = FALSE` has no effect for dense metrics. -#' -#' @section Value: -#' A list of length equal to the number of MCMC chains. See the `matrix` -#' argument for details. -#' -#' @seealso [`CmdStanMCMC`] -#' -#' @examples -#' \dontrun{ -#' fit <- cmdstanr_example("logistic") -#' fit$inv_metric() -#' fit$inv_metric(matrix=FALSE) -#' -#' fit <- cmdstanr_example("logistic", metric = "dense_e") -#' fit$inv_metric() -#' } -#' -NULL +draws <- function(variables = NULL, inc_warmup = FALSE) { + # CmdStanMCMC and CmdStanGQ have separate implementations, + # this is used for CmdStanVB and CmdStanMLE + if (!length(self$output_files(include_failed = FALSE))) { + stop("Fitting failed. Unable to retrieve the draws.", call. = FALSE) + } + if (inc_warmup) { + warning("'inc_warmup' is ignored except when used with CmdStanMCMC objects.", + call. = FALSE) + } + if (is.null(private$draws_)) { + private$read_csv_() + } + posterior::subset_draws(private$draws_, variable = variables) +} +CmdStanFit$set("public", name = "draws", value = draws) -#' Extract initial values +#' Extract user-specified initial values #' #' @name fit-method-init #' @aliases init #' @description Return user-specified initial values. If the user provided #' initial values files or \R objects (list of lists or function) via the #' `init` argument when fitting the model then these are returned (always in -#' the list of lists format). Initial values generated by CmdStan are not -#' returned. -#' -#' @section Usage: -#' ``` -#' $init() -#' ``` +#' the list of lists format). Currently it is not possible to extract initial +#' values generated automatically by CmdStan, although CmdStan may support +#' this in the future. #' -#' @section Value: -#' A list of lists. See **Examples**. +#' @return A list of lists. See **Examples**. #' #' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`] #' @@ -400,7 +211,17 @@ NULL #' str(fit$init()) #' } #' -NULL +init <- function() { + if (is.null(private$init_)) { + init_paths <- self$metadata()$init + if (!is.character(init_paths) || any(!file.exists(init_paths))) { + stop("Can't find initial values files.", call. = FALSE) + } + private$init_ <- lapply(init_paths, jsonlite::read_json, simplifyVector = TRUE) + } + private$init_ +} +CmdStanFit$set("public", name = "init", value = init) #' Extract log probability (target) #' @@ -416,12 +237,6 @@ NULL #' section of the Stan Reference Manual for details on when normalizing #' constants are dropped from log probability calculations. #' -#' @section Usage: -#' ``` -#' $lp() -#' $lp_approx() -#' ``` -#' #' @section Details: #' `lp__` is the unnormalized log density on Stan's [unconstrained #' space](https://mc-stan.org/docs/2_23/reference-manual/variable-transforms-chapter.html). @@ -434,9 +249,9 @@ NULL #' for performing the checks described in Yao et al. (2018) and implemented in #' the \pkg{loo} package. #' -#' @section Value: -#' A numeric vector with length equal to the number of (post-warmup) draws for -#' MCMC and variational inference, and length equal to `1` for optimization. +#' @return A numeric vector with length equal to the number of (post-warmup) +#' draws for MCMC and variational inference, and length equal to `1` for +#' optimization. #' #' @references #' Yao, Y., Vehtari, A., Simpson, D., and Gelman, A. (2018). Yes, but did it @@ -457,77 +272,36 @@ NULL #' plot(fit_vb$lp(), fit_vb$lp_approx()) #' } #' -NULL +lp <- function() { + lp__ <- self$draws(variables = "lp__") + lp__ <- posterior::as_draws_matrix(lp__) # if mcmc this combines all chains, otherwise does nothing + as.numeric(lp__) +} +CmdStanFit$set("public", name = "lp", value = lp) -#' Extract (penalized) maximum likelihood estimate after optimization -#' -#' @name fit-method-mle -#' @aliases mle -#' @description The `$mle()` method is only available for [`CmdStanMLE`] objects. -#' It returns the penalized maximum likelihood estimate (posterior mode) as a -#' numeric vector with one element per variable. The returned vector does not -#' include `lp__`, the total log probability (`target`) accumulated in the -#' `model` block of the Stan program, which is available via the -#' [`$lp()`][fit-method-lp] method and also included in the -#' [`$draws()`][fit-method-draws] method. -#' -#' @section Usage: -#' ``` -#' $mle(variables = NULL) -#' ``` -#' @section Arguments: -#' * `variables`: (character vector) The variables (parameters and generated -#' quantities) to include. If NULL (the default) then all variables are -#' included. -#' -#' @section Value: -#' A numeric vector. See **Examples**. -#' -#' @seealso [`CmdStanMLE`] -#' -#' @examples -#' \dontrun{ -#' fit <- cmdstanr_example("logistic", method = "optimize") -#' fit$mle() -#' fit$mle("alpha") -#' fit$mle("beta") -#' fit$mle("beta[2]") -#' } -#' -NULL -#' Compute a summary table of MCMC estimates and diagnostics +#' Compute a summary table of estimates and diagnostics #' #' @name fit-method-summary -#' @aliases summary print.CmdStanMCMC print.CmdStanMLE print.CmdStanVB +#' @aliases summary fit-method-print print.CmdStanMCMC print.CmdStanMLE print.CmdStanVB #' @description The `$summary()` method runs #' [`summarise_draws()`][posterior::draws_summary] from the \pkg{posterior} -#' package and returns the output. For MCMC only post-warmup draws are included -#' in the summary. -#' -#' The `$print()` method prints the same summary stats but removes the extra -#' formatting used for printing tibbles and returns the fitted model object -#' itself. The `$print()` method may also be faster than `$summary()` because -#' it is designed to only compute the summary statistics for the variables -#' that will actually fit in the printed output (see argument `max_rows`) -#' whereas `$summary()` will compute them for all of the specified variables -#' in order to be able to return them to the user. -#' -#' @section Usage: -#' ``` -#' $summary(variables = NULL, ...) -#' $print(variables = NULL, ..., digits = 2, max_rows = 10) -#' ``` -#' @section Arguments: -#' * `variables`: (character vector) The variables to include. -#' * `...`: Optional arguments to pass to -#' [`posterior::summarise_draws()`][posterior::draws_summary]. -#' * `digits`: (integer) For `print` only, the number of digits to use for -#' rounding. -#' * `max_rows`: (integer) For `print` only, the maximum number of rows to print. -#' -#' @section Value: -#' The `$summary()` method returns the tibble created by +#' package and returns the output. For MCMC, only post-warmup draws are +#' included in the summary. +#' +#' There is also a `$print()` method that prints the same summary stats but +#' removes the extra formatting used for printing tibbles and returns the +#' fitted model object itself. The `$print()` method may also be faster than +#' `$summary()` because it is designed to only compute the summary statistics +#' for the variables that will actually fit in the printed output whereas +#' `$summary()` will compute them for all of the specified variables in order +#' to be able to return them to the user. See **Examples**. +#' +#' @param variables (character vector) The variables to include. +#' @param ... Optional arguments to pass to [`posterior::summarise_draws()`][posterior::draws_summary]. +#' +#' @return +#' The `$summary()` method returns the tibble data frame created by #' [`posterior::summarise_draws()`][posterior::draws_summary]. #' #' The `$print()` method returns the fitted model object itself (invisibly), @@ -554,31 +328,52 @@ NULL #' fit$summary("beta", prob_gt_0 = ~ mean(. > 0)) #' } #' -NULL - +summary <- function(variables = NULL, ...) { + draws <- self$draws(variables) + if (self$runset$method() == "sample") { + summary <- posterior::summarise_draws(draws, ...) + } else { + if (!length(list(...))) { + # if user didn't supply any args use default summary measures, + # which don't include MCMC-specific things + summary <- posterior::summarise_draws( + draws, + posterior::default_summary_measures() + ) + } else { + # otherwise use whatever the user specified via ... + summary <- posterior::summarise_draws(draws, ...) + } + } + if (self$runset$method() == "optimize") { + summary <- summary[, c("variable", "mean")] + colnames(summary) <- c("variable", "estimate") + } + summary +} +CmdStanFit$set("public", name = "summary", value = summary) -#' Run CmdStan's `stansummary` and `diagnose` -#' -#' @description -#' Run CmdStan's `stansummary` and `diagnose` utilities. These are -#' documented in the CmdStan Guide: -#' * https://mc-stan.org/docs/cmdstan-guide/stansummary.html -#' * https://mc-stan.org/docs/cmdstan-guide/diagnose.html +#' Run CmdStan's `stansummary` and `diagnose` utilities #' #' @name fit-method-cmdstan_summary #' @aliases fit-method-cmdstan_diagnose cmdstan_summary cmdstan_diagnose -#' @note Although these methods also work for models fit using the +#' @description Run CmdStan's `stansummary` and `diagnose` utilities. These are +#' documented in the CmdStan Guide: +#' * https://mc-stan.org/docs/cmdstan-guide/stansummary.html +#' * https://mc-stan.org/docs/cmdstan-guide/diagnose.html +#' +#' Although these methods can be used for models fit using the #' [`$variational()`][model-method-variational] method, much of the output is -#' only relevant for models fit using the [`$sample()`][model-method-sample] -#' method. +#' currently only relevant for models fit using the +#' [`$sample()`][model-method-sample] method. #' -#' @section Usage: -#' ``` -#' $cmdstan_summary() -#' $cmdstan_diagnose() -#' ``` +#' See the [$summary()][fit-method-summary] for computing similar summaries in +#' R rather than calling CmdStan's utilites. #' -#' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`] +#' @param flags An optional character vector of flags (e.g. +#' `flags = c("--sig_figs=1")`). +#' +#' @seealso [`CmdStanMCMC`], [fit-method-summary] #' #' @examples #' \dontrun{ @@ -587,7 +382,16 @@ NULL #' fit$cmdstan_summary() #' } #' -NULL +cmdstan_summary <- function(flags = NULL) { + self$runset$run_cmdstan_tool("stansummary", flags = flags) +} +CmdStanFit$set("public", name = "cmdstan_summary", value = cmdstan_summary) + +#' @rdname fit-method-cmdstan_summary +cmdstan_diagnose <- function() { + self$runset$run_cmdstan_tool("diagnose") +} +CmdStanFit$set("public", name = "cmdstan_diagnose", value = cmdstan_diagnose) #' Save output and data files #' @@ -607,23 +411,11 @@ NULL #' The versions without the `save_` prefix (e.g., `$output_files()`) return #' the current file paths without moving any files. #' -#' @section Usage: -#' ``` -#' $save_output_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) -#' $save_latent_dynamics_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) -#' $save_data_file(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) -#' -#' $output_files() -#' $latent_dynamics_files() -#' $data_file() -#' ``` -#' -#' @section Arguments: -#' * `dir`: (string) Path to directory where the files should be saved. -#' * `basename`: (string) Base filename to use. See __Details__. -#' * `timestamp`: (logical) Should a timestamp be added to the file name(s)? +#' @param dir (string) Path to directory where the files should be saved. +#' @param basename (string) Base filename to use. See __Details__. +#' @param timestamp (logical) Should a timestamp be added to the file name(s)? #' Defaults to `TRUE`. See __Details__. -#' * `random`: (logical) Should random alphanumeric characters be added to the +#' @param random (logical) Should random alphanumeric characters be added to the #' end of the file name(s)? Defaults to `TRUE`. See __Details__. #' #' @section Details: @@ -641,7 +433,7 @@ NULL #' For `$save_data_file()` no `id` is included in the file name because even #' with multiple MCMC chains the data file is the same. #' -#' @section Value: +#' @return #' The `$save_*` methods print a message with the new file paths and (invisibly) #' return a character vector of the new paths (or `NA` for any that couldn't be #' copied). They also have the side effect of setting the internal paths in the @@ -652,7 +444,65 @@ NULL #' #' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanGQ`] #' -NULL +#' @examples +#' \dontrun{ +#' fit <- cmdstanr_example() +#' fit$output_files() +#' fit$data_file() +#' +#' # just using tempdir for the example +#' my_dir <- tempdir() +#' fit$save_output_files(dir = my_dir, basename = "banana") +#' fit$save_output_files(dir = my_dir, basename = "tomato", timestamp = FALSE) +#' fit$save_output_files(dir = my_dir, basename = "lettuce", timestamp = FALSE, random = FALSE) +#' } +#' +save_output_files <- function(dir = ".", + basename = NULL, + timestamp = TRUE, + random = TRUE) { + self$runset$save_output_files(dir, basename, timestamp, random) +} +CmdStanFit$set("public", name = "save_output_files", value = save_output_files) + +#' @rdname fit-method-save_output_files +save_latent_dynamics_files <- function(dir = ".", + basename = NULL, + timestamp = TRUE, + random = TRUE) { + self$runset$save_latent_dynamics_files(dir, basename, timestamp, random) +} +CmdStanFit$set("public", name = "save_latent_dynamics_files", value = save_latent_dynamics_files) + +#' @rdname fit-method-save_output_files +save_data_file <- function(dir = ".", + basename = NULL, + timestamp = TRUE, + random = TRUE) { + self$runset$save_data_file(dir, basename, timestamp, random) +} +CmdStanFit$set("public", name = "save_data_file", value = save_data_file) + + +#' @rdname fit-method-save_output_files +#' @param include_failed Should CmdStan runs that failed also be included? The +#' default is `FALSE.` +output_files <- function(include_failed = FALSE) { + self$runset$output_files(include_failed) +} +CmdStanFit$set("public", name = "output_files", value = output_files) + +#' @rdname fit-method-save_output_files +latent_dynamics_files <- function(include_failed = FALSE) { + self$runset$latent_dynamics_files(include_failed) +} +CmdStanFit$set("public", name = "latent_dynamics_files", value = latent_dynamics_files) + +#' @rdname fit-method-save_output_files +data_file <- function() { + self$runset$data_file() +} +CmdStanFit$set("public", name = "data_file", value = data_file) #' Report timing of CmdStan runs #' @@ -662,12 +512,7 @@ NULL #' is provided about the run times of individual chains and the warmup and #' sampling phases. #' -#' @section Usage: -#' ``` -#' $time() -#' ``` -#' -#' @section Value: +#' @return #' A list with elements #' * `total`: (scalar) the total run time. #' * `chains`: (data frame) for MCMC only, timing info for the individual @@ -688,26 +533,25 @@ NULL #' fit_vb$time() #' } #' -NULL +time <- function() { + self$runset$time() +} +CmdStanFit$set("public", name = "time", value = time) #' Access console output #' #' @name fit-method-output #' @aliases output -#' @description For MCMC the `$output()` method returns the stdout and stderr of -#' all chains as a list of character vectors. If the `id` argument is specified -#' it pretty prints the console output for a single chain. +#' @description For MCMC, the `$output()` method returns the stdout and stderr +#' of all chains as a list of character vectors if `id=NULL`. If the `id` +#' argument is specified it instead pretty prints the console output for a +#' single chain. #' -#' For optimization and variational inference `$output()` just pretty prints the -#' console output. +#' For optimization and variational inference `$output()` just pretty prints +#' the console output. #' -#' @section Usage: -#' ``` -#' $output(id = NULL) -#' ``` -#' -#' @section Arguments: -#' * `id`: (integer) For MCMC only, the chain id. +#' @param id (integer) The chain id. Ignored if the model was not fit using +#' MCMC. #' #' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanGQ`] #' @@ -725,7 +569,12 @@ NULL #' fit_vb$output() #' } #' -NULL +output <- function(id = NULL) { + # MCMC has separate implementation but doc is shared + # Non-MCMC fit is obtained with one process only so id is ignored + cat(paste(self$runset$procs$proc_output(1), collapse="\n")) +} +CmdStanFit$set("public", name = "output", value = output) #' Extract metadata from CmdStan CSV files #' @@ -735,11 +584,6 @@ NULL #' from the CSV output files, including the CmdStan configuration used when #' fitting the model. See **Examples** and [read_cmdstan_csv()]. #' -#' @section Usage: -#' ``` -#' $metadata() -#' ``` -#' #' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanGQ`] #' #' @examples @@ -754,7 +598,16 @@ NULL #' str(fit_vb$metadata()) #' } #' -NULL +metadata <- function() { + if (!length(self$output_files(include_failed = FALSE))) { + stop("Fitting failed. Unable to retrieve the metadata.", call. = FALSE) + } + if (is.null(private$metadata_)) { + private$read_csv_() + } + private$metadata_ +} +CmdStanFit$set("public", name = "metadata", value = metadata) #' Extract return codes from CmdStan #' @@ -762,11 +615,8 @@ NULL #' @aliases return_codes #' @description The `$return_codes()` method returns a vector of return codes #' from the CmdStan run(s). A return code of 0 indicates a successful run. -#' -#' @section Usage: -#' ``` -#' $return_codes() -#' ``` +#' @return An integer vector of return codes with length equal to the number of +#' CmdStan runs (number of chains for MCMC and one otherwise). #' #' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanGQ`] #' @@ -781,56 +631,13 @@ NULL #' fit_opt$return_codes() # should be non-zero #' } #' -NULL - -#' Leave-one-out cross-validation (LOO-CV) -#' -#' @name fit-method-loo -#' @aliases loo -#' @description The `$loo()` method computes approximate LOO-CV using the -#' \pkg{loo} package. This is a simple wrapper around [loo::loo.array()] -#' provided for convenience and requires computing the pointwise -#' log-likelihood in your Stan program. See the \pkg{loo} package -#' [vignettes](https://mc-stan.org/loo/articles/) for details. -#' -#' @section Usage: -#' ``` -#' $loo(variables = "log_lik", r_eff = NULL, ...) -#' ``` -#' -#' @param variables (character vector) The name(s) of the variable(s) in the -#' Stan program containing the pointwise log-likelihood. The default is to -#' look for `"log_lik"`. This argument is passed to the -#' [`$draws()][fit-method-draws] method. -#' @param r_eff There are several options: -#' * `TRUE` (the default) will automatically call [loo::relative_eff.array()] -#' to compute the `r_eff` argument to pass to [loo::loo.array()]. -#' * `FALSE` or `NULL` will avoid computing `r_eff` (which can sometimes be slow) -#' but will result in a warning from the \pkg{loo} package. -#' * If `r_eff` is anything else, that object will be passed as the `r_eff` -#' argument to [loo::loo.array()]. -#' @param ... Other arguments (e.g., `cores`, `save_psis`, etc.) passed to -#' [loo::loo.array()]. -#' -#' @section Value: The object returned by [loo::loo.array()]. -#' -#' @seealso The \pkg{loo} package website with -#' [documentation](https://mc-stan.org/loo/reference/index.html) and -#' [vignettes](https://mc-stan.org/loo/articles/). -#' -#' @examples -#' -#' \dontrun{ -#' # the "logistic" example model has "log_lik" in generated quantities -#' fit <- cmdstanr_example("logistic") -#' fit$loo(cores = 2) -#' } -#' -NULL +return_codes <- function() { + self$runset$procs$return_codes() +} +CmdStanFit$set("public", name = "return_codes", value = return_codes) # CmdStanMCMC ------------------------------------------------------------- - #' CmdStanMCMC objects #' #' @name CmdStanMCMC @@ -855,6 +662,7 @@ NULL #' [`$inv_metric()`][fit-method-inv_metric] | Return the inverse metric for each chain. | #' [`$init()`][fit-method-init] | Return user-specified initial values. | #' [`$metadata()`][fit-method-metadata] | Return a list of metadata gathered from the CmdStan CSV files. | +#' [`$num_chains()`][fit-method-num_chains] | Returns the number of MCMC chains. | #' #' ## Summarize inferences and diagnostics #' @@ -878,8 +686,8 @@ NULL #' #' |**Method**|**Description**| #' |:----------|:---------------| -#' [`$time()`][fit-method-time] | Report total and chain-specific run times. | #' [`$output()`][fit-method-output] | Return the stdout and stderr of all chains or pretty print the output for a single chain. | +#' [`$time()`][fit-method-time] | Report total and chain-specific run times. | #' [`$return_codes()`][fit-method-return_codes] | Return the return codes from the CmdStan runs. | #' NULL @@ -888,6 +696,7 @@ CmdStanMCMC <- R6::R6Class( classname = "CmdStanMCMC", inherit = CmdStanFit, public = list( + # override the CmdStanFit initialize method initialize = function(runset) { super$initialize(runset) if (!length(self$output_files())) { @@ -910,11 +719,7 @@ CmdStanMCMC <- R6::R6Class( } } }, - - num_chains = function() { - super$num_procs() - }, - + # override the CmdStanFit output method output = function(id = NULL) { if (is.null(id)) { self$runset$procs$proc_output() @@ -923,6 +728,7 @@ CmdStanMCMC <- R6::R6Class( } }, + # override the CmdStanFit draws method draws = function(variables = NULL, inc_warmup = FALSE) { if (!length(self$output_files(include_failed = FALSE))) { stop("No chains finished successfully. Unable to retrieve the draws.", call. = FALSE) @@ -955,67 +761,7 @@ CmdStanMCMC <- R6::R6Class( } else { private$draws_[,,variables] } - }, - - sampler_diagnostics = function(inc_warmup = FALSE) { - if (!length(self$output_files(include_failed = FALSE))) { - stop("No chains finished successfully. Unable to retrieve the sampler diagnostics.", call. = FALSE) - } - to_read <- remaining_columns_to_read( - requested = NULL, - currently_read = dimnames(private$sampler_diagnostics_)$variable, - all = private$metadata_$sampler_diagnostics - ) - if (is.null(to_read) || any(nzchar(to_read))) { - private$read_csv_(variables = "", sampler_diagnostics = NULL) - } - if (inc_warmup) { - if (!private$metadata_$save_warmup) { - stop("Warmup sampler diagnostics were requested from a fit object without them! ", - "Please rerun the model with save_warmup = TRUE.", call. = FALSE) - } - posterior::bind_draws( - private$warmup_sampler_diagnostics_, - private$sampler_diagnostics_, - along="iteration" - ) - } else { - private$sampler_diagnostics_ - } - }, - - # returns list of inverse metrics - inv_metric = function(matrix = TRUE) { - if (!length(self$output_files(include_failed = FALSE))) { - stop("No chains finished successfully. Unable to retrieve the inverse metrics.", call. = FALSE) - } - if (is.null(private$inv_metric_)) { - private$read_csv_(variables = "", sampler_diagnostics = "") - } - out <- private$inv_metric_ - if (matrix && !is.matrix(out[[1]])) { - # convert each vector to a diagonal matrix - out <- lapply(out, diag) - } - out - }, - # approximate loo-cv using the loo package - loo = function(variables = "log_lik", r_eff = TRUE, ...) { - if (!requireNamespace("loo", quietly = TRUE)) { - stop("Please install the loo package to use this method.", call. = FALSE) - } - LLarray <- self$draws(variables) - if (is.logical(r_eff)) { - if (isTRUE(r_eff)) { - r_eff_cores <- list(...)[["cores"]] %||% getOption("mc.cores", 1) - r_eff <- loo::relative_eff(exp(LLarray), cores = r_eff_cores) - } else { - r_eff <- NULL - } - } - loo::loo.array(LLarray, r_eff = r_eff, ...) } - ), private = list( # also inherits draws_ and metadata_ from CmdStanFit @@ -1075,9 +821,181 @@ CmdStanMCMC <- R6::R6Class( ) ) +#' Leave-one-out cross-validation (LOO-CV) +#' +#' @name fit-method-loo +#' @aliases loo +#' @description The `$loo()` method computes approximate LOO-CV using the +#' \pkg{loo} package. This is a simple wrapper around [loo::loo.array()] +#' provided for convenience and requires computing the pointwise +#' log-likelihood in your Stan program. See the \pkg{loo} package +#' [vignettes](https://mc-stan.org/loo/articles/) for details. +#' +#' @param variables (character vector) The name(s) of the variable(s) in the +#' Stan program containing the pointwise log-likelihood. The default is to +#' look for `"log_lik"`. This argument is passed to the +#' [`$draws()][fit-method-draws] method. +#' @param r_eff There are several options: +#' * `TRUE` (the default) will automatically call [loo::relative_eff.array()] +#' to compute the `r_eff` argument to pass to [loo::loo.array()]. +#' * `FALSE` or `NULL` will avoid computing `r_eff` (which can sometimes be slow) +#' but will result in a warning from the \pkg{loo} package. +#' * If `r_eff` is anything else, that object will be passed as the `r_eff` +#' argument to [loo::loo.array()]. +#' @param ... Other arguments (e.g., `cores`, `save_psis`, etc.) passed to +#' [loo::loo.array()]. +#' +#' @return The object returned by [loo::loo.array()]. +#' +#' @seealso The \pkg{loo} package website with +#' [documentation](https://mc-stan.org/loo/reference/index.html) and +#' [vignettes](https://mc-stan.org/loo/articles/). +#' +#' @examples +#' +#' \dontrun{ +#' # the "logistic" example model has "log_lik" in generated quantities +#' fit <- cmdstanr_example("logistic") +#' loo_result <- fit$loo(cores = 2) +#' print(loo_result) +#' } +#' +loo <- function(variables = "log_lik", r_eff = TRUE, ...) { + if (!requireNamespace("loo", quietly = TRUE)) { + stop("Please install the loo package to use this method.", call. = FALSE) + } + LLarray <- self$draws(variables) + if (is.logical(r_eff)) { + if (isTRUE(r_eff)) { + r_eff_cores <- list(...)[["cores"]] %||% getOption("mc.cores", 1) + r_eff <- loo::relative_eff(exp(LLarray), cores = r_eff_cores) + } else { + r_eff <- NULL + } + } + loo::loo.array(LLarray, r_eff = r_eff, ...) +} +CmdStanMCMC$set("public", name = "loo", value = loo) + +#' Extract sampler diagnostics after MCMC +#' +#' @name fit-method-sampler_diagnostics +#' @aliases sampler_diagnostics +#' @description Extract the values of sampler diagnostics for each iteration and +#' chain of MCMC. +#' +#' @param inc_warmup (logical) Should warmup draws be included? Defaults to `FALSE`. +#' +#' @return +#' A 3-D [`draws_array`][posterior::draws_array] object (iteration x chain x +#' variable). The variables for Stan's default MCMC algorithm are +#' `"accept_stat__"`, `"stepsize__"`, `"treedepth__"`, `"n_leapfrog__"`, +#' `"divergent__"`, `"energy__"`. +#' +#' @seealso [`CmdStanMCMC`] +#' +#' @examples +#' \dontrun{ +#' fit <- cmdstanr_example("logistic") +#' sampler_diagnostics <- fit$sampler_diagnostics() +#' str(sampler_diagnostics) +#' +#' library(posterior) +#' as_draws_df(sampler_diagnostics) +#' } +#' +sampler_diagnostics <- function(inc_warmup = FALSE) { + if (!length(self$output_files(include_failed = FALSE))) { + stop("No chains finished successfully. Unable to retrieve the sampler diagnostics.", call. = FALSE) + } + to_read <- remaining_columns_to_read( + requested = NULL, + currently_read = dimnames(private$sampler_diagnostics_)$variable, + all = private$metadata_$sampler_diagnostics + ) + if (is.null(to_read) || any(nzchar(to_read))) { + private$read_csv_(variables = "", sampler_diagnostics = NULL) + } + if (inc_warmup) { + if (!private$metadata_$save_warmup) { + stop("Warmup sampler diagnostics were requested from a fit object without them! ", + "Please rerun the model with save_warmup = TRUE.", call. = FALSE) + } + posterior::bind_draws( + private$warmup_sampler_diagnostics_, + private$sampler_diagnostics_, + along="iteration" + ) + } else { + private$sampler_diagnostics_ + } +} +CmdStanMCMC$set("public", name = "sampler_diagnostics", value = sampler_diagnostics) + +#' Extract inverse metric (mass matrix) after MCMC +#' +#' @name fit-method-inv_metric +#' @aliases inv_metric +#' @description Extract the inverse metric (mass matrix) for each MCMC chain. +#' +#' @param matrix (logical) If a diagonal metric was used, setting `matrix = +#' FALSE` returns a list containing just the diagonals of the matrices instead +#' of the full matrices. Setting `matrix = FALSE` has no effect for dense +#' metrics. +#' +#' @return A list of length equal to the number of MCMC chains. See the `matrix` +#' argument for details. +#' +#' @seealso [`CmdStanMCMC`] +#' +#' @examples +#' \dontrun{ +#' fit <- cmdstanr_example("logistic") +#' fit$inv_metric() +#' fit$inv_metric(matrix=FALSE) +#' +#' fit <- cmdstanr_example("logistic", metric = "dense_e") +#' fit$inv_metric() +#' } +#' +inv_metric <- function(matrix = TRUE) { + if (!length(self$output_files(include_failed = FALSE))) { + stop("No chains finished successfully. Unable to retrieve the inverse metrics.", call. = FALSE) + } + if (is.null(private$inv_metric_)) { + private$read_csv_(variables = "", sampler_diagnostics = "") + } + out <- private$inv_metric_ + if (matrix && !is.matrix(out[[1]])) { + # convert each vector to a diagonal matrix + out <- lapply(out, diag) + } + out +} +CmdStanMCMC$set("public", name = "inv_metric", value = inv_metric) + +#' Extract number of chains after MCMC +#' +#' @name fit-method-num_chains +#' @aliases num_chains +#' @description The `$num_chains()` method returns the number of MCMC chains. +#' @return An integer. +#' +#' @seealso [`CmdStanMCMC`] +#' +#' @examples +#' \dontrun{ +#' fit_mcmc <- cmdstanr_example(chains = 2) +#' fit_mcmc$num_chains() +#' } +#' +num_chains = function() { + super$num_procs() +} +CmdStanMCMC$set("public", name = "num_chains", value = num_chains) -# CmdStanMLE ------------------------------------------------------------- +# CmdStanMLE ------------------------------------------------------------- #' CmdStanMLE objects #' #' @name CmdStanMLE @@ -1127,14 +1045,7 @@ NULL CmdStanMLE <- R6::R6Class( classname = "CmdStanMLE", inherit = CmdStanFit, - public = list( - mle = function(variables = NULL) { - x <- self$draws(variables) - x <- x[, colnames(x) != "lp__"] - estimate <- setNames(as.numeric(x), nm = posterior::variables(x)) - estimate - } - ), + public = list(), private = list( # inherits draws_ and metadata_ slots from CmdStanFit read_csv_ = function() { @@ -1149,8 +1060,43 @@ CmdStanMLE <- R6::R6Class( ) ) -# CmdStanVB --------------------------------------------------------------- +#' Extract (penalized) maximum likelihood estimate after optimization +#' +#' @name fit-method-mle +#' @aliases mle +#' @description The `$mle()` method is only available for [`CmdStanMLE`] objects. +#' It returns the penalized maximum likelihood estimate (posterior mode) as a +#' numeric vector with one element per variable. The returned vector does *not* +#' include `lp__`, the total log probability (`target`) accumulated in the +#' `model` block of the Stan program, which is available via the +#' [`$lp()`][fit-method-lp] method and also included in the +#' [`$draws()`][fit-method-draws] method. +#' +#' @param variables (character vector) The variables (parameters, transformed +#' parameters, and generated quantities) to include. If NULL (the default) +#' then all variables are included. +#' +#' @return A numeric vector. See **Examples**. +#' +#' @seealso [`CmdStanMLE`] +#' +#' @examples +#' \dontrun{ +#' fit <- cmdstanr_example("logistic", method = "optimize") +#' fit$mle("alpha") +#' fit$mle("beta") +#' fit$mle("beta[2]") +#' } +#' +mle <- function(variables = NULL) { + x <- self$draws(variables) + x <- x[, colnames(x) != "lp__"] + stats::setNames(as.numeric(x), posterior::variables(x)) +} +CmdStanMLE$set("public", name = "mle", value = mle) + +# CmdStanVB --------------------------------------------------------------- #' CmdStanVB objects #' #' @name CmdStanVB @@ -1203,11 +1149,7 @@ NULL CmdStanVB <- R6::R6Class( classname = "CmdStanVB", inherit = CmdStanFit, - public = list( - lp_approx = function() { - as.numeric(self$draws()[, "lp_approx__"]) - } - ), + public = list(), private = list( # inherits draws_ and metadata_ slots from CmdStanFit read_csv_ = function() { @@ -1222,8 +1164,14 @@ CmdStanVB <- R6::R6Class( ) ) -# CmdStanGQ --------------------------------------------------------------- +#' @rdname fit-method-lp +lp_approx <- function() { + as.numeric(self$draws()[, "lp_approx__"]) +} +CmdStanVB$set("public", name = "lp_approx", value = lp_approx) + +# CmdStanGQ --------------------------------------------------------------- #' CmdStanGQ objects #' #' @name CmdStanGQ @@ -1278,10 +1226,15 @@ CmdStanGQ <- R6::R6Class( num_chains = function() { super$num_procs() }, - draws = function(variables = NULL) { + # override CmdStanFit draws method + draws = function(variables = NULL, inc_warmup = FALSE) { if (!length(self$output_files(include_failed = FALSE))) { stop("Generating quantities for all MCMC chains failed. Unable to retrieve the generated quantities.", call. = FALSE) } + if (inc_warmup) { + warning("'inc_warmup' is ignored except when used with CmdStanMCMC objects.", + call. = FALSE) + } to_read <- remaining_columns_to_read( requested = variables, currently_read = dimnames(private$draws_)$variable, @@ -1302,6 +1255,7 @@ CmdStanGQ <- R6::R6Class( } private$draws_[,,variables] }, + # override CmdStanFit output method output = function(id = NULL) { if (is.null(id)) { self$runset$procs$proc_output() diff --git a/R/knitr.R b/R/knitr.R index 21c4fc55e..f6a60e044 100644 --- a/R/knitr.R +++ b/R/knitr.R @@ -5,8 +5,11 @@ #' [R Markdown CmdStan Engine](https://mc-stan.org/cmdstanr/articles/r-markdown.html) #' for a demonstration. #' -#' @param override Override knitr's built-in, RStan-based engine for `stan`. -#' See below for details. +#' @export +#' +#' @param override Override knitr's built-in, RStan-based engine for Stan? The +#' default is `TRUE`. See **Details**. +#' #' @details #' If `override = TRUE` (default), this registers CmdStanR's knitr engine as the #' engine for `stan` chunks, replacing knitr's built-in, RStan-based engine. If @@ -30,10 +33,11 @@ #' If you would like to keep `stan` chunks as `stan` chunks, it is possible to #' specify `engine = "cmdstan"` in the chunk options after registering the #' `cmdstan` engine with `override = FALSE`. +#' #' @references -#' - [Register a custom language engine](https://bookdown.org/yihui/rmarkdown-cookbook/custom-engine.html) -#' - [Stan language engine](https://bookdown.org/yihui/rmarkdown/language-engines.html#stan) -#' @export +#' * [Register a custom language engine for knitr](https://bookdown.org/yihui/rmarkdown-cookbook/custom-engine.html) +#' * [knitr's built-in Stan language engine](https://bookdown.org/yihui/rmarkdown/language-engines.html#stan) +#' register_knitr_engine <- function(override = TRUE) { if (!requireNamespace("knitr", quietly = TRUE)) { stop("Please install the knitr package.", call. = FALSE) diff --git a/R/model.R b/R/model.R index f08fcd9a4..1ccc0a54f 100644 --- a/R/model.R +++ b/R/model.R @@ -276,61 +276,43 @@ CmdStanModel <- R6::R6Class( #' `$hpp_file()` methods. The default is to create the executable in the same #' directory as the Stan program and to write the generated C++ code in a #' temporary directory. To save the C++ code to a non-temporary location use -#' `$save_hpp_file()`. +#' `$save_hpp_file(dir)`. #' -#' @section Usage: -#' ``` -#' $compile( -#' quiet = TRUE, -#' dir = NULL, -#' pedantic = FALSE, -#' include_paths = NULL, -#' cpp_options = list(), -#' stanc_options = list(), -#' force_recompile = FALSE -#' ) -#' $exe_file() -#' $hpp_file() -#' $save_hpp_file(dir = NULL) -#' ``` -#' -#' @section Arguments: -#' Leaving all arguments at their defaults should be fine for most users, but -#' optional arguments are provided to enable features in CmdStan (and the Stan -#' Math library). See the CmdStan manual for more details. -#' * `quiet`: (logical) Should the verbose output from CmdStan during +#' @param quiet (logical) Should the verbose output from CmdStan during #' compilation be suppressed? The default is `TRUE`, but if you encounter an #' error we recommend trying again with `quiet=FALSE` to see more of the #' output. -#' * `dir`: (string) The path to the directory in which to store the CmdStan +#' @param dir (string) The path to the directory in which to store the CmdStan #' executable (or `.hpp` file if using `$save_hpp_file()`). The default is the #' same location as the Stan program. -#' * `pedantic`: (logical) Should pedantic mode be turned on? The default is +#' @param pedantic (logical) Should pedantic mode be turned on? The default is #' `FALSE`. Pedantic mode attempts to warn you about potential issues in your #' Stan program beyond syntax errors. For details see the [*Pedantic mode* #' chapter](https://mc-stan.org/docs/reference-manual/pedantic-mode.html) in #' the Stan Reference Manual. **Note:** to do a pedantic check for a model #' that is already compiled use the #' [`$check_syntax()`][model-method-check_syntax] method instead. -#' * `include_paths`: (character vector) Paths to directories where Stan +#' @param include_paths (character vector) Paths to directories where Stan #' should look for files specified in `#include` directives in the Stan #' program. -#' * `cpp_options`: (list) Any makefile options to be used when compiling the +#' @param cpp_options (list) Any makefile options to be used when compiling the #' model (`STAN_THREADS`, `STAN_MPI`, `STAN_OPENCL`, etc.). Anything you would #' otherwise write in the `make/local` file. -#' * `stanc_options`: (list) Any Stan-to-C++ transpiler options to be used +#' @param stanc_options (list) Any Stan-to-C++ transpiler options to be used #' when compiling the model. See the **Examples** section below as well as the #' `stanc` chapter of the CmdStan Guide for more details on available options: #' https://mc-stan.org/docs/cmdstan-guide/stanc.html. -#' * `force_recompile`: (logical) Should the model be recompiled even if was +#' @param force_recompile (logical) Should the model be recompiled even if was #' not modified since last compiled. The default is `FALSE`. +#' @param threads Deprecated and will be removed in a future release. Please +#' turn on threading via `cpp_options = list(stan_threads = TRUE)` instead. #' #' @section Value: The `$compile()` method is called for its side effect of #' creating the executable and adding its path to the [`CmdStanModel`] object, #' but it also returns the [`CmdStanModel`] object invisibly. #' -#' The `$exe_file()`, `$hpp_file()`, and `$save_hpp_file()` methods all return -#' file paths. +#' After compilation, the `$exe_file()`, `$hpp_file()`, and `$save_hpp_file()` +#' methods can be used and return file paths. #' #' @template seealso-docs #' @@ -361,17 +343,15 @@ CmdStanModel <- R6::R6Class( #' #' } #' -NULL - -compile_method <- function(quiet = TRUE, - dir = NULL, - pedantic = FALSE, - include_paths = NULL, - cpp_options = list(), - stanc_options = list(), - force_recompile = FALSE, - #deprecated - threads = FALSE) { +compile <- function(quiet = TRUE, + dir = NULL, + pedantic = FALSE, + include_paths = NULL, + cpp_options = list(), + stanc_options = list(), + force_recompile = FALSE, + #deprecated + threads = FALSE) { if (length(cpp_options) == 0 && !is.null(private$precompile_cpp_options_)) { cpp_options <- private$precompile_cpp_options_ } @@ -531,7 +511,7 @@ compile_method <- function(quiet = TRUE, private$precompile_include_paths_ <- NULL invisible(self) } -CmdStanModel$set("public", name = "compile", value = compile_method) +CmdStanModel$set("public", name = "compile", value = compile) #' Check syntax of a Stan program #' @@ -543,29 +523,18 @@ CmdStanModel$set("public", name = "compile", value = compile_method) #' checks the Stan program for syntax errors and returns `TRUE` (invisibly) if #' parsing succeeds. If invalid syntax in found an error is thrown. #' -#' @section Usage: -#' ``` -#' $check_syntax( -#' pedantic = FALSE, -#' include_paths = NULL, -#' stanc_options = list(), -#' quiet = FALSE -#' ) -#' ``` -#' -#' @section Arguments: -#' * `pedantic`: (logical) Should pedantic mode be turned on? The default is +#' @param pedantic (logical) Should pedantic mode be turned on? The default is #' `FALSE`. Pedantic mode attempts to warn you about potential issues in your #' Stan program beyond syntax errors. For details see the [*Pedantic mode* #' chapter](https://mc-stan.org/docs/reference-manual/pedantic-mode.html) in #' the Stan Reference Manual. -#' * `include_paths`: (character vector) Paths to directories where Stan +#' @param include_paths (character vector) Paths to directories where Stan #' should look for files specified in `#include` directives in the Stan #' program. -#' * `stanc_options`: (list) Any other Stan-to-C++ transpiler options to be +#' @param stanc_options (list) Any other Stan-to-C++ transpiler options to be #' used when compiling the model. See the documentation for the #' [`$compile()`][model-method-compile] method for details. -#' * `quiet`: (logical) Should informational messages be suppressed? The +#' @param quiet (logical) Should informational messages be suppressed? The #' default is `FALSE`, which will print a message if the Stan program is valid #' or the compiler error message if there are syntax errors. If `TRUE`, only #' the error message will be printed. @@ -600,12 +569,10 @@ CmdStanModel$set("public", name = "compile", value = compile_method) #' mod$check_syntax(pedantic = TRUE) #' } #' -NULL - -check_syntax_method <- function(pedantic = FALSE, - include_paths = NULL, - stanc_options = list(), - quiet = FALSE) { +check_syntax <- function(pedantic = FALSE, + include_paths = NULL, + stanc_options = list(), + quiet = FALSE) { if (length(stanc_options) == 0 && !is.null(private$precompile_stanc_options_)) { stanc_options <- private$precompile_stanc_options_ } @@ -674,7 +641,7 @@ check_syntax_method <- function(pedantic = FALSE, } invisible(TRUE) } -CmdStanModel$set("public", name = "check_syntax", value = check_syntax_method) +CmdStanModel$set("public", name = "check_syntax", value = check_syntax) #' Run Stan's MCMC algorithms #' @@ -687,192 +654,58 @@ CmdStanModel$set("public", name = "check_syntax", value = check_syntax_method) #' a set of draws from the posterior distribution of a model conditioned on #' some data. #' -#' @section Usage: -#' ``` -#' $sample( -#' data = NULL, -#' seed = NULL, -#' refresh = NULL, -#' init = NULL, -#' save_latent_dynamics = FALSE, -#' output_dir = NULL, -#' sig_figs = NULL, -#' chains = 4, -#' parallel_chains = getOption("mc.cores", 1), -#' chain_ids = seq_len(chains), -#' threads_per_chain = NULL, -#' iter_warmup = NULL, -#' iter_sampling = NULL, -#' save_warmup = FALSE, -#' thin = NULL, -#' max_treedepth = NULL, -#' adapt_engaged = TRUE, -#' adapt_delta = NULL, -#' step_size = NULL, -#' metric = NULL, -#' metric_file = NULL, -#' inv_metric = NULL, -#' init_buffer = NULL, -#' term_buffer = NULL, -#' window = NULL, -#' fixed_param = FALSE, -#' validate_csv = TRUE, -#' show_messages = TRUE -#' ) -#' ``` +#' Any argument left as `NULL` will default to the default value used by the +#' installed version of CmdStan. See the +#' [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/) +#' for more details. #' #' @template model-common-args -#' @section Arguments unique to the `sample` method: In addition to the -#' arguments above, the `$sample()` method also has its own set of arguments. -#' -#' The following three arguments are offered by CmdStanR but do not correspond -#' to arguments in CmdStan: -#' -#' * `chains`: (positive integer) The number of Markov chains to run. The -#' default is 4. -#' -#' * `parallel_chains`: (positive integer) The _maximum_ number of MCMC chains -#' to run in parallel. If `parallel_chains` is not specified then the default -#' is to look for the option `"mc.cores"`, which can be set for an entire \R -#' session by `options(mc.cores=value)`. If the `"mc.cores"` option has not -#' been set then the default is `1`. +#' @template model-sample-args +#' @param cores,num_cores,num_chains,num_warmup,num_samples,save_extra_diagnostics,max_depth,stepsize +#' Deprecated and will be removed in a future release. #' -#' * `chain_ids`: (vector) A vector of chain IDs. Must contain `chains` unique -#' positive integers. If not set, the default chain IDs are used (integers -#' starting from `1`). -#' -#' * `threads_per_chain`: (positive integer) If the model was -#' [compiled][model-method-compile] with threading support, the number of -#' threads to use in parallelized sections _within_ an MCMC chain (e.g., when -#' using the Stan functions `reduce_sum()` or `map_rect()`). This is in -#' contrast with `parallel_chains`, which specifies the number of chains to -#' run in parallel. The actual number of CPU cores used use is -#' `parallel_chains*threads_per_chain`. For an example of using threading see -#' the Stan case study [Reduce Sum: A Minimal -#' Example](https://mc-stan.org/users/documentation/case-studies/reduce_sum_tutorial.html). -#' -#' * `show_messages`: (logical) When `TRUE` (the default), prints all -#' informational messages, for example rejection of the current proposal. -#' Disable if you wish silence these messages, but this is not recommended -#' unless you are very sure that the model is correct up to numerical error. -#' If the messages are silenced then the `$output()` method of the resulting -#' fit object can be used to display all the silenced messages. -#' -#' * `validate_csv`: (logical) When `TRUE` (the default), validate the -#' sampling results in the csv files. Disable if you wish to manually read in -#' the sampling results and validate them yourself, for example using -#' [read_cmdstan_csv()]. -#' -#' -#' The rest of the arguments correspond to arguments offered by CmdStan, -#' although some names are slightly different. They are described briefly here -#' and in greater detail in the CmdStan manual. Arguments left at `NULL` -#' default to the default used by the installed version of CmdStan. -#' The latest [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/) -#' will have the default values for the latest version of CmdStan. -#' -#' * `iter_sampling`: (positive integer) The number of post-warmup iterations to -#' run per chain. -#' * `iter_warmup`: (positive integer) The number of warmup iterations to run -#' per chain. -#' * `save_warmup`: (logical) Should warmup iterations be saved? The default -#' is `FALSE`. If `save_warmup=TRUE` then you can use -#' [$draws(inc_warmup=TRUE)][fit-method-draws] to include warmup when -#' accessing the draws. -#' * `thin`: (positive integer) The period between saved samples. This should -#' be left at its default (no thinning) unless memory is a problem. -#' * `max_treedepth`: (positive integer) The maximum allowed tree depth for the -#' NUTS engine. See the _Tree Depth_ section of the CmdStan manual for more -#' details. -#' * `adapt_engaged`: (logical) Do warmup adaptation? The default is `TRUE`. -#' If a precomputed inverse metric is specified via the `inv_metric` argument -#' (or `metric_file`) then, if `adapt_engaged=TRUE`, Stan will use the -#' provided inverse metric just as an initial guess during adaptation. To turn -#' off adaptation when using a precomputed inverse metric set -#' `adapt_engaged=FALSE`. -#' * `adapt_delta`: (real in `(0,1)`) The adaptation target acceptance -#' statistic. -#' * `step_size`: (positive real) The _initial_ step size for the discrete -#' approximation to continuous Hamiltonian dynamics. This is further tuned -#' during warmup. -#' * `metric`: (character) One of `"diag_e"`, `"dense_e"`, or `"unit_e"`, -#' specifying the geometry of the base manifold. See the _Euclidean Metric_ -#' section of the CmdStan documentation for more details. To specify a -#' precomputed (inverse) metric, see the `inv_metric` argument below. -#' * `metric_file`: (character) A character vector containing paths to JSON or -#' Rdump files (one per chain) compatible with CmdStan that contain -#' precomputed inverse metrics. The `metric_file` argument is inherited from -#' CmdStan but is confusing in that the entry in JSON or Rdump file(s) must be -#' named `inv_metric`, referring to the _inverse_ metric. We recommend instead -#' using CmdStanR's `inv_metric` argument (see below) to specify an inverse -#' metric directly using a vector or matrix from your \R session. -#' * `inv_metric`: (vector, matrix) A vector (if `metric='diag_e'`) or a -#' matrix (if `metric='dense_e'`) for initializing the inverse metric, which -#' can be used as an alternative to the `metric_file` argument. A vector is -#' interpreted as a diagonal metric. The inverse metric is usually set to an -#' estimate of the posterior covariance. See the `adapt_engaged` argument -#' above for details on (and control over) how specifying a precomputed -#' inverse metric interacts with adaptation. -#' * `init_buffer`: (nonnegative integer) Width of initial fast timestep -#' adaptation interval during warmup. -#' * `term_buffer`: (nonnegative integer) Width of final fast timestep -#' adaptation interval during warmup. -#' * `window`: (nonnegative integer) Initial width of slow timestep/metric -#' adaptation interval. -#' * `fixed_param`: (logical) When `TRUE`, call CmdStan with argument -#' `"algorithm=fixed_param"`. The default is `FALSE`. The fixed parameter -#' sampler generates a new sample without changing the current state of the -#' Markov chain; only generated quantities may change. This can be useful -#' when, for example, trying to generate pseudo-data using the generated -#' quantities block. If the parameters block is empty then using -#' `fixed_param=TRUE` is mandatory. When `fixed_param=TRUE` the `chains` and -#' `parallel_chains` arguments will be set to `1`. -#' -#' @section Value: The `$sample()` method returns a [`CmdStanMCMC`] object. +#' @section Value: A [`CmdStanMCMC`] object. #' #' @template seealso-docs #' @inherit cmdstan_model examples #' -NULL - -sample_method <- function(data = NULL, - seed = NULL, - refresh = NULL, - init = NULL, - save_latent_dynamics = FALSE, - output_dir = NULL, - sig_figs = NULL, - chains = 4, - parallel_chains = getOption("mc.cores", 1), - chain_ids = seq_len(chains), - threads_per_chain = NULL, - iter_warmup = NULL, - iter_sampling = NULL, - save_warmup = FALSE, - thin = NULL, - max_treedepth = NULL, - adapt_engaged = TRUE, - adapt_delta = NULL, - step_size = NULL, - metric = NULL, - metric_file = NULL, - inv_metric = NULL, - init_buffer = NULL, - term_buffer = NULL, - window = NULL, - fixed_param = FALSE, - validate_csv = TRUE, - show_messages = TRUE, - # deprecated - cores = NULL, - num_cores = NULL, - num_chains = NULL, - num_warmup = NULL, - num_samples = NULL, - save_extra_diagnostics = NULL, - max_depth = NULL, - stepsize = NULL) { - +sample <- function(data = NULL, + seed = NULL, + refresh = NULL, + init = NULL, + save_latent_dynamics = FALSE, + output_dir = NULL, + sig_figs = NULL, + chains = 4, + parallel_chains = getOption("mc.cores", 1), + chain_ids = seq_len(chains), + threads_per_chain = NULL, + iter_warmup = NULL, + iter_sampling = NULL, + save_warmup = FALSE, + thin = NULL, + max_treedepth = NULL, + adapt_engaged = TRUE, + adapt_delta = NULL, + step_size = NULL, + metric = NULL, + metric_file = NULL, + inv_metric = NULL, + init_buffer = NULL, + term_buffer = NULL, + window = NULL, + fixed_param = FALSE, + validate_csv = TRUE, + show_messages = TRUE, + # deprecated + cores = NULL, + num_cores = NULL, + num_chains = NULL, + num_warmup = NULL, + num_samples = NULL, + save_extra_diagnostics = NULL, + max_depth = NULL, + stepsize = NULL) { # temporary deprecation warnings if (!is.null(cores)) { warning("'cores' is deprecated. Please use 'parallel_chains' instead.") @@ -972,7 +805,7 @@ sample_method <- function(data = NULL, runset$run_cmdstan() CmdStanMCMC$new(runset) } -CmdStanModel$set("public", name = "sample", value = sample_method) +CmdStanModel$set("public", name = "sample", value = sample) #' Run Stan's MCMC algorithms with MPI #' @@ -987,15 +820,15 @@ CmdStanModel$set("public", name = "sample", value = sample_method) #' [`$sample()`][model-method-sample] method provides both parallelization of #' chains and threading support for within-chain parallelization. #' -#' @details In order to use MPI with Stan, an MPI implementation must be +#' In order to use MPI with Stan, an MPI implementation must be #' installed. For Unix systems the most commonly used implementations are #' MPICH and OpenMPI. The implementations provide an MPI C++ compiler wrapper #' (for example mpicxx), which is required to compile the model. #' #' An example of compiling with MPI: #' ``` -#' mpi_options <- list(STAN_MPI=TRUE, CXX="mpicxx", TBB_CXX_TYPE="gcc") -#' mod <- cmdstan_model("model.stan", cpp_options = mpi_options) +#' mpi_options = list(STAN_MPI=TRUE, CXX="mpicxx", TBB_CXX_TYPE="gcc") +#' mod = cmdstan_model("model.stan", cpp_options = mpi_options) #' ``` #' The C++ options that must be supplied to the #' [compile][model-method-compile] call are: @@ -1004,59 +837,21 @@ CmdStanModel$set("public", name = "sample", value = sample_method) #' - `TBB_CXX_TYPE`: The C++ compiler the MPI wrapper wraps. Typically `"gcc"` #' on Linux and `"clang"` on macOS. #' -#' In the call to the `$sample_mpi()` method we can also provide the name of -#' the MPI launcher (`mpi_cmd`, defaulting to `"mpiexec"`) and any other -#' MPI launch arguments. In most cases, it is enough to only define the number -#' of processes with `mpi_args = list("n" = 4)`. -#' -#' @section Usage: -#' ``` -#' $sample_mpi( -#' data = NULL, -#' mpi_cmd = "mpiexec", -#' mpi_args = NULL, -#' seed = NULL, -#' refresh = NULL, -#' init = NULL, -#' save_latent_dynamics = FALSE, -#' output_dir = NULL, -#' sig_figs = NULL, -#' chains = 4, -#' parallel_chains = getOption("mc.cores", 1), -#' chain_ids = seq_len(chains), -#' iter_warmup = NULL, -#' iter_sampling = NULL, -#' save_warmup = FALSE, -#' thin = NULL, -#' max_treedepth = NULL, -#' adapt_engaged = TRUE, -#' adapt_delta = NULL, -#' step_size = NULL, -#' metric = NULL, -#' metric_file = NULL, -#' inv_metric = NULL, -#' init_buffer = NULL, -#' term_buffer = NULL, -#' window = NULL, -#' fixed_param = FALSE, -#' validate_csv = TRUE, -#' show_messages = TRUE -#' ) -#' ``` -#' -#' @section Arguments unique to the `sample_mpi` method: -#' * `mpi_cmd`: (character vector) The MPI launcher used for launching MPI processes. -#' The default launcher is `"mpiexec"`. -#' * `mpi_args`: (list) A list of arguments to use when launching MPI processes. -#' For example, `mpi_args = list("n" = 4)` launches the executable as -#' `mpiexec -n 4 model_executable`, followed by CmdStan arguments -#' for the model executable. +#' In the call to the `$sample_mpi()` method it is also possible to provide +#' the name of the MPI launcher (`mpi_cmd`, defaulting to `"mpiexec"`) and any +#' other MPI launch arguments (`mpi_args`). In most cases, it is enough to +#' only define the number of processes. To use `n_procs` processes specify +#' `mpi_args = list("n" = n_procs)`. #' -#' All other arguments are the same as for [`$sample()`][model-method-sample] -#' except `$sample_mpi()` does not have arguments `threads_per_chain` or -#' `parallel_chains`. +#' @inheritParams model-method-sample +#' @param mpi_cmd (character vector) The MPI launcher used for launching MPI +#' processes. The default launcher is `"mpiexec"`. +#' @param mpi_args (list) A list of arguments to use when launching MPI +#' processes. For example, `mpi_args = list("n" = 4)` launches the executable +#' as `mpiexec -n 4 model_executable`, followed by CmdStan arguments for the +#' model executable. #' -#' @section Value: The `$sample_mpi()` method returns a [`CmdStanMCMC`] object. +#' @section Value: A [`CmdStanMCMC`] object. #' #' @template seealso-docs #' @seealso The Stan Math Library's MPI documentation @@ -1070,36 +865,34 @@ CmdStanModel$set("public", name = "sample", value = sample_method) #' # fit <- mod$sample_mpi(..., mpi_args = list("n" = 4)) #' } #' -NULL - -sample_mpi_method <- function(data = NULL, - mpi_cmd = "mpiexec", - mpi_args = NULL, - seed = NULL, - refresh = NULL, - init = NULL, - save_latent_dynamics = FALSE, - output_dir = NULL, - chains = 1, - chain_ids = seq_len(chains), - iter_warmup = NULL, - iter_sampling = NULL, - save_warmup = FALSE, - thin = NULL, - max_treedepth = NULL, - adapt_engaged = TRUE, - adapt_delta = NULL, - step_size = NULL, - metric = NULL, - metric_file = NULL, - inv_metric = NULL, - init_buffer = NULL, - term_buffer = NULL, - window = NULL, - fixed_param = FALSE, - sig_figs = NULL, - validate_csv = TRUE, - show_messages = TRUE) { +sample_mpi <- function(data = NULL, + mpi_cmd = "mpiexec", + mpi_args = NULL, + seed = NULL, + refresh = NULL, + init = NULL, + save_latent_dynamics = FALSE, + output_dir = NULL, + chains = 1, + chain_ids = seq_len(chains), + iter_warmup = NULL, + iter_sampling = NULL, + save_warmup = FALSE, + thin = NULL, + max_treedepth = NULL, + adapt_engaged = TRUE, + adapt_delta = NULL, + step_size = NULL, + metric = NULL, + metric_file = NULL, + inv_metric = NULL, + init_buffer = NULL, + term_buffer = NULL, + window = NULL, + fixed_param = FALSE, + sig_figs = NULL, + validate_csv = TRUE, + show_messages = TRUE) { if (fixed_param) { chains <- 1 save_warmup <- FALSE @@ -1147,7 +940,7 @@ sample_mpi_method <- function(data = NULL, runset$run_cmdstan_mpi(mpi_cmd, mpi_args) CmdStanMCMC$new(runset) } -CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi_method) +CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi) #' Run Stan's optimization algorithms #' @@ -1159,6 +952,11 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi_method) #' Stan's optimizer to obtain a posterior mode (penalized maximum likelihood) #' estimate. #' +#' Any argument left as `NULL` will default to the default value used by the +#' installed version of CmdStan. See the +#' [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/) +#' for more details. +#' #' @details CmdStan can find the posterior mode (assuming there is one). If the #' posterior is not convex, there is no guarantee Stan will be able to find #' the global mode as opposed to a local optimum of log probability. For @@ -1168,81 +966,48 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi_method) #' #' -- [*CmdStan User's Guide*](https://mc-stan.org/docs/cmdstan-guide/) #' -#' @section Usage: -#' ``` -#' $optimize( -#' data = NULL, -#' seed = NULL, -#' refresh = NULL, -#' init = NULL, -#' save_latent_dynamics = FALSE, -#' output_dir = NULL, -#' sig_figs = NULL, -#' threads = NULL, -#' algorithm = NULL, -#' init_alpha = NULL, -#' iter = NULL, -#' tol_obj = NULL, -#' tol_rel_obj = NULL, -#' tol_grad = NULL, -#' tol_rel_grad = NULL, -#' tol_param = NULL, -#' history_size = NULL -#' ) -#' ``` -#' #' @template model-common-args -#' @section Arguments unique to the `optimize` method: In addition to the -#' arguments above, the `$optimize()` method also has its own set of -#' arguments. These arguments are described briefly here and in greater detail -#' in the CmdStan manual. Arguments left at `NULL` default to the default used -#' by the installed version of CmdStan. -#' The latest [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/) -#' will have the defaults for the latest version of CmdStan. -#' -#' * `threads`: (positive integer) If the model was +#' @param threads (positive integer) If the model was #' [compiled][model-method-compile] with threading support, the number of #' threads to use in parallelized sections (e.g., when #' using the Stan functions `reduce_sum()` or `map_rect()`). -#' * `iter`: (positive integer) The maximum number of iterations. -#' * `algorithm`: (string) The optimization algorithm. One of `"lbfgs"`, +#' @param iter (positive integer) The maximum number of iterations. +#' @param algorithm (string) The optimization algorithm. One of `"lbfgs"`, #' `"bfgs"`, or `"newton"`. The control parameters below are only available #' for `"lbfgs"` and `"bfgs`. For their default values and more details see #' the CmdStan User's Guide. The default values can also be obtained by #' running `cmdstanr_example(method="optimize")$metadata()`. -#' * `init_alpha`: (positive real) The initial step size parameter. -#' * `tol_obj`: (positive real) Convergence tolerance on changes in objective function value. -#' * `tol_rel_obj`: (positive real) Convergence tolerance on relative changes in objective function value. -#' * `tol_grad`: (positive real) Convergence tolerance on the norm of the gradient. -#' * `tol_rel_grad`: (positive real) Convergence tolerance on the relative norm of the gradient. -#' * `tol_param`: (positive real) Convergence tolerance on changes in parameter value. -#' * `history_size`: (positive integer) The size of the history used when +#' @param init_alpha (positive real) The initial step size parameter. +#' @param tol_obj (positive real) Convergence tolerance on changes in objective function value. +#' @param tol_rel_obj (positive real) Convergence tolerance on relative changes in objective function value. +#' @param tol_grad (positive real) Convergence tolerance on the norm of the gradient. +#' @param tol_rel_grad (positive real) Convergence tolerance on the relative norm of the gradient. +#' @param tol_param (positive real) Convergence tolerance on changes in parameter value. +#' @param history_size (positive integer) The size of the history used when #' approximating the Hessian. Only available for L-BFGS. #' -#' @section Value: The `$optimize()` method returns a [`CmdStanMLE`] object. +#' @section Value: A [`CmdStanMLE`] object. #' #' @template seealso-docs #' @inherit cmdstan_model examples #' -NULL - -optimize_method <- function(data = NULL, - seed = NULL, - refresh = NULL, - init = NULL, - save_latent_dynamics = FALSE, - output_dir = NULL, - sig_figs = NULL, - threads = NULL, - algorithm = NULL, - init_alpha = NULL, - iter = NULL, - tol_obj = NULL, - tol_rel_obj = NULL, - tol_grad = NULL, - tol_rel_grad = NULL, - tol_param = NULL, - history_size = NULL) { +optimize <- function(data = NULL, + seed = NULL, + refresh = NULL, + init = NULL, + save_latent_dynamics = FALSE, + output_dir = NULL, + sig_figs = NULL, + threads = NULL, + algorithm = NULL, + init_alpha = NULL, + iter = NULL, + tol_obj = NULL, + tol_rel_obj = NULL, + tol_grad = NULL, + tol_rel_grad = NULL, + tol_param = NULL, + history_size = NULL) { checkmate::assert_integerish(threads, lower = 1, len = 1, null.ok = TRUE) if (is.null(self$cpp_options()[["stan_threads"]])) { if (!is.null(threads)) { @@ -1292,7 +1057,7 @@ optimize_method <- function(data = NULL, runset$run_cmdstan() CmdStanMLE$new(runset) } -CmdStanModel$set("public", name = "optimize", value = optimize_method) +CmdStanModel$set("public", name = "optimize", value = optimize) #' Run Stan's variational approximation algorithms @@ -1304,6 +1069,11 @@ CmdStanModel$set("public", name = "optimize", value = optimize_method) #' @description The `$variational()` method of a [`CmdStanModel`] object runs #' Stan's variational Bayes (ADVI) algorithms. #' +#' Any argument left as `NULL` will default to the default value used by the +#' installed version of CmdStan. See the +#' [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/) +#' for more details. +#' #' @details CmdStan can fit a variational approximation to the posterior. The #' approximation is a Gaussian in the unconstrained variable space. Stan #' implements two variational algorithms. The `algorithm="meanfield"` option @@ -1313,84 +1083,52 @@ CmdStanModel$set("public", name = "optimize", value = optimize_method) #' #' -- [*CmdStan Interface User's Guide*](https://github.com/stan-dev/cmdstan/releases/latest) #' -#' @section Usage: -#' ``` -#' $variational( -#' data = NULL, -#' seed = NULL, -#' refresh = NULL, -#' init = NULL, -#' save_latent_dynamics = FALSE, -#' output_dir = NULL, -#' sig_figs = NULL, -#' threads = NULL, -#' algorithm = NULL, -#' iter = NULL, -#' grad_samples = NULL, -#' elbo_samples = NULL, -#' eta = NULL, -#' adapt_engaged = NULL, -#' adapt_iter = NULL, -#' tol_rel_obj = NULL, -#' eval_elbo = NULL, -#' output_samples = NULL -#' ) -#' ``` -#' #' @template model-common-args -#' @section Arguments unique to the `variational` method: In addition to the -#' arguments above, the `$variational()` method also has its own set of -#' arguments. These arguments are described briefly here and in greater detail -#' in the CmdStan manual. Arguments left at `NULL` default to the default used -#' by the installed version of CmdStan. -#' -#' * `threads`: (positive integer) If the model was +#' @param threads (positive integer) If the model was #' [compiled][model-method-compile] with threading support, the number of -#' threads to use in parallelized sections (e.g., when -#' using the Stan functions `reduce_sum()` or `map_rect()`). -#' * `algorithm`: (string) The algorithm. Either `"meanfield"` or `"fullrank"`. -#' * `iter`: (positive integer) The _maximum_ number of iterations. -#' * `grad_samples`: (positive integer) The number of samples for Monte Carlo +#' threads to use in parallelized sections (e.g., when using the Stan +#' functions `reduce_sum()` or `map_rect()`). +#' @param algorithm (string) The algorithm. Either `"meanfield"` or +#' `"fullrank"`. +#' @param iter (positive integer) The _maximum_ number of iterations. +#' @param grad_samples (positive integer) The number of samples for Monte Carlo #' estimate of gradients. -#' * `elbo_samples`: (positive integer) The number of samples for Monte Carlo +#' @param elbo_samples (positive integer) The number of samples for Monte Carlo #' estimate of ELBO (objective function). -#' * `eta`: (positive real) The step size weighting parameter for adaptive +#' @param eta (positive real) The step size weighting parameter for adaptive #' step size sequence. -#' * `adapt_engaged`: (logical) Do warmup adaptation? -#' * `adapt_iter`: (positive integer) The _maximum_ number of adaptation +#' @param adapt_engaged (logical) Do warmup adaptation? +#' @param adapt_iter (positive integer) The _maximum_ number of adaptation #' iterations. -#' * `tol_rel_obj`: (positive real) Convergence tolerance on the relative norm +#' @param tol_rel_obj (positive real) Convergence tolerance on the relative norm #' of the objective. -#' * `eval_elbo`: (positive integer) Evaluate ELBO every Nth iteration. -#' * `output_samples:` (positive integer) Number of posterior samples to draw -#' and save. +#' @param eval_elbo (positive integer) Evaluate ELBO every Nth iteration. +#' @param output_samples (positive integer) Number of approximate posterior +#' samples to draw and save. #' -#' -#' @section Value: The `$variational()` method returns a [`CmdStanVB`] object. +#' @section Value: A [`CmdStanVB`] object. #' #' @template seealso-docs #' @inherit cmdstan_model examples #' -NULL - -variational_method <- function(data = NULL, - seed = NULL, - refresh = NULL, - init = NULL, - save_latent_dynamics = FALSE, - output_dir = NULL, - sig_figs = NULL, - threads = NULL, - algorithm = NULL, - iter = NULL, - grad_samples = NULL, - elbo_samples = NULL, - eta = NULL, - adapt_engaged = NULL, - adapt_iter = NULL, - tol_rel_obj = NULL, - eval_elbo = NULL, - output_samples = NULL) { +variational <- function(data = NULL, + seed = NULL, + refresh = NULL, + init = NULL, + save_latent_dynamics = FALSE, + output_dir = NULL, + sig_figs = NULL, + threads = NULL, + algorithm = NULL, + iter = NULL, + grad_samples = NULL, + elbo_samples = NULL, + eta = NULL, + adapt_engaged = NULL, + adapt_iter = NULL, + tol_rel_obj = NULL, + eval_elbo = NULL, + output_samples = NULL) { checkmate::assert_integerish(threads, lower = 1, len = 1, null.ok = TRUE) if (is.null(self$cpp_options()[["stan_threads"]])) { if (!is.null(threads)) { @@ -1441,7 +1179,7 @@ variational_method <- function(data = NULL, runset$run_cmdstan() CmdStanVB$new(runset) } -CmdStanModel$set("public", name = "variational", value = variational_method) +CmdStanModel$set("public", name = "variational", value = variational) #' Run Stan's standalone generated quantities method #' @@ -1453,29 +1191,15 @@ CmdStanModel$set("public", name = "variational", value = variational_method) #' runs Stan's standalone generated quantities to obtain generated quantities #' based on previously fitted parameters. #' -#' @section Usage: -#' ``` -#' $generate_quantities( -#' fitted_params, -#' data = NULL, -#' seed = NULL, -#' output_dir = NULL, -#' sig_figs = NULL, -#' parallel_chains = getOption("mc.cores", 1), -#' threads_per_chain = NULL -#' ) -#' ``` +#' @inheritParams model-method-sample +#' @param fitted_params (multiple options) The parameter draws to use. One of +#' the following: +#' * A [CmdStanMCMC] or [CmdStanVB] fitted model object. +#' * A [posterior::draws_array] (for MCMC) or [posterior::draws_matrix] (for +#' VB) object returned by CmdStanR's [`$draws()`][fit-method-draws] method. +#' * A character vector of paths to CmdStan CSV output files. #' -#' @section Arguments: -#' * `fitted_params`: (multiple options) The parameter draws to use. One of the following: -#' - A [CmdStanMCMC] or [CmdStanVB] fitted model object. -#' - A [posterior::draws_array] (for MCMC) or [posterior::draws_matrix] (for VB) -#' object returned by CmdStanR's [`$draws()`][fit-method-draws] method. -#' - A character vector of paths to CmdStan CSV output files. -#' * `data`, `seed`, `output_dir`, `sig_figs`, `parallel_chains`, `threads_per_chain`: -#' Same as for the [`$sample()`][model-method-sample] method. -#' -#' @section Value: The `$generate_quantities()` method returns a [`CmdStanGQ`] object. +#' @section Value: A [`CmdStanGQ`] object. #' #' @template seealso-docs #' @@ -1522,15 +1246,13 @@ CmdStanModel$set("public", name = "variational", value = variational_method) #' as_draws_df(fit_gq$draws()) #' } #' -NULL - -generate_quantities_method <- function(fitted_params, - data = NULL, - seed = NULL, - output_dir = NULL, - sig_figs = NULL, - parallel_chains = getOption("mc.cores", 1), - threads_per_chain = NULL) { +generate_quantities <- function(fitted_params, + data = NULL, + seed = NULL, + output_dir = NULL, + sig_figs = NULL, + parallel_chains = getOption("mc.cores", 1), + threads_per_chain = NULL) { checkmate::assert_integerish(parallel_chains, lower = 1, null.ok = TRUE) checkmate::assert_integerish(threads_per_chain, lower = 1, len = 1, null.ok = TRUE) if (is.null(self$cpp_options()[["stan_threads"]])) { @@ -1572,4 +1294,4 @@ generate_quantities_method <- function(fitted_params, runset$run_cmdstan() CmdStanGQ$new(runset) } -CmdStanModel$set("public", name = "generate_quantities", value = generate_quantities_method) +CmdStanModel$set("public", name = "generate_quantities", value = generate_quantities) diff --git a/docs/404.html b/docs/404.html index 43b2a3f6a..f82b192b0 100644 --- a/docs/404.html +++ b/docs/404.html @@ -79,7 +79,7 @@ cmdstanr - 0.2.0 + 0.3.0 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index b96e169fb..11f3941a8 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -79,7 +79,7 @@ cmdstanr - 0.2.2 + 0.3.0 diff --git a/docs/LICENSE.html b/docs/LICENSE.html index 81d4b0e51..105fe6888 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -79,7 +79,7 @@ cmdstanr - 0.2.2 + 0.3.0 diff --git a/docs/articles/cmdstanr-internals.html b/docs/articles/cmdstanr-internals.html index c03ca23e8..8ef4ae7e3 100644 --- a/docs/articles/cmdstanr-internals.html +++ b/docs/articles/cmdstanr-internals.html @@ -38,7 +38,7 @@ cmdstanr - 0.2.1 + 0.3.0 @@ -229,14 +229,14 @@

Compiling Stan program... Warning: The parameter lambda has no priors. -Warning at '/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp6z0m2V/model-14ba25f3a526f.stan', line 11, column 14 to column 20: +Warning at '/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/model-34da50fdf88f.stan', line 11, column 14 to column 20: A poisson distribution is given parameter lambda as a rate parameter (argument 1), but lambda was not constrained to be strictly positive.

To turn on pedantic mode separately from compilation use the pedantic argument to the $check_syntax() method.

mod_pedantic$check_syntax(pedantic = TRUE) 
 Warning:
   The parameter lambda has no priors.
-Warning at '/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp6z0m2V/file14ba25b2cde39.stan', line 11, column 14 to column 20:
+Warning at '/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/file34da6f12ccb5.stan', line 11, column 14 to column 20:
   A poisson distribution is given parameter lambda as a rate parameter
   (argument 1), but lambda was not constrained to be strictly positive.
 Stan program is syntactically correct
@@ -249,7 +249,7 @@

mod_pedantic$check_syntax(pedantic = TRUE) Warning: The parameter lambda has no priors. -Warning at '/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp6z0m2V/file14ba25b2cde39.stan', line 11, column 14 to column 20: +Warning at '/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/file34da6f12ccb5.stan', line 11, column 14 to column 20: A poisson distribution is given parameter lambda as a rate parameter (argument 1), but lambda was not constrained to be strictly positive. Stan program is syntactically correct @@ -325,10 +325,10 @@

fit <- mod$sample(data = data_list)

When fitting a model, the default behavior is to write the output from CmdStan to CSV files in a temporary directory:

fit$output_files()
-
[1] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp6z0m2V/bernoulli-202011241551-1-9165a8.csv"
-[2] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp6z0m2V/bernoulli-202011241551-2-9165a8.csv"
-[3] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp6z0m2V/bernoulli-202011241551-3-9165a8.csv"
-[4] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp6z0m2V/bernoulli-202011241551-4-9165a8.csv"
+
[1] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/bernoulli-202012171340-1-34baed.csv"
+[2] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/bernoulli-202012171340-2-34baed.csv"
+[3] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/bernoulli-202012171340-3-34baed.csv"
+[4] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/bernoulli-202012171340-4-34baed.csv"

These files will be lost if you end your R session or if you remove the fit object and force (or wait for) garbage collection.

files <- fit$output_files()
 file.exists(files)
@@ -336,8 +336,8 @@

rm(fit)
 gc()
          used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
-Ncells  856930 45.8    1341254 71.7         NA  1341254 71.7
-Vcells 1593242 12.2    8388608 64.0      16384  2363425 18.1
+Ncells 830780 44.4 1339817 71.6 NA 1339817 71.6 +Vcells 1540850 11.8 8388608 64.0 16384 2443368 18.7
file.exists(files)
[1] FALSE FALSE FALSE FALSE
@@ -365,14 +365,15 @@

Inherits from: <CmdStanFit> Public: clone: function (deep = FALSE) - cmdstan_diagnose: function (...) - cmdstan_summary: function (...) + cmdstan_diagnose: function () + cmdstan_summary: function (flags = NULL) data_file: function () draws: function (variables = NULL, inc_warmup = FALSE) init: function () initialize: function (runset) inv_metric: function (matrix = TRUE) latent_dynamics_files: function (include_failed = FALSE) + loo: function (variables = "log_lik", r_eff = TRUE, ...) lp: function () metadata: function () num_chains: function () @@ -405,14 +406,15 @@

Inherits from: <CmdStanFit> Public: clone: function (deep = FALSE) - cmdstan_diagnose: function (...) - cmdstan_summary: function (...) + cmdstan_diagnose: function () + cmdstan_summary: function (flags = NULL) data_file: function () draws: function (variables = NULL, inc_warmup = FALSE) init: function () initialize: function (runset) inv_metric: function (matrix = TRUE) latent_dynamics_files: function (include_failed = FALSE) + loo: function (variables = "log_lik", r_eff = TRUE, ...) lp: function () metadata: function () num_chains: function () @@ -430,7 +432,7 @@

summary: function (variables = NULL, ...) time: function () Private: - draws_: -6.78913 -6.76301 -6.76814 -7.29383 -7.03635 -7.40541 -8 ... + draws_: -7.52422 -9.11823 -8.53862 -6.81816 -6.9535 -8.32874 -7. ... init_: NULL inv_metric_: list metadata_: list @@ -468,12 +470,12 @@

..$ stepsize_jitter : num 0 ..$ id : num [1:4] 1 2 3 4 ..$ init : num [1:4] 2 2 2 2 - ..$ seed : num [1:4] 2.10e+08 1.69e+09 7.13e+08 7.69e+08 + ..$ seed : num [1:4] 1.23e+09 1.35e+09 6.17e+08 1.43e+09 ..$ refresh : num 100 ..$ sig_figs : num -1 ..$ sampler_diagnostics : chr [1:6] "accept_stat__" "stepsize__" "treedepth__" "n_leapfrog__" ... ..$ model_params : chr [1:2] "lp__" "theta" - ..$ step_size_adaptation: num [1:4] 0.859 0.921 1.049 1.023 + ..$ step_size_adaptation: num [1:4] 1.09 0.841 0.927 0.874 ..$ model_name : chr "bernoulli_model" ..$ adapt_engaged : num 1 ..$ adapt_delta : num 0.8 @@ -486,23 +488,23 @@

.. ..$ theta: num 1 ..$ stan_variables : chr [1:2] "lp__" "theta" $ inv_metric :List of 4 - ..$ 1: num 0.542 - ..$ 2: num 0.525 - ..$ 3: num 0.444 - ..$ 4: num 0.52 + ..$ 1: num 0.411 + ..$ 2: num 0.582 + ..$ 3: num 0.437 + ..$ 4: num 0.483 $ step_size :List of 4 - ..$ 1: num 0.859 - ..$ 2: num 0.921 - ..$ 3: num 1.05 - ..$ 4: num 1.02 + ..$ 1: num 1.09 + ..$ 2: num 0.841 + ..$ 3: num 0.927 + ..$ 4: num 0.874 $ warmup_draws : NULL - $ post_warmup_draws : 'draws_array' num [1:1000, 1:4, 1:2] -6.79 -6.76 -6.77 -7.29 -7.04 ... + $ post_warmup_draws : 'draws_array' num [1:1000, 1:4, 1:2] -7.52 -9.12 -8.54 -6.82 -6.95 ... ..- attr(*, "dimnames")=List of 3 .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... .. ..$ chain : chr [1:4] "1" "2" "3" "4" .. ..$ variable : chr [1:2] "lp__" "theta" $ warmup_sampler_diagnostics : NULL - $ post_warmup_sampler_diagnostics: 'draws_array' num [1:1000, 1:4, 1:6] 0.994 1 0.995 0.923 1 ... + $ post_warmup_sampler_diagnostics: 'draws_array' num [1:1000, 1:4, 1:6] 0.925 0.823 1 1 0.969 ... ..- attr(*, "dimnames")=List of 3 .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... .. ..$ chain : chr [1:4] "1" "2" "3" "4" @@ -515,36 +517,36 @@

CmdStanR does not yet provide a special method for processing these files but they can be read into R using R’s standard CSV reading functions:

fit <- mod$sample(data = data_list, save_latent_dynamics = TRUE)
fit$latent_dynamics_files()
-
[1] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp6z0m2V/bernoulli-diagnostic-202011241551-1-55cc49.csv"
-[2] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp6z0m2V/bernoulli-diagnostic-202011241551-2-55cc49.csv"
-[3] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp6z0m2V/bernoulli-diagnostic-202011241551-3-55cc49.csv"
-[4] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp6z0m2V/bernoulli-diagnostic-202011241551-4-55cc49.csv"
+
[1] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/bernoulli-diagnostic-202012171340-1-33b8ec.csv"
+[2] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/bernoulli-diagnostic-202012171340-2-33b8ec.csv"
+[3] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/bernoulli-diagnostic-202012171340-3-33b8ec.csv"
+[4] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/bernoulli-diagnostic-202012171340-4-33b8ec.csv"
# read one of the files in
 x <- utils::read.csv(fit$latent_dynamics_files()[1], comment.char = "#")
 head(x)
      lp__ accept_stat__ stepsize__ treedepth__ n_leapfrog__ divergent__
-1 -6.77163      1.000000    1.01917           1            1           0
-2 -7.08782      0.901299    1.01917           1            3           0
-3 -6.82835      0.998086    1.01917           1            3           0
-4 -7.17254      0.961877    1.01917           2            3           0
-5 -6.79864      0.663324    1.01917           2            3           0
-6 -9.34056      0.564600    1.01917           2            3           0
-  energy__     theta   p_theta   g_theta
-1  6.78126 -0.955446 -0.195418  0.333491
-2  7.26475 -1.677310  0.837281 -1.110570
-3  7.02601 -1.372220  0.884978 -0.572866
-4  7.23579 -0.510811 -0.500586  1.500040
-5  8.68073 -0.890040  2.730830  0.493219
-6  9.36160 -2.899070  0.288769 -2.373610
+1 -7.86236 0.603036 0.987064 2 3 0 +2 -7.12202 1.000000 0.987064 1 1 0 +3 -7.12202 0.330321 0.987064 1 1 0 +4 -6.77903 1.000000 0.987064 1 3 0 +5 -7.40531 0.811543 0.987064 1 3 0 +6 -6.95963 0.986487 0.987064 1 3 0 + energy__ theta p_theta g_theta +1 9.22511 -0.165477 2.36755 2.504700 +2 7.65762 -0.545627 -1.48427 1.402550 +3 9.20892 -0.545627 -2.92983 1.402550 +4 7.02219 -0.934776 -1.00008 0.383482 +5 7.83726 -1.922470 -1.33294 -1.468960 +6 7.32140 -1.549900 -1.21986 -0.898786

The column lp__ is also provided via fit$draws(), and the columns accept_stat__, stepsize__, treedepth__, n_leapfrog__, divergent__, and energy__ are also provided by fit$sampler_diagnostics(), but there are several columns unique to the latent dynamics file:

head(x[, c("theta", "p_theta", "g_theta")])
-
      theta   p_theta   g_theta
-1 -0.955446 -0.195418  0.333491
-2 -1.677310  0.837281 -1.110570
-3 -1.372220  0.884978 -0.572866
-4 -0.510811 -0.500586  1.500040
-5 -0.890040  2.730830  0.493219
-6 -2.899070  0.288769 -2.373610
+
      theta  p_theta   g_theta
+1 -0.165477  2.36755  2.504700
+2 -0.545627 -1.48427  1.402550
+3 -0.545627 -2.92983  1.402550
+4 -0.934776 -1.00008  0.383482
+5 -1.922470 -1.33294 -1.468960
+6 -1.549900 -1.21986 -0.898786

Our model has a single parameter theta and the three columns above correspond to theta in the unconstrained space (theta on the constrained space is accessed via fit$draws()), the auxiliary momentum p_theta, and the gradient g_theta. In general, each of these three columns will exist for every parameter in the model.

@@ -557,15 +559,107 @@

We can check that this worked by removing fit and loading it back in from the save file:

rm(fit); gc()
          used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
-Ncells  873638 46.7    1654670 88.4         NA  1341254 71.7
-Vcells 1696076 13.0    8388608 64.0      16384  3066776 23.4
+Ncells 846633 45.3 1339817 71.6 NA 1339817 71.6 +Vcells 1640837 12.6 8388608 64.0 16384 3011182 23.0
fit <- readRDS(temp_rds_file)
 fit$summary()
# A tibble: 2 x 10
   variable   mean median    sd   mad      q5    q95  rhat ess_bulk ess_tail
   <chr>     <dbl>  <dbl> <dbl> <dbl>   <dbl>  <dbl> <dbl>    <dbl>    <dbl>
-1 lp__     -7.26  -6.99  0.761 0.325 -8.73   -6.75   1.00    1963.    2302.
-2 theta     0.247  0.235 0.118 0.120  0.0819  0.465  1.00    1587.    2049.
+1 lp__ -7.28 -6.99 0.758 0.333 -8.85 -6.75 1.00 1753. 1889. +2 theta 0.251 0.238 0.121 0.123 0.0784 0.473 1.00 1381. 1459. + +
+

+Developing using CmdStanR

+

CmdStanR can of course be used for developing other packages that require compiling and running Stan models as well as using new or custom Stan features available through CmdStan.

+
+

+Troubleshooting and debugging

+

When developing or testing new features it might be useful to have more information on how CmdStan is called internally and to see more information printed when compiling or running models. This can be enabled for an entire R session by setting the option "cmdstanr_verbose" to TRUE.

+
options("cmdstanr_verbose"=TRUE)
+
+mod <- cmdstan_model(stan_file, force_recompile = TRUE)
+
Compiling Stan program...
+
Running make \
+  /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/model-34da6df292c7 \
+  "STANCFLAGS += --name='bernoulli_model'"
+
+--- Translating Stan model to C++ code ---
+bin/stanc --name='bernoulli_model' --o=/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/model-34da6df292c7.hpp /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/model-34da6df292c7.stan
+
+--- Compiling, linking C++ code ---
+clang++ -std=c++1y -Wno-unknown-warning-option -Wno-tautological-compare -Wno-sign-compare -D_REENTRANT -Wno-ignored-attributes      -I stan/lib/stan_math/lib/tbb_2019_U8/include   -O3 -I src -I stan/src -I lib/rapidjson_1.1.0/ -I stan/lib/stan_math/ -I stan/lib/stan_math/lib/eigen_3.3.7 -I stan/lib/stan_math/lib/boost_1.72.0 -I stan/lib/stan_math/lib/sundials_5.2.0/include    -DBOOST_DISABLE_ASSERTS        -c -include-pch stan/src/stan/model/model_header.hpp.gch -x c++ -o /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/model-34da6df292c7.o /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/model-34da6df292c7.hpp
+clang++ -std=c++1y -Wno-unknown-warning-option -Wno-tautological-compare -Wno-sign-compare -D_REENTRANT -Wno-ignored-attributes      -I stan/lib/stan_math/lib/tbb_2019_U8/include   -O3 -I src -I stan/src -I lib/rapidjson_1.1.0/ -I stan/lib/stan_math/ -I stan/lib/stan_math/lib/eigen_3.3.7 -I stan/lib/stan_math/lib/boost_1.72.0 -I stan/lib/stan_math/lib/sundials_5.2.0/include    -DBOOST_DISABLE_ASSERTS              -Wl,-L,"/Users/jgabry/.cmdstanr/cmdstan-2.25.0/stan/lib/stan_math/lib/tbb" -Wl,-rpath,"/Users/jgabry/.cmdstanr/cmdstan-2.25.0/stan/lib/stan_math/lib/tbb"      /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/model-34da6df292c7.o src/cmdstan/main.o         stan/lib/stan_math/lib/sundials_5.2.0/lib/libsundials_nvecserial.a stan/lib/stan_math/lib/sundials_5.2.0/lib/libsundials_cvodes.a stan/lib/stan_math/lib/sundials_5.2.0/lib/libsundials_idas.a stan/lib/stan_math/lib/sundials_5.2.0/lib/libsundials_kinsol.a  stan/lib/stan_math/lib/tbb/libtbb.dylib stan/lib/stan_math/lib/tbb/libtbbmalloc.dylib stan/lib/stan_math/lib/tbb/libtbbmalloc_proxy.dylib -o /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/model-34da6df292c7
+rm -f /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/model-34da6df292c7.o
+
fit <- mod$sample(
+  data = data_list,
+  chains = 1,
+  iter_warmup = 100,
+  iter_sampling = 100
+)
+
Running MCMC with 1 chain...
+
+Running ./bernoulli 'id=1' random 'seed=1610984816' data \
+  'file=/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/standata-34da50827f35.json' \
+  output \
+  'file=/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/bernoulli-202012171340-1-88a07d.csv' \
+  'method=sample' 'num_samples=100' 'num_warmup=100' 'save_warmup=0' \
+  'algorithm=hmc' 'engine=nuts' adapt 'engaged=1'
+Chain 1 method = sample (Default) 
+Chain 1   sample 
+Chain 1     num_samples = 100 
+Chain 1     num_warmup = 100 
+Chain 1     save_warmup = 0 (Default) 
+Chain 1     thin = 1 (Default) 
+Chain 1     adapt 
+Chain 1       engaged = 1 (Default) 
+Chain 1       gamma = 0.050000000000000003 (Default) 
+Chain 1       delta = 0.80000000000000004 (Default) 
+Chain 1       kappa = 0.75 (Default) 
+Chain 1       t0 = 10 (Default) 
+Chain 1       init_buffer = 75 (Default) 
+Chain 1       term_buffer = 50 (Default) 
+Chain 1       window = 25 (Default) 
+Chain 1     algorithm = hmc (Default) 
+Chain 1       hmc 
+Chain 1         engine = nuts (Default) 
+Chain 1           nuts 
+Chain 1             max_depth = 10 (Default) 
+Chain 1         metric = diag_e (Default) 
+Chain 1         metric_file =  (Default) 
+Chain 1         stepsize = 1 (Default) 
+Chain 1         stepsize_jitter = 0 (Default) 
+Chain 1 id = 1 
+Chain 1 data 
+Chain 1   file = /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/standata-34da50827f35.json 
+Chain 1 init = 2 (Default) 
+Chain 1 random 
+Chain 1   seed = 1610984816 
+Chain 1 output 
+Chain 1   file = /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpHQoi2h/bernoulli-202012171340-1-88a07d.csv 
+Chain 1   diagnostic_file =  (Default) 
+Chain 1   refresh = 100 (Default) 
+Chain 1   sig_figs = -1 (Default) 
+Chain 1 Gradient evaluation took 9e-06 seconds 
+Chain 1 1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds. 
+Chain 1 Adjust your expectations accordingly! 
+Chain 1 WARNING: There aren't enough warmup iterations to fit the 
+Chain 1          three stages of adaptation as currently configured. 
+Chain 1          Reducing each adaptation stage to 15%/75%/10% of 
+Chain 1          the given number of warmup iterations: 
+Chain 1            init_buffer = 15 
+Chain 1            adapt_window = 75 
+Chain 1            term_buffer = 10 
+Chain 1 Iteration:   1 / 200 [  0%]  (Warmup) 
+Chain 1 Iteration: 100 / 200 [ 50%]  (Warmup) 
+Chain 1 Iteration: 101 / 200 [ 50%]  (Sampling) 
+Chain 1 Iteration: 200 / 200 [100%]  (Sampling) 
+Chain 1  Elapsed Time: 0 seconds (Warm-up) 
+Chain 1                0.001 seconds (Sampling) 
+Chain 1                0.001 seconds (Total) 
+Chain 1 finished in 0.0 seconds.
+
diff --git a/docs/articles/cmdstanr.html b/docs/articles/cmdstanr.html index 95047e6ef..238beb201 100644 --- a/docs/articles/cmdstanr.html +++ b/docs/articles/cmdstanr.html @@ -38,7 +38,7 @@ cmdstanr - 0.2.2 + 0.3.0 @@ -347,7 +347,7 @@

CmdStan utilities

The $cmdstan_diagnose() and $cmdstan_summary() methods call CmdStan’s diagnose and stansummary utilities:

fit$cmdstan_diagnose()
-
Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpzEjL6R/bernoulli-202012151349-1-91a8ba.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpzEjL6R/bernoulli-202012151349-2-91a8ba.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpzEjL6R/bernoulli-202012151349-3-91a8ba.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpzEjL6R/bernoulli-202012151349-4-91a8ba.csv
+
Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpnnYljv/bernoulli-202012171341-1-2e6207.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpnnYljv/bernoulli-202012171341-2-2e6207.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpnnYljv/bernoulli-202012171341-3-2e6207.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpnnYljv/bernoulli-202012171341-4-2e6207.csv
 
 Checking sampler transitions treedepth.
 Treedepth satisfactory for all transitions.
@@ -364,24 +364,24 @@ 

Processing complete, no problems detected.

-
Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpzEjL6R/bernoulli-202012151349-1-91a8ba.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpzEjL6R/bernoulli-202012151349-2-91a8ba.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpzEjL6R/bernoulli-202012151349-3-91a8ba.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpzEjL6R/bernoulli-202012151349-4-91a8ba.csv
+
Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpnnYljv/bernoulli-202012171341-1-2e6207.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpnnYljv/bernoulli-202012171341-2-2e6207.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpnnYljv/bernoulli-202012171341-3-2e6207.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpnnYljv/bernoulli-202012171341-4-2e6207.csv
 Inference for Stan model: bernoulli_model
 4 chains: each with iter=(1000,1000,1000,1000); warmup=(0,0,0,0); thin=(1,1,1,1); 4000 iterations saved.
 
-Warmup took (0.0060, 0.0060, 0.0060, 0.0060) seconds, 0.024 seconds total
-Sampling took (0.015, 0.014, 0.013, 0.013) seconds, 0.055 seconds total
+Warmup took (0.0070, 0.0060, 0.0070, 0.0060) seconds, 0.026 seconds total
+Sampling took (0.017, 0.017, 0.015, 0.016) seconds, 0.065 seconds total
 
                 Mean     MCSE  StdDev     5%   50%   95%    N_Eff  N_Eff/s    R_hat
 
-lp__            -7.3  2.0e-02    0.76   -8.8  -7.0  -6.8     1431    26014      1.0
-accept_stat__   0.90  2.5e-03    0.15   0.57  0.97   1.0  3.7e+03  6.7e+04  1.0e+00
-stepsize__       1.1  8.8e-02    0.12   0.93   1.2   1.3  2.0e+00  3.6e+01  3.5e+13
-treedepth__      1.4  8.2e-03    0.51    1.0   1.0   2.0  3.8e+03  6.9e+04  1.0e+00
-n_leapfrog__     2.5  1.9e-01     1.3    1.0   3.0   3.0  4.3e+01  7.9e+02  1.0e+00
+lp__            -7.3  2.0e-02    0.76   -8.8  -7.0  -6.8     1431    22012      1.0
+accept_stat__   0.90  2.5e-03    0.15   0.57  0.97   1.0  3.7e+03  5.7e+04  1.0e+00
+stepsize__       1.1  8.8e-02    0.12   0.93   1.2   1.3  2.0e+00  3.1e+01  3.5e+13
+treedepth__      1.4  8.2e-03    0.51    1.0   1.0   2.0  3.8e+03  5.8e+04  1.0e+00
+n_leapfrog__     2.5  1.9e-01     1.3    1.0   3.0   3.0  4.3e+01  6.7e+02  1.0e+00
 divergent__     0.00      nan    0.00   0.00  0.00  0.00      nan      nan      nan
-energy__         7.8  2.7e-02     1.0    6.8   7.4   9.9  1.5e+03  2.7e+04  1.0e+00
+energy__         7.8  2.7e-02     1.0    6.8   7.4   9.9  1.5e+03  2.3e+04  1.0e+00
 
-theta           0.25  3.1e-03    0.12  0.080  0.24  0.48     1542    28043      1.0
+theta           0.25  3.1e-03    0.12  0.080  0.24  0.48     1542    23728      1.0
 
 Samples were drawn using hmc with nuts.
 For each parameter, N_Eff is a crude measure of effective sample size,
@@ -434,8 +434,8 @@ 

This procedure has not been thoroughly tested and may be unstable or buggy. The interface is subject to change. ------------------------------------------------------------ -Gradient evaluation took 9e-06 seconds -1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds. +Gradient evaluation took 1.1e-05 seconds +1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds. Adjust your expectations accordingly! Begin eta adaptation. Iteration: 1 / 250 [ 0%] (Adaptation) diff --git a/docs/articles/index.html b/docs/articles/index.html index afa9d5019..42f8de56e 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -79,7 +79,7 @@ cmdstanr - 0.2.0 + 0.3.0 diff --git a/docs/articles/r-markdown.html b/docs/articles/r-markdown.html index 0bfb385eb..75174d80f 100644 --- a/docs/articles/r-markdown.html +++ b/docs/articles/r-markdown.html @@ -38,7 +38,7 @@ cmdstanr - 0.2.0 + 0.3.0 @@ -125,7 +125,7 @@ -
+
diff --git a/docs/index.html b/docs/index.html index 3d212d98e..f988ff4b2 100644 --- a/docs/index.html +++ b/docs/index.html @@ -42,7 +42,7 @@ cmdstanr - 0.2.2 + 0.3.0
diff --git a/docs/news/index.html b/docs/news/index.html index 58997c0d4..9e96a30e5 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -79,7 +79,7 @@ cmdstanr - 0.2.2 + 0.3.0
@@ -173,20 +173,45 @@

Changelog

Source: NEWS.md
+
+

+cmdstanr 0.3.0

+
+

+Bug fixes

+
    +
  • Fixed reading inverse mass matrix with values written in scientific format in the CSV. (#394)

  • +
  • Fixed error caused by an empty data list. Previously if a model didn’t require data then data had to either be NULL or be a non-empty list, but now list() is allowed. (#403)

  • +
+
+
+

+New features

+
    +
  • Added $sample_mpi() for MCMC sampling with MPI. (#350)

  • +
  • Added informative messages on compile errors caused by precompiled headers (PCH). (#384)

  • +
  • Added the cmdstanr_verbose option for verbose mode. Intended for troubleshooting, debugging and development. See end of How does CmdStanR work? vignette for details. (#392)

  • +
  • New $loo() method for CmdStanMCMC objects. Requires computing pointwise log-likelihood in Stan program. (#366)

  • +
  • The fitted_params argument to the $generate_quantities() method now also accepts CmdStanVB, posterior::draws_array, and posterior::draws_matrix objects. (#390)

  • +
  • The $optimize() method now supports all of CmdStan’s tolerance-related arguments for (L)BFGS. (#398)

  • +
  • The documentation for the R6 methods now uses @param, which allows package developers to import the CmdStanR documentation using roxygen2’s @inheritParams. (#408)

  • +
+
+

cmdstanr 0.2.2

-
+

-Bug fixes

+Bug fixes

  • Fixed bug with reading Stan CSV when grep used coloring by default (#364,#371)

  • Depend on posterior v0.1.3 to avoid a potential error in $summary(). (#383)

-
+

-New features

+New features
  • Added support for native execution on the macOS with the M1 ARM-based CPU. (#375)

  • Added threading support via threads argument for $optimize() and $variational() (was already available via threads_per_chain for $sample()). (#369)

  • @@ -196,17 +221,17 @@

    cmdstanr 0.2.1

    -
    +

    -Bug fixes

    +Bug fixes

-
+

-New features

+New features
  • compile() and check_syntax() methods gain argument pedantic for turning on pedantic mode, which warns about issues with the model beyond syntax errors. (#361)
  • @@ -216,9 +241,9 @@

    cmdstanr 0.2.0

    -
    +

    -Bug fixes

    +Bug fixes

    • Fix potential indexing error if using read_cmdstan_csv() with CSV files created by CmdStan without CmdStanR. (#291, #292, @johnlees)

    • Fix error when returning draws or sampler diagnostics for a fit with only warmup and no samples. (#288, #293)

    • @@ -230,9 +255,9 @@

    • Fix missing include_paths in $syntax_check(). (#335, @mike-lawrence)

-
+

-New features

+New features
  • CSV reading is now faster by using data.table::fread(). (#318)

  • install_cmdstan() gains argument version for specifying which version of CmdStan to install. (#300, #308)

  • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 39cec2e81..48f642925 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,11 +1,11 @@ -pandoc: 2.7.3 +pandoc: 2.11.1.1 pkgdown: 1.5.1 pkgdown_sha: ~ articles: cmdstanr-internals: cmdstanr-internals.html cmdstanr: cmdstanr.html r-markdown: r-markdown.html -last_built: 2020-11-12T21:15Z +last_built: 2020-12-17T20:37Z urls: reference: https://mc-stan.org/cmdstanr/reference article: https://mc-stan.org/cmdstanr/articles diff --git a/docs/pull_request_template.html b/docs/pull_request_template.html index 40f340543..b9a96aa66 100644 --- a/docs/pull_request_template.html +++ b/docs/pull_request_template.html @@ -79,7 +79,7 @@ cmdstanr - 0.2.2 + 0.3.0
diff --git a/docs/reference/CmdStanGQ.html b/docs/reference/CmdStanGQ.html index dbb6e9555..84926df0d 100644 --- a/docs/reference/CmdStanGQ.html +++ b/docs/reference/CmdStanGQ.html @@ -82,7 +82,7 @@ cmdstanr - 0.2.0 + 0.3.0
@@ -270,7 +270,7 @@

Examp #> #> All 4 chains finished successfully. #> Mean chain execution time: 0.0 seconds. -#> Total execution time: 13.5 seconds.

+#> Total execution time: 0.7 seconds.
# stan program for standalone generated quantities # (could keep model block, but not necessary so removing it) gq_program <- write_stan_file( @@ -295,12 +295,12 @@

Examp #> #> All 4 chains finished successfully. #> Mean chain execution time: 0.0 seconds. -#> Total execution time: 3.8 seconds.

str(fit_gq$draws())
#> 'draws_array' int [1:1000, 1:4, 1:10] 0 0 0 0 1 0 1 0 0 0 ... +#> Total execution time: 0.6 seconds.
str(fit_gq$draws())
#> 'draws_array' int [1:1000, 1:4, 1:10] 0 0 0 0 1 0 1 0 0 0 ... #> - attr(*, "dimnames")=List of 3 #> ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... #> ..$ chain : chr [1:4] "1" "2" "3" "4" #> ..$ variable : chr [1:10] "y_rep[1]" "y_rep[2]" "y_rep[3]" "y_rep[4]" ...
-library(posterior)
#> This is posterior version 0.1.2
as_draws_df(fit_gq$draws())
#> # A draws_df: 1000 iterations, 4 chains, and 10 variables +library(posterior)
#> This is posterior version 0.1.3
as_draws_df(fit_gq$draws())
#> # A draws_df: 1000 iterations, 4 chains, and 10 variables #> y_rep[1] y_rep[2] y_rep[3] y_rep[4] y_rep[5] y_rep[6] y_rep[7] y_rep[8] #> 1 0 0 0 0 0 0 0 0 #> 2 0 1 1 1 1 1 1 1 diff --git a/docs/reference/CmdStanMCMC.html b/docs/reference/CmdStanMCMC.html index 8fc0c84cd..91516226f 100644 --- a/docs/reference/CmdStanMCMC.html +++ b/docs/reference/CmdStanMCMC.html @@ -83,7 +83,7 @@ cmdstanr - 0.2.0 + 0.3.0
@@ -199,6 +199,7 @@

Methods $inv_metric()Return the inverse metric for each chain. $init()Return user-specified initial values. $metadata()Return a list of metadata gathered from the CmdStan CSV files. +$num_chains()Returns the number of MCMC chains. @@ -210,6 +211,7 @@

Summarize inferences and diagnostics

$summary()Run posterior::summarise_draws(). $cmdstan_summary()Run and print CmdStan's bin/stansummary. $cmdstan_diagnose()Run and print CmdStan's bin/diagnose. +$loo()Run loo::loo.array() for approximate LOO-CV @@ -230,8 +232,8 @@

Save fitted model object and temporary files

Report run times, console output, return codes

- +
MethodDescription
$time()Report total and chain-specific run times.
$output()Return the stdout and stderr of all chains or pretty print the output for a single chain.
$time()Report total and chain-specific run times.
$return_codes()Return the return codes from the CmdStan runs.
diff --git a/docs/reference/CmdStanMLE.html b/docs/reference/CmdStanMLE.html index 299b6ed0f..a68a72b93 100644 --- a/docs/reference/CmdStanMLE.html +++ b/docs/reference/CmdStanMLE.html @@ -81,7 +81,7 @@ cmdstanr - 0.2.0 + 0.3.0
diff --git a/docs/reference/CmdStanModel-1.png b/docs/reference/CmdStanModel-1.png index b248a3f89..08beb3c62 100644 Binary files a/docs/reference/CmdStanModel-1.png and b/docs/reference/CmdStanModel-1.png differ diff --git a/docs/reference/CmdStanModel-2.png b/docs/reference/CmdStanModel-2.png index 35db9ff48..df8262149 100644 Binary files a/docs/reference/CmdStanModel-2.png and b/docs/reference/CmdStanModel-2.png differ diff --git a/docs/reference/CmdStanModel.html b/docs/reference/CmdStanModel.html index e3b649c8b..1de0ac2d9 100644 --- a/docs/reference/CmdStanModel.html +++ b/docs/reference/CmdStanModel.html @@ -83,7 +83,7 @@ cmdstanr - 0.2.1 + 0.3.0 @@ -218,6 +218,7 @@

Model fitting

+ @@ -240,7 +241,8 @@

See a

Examples

# \dontrun{ library(cmdstanr) -library(posterior)
#> This is posterior version 0.1.2
library(bayesplot)
#> This is bayesplot version 1.7.2.9000
#> - Online documentation and vignettes at mc-stan.org/bayesplot
#> - bayesplot theme set to bayesplot::theme_default()
#> * Does _not_ affect other ggplot2 plots
#> * See ?bayesplot_theme_set for details on theme setting
#> +library(posterior) +library(bayesplot)
#> This is bayesplot version 1.7.2
#> - Online documentation and vignettes at mc-stan.org/bayesplot
#> - bayesplot theme set to bayesplot::theme_default()
#> * Does _not_ affect other ggplot2 plots
#> * See ?bayesplot_theme_set for details on theme setting
#> #> Attaching package: ‘bayesplot’
#> The following object is masked from ‘package:posterior’: #> #> rhat
color_scheme_set("brightblue") @@ -325,7 +327,7 @@

Examp #> #> Both chains finished successfully. #> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.2 seconds.

+#> Total execution time: 0.1 seconds.
# Use 'posterior' package for summaries fit_mcmc$summary()
#> # A tibble: 2 x 10 #> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail @@ -374,7 +376,7 @@

Examp # Plot posterior using bayesplot (ggplot2) mcmc_hist(fit_mcmc$draws("theta"))

#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Call CmdStan's diagnose and stansummary utilities -fit_mcmc$cmdstan_diagnose()
#> Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpCe3KhV/bernoulli-202011241550-1-0d9499.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpCe3KhV/bernoulli-202011241550-2-0d9499.csv +fit_mcmc$cmdstan_diagnose()
#> Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171337-1-34ccc3.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171337-2-34ccc3.csv #> #> Checking sampler transitions treedepth. #> Treedepth satisfactory for all transitions. @@ -389,24 +391,24 @@

Examp #> #> Split R-hat values satisfactory all parameters. #> -#> Processing complete, no problems detected.

fit_mcmc$cmdstan_summary()
#> Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpCe3KhV/bernoulli-202011241550-1-0d9499.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpCe3KhV/bernoulli-202011241550-2-0d9499.csv +#> Processing complete, no problems detected.
fit_mcmc$cmdstan_summary()
#> Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171337-1-34ccc3.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171337-2-34ccc3.csv #> Inference for Stan model: bernoulli_model #> 2 chains: each with iter=(1000,1000); warmup=(0,0); thin=(1,1); 2000 iterations saved. #> #> Warmup took (0.0050, 0.0050) seconds, 0.010 seconds total -#> Sampling took (0.014, 0.013) seconds, 0.027 seconds total +#> Sampling took (0.015, 0.013) seconds, 0.028 seconds total #> #> Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat #> -#> lp__ -7.3 3.1e-02 0.74 -8.8 -7.0 -6.8 586 21709 1.0 -#> accept_stat__ 0.92 5.0e-03 0.14 0.61 0.97 1.0 7.3e+02 2.7e+04 1.0e+00 -#> stepsize__ 1.0 9.0e-02 0.090 0.93 1.1 1.1 1.0e+00 3.7e+01 2.6e+13 -#> treedepth__ 1.4 1.2e-02 0.52 1.0 1.0 2.0 1.9e+03 6.9e+04 1.0e+00 -#> n_leapfrog__ 2.6 4.0e-01 1.5 1.0 3.0 7.0 1.4e+01 5.2e+02 1.0e+00 +#> lp__ -7.3 3.1e-02 0.74 -8.8 -7.0 -6.8 586 20934 1.0 +#> accept_stat__ 0.92 5.0e-03 0.14 0.61 0.97 1.0 7.3e+02 2.6e+04 1.0e+00 +#> stepsize__ 1.0 9.0e-02 0.090 0.93 1.1 1.1 1.0e+00 3.6e+01 2.6e+13 +#> treedepth__ 1.4 1.2e-02 0.52 1.0 1.0 2.0 1.9e+03 6.6e+04 1.0e+00 +#> n_leapfrog__ 2.6 4.0e-01 1.5 1.0 3.0 7.0 1.4e+01 5.0e+02 1.0e+00 #> divergent__ 0.00 nan 0.00 0.00 0.00 0.00 nan nan nan -#> energy__ 7.8 4.0e-02 1.0 6.8 7.4 10.0 6.9e+02 2.6e+04 1.0e+00 +#> energy__ 7.8 4.0e-02 1.0 6.8 7.4 10.0 6.9e+02 2.5e+04 1.0e+00 #> -#> theta 0.25 4.5e-03 0.12 0.081 0.23 0.49 755 27971 1.00 +#> theta 0.25 4.5e-03 0.12 0.081 0.23 0.49 755 26972 1.00 #> #> Samples were drawn using hmc with nuts. #> For each parameter, N_Eff is a crude measure of effective sample size, @@ -439,8 +441,8 @@

Examp #> This procedure has not been thoroughly tested and may be unstable #> or buggy. The interface is subject to change. #> ------------------------------------------------------------ -#> Gradient evaluation took 9e-06 seconds -#> 1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds. +#> Gradient evaluation took 1.1e-05 seconds +#> 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds. #> Adjust your expectations accordingly! #> Begin eta adaptation. #> Iteration: 1 / 250 [ 0%] (Adaptation) diff --git a/docs/reference/CmdStanVB.html b/docs/reference/CmdStanVB.html index 64ed85b9a..f13b687fd 100644 --- a/docs/reference/CmdStanVB.html +++ b/docs/reference/CmdStanVB.html @@ -82,7 +82,7 @@ cmdstanr - 0.2.0 + 0.3.0

diff --git a/docs/reference/cmdstan_default_install_path.html b/docs/reference/cmdstan_default_install_path.html index 9b40e4c5c..33ac56b7a 100644 --- a/docs/reference/cmdstan_default_install_path.html +++ b/docs/reference/cmdstan_default_install_path.html @@ -80,7 +80,7 @@ cmdstanr - 0.2.0 + 0.3.0 diff --git a/docs/reference/cmdstan_default_path.html b/docs/reference/cmdstan_default_path.html index 48cd7959e..ebcdd29ae 100644 --- a/docs/reference/cmdstan_default_path.html +++ b/docs/reference/cmdstan_default_path.html @@ -80,7 +80,7 @@ cmdstanr - 0.2.0 + 0.3.0 diff --git a/docs/reference/cmdstan_model.html b/docs/reference/cmdstan_model.html index b54ee3325..da7d5a19a 100644 --- a/docs/reference/cmdstan_model.html +++ b/docs/reference/cmdstan_model.html @@ -82,7 +82,7 @@ cmdstanr - 0.2.0 + 0.3.0 @@ -358,10 +358,7 @@

Examp # Plot posterior using bayesplot (ggplot2) mcmc_hist(fit_mcmc$draws("theta"))
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Call CmdStan's diagnose and stansummary utilities -fit_mcmc$cmdstan_diagnose()
#> Running bin/diagnose \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-1-7b8a20.csv \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-2-7b8a20.csv -#> Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-1-7b8a20.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-2-7b8a20.csv +fit_mcmc$cmdstan_diagnose()
#> Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171337-1-7b719e.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171337-2-7b719e.csv #> #> Checking sampler transitions treedepth. #> Treedepth satisfactory for all transitions. @@ -376,27 +373,24 @@

Examp #> #> Split R-hat values satisfactory all parameters. #> -#> Processing complete, no problems detected.

fit_mcmc$cmdstan_summary()
#> Running bin/stansummary \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-1-7b8a20.csv \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-2-7b8a20.csv -#> Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-1-7b8a20.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-2-7b8a20.csv +#> Processing complete, no problems detected.
fit_mcmc$cmdstan_summary()
#> Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171337-1-7b719e.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171337-2-7b719e.csv #> Inference for Stan model: bernoulli_model #> 2 chains: each with iter=(1000,1000); warmup=(0,0); thin=(1,1); 2000 iterations saved. #> -#> Warmup took (0.0080, 0.0070) seconds, 0.015 seconds total -#> Sampling took (0.019, 0.018) seconds, 0.037 seconds total +#> Warmup took (0.0080, 0.0080) seconds, 0.016 seconds total +#> Sampling took (0.019, 0.016) seconds, 0.035 seconds total #> #> Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat #> -#> lp__ -7.3 3.1e-02 0.74 -8.8 -7.0 -6.8 586 15842 1.0 -#> accept_stat__ 0.92 5.0e-03 0.14 0.61 0.97 1.0 7.3e+02 2.0e+04 1.0e+00 -#> stepsize__ 1.0 9.0e-02 0.090 0.93 1.1 1.1 1.0e+00 2.7e+01 2.6e+13 -#> treedepth__ 1.4 1.2e-02 0.52 1.0 1.0 2.0 1.9e+03 5.0e+04 1.0e+00 -#> n_leapfrog__ 2.6 4.0e-01 1.5 1.0 3.0 7.0 1.4e+01 3.8e+02 1.0e+00 +#> lp__ -7.3 3.1e-02 0.74 -8.8 -7.0 -6.8 586 16747 1.0 +#> accept_stat__ 0.92 5.0e-03 0.14 0.61 0.97 1.0 7.3e+02 2.1e+04 1.0e+00 +#> stepsize__ 1.0 9.0e-02 0.090 0.93 1.1 1.1 1.0e+00 2.9e+01 2.6e+13 +#> treedepth__ 1.4 1.2e-02 0.52 1.0 1.0 2.0 1.9e+03 5.3e+04 1.0e+00 +#> n_leapfrog__ 2.6 4.0e-01 1.5 1.0 3.0 7.0 1.4e+01 4.0e+02 1.0e+00 #> divergent__ 0.00 nan 0.00 0.00 0.00 0.00 nan nan nan -#> energy__ 7.8 4.0e-02 1.0 6.8 7.4 10.0 6.9e+02 1.9e+04 1.0e+00 +#> energy__ 7.8 4.0e-02 1.0 6.8 7.4 10.0 6.9e+02 2.0e+04 1.0e+00 #> -#> theta 0.25 4.5e-03 0.12 0.081 0.23 0.49 755 20411 1.00 +#> theta 0.25 4.5e-03 0.12 0.081 0.23 0.49 755 21577 1.00 #> #> Samples were drawn using hmc with nuts. #> For each parameter, N_Eff is a crude measure of effective sample size, @@ -429,8 +423,8 @@

Examp #> This procedure has not been thoroughly tested and may be unstable #> or buggy. The interface is subject to change. #> ------------------------------------------------------------ -#> Gradient evaluation took 1.1e-05 seconds -#> 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds. +#> Gradient evaluation took 1.3e-05 seconds +#> 1000 transitions using 10 leapfrog steps per transition would take 0.13 seconds. #> Adjust your expectations accordingly! #> Begin eta adaptation. #> Iteration: 1 / 250 [ 0%] (Adaptation) diff --git a/docs/reference/cmdstanr-package.html b/docs/reference/cmdstanr-package.html index 34ae9de1f..09bb5f92b 100644 --- a/docs/reference/cmdstanr-package.html +++ b/docs/reference/cmdstanr-package.html @@ -82,7 +82,7 @@ cmdstanr - 0.2.0 + 0.3.0

@@ -319,7 +319,7 @@

Examp #> #> Both chains finished successfully. #> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.2 seconds.
+#> Total execution time: 0.1 seconds.
# Use 'posterior' package for summaries fit_mcmc$summary()
#> # A tibble: 2 x 10 #> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail @@ -368,10 +368,7 @@

Examp # Plot posterior using bayesplot (ggplot2) mcmc_hist(fit_mcmc$draws("theta"))

#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Call CmdStan's diagnose and stansummary utilities -fit_mcmc$cmdstan_diagnose()
#> Running bin/diagnose \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-1-4e68fe.csv \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-2-4e68fe.csv -#> Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-1-4e68fe.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-2-4e68fe.csv +fit_mcmc$cmdstan_diagnose()
#> Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171337-1-4e507c.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171337-2-4e507c.csv #> #> Checking sampler transitions treedepth. #> Treedepth satisfactory for all transitions. @@ -386,27 +383,24 @@

Examp #> #> Split R-hat values satisfactory all parameters. #> -#> Processing complete, no problems detected.

fit_mcmc$cmdstan_summary()
#> Running bin/stansummary \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-1-4e68fe.csv \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-2-4e68fe.csv -#> Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-1-4e68fe.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121416-2-4e68fe.csv +#> Processing complete, no problems detected.
fit_mcmc$cmdstan_summary()
#> Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171337-1-4e507c.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171337-2-4e507c.csv #> Inference for Stan model: bernoulli_model #> 2 chains: each with iter=(1000,1000); warmup=(0,0); thin=(1,1); 2000 iterations saved. #> -#> Warmup took (0.0070, 0.0070) seconds, 0.014 seconds total -#> Sampling took (0.018, 0.017) seconds, 0.035 seconds total +#> Warmup took (0.0070, 0.0060) seconds, 0.013 seconds total +#> Sampling took (0.016, 0.015) seconds, 0.031 seconds total #> #> Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat #> -#> lp__ -7.3 3.1e-02 0.74 -8.8 -7.0 -6.8 586 16747 1.0 -#> accept_stat__ 0.92 5.0e-03 0.14 0.61 0.97 1.0 7.3e+02 2.1e+04 1.0e+00 -#> stepsize__ 1.0 9.0e-02 0.090 0.93 1.1 1.1 1.0e+00 2.9e+01 2.6e+13 -#> treedepth__ 1.4 1.2e-02 0.52 1.0 1.0 2.0 1.9e+03 5.3e+04 1.0e+00 -#> n_leapfrog__ 2.6 4.0e-01 1.5 1.0 3.0 7.0 1.4e+01 4.0e+02 1.0e+00 +#> lp__ -7.3 3.1e-02 0.74 -8.8 -7.0 -6.8 586 18908 1.0 +#> accept_stat__ 0.92 5.0e-03 0.14 0.61 0.97 1.0 7.3e+02 2.4e+04 1.0e+00 +#> stepsize__ 1.0 9.0e-02 0.090 0.93 1.1 1.1 1.0e+00 3.2e+01 2.6e+13 +#> treedepth__ 1.4 1.2e-02 0.52 1.0 1.0 2.0 1.9e+03 6.0e+04 1.0e+00 +#> n_leapfrog__ 2.6 4.0e-01 1.5 1.0 3.0 7.0 1.4e+01 4.5e+02 1.0e+00 #> divergent__ 0.00 nan 0.00 0.00 0.00 0.00 nan nan nan -#> energy__ 7.8 4.0e-02 1.0 6.8 7.4 10.0 6.9e+02 2.0e+04 1.0e+00 +#> energy__ 7.8 4.0e-02 1.0 6.8 7.4 10.0 6.9e+02 2.2e+04 1.0e+00 #> -#> theta 0.25 4.5e-03 0.12 0.081 0.23 0.49 755 21577 1.00 +#> theta 0.25 4.5e-03 0.12 0.081 0.23 0.49 755 24361 1.00 #> #> Samples were drawn using hmc with nuts. #> For each parameter, N_Eff is a crude measure of effective sample size, @@ -439,8 +433,8 @@

Examp #> This procedure has not been thoroughly tested and may be unstable #> or buggy. The interface is subject to change. #> ------------------------------------------------------------ -#> Gradient evaluation took 1e-05 seconds -#> 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds. +#> Gradient evaluation took 1.4e-05 seconds +#> 1000 transitions using 10 leapfrog steps per transition would take 0.14 seconds. #> Adjust your expectations accordingly! #> Begin eta adaptation. #> Iteration: 1 / 250 [ 0%] (Adaptation) diff --git a/docs/reference/cmdstanr_example.html b/docs/reference/cmdstanr_example.html index 62fe8db43..4569c9333 100644 --- a/docs/reference/cmdstanr_example.html +++ b/docs/reference/cmdstanr_example.html @@ -80,7 +80,7 @@ cmdstanr - 0.2.0 + 0.3.0

@@ -240,31 +240,52 @@

Examp #> target += normal_lpdf(alpha | 0, 1); #> target += normal_lpdf(beta | 0, 1); #> target += bernoulli_logit_glm_lpmf(y | X, alpha, beta); -#> }
fit_logistic_mcmc <- cmdstanr_example("logistic", chains = 2)
#> Compiling Stan program...
fit_logistic_mcmc$summary()
#> # A tibble: 5 x 10 -#> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 lp__ -65.9 -65.6 1.41 1.18 -68.8 -64.2 1.00 1006. 1575. -#> 2 alpha 0.385 0.382 0.220 0.217 0.0236 0.761 1.00 2337. 1515. -#> 3 beta[1] -0.662 -0.654 0.242 0.241 -1.06 -0.276 1.00 1987. 1563. -#> 4 beta[2] -0.279 -0.278 0.221 0.216 -0.648 0.0864 1.00 1949. 1558. -#> 5 beta[3] 0.680 0.673 0.266 0.271 0.270 1.12 1.00 1976. 1416.
-fit_logistic_optim <- cmdstanr_example("logistic", method = "optimize")
#> Model executable is up to date!
fit_logistic_optim$summary()
#> # A tibble: 5 x 2 -#> variable estimate -#> <chr> <dbl> -#> 1 lp__ -63.9 -#> 2 alpha 0.364 -#> 3 beta[1] -0.632 -#> 4 beta[2] -0.259 -#> 5 beta[3] 0.648
-fit_logistic_vb <- cmdstanr_example("logistic", method = "variational")
#> Model executable is up to date!
fit_logistic_vb$summary()
#> # A tibble: 6 x 7 -#> variable mean median sd mad q5 q95 -#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 lp__ -66.2 -65.9 1.56 1.46 -69.2 -64.4 -#> 2 lp_approx__ -2.03 -1.72 1.44 1.27 -4.83 -0.311 -#> 3 alpha 0.410 0.406 0.230 0.221 0.0365 0.806 -#> 4 beta[1] -0.790 -0.795 0.222 0.220 -1.16 -0.401 -#> 5 beta[2] -0.219 -0.228 0.248 0.262 -0.618 0.196 -#> 6 beta[3] 0.761 0.753 0.252 0.246 0.333 1.16
+#> } +#> generated quantities { +#> vector[N] log_lik; +#> for (n in 1:N) log_lik[n] = bernoulli_logit_lpmf(y[n] | alpha + X[n] * beta); +#> }
fit_logistic_mcmc <- cmdstanr_example("logistic", chains = 2)
#> Compiling Stan program...
fit_logistic_mcmc$summary()
#> # A tibble: 105 x 10 +#> variable mean median sd mad q5 q95 rhat ess_bulk +#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> +#> 1 lp__ -65.9 -65.6 1.41 1.18 -68.8 -64.2 1.00 1006. +#> 2 alpha 0.385 0.382 0.220 0.217 0.0236 0.761 1.00 2337. +#> 3 beta[1] -0.662 -0.654 0.242 0.241 -1.06 -0.276 1.00 1987. +#> 4 beta[2] -0.279 -0.278 0.221 0.216 -0.648 0.0864 1.00 1949. +#> 5 beta[3] 0.680 0.673 0.266 0.271 0.270 1.12 1.00 1976. +#> 6 log_lik… -0.514 -0.509 0.0984 0.0965 -0.687 -0.361 1.00 2280. +#> 7 log_lik… -0.406 -0.389 0.146 0.137 -0.666 -0.204 1.00 2144. +#> 8 log_lik… -0.499 -0.463 0.213 0.202 -0.903 -0.207 1.00 2098. +#> 9 log_lik… -0.449 -0.438 0.148 0.146 -0.717 -0.233 1.00 1869. +#> 10 log_lik… -1.18 -1.16 0.280 0.273 -1.68 -0.763 1.00 2465. +#> # … with 95 more rows, and 1 more variable: ess_tail <dbl>
+fit_logistic_optim <- cmdstanr_example("logistic", method = "optimize")
#> Model executable is up to date!
fit_logistic_optim$summary()
#> # A tibble: 105 x 2 +#> variable estimate +#> <chr> <dbl> +#> 1 lp__ -63.9 +#> 2 alpha 0.364 +#> 3 beta[1] -0.632 +#> 4 beta[2] -0.259 +#> 5 beta[3] 0.648 +#> 6 log_lik[1] -0.515 +#> 7 log_lik[2] -0.394 +#> 8 log_lik[3] -0.469 +#> 9 log_lik[4] -0.442 +#> 10 log_lik[5] -1.14 +#> # … with 95 more rows
+fit_logistic_vb <- cmdstanr_example("logistic", method = "variational")
#> Model executable is up to date!
fit_logistic_vb$summary()
#> # A tibble: 106 x 7 +#> variable mean median sd mad q5 q95 +#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> +#> 1 lp__ -66.2 -65.9 1.56 1.46 -69.2 -64.4 +#> 2 lp_approx__ -2.03 -1.72 1.44 1.27 -4.83 -0.311 +#> 3 alpha 0.410 0.406 0.230 0.221 0.0365 0.806 +#> 4 beta[1] -0.790 -0.795 0.222 0.220 -1.16 -0.401 +#> 5 beta[2] -0.219 -0.228 0.248 0.262 -0.618 0.196 +#> 6 beta[3] 0.761 0.753 0.252 0.246 0.333 1.16 +#> 7 log_lik[1] -0.488 -0.482 0.0980 0.0954 -0.660 -0.338 +#> 8 log_lik[2] -0.350 -0.328 0.143 0.126 -0.615 -0.160 +#> 9 log_lik[3] -0.411 -0.373 0.204 0.192 -0.807 -0.151 +#> 10 log_lik[4] -0.450 -0.431 0.148 0.144 -0.725 -0.239 +#> # … with 96 more rows
print_example_program("schools")
#> data { #> int<lower=1> J; #> vector<lower=0>[J] sigma; diff --git a/docs/reference/eng_cmdstan.html b/docs/reference/eng_cmdstan.html index 330873a00..3951485d4 100644 --- a/docs/reference/eng_cmdstan.html +++ b/docs/reference/eng_cmdstan.html @@ -85,7 +85,7 @@ cmdstanr - 0.2.0 + 0.3.0
diff --git a/docs/reference/fit-method-cmdstan_summary.html b/docs/reference/fit-method-cmdstan_summary.html index 5648af9f2..c6458eda2 100644 --- a/docs/reference/fit-method-cmdstan_summary.html +++ b/docs/reference/fit-method-cmdstan_summary.html @@ -6,7 +6,7 @@ -Run CmdStan's <code>stansummary</code> and <code>diagnose</code> — fit-method-cmdstan_summary • cmdstanr +Run CmdStan's <code>stansummary</code> and <code>diagnose</code> utilities — fit-method-cmdstan_summary • cmdstanr @@ -46,13 +46,19 @@ - + + +Although these methods can be used for models fit using the +$variational() method, much of the output is +currently only relevant for models fit using the +$sample() method. +See the $summary() for computing similar summaries in +R rather than calling CmdStan's utilites." /> @@ -85,7 +91,7 @@ cmdstanr - 0.2.0 + 0.3.0 @@ -175,7 +181,7 @@
@@ -187,35 +193,35 @@

Run CmdStan's stansummary and diagnose

  • https://mc-stan.org/docs/cmdstan-guide/diagnose.html

  • +

    Although these methods can be used for models fit using the +$variational() method, much of the output is +currently only relevant for models fit using the +$sample() method.

    +

    See the $summary() for computing similar summaries in +R rather than calling CmdStan's utilites.

    +
    cmdstan_summary(flags = NULL)
     
    +cmdstan_diagnose()
    -

    Note

    - -

    Although these methods also work for models fit using the -$variational() method, much of the output is -only relevant for models fit using the $sample() -method.

    -

    Usage

    - - -
    $cmdstan_summary()
    -$cmdstan_diagnose()
    -
    +

    Arguments

    +

    MethodDescription
    $sample()Run CmdStan's "sample" method, return CmdStanMCMC object.
    $sample_mpi()Run CmdStan's "sample" method with MPI, return CmdStanMCMC object.
    $optimize()Run CmdStan's "optimize" method, return CmdStanMLE object.
    $variational()Run CmdStan's "variational" method, return CmdStanVB object.
    $generate_quantities()Run CmdStan's "generate quantities" method, return CmdStanGQ object.
    + + + + + +
    flags

    An optional character vector of flags (e.g. +flags = c("--sig_figs=1")).

    See also

    - +

    Examples

    # \dontrun{ -fit <- cmdstanr_example("logistic")
    #> Model executable is up to date!
    fit$cmdstan_diagnose()
    #> Running bin/diagnose \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-1-4d4c69.csv \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-2-4d4c69.csv \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-3-4d4c69.csv \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-4-4d4c69.csv -#> Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-1-4d4c69.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-2-4d4c69.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-3-4d4c69.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-4-4d4c69.csv +fit <- cmdstanr_example("logistic")
    #> Model executable is up to date!
    fit$cmdstan_diagnose()
    #> Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/logistic-202012171338-1-4d33e7.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/logistic-202012171338-2-4d33e7.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/logistic-202012171338-3-4d33e7.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/logistic-202012171338-4-4d33e7.csv #> #> Checking sampler transitions treedepth. #> Treedepth satisfactory for all transitions. @@ -230,32 +236,127 @@

    Examp #> #> Split R-hat values satisfactory all parameters. #> -#> Processing complete, no problems detected.

    fit$cmdstan_summary()
    #> Running bin/stansummary \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-1-4d4c69.csv \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-2-4d4c69.csv \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-3-4d4c69.csv \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-4-4d4c69.csv -#> Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-1-4d4c69.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-2-4d4c69.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-3-4d4c69.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-4-4d4c69.csv +#> Processing complete, no problems detected.
    fit$cmdstan_summary()
    #> Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/logistic-202012171338-1-4d33e7.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/logistic-202012171338-2-4d33e7.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/logistic-202012171338-3-4d33e7.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/logistic-202012171338-4-4d33e7.csv #> Inference for Stan model: logistic_model #> 4 chains: each with iter=(1000,1000,1000,1000); warmup=(0,0,0,0); thin=(1,1,1,1); 4000 iterations saved. #> -#> Warmup took (0.039, 0.035, 0.032, 0.034) seconds, 0.14 seconds total -#> Sampling took (0.073, 0.047, 0.037, 0.050) seconds, 0.21 seconds total +#> Warmup took (0.029, 0.028, 0.026, 0.043) seconds, 0.13 seconds total +#> Sampling took (0.11, 0.099, 0.095, 0.17) seconds, 0.48 seconds total #> -#> Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat +#> Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat #> -#> lp__ -66 3.2e-02 1.5 -69 -66 -64 2048 9893 1.0 -#> accept_stat__ 0.91 6.2e-03 0.10 0.70 0.95 1.0 2.7e+02 1.3e+03 1.0e+00 -#> stepsize__ 0.74 4.6e-02 0.065 0.65 0.75 0.83 2.0e+00 9.7e+00 2.5e+13 -#> treedepth__ 2.4 7.7e-02 0.52 2.0 2.0 3.0 4.6e+01 2.2e+02 1.0e+00 -#> n_leapfrog__ 5.3 3.1e-01 2.0 3.0 7.0 7.0 4.1e+01 2.0e+02 1.0e+00 -#> divergent__ 0.00 nan 0.00 0.00 0.00 0.00 nan nan nan -#> energy__ 68 4.9e-02 2.0 65 68 72 1.7e+03 8.1e+03 1.0e+00 +#> lp__ -6.6e+01 3.2e-02 1.5 -69 -6.6e+01 -6.4e+01 2048 4311 1.0 +#> accept_stat__ 0.91 6.2e-03 0.10 0.70 0.95 1.0 2.7e+02 5.8e+02 1.0e+00 +#> stepsize__ 0.74 4.6e-02 0.065 0.65 0.75 0.83 2.0e+00 4.2e+00 2.5e+13 +#> treedepth__ 2.4 7.7e-02 0.52 2.0 2.0 3.0 4.6e+01 9.7e+01 1.0e+00 +#> n_leapfrog__ 5.3 3.1e-01 2.0 3.0 7.0 7.0 4.1e+01 8.6e+01 1.0e+00 +#> divergent__ 0.00 nan 0.00 0.00 0.00 0.00 nan nan nan +#> energy__ 68 4.9e-02 2.0 65 68 72 1.7e+03 3.5e+03 1.0e+00 #> -#> alpha 0.38 3.5e-03 0.22 0.027 0.37 0.75 3858 18636 1.0 -#> beta[1] -0.67 4.1e-03 0.26 -1.1 -0.67 -0.25 3893 18805 1.0 -#> beta[2] -0.27 3.4e-03 0.23 -0.64 -0.26 0.088 4345 20992 1.00 -#> beta[3] 0.68 4.2e-03 0.27 0.24 0.67 1.1 4049 19562 1.00 +#> alpha 3.8e-01 3.5e-03 0.22 0.027 3.7e-01 7.5e-01 3858 8121 1.0 +#> beta[1] -6.7e-01 4.1e-03 0.26 -1.1 -6.7e-01 -2.5e-01 3893 8195 1.0 +#> beta[2] -2.7e-01 3.4e-03 0.23 -0.64 -2.6e-01 8.8e-02 4345 9148 1.00 +#> beta[3] 6.8e-01 4.2e-03 0.27 0.24 6.7e-01 1.1e+00 4049 8525 1.00 +#> log_lik[1] -5.1e-01 1.6e-03 0.098 -0.68 -5.1e-01 -3.6e-01 3960 8337 1.00 +#> log_lik[2] -4.0e-01 2.3e-03 0.15 -0.68 -3.8e-01 -2.0e-01 4327 9110 1.00 +#> log_lik[3] -4.9e-01 3.4e-03 0.22 -0.90 -4.6e-01 -2.0e-01 4235 8915 1.00 +#> log_lik[4] -4.5e-01 2.4e-03 0.15 -0.74 -4.4e-01 -2.4e-01 3879 8167 1.00 +#> log_lik[5] -1.2e+00 4.3e-03 0.29 -1.7 -1.2e+00 -7.5e-01 4422 9310 1.00 +#> log_lik[6] -6.0e-01 3.0e-03 0.19 -0.94 -5.7e-01 -3.2e-01 4225 8895 1.0 +#> log_lik[7] -6.4e-01 1.9e-03 0.12 -0.86 -6.3e-01 -4.4e-01 4341 9139 1.00 +#> log_lik[8] -2.8e-01 2.1e-03 0.13 -0.53 -2.6e-01 -1.1e-01 4099 8630 1.0 +#> log_lik[9] -7.0e-01 2.6e-03 0.17 -1.00 -6.8e-01 -4.5e-01 4154 8746 1.00 +#> log_lik[10] -7.3e-01 3.6e-03 0.23 -1.1 -7.0e-01 -4.0e-01 4173 8786 1.00 +#> log_lik[11] -2.8e-01 2.0e-03 0.13 -0.52 -2.6e-01 -1.2e-01 3979 8376 1.0 +#> log_lik[12] -5.0e-01 3.8e-03 0.24 -0.99 -4.6e-01 -1.9e-01 4096 8622 1.00 +#> log_lik[13] -6.5e-01 3.3e-03 0.21 -1.0 -6.3e-01 -3.5e-01 4156 8749 1.0 +#> log_lik[14] -3.6e-01 2.8e-03 0.17 -0.67 -3.2e-01 -1.3e-01 3922 8257 1.00 +#> log_lik[15] -2.8e-01 1.7e-03 0.11 -0.48 -2.6e-01 -1.3e-01 4228 8902 1.0 +#> log_lik[16] -2.8e-01 1.4e-03 0.087 -0.43 -2.7e-01 -1.5e-01 3994 8408 1.0 +#> log_lik[17] -1.6e+00 4.8e-03 0.29 -2.1 -1.6e+00 -1.2e+00 3563 7500 1.0 +#> log_lik[18] -4.8e-01 1.6e-03 0.11 -0.67 -4.7e-01 -3.2e-01 4265 8978 1.0 +#> log_lik[19] -2.3e-01 1.2e-03 0.075 -0.37 -2.2e-01 -1.2e-01 4039 8502 1.0 +#> log_lik[20] -1.1e-01 1.3e-03 0.079 -0.26 -9.1e-02 -2.8e-02 3861 8128 1.00 +#> log_lik[21] -2.1e-01 1.4e-03 0.088 -0.38 -2.0e-01 -9.6e-02 3943 8300 1.0 +#> log_lik[22] -5.7e-01 2.3e-03 0.15 -0.84 -5.6e-01 -3.5e-01 4113 8658 1.0 +#> log_lik[23] -3.3e-01 2.1e-03 0.14 -0.59 -3.1e-01 -1.5e-01 4448 9365 1.0 +#> log_lik[24] -1.4e-01 1.1e-03 0.067 -0.26 -1.2e-01 -5.1e-02 3896 8202 1.0 +#> log_lik[25] -4.6e-01 1.9e-03 0.12 -0.67 -4.4e-01 -2.7e-01 4163 8764 1.00 +#> log_lik[26] -1.5e+00 5.4e-03 0.34 -2.1 -1.5e+00 -1.0e+00 4027 8478 1.0 +#> log_lik[27] -3.1e-01 2.0e-03 0.12 -0.53 -2.9e-01 -1.4e-01 3970 8358 1.0 +#> log_lik[28] -4.4e-01 1.3e-03 0.082 -0.59 -4.4e-01 -3.2e-01 3905 8222 1.0 +#> log_lik[29] -7.3e-01 3.4e-03 0.23 -1.1 -7.0e-01 -4.0e-01 4650 9790 1.00 +#> log_lik[30] -7.0e-01 2.9e-03 0.19 -1.0 -6.8e-01 -4.2e-01 4297 9046 1.00 +#> log_lik[31] -4.9e-01 2.6e-03 0.16 -0.78 -4.7e-01 -2.6e-01 4043 8512 1.0 +#> log_lik[32] -4.2e-01 1.7e-03 0.11 -0.62 -4.1e-01 -2.7e-01 4274 8998 1.0 +#> log_lik[33] -4.1e-01 2.0e-03 0.13 -0.64 -3.9e-01 -2.2e-01 4098 8628 1.00 +#> log_lik[34] -6.4e-02 8.4e-04 0.051 -0.16 -5.0e-02 -1.3e-02 3733 7860 1.0 +#> log_lik[35] -5.9e-01 2.7e-03 0.19 -0.93 -5.6e-01 -3.2e-01 4732 9961 1.0 +#> log_lik[36] -3.3e-01 1.9e-03 0.13 -0.57 -3.1e-01 -1.5e-01 4729 9955 1.0 +#> log_lik[37] -6.9e-01 3.4e-03 0.23 -1.1 -6.7e-01 -3.8e-01 4486 9445 1.00 +#> log_lik[38] -3.2e-01 2.4e-03 0.15 -0.60 -2.9e-01 -1.2e-01 4017 8457 1.00 +#> log_lik[39] -1.8e-01 1.8e-03 0.11 -0.39 -1.5e-01 -5.2e-02 3948 8311 1.00 +#> log_lik[40] -6.8e-01 2.0e-03 0.13 -0.90 -6.7e-01 -4.9e-01 4095 8622 1.00 +#> log_lik[41] -1.1e+00 4.0e-03 0.25 -1.6 -1.1e+00 -7.5e-01 3927 8267 1.00 +#> log_lik[42] -9.3e-01 3.1e-03 0.20 -1.3 -9.2e-01 -6.4e-01 4060 8548 1.00 +#> log_lik[43] -4.1e-01 4.0e-03 0.26 -0.92 -3.5e-01 -1.0e-01 4264 8977 1.0 +#> log_lik[44] -1.2e+00 3.2e-03 0.19 -1.5 -1.2e+00 -8.9e-01 3535 7442 1.0 +#> log_lik[45] -3.6e-01 1.9e-03 0.12 -0.57 -3.4e-01 -1.8e-01 3909 8230 1.00 +#> log_lik[46] -5.8e-01 1.9e-03 0.13 -0.81 -5.7e-01 -3.8e-01 4348 9153 1.00 +#> log_lik[47] -3.1e-01 2.1e-03 0.13 -0.54 -2.9e-01 -1.3e-01 3879 8165 1.00 +#> log_lik[48] -3.2e-01 1.3e-03 0.082 -0.47 -3.2e-01 -2.0e-01 4058 8543 1.0 +#> log_lik[49] -3.2e-01 1.3e-03 0.079 -0.46 -3.1e-01 -2.0e-01 3862 8131 1.0 +#> log_lik[50] -1.3e+00 5.2e-03 0.34 -1.9 -1.3e+00 -8.0e-01 4084 8598 1.0 +#> log_lik[51] -2.9e-01 1.4e-03 0.093 -0.46 -2.8e-01 -1.5e-01 4280 9011 1.0 +#> log_lik[52] -8.4e-01 2.3e-03 0.14 -1.1 -8.3e-01 -6.2e-01 3855 8117 1.00 +#> log_lik[53] -4.0e-01 2.1e-03 0.13 -0.64 -3.9e-01 -2.2e-01 4032 8488 1.0 +#> log_lik[54] -3.7e-01 2.3e-03 0.14 -0.63 -3.5e-01 -1.7e-01 3881 8171 1.00 +#> log_lik[55] -3.9e-01 2.1e-03 0.14 -0.63 -3.7e-01 -1.9e-01 4199 8839 1.00 +#> log_lik[56] -3.2e-01 2.8e-03 0.19 -0.69 -2.8e-01 -9.6e-02 4702 9899 1.0 +#> log_lik[57] -6.5e-01 1.8e-03 0.12 -0.86 -6.5e-01 -4.8e-01 4134 8703 1.00 +#> log_lik[58] -9.5e-01 5.2e-03 0.36 -1.6 -9.1e-01 -4.4e-01 4738 9974 1.00 +#> log_lik[59] -1.4e+00 5.5e-03 0.34 -2.0 -1.3e+00 -8.5e-01 3911 8233 1.00 +#> log_lik[60] -9.8e-01 2.6e-03 0.16 -1.3 -9.7e-01 -7.4e-01 3688 7764 1.00 +#> log_lik[61] -5.4e-01 1.5e-03 0.097 -0.70 -5.3e-01 -3.9e-01 3957 8331 1.00 +#> log_lik[62] -8.9e-01 5.1e-03 0.32 -1.5 -8.5e-01 -4.4e-01 3900 8210 1.00 +#> log_lik[63] -1.2e-01 1.2e-03 0.074 -0.26 -1.0e-01 -3.4e-02 3706 7803 1.0 +#> log_lik[64] -9.0e-01 3.7e-03 0.25 -1.3 -8.7e-01 -5.4e-01 4518 9512 1.00 +#> log_lik[65] -2.0e+00 9.6e-03 0.59 -3.0 -2.0e+00 -1.1e+00 3848 8100 1.00 +#> log_lik[66] -5.1e-01 2.0e-03 0.14 -0.76 -4.9e-01 -3.1e-01 4532 9541 1.00 +#> log_lik[67] -2.8e-01 1.3e-03 0.081 -0.42 -2.7e-01 -1.6e-01 4172 8783 1.0 +#> log_lik[68] -1.1e+00 3.6e-03 0.23 -1.5 -1.0e+00 -7.1e-01 4159 8756 1.00 +#> log_lik[69] -4.4e-01 1.3e-03 0.083 -0.58 -4.3e-01 -3.1e-01 3903 8216 1.0 +#> log_lik[70] -6.4e-01 3.4e-03 0.23 -1.1 -6.1e-01 -3.1e-01 4663 9818 1.00 +#> log_lik[71] -6.1e-01 3.2e-03 0.21 -1.00 -5.8e-01 -3.0e-01 4420 9306 1.0 +#> log_lik[72] -4.6e-01 2.7e-03 0.17 -0.78 -4.4e-01 -2.2e-01 3995 8410 1.00 +#> log_lik[73] -1.5e+00 5.9e-03 0.37 -2.1 -1.5e+00 -9.3e-01 3874 8157 1.00 +#> log_lik[74] -9.5e-01 3.1e-03 0.20 -1.3 -9.4e-01 -6.5e-01 4177 8793 1.00 +#> log_lik[75] -1.2e+00 6.4e-03 0.40 -1.9 -1.1e+00 -5.8e-01 3846 8098 1.0 +#> log_lik[76] -3.7e-01 2.2e-03 0.14 -0.62 -3.5e-01 -1.8e-01 3995 8411 1.00 +#> log_lik[77] -8.8e-01 2.2e-03 0.14 -1.1 -8.7e-01 -6.6e-01 4064 8556 1.0 +#> log_lik[78] -4.8e-01 2.7e-03 0.17 -0.80 -4.6e-01 -2.4e-01 4122 8677 1.00 +#> log_lik[79] -7.6e-01 2.9e-03 0.19 -1.1 -7.4e-01 -4.8e-01 4267 8983 1.00 +#> log_lik[80] -5.4e-01 2.9e-03 0.20 -0.91 -5.1e-01 -2.7e-01 4700 9894 1.00 +#> log_lik[81] -1.6e-01 1.6e-03 0.10 -0.36 -1.4e-01 -4.8e-02 3848 8101 1.0 +#> log_lik[82] -2.2e-01 2.2e-03 0.14 -0.49 -1.9e-01 -6.3e-02 4165 8769 1.00 +#> log_lik[83] -3.4e-01 1.3e-03 0.081 -0.49 -3.4e-01 -2.2e-01 3873 8155 1.0 +#> log_lik[84] -2.7e-01 1.4e-03 0.092 -0.44 -2.6e-01 -1.5e-01 4079 8588 1.0 +#> log_lik[85] -1.3e-01 1.2e-03 0.075 -0.28 -1.1e-01 -4.0e-02 3907 8225 1.0 +#> log_lik[86] -1.1e+00 4.9e-03 0.32 -1.7 -1.1e+00 -6.5e-01 4192 8826 1.00 +#> log_lik[87] -8.2e-01 2.0e-03 0.13 -1.0 -8.2e-01 -6.3e-01 4067 8562 1.00 +#> log_lik[88] -7.7e-01 3.8e-03 0.24 -1.2 -7.4e-01 -4.2e-01 4159 8755 1.00 +#> log_lik[89] -1.3e+00 5.1e-03 0.32 -1.8 -1.3e+00 -8.0e-01 3946 8308 1.00 +#> log_lik[90] -2.6e-01 2.1e-03 0.14 -0.53 -2.4e-01 -9.3e-02 4130 8694 1.0 +#> log_lik[91] -3.9e-01 2.0e-03 0.13 -0.62 -3.7e-01 -2.0e-01 4115 8663 1.0 +#> log_lik[92] -1.5e+00 5.5e-03 0.35 -2.1 -1.5e+00 -9.7e-01 4068 8564 1.00 +#> log_lik[93] -7.5e-01 3.6e-03 0.22 -1.1 -7.3e-01 -4.3e-01 3814 8029 1.00 +#> log_lik[94] -3.2e-01 1.4e-03 0.088 -0.47 -3.1e-01 -1.9e-01 3945 8305 1.0 +#> log_lik[95] -3.9e-01 1.7e-03 0.11 -0.59 -3.8e-01 -2.3e-01 4064 8555 1.0 +#> log_lik[96] -1.6e+00 4.9e-03 0.29 -2.1 -1.6e+00 -1.1e+00 3451 7265 1.0 +#> log_lik[97] -4.3e-01 1.5e-03 0.099 -0.61 -4.3e-01 -2.8e-01 4403 9269 1.0 +#> log_lik[98] -1.0e+00 5.5e-03 0.38 -1.7 -1.0e+00 -5.1e-01 4727 9951 1.00 +#> log_lik[99] -6.9e-01 2.1e-03 0.14 -0.94 -6.8e-01 -4.8e-01 4346 9149 1.00 +#> log_lik[100] -3.9e-01 1.5e-03 0.096 -0.56 -3.8e-01 -2.4e-01 4036 8496 1.0 #> #> Samples were drawn using hmc with nuts. #> For each parameter, N_Eff is a crude measure of effective sample size, diff --git a/docs/reference/fit-method-draws.html b/docs/reference/fit-method-draws.html index bcad43c51..789ea3b20 100644 --- a/docs/reference/fit-method-draws.html +++ b/docs/reference/fit-method-draws.html @@ -85,7 +85,7 @@ cmdstanr - 0.2.0 + 0.3.0
    @@ -189,32 +189,26 @@

    Extract posterior draws

    log probability (target) accumulated in the model block.

    - - -

    Usage

    - - -
    $draws(variables = NULL, inc_warmup = FALSE, ...)
    -
    +
    draws(variables = NULL, inc_warmup = FALSE)

    Arguments

    - - - -
      -
    • variables: (character vector) The variables (parameters and generated -quantities) to read in. If NULL (the default) then the draws of all -variables are included.

    • -
    • inc_warmup: (logical) For MCMC only, should warmup draws be included? -Defaults to FALSE.

    • -
    • ...: Arguments passed on to -posterior::as_draws_array().

    • -
    + + + + + + + + + + +
    variables

    (character vector) The variables to read in. If NULL (the +default) then all variables are included.

    inc_warmup

    (logical) Should warmup draws be included? Defaults to +FALSE. Ignored except when used with CmdStanMCMC objects.

    Value

    -
    • For MCMC, a 3-D draws_array object (iteration x chain x @@ -247,25 +241,25 @@

      Examp fit <- cmdstanr_example("logistic")
      #> Model executable is up to date!
      # returned as 3-D array (see ?posterior::draws_array) draws <- fit$draws() -dim(draws)
      #> [1] 1000 4 5
      str(draws)
      #> 'draws_array' num [1:1000, 1:4, 1:5] -67.7 -67.3 -66.9 -67.2 -68.1 ... +dim(draws)
      #> [1] 1000 4 105
      str(draws)
      #> 'draws_array' num [1:1000, 1:4, 1:105] -67.7 -67.3 -66.9 -67.2 -68.1 ... #> - attr(*, "dimnames")=List of 3 #> ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... #> ..$ chain : chr [1:4] "1" "2" "3" "4" -#> ..$ variable : chr [1:5] "lp__" "alpha" "beta[1]" "beta[2]" ...
      +#> ..$ variable : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ...
      # can easily convert to other formats (data frame, matrix, list) -as_draws_df(draws) # see also as_draws_matrix, as_draws_list
      #> # A draws_df: 1000 iterations, 4 chains, and 5 variables -#> lp__ alpha beta[1] beta[2] beta[3] -#> 1 -68 0.56 -1.19 -0.581 0.65 -#> 2 -67 0.15 -0.85 -0.681 0.46 -#> 3 -67 0.60 -0.54 0.191 0.66 -#> 4 -67 0.14 -0.71 -0.748 0.97 -#> 5 -68 0.43 -0.64 0.340 0.73 -#> 6 -66 0.42 -0.75 0.116 0.79 -#> 7 -65 0.58 -0.56 -0.056 0.78 -#> 8 -64 0.42 -0.75 -0.122 0.82 -#> 9 -65 0.34 -0.40 -0.508 0.46 -#> 10 -65 0.65 -0.59 -0.022 0.64 -#> # ... with 3990 more draws +as_draws_df(draws) # see also as_draws_matrix, as_draws_list
      #> # A draws_df: 1000 iterations, 4 chains, and 105 variables +#> lp__ alpha beta[1] beta[2] beta[3] log_lik[1] log_lik[2] log_lik[3] +#> 1 -68 0.56 -1.19 -0.581 0.65 -0.41 -0.42 -0.51 +#> 2 -67 0.15 -0.85 -0.681 0.46 -0.61 -0.46 -1.00 +#> 3 -67 0.60 -0.54 0.191 0.66 -0.40 -0.39 -0.18 +#> 4 -67 0.14 -0.71 -0.748 0.97 -0.69 -0.27 -1.01 +#> 5 -68 0.43 -0.64 0.340 0.73 -0.43 -0.28 -0.14 +#> 6 -66 0.42 -0.75 0.116 0.79 -0.45 -0.27 -0.20 +#> 7 -65 0.58 -0.56 -0.056 0.78 -0.43 -0.38 -0.27 +#> 8 -64 0.42 -0.75 -0.122 0.82 -0.48 -0.30 -0.31 +#> 9 -65 0.34 -0.40 -0.508 0.46 -0.57 -0.59 -0.84 +#> 10 -65 0.65 -0.59 -0.022 0.64 -0.39 -0.46 -0.25 +#> # ... with 3990 more draws, and 97 more variables #> # ... hidden reserved variables {'.chain', '.iteration', '.draw'}
      # can select specific parameters fit$draws("alpha")
      #> # A draws_array: 1000 iterations, 4 chains, and 1 variables diff --git a/docs/reference/fit-method-init.html b/docs/reference/fit-method-init.html index d8760df5a..5c5d7c1a9 100644 --- a/docs/reference/fit-method-init.html +++ b/docs/reference/fit-method-init.html @@ -6,7 +6,7 @@ -Extract initial values — fit-method-init • cmdstanr +Extract user-specified initial values — fit-method-init • cmdstanr @@ -46,12 +46,13 @@ - + +the list of lists format). Currently it is not possible to extract initial +values generated automatically by CmdStan, although CmdStan may support +this in the future." /> @@ -84,7 +85,7 @@ cmdstanr - 0.2.0 + 0.3.0
      @@ -174,7 +175,7 @@
      @@ -183,23 +184,17 @@

      Extract initial values

      Return user-specified initial values. If the user provided initial values files or R objects (list of lists or function) via the init argument when fitting the model then these are returned (always in -the list of lists format). Initial values generated by CmdStan are not -returned.

      +the list of lists format). Currently it is not possible to extract initial +values generated automatically by CmdStan, although CmdStan may support +this in the future.

      +
      init()
      -

      Usage

      - - -
      $init()
      -
      -

      Value

      - - -

      A list of lists. See Examples.

      +

      A list of lists. See Examples.

      See also

      diff --git a/docs/reference/fit-method-inv_metric.html b/docs/reference/fit-method-inv_metric.html index 024d3f4a8..c408e68f3 100644 --- a/docs/reference/fit-method-inv_metric.html +++ b/docs/reference/fit-method-inv_metric.html @@ -6,7 +6,7 @@ -Extract inverse metric (mass matrix) — fit-method-inv_metric • cmdstanr +Extract inverse metric (mass matrix) after MCMC — fit-method-inv_metric • cmdstanr @@ -46,9 +46,8 @@ - - + + @@ -81,7 +80,7 @@ cmdstanr - 0.2.0 + 0.3.0
      @@ -171,39 +170,31 @@
      -

      Return a list containing the inverse metric (mass matrix) for -each chain.

      +

      Extract the inverse metric (mass matrix) for each MCMC chain.

      - - -

      Usage

      - - -
      $inv_metric(matrix = TRUE)
      -
      +
      inv_metric(matrix = TRUE)

      Arguments

      - - - -
        -
      • matrix: (logical) If a diagonal metric was used, setting matrix = FALSE -returns a list containing just the diagonals of the matrices instead of the -full matrices. Setting matrix = FALSE has no effect for dense metrics.

      • -
      + + + + + + +
      matrix

      (logical) If a diagonal metric was used, setting matrix = FALSE returns a list containing just the diagonals of the matrices instead +of the full matrices. Setting matrix = FALSE has no effect for dense +metrics.

      Value

      - - -

      A list of length equal to the number of MCMC chains. See the matrix +

      A list of length equal to the number of MCMC chains. See the matrix argument for details.

      See also

      diff --git a/docs/reference/fit-method-loo.html b/docs/reference/fit-method-loo.html new file mode 100644 index 000000000..af19a1053 --- /dev/null +++ b/docs/reference/fit-method-loo.html @@ -0,0 +1,275 @@ + + + + + + + + +Leave-one-out cross-validation (LOO-CV) — fit-method-loo • cmdstanr + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      +
      + + + + +
      + +
      +
      + + +
      +

      The $loo() method computes approximate LOO-CV using the +loo package. This is a simple wrapper around loo::loo.array() +provided for convenience and requires computing the pointwise +log-likelihood in your Stan program. See the loo package +vignettes for details.

      +
      + +
      loo(variables = "log_lik", r_eff = TRUE, ...)
      + +

      Arguments

      + + + + + + + + + + + + + + +
      variables

      (character vector) The name(s) of the variable(s) in the +Stan program containing the pointwise log-likelihood. The default is to +look for "log_lik". This argument is passed to the +`$draws() method.

      r_eff

      There are several options:

        +
      • TRUE (the default) will automatically call loo::relative_eff.array() +to compute the r_eff argument to pass to loo::loo.array().

      • +
      • FALSE or NULL will avoid computing r_eff (which can sometimes be slow) +but will result in a warning from the loo package.

      • +
      • If r_eff is anything else, that object will be passed as the r_eff +argument to loo::loo.array().

      • +
      ...

      Other arguments (e.g., cores, save_psis, etc.) passed to +loo::loo.array().

      + +

      Value

      + +

      The object returned by loo::loo.array().

      +

      See also

      + +

      The loo package website with +documentation and +vignettes.

      + +

      Examples

      +
      +# \dontrun{ +# the "logistic" example model has "log_lik" in generated quantities +fit <- cmdstanr_example("logistic")
      #> Model executable is up to date!
      loo_result <- fit$loo(cores = 2) +print(loo_result)
      #> +#> Computed from 4000 by 100 log-likelihood matrix +#> +#> Estimate SE +#> elpd_loo -63.7 4.1 +#> p_loo 4.0 0.5 +#> looic 127.4 8.3 +#> ------ +#> Monte Carlo SE of elpd_loo is 0.0. +#> +#> All Pareto k estimates are good (k < 0.5). +#> See help('pareto-k-diagnostic') for details.
      # } + +
      +
      + +
      + + +
      + + +
      +

      Site built with pkgdown 1.5.1.

      +
      + +
      +
      + + + + + + + + diff --git a/docs/reference/fit-method-lp-1.png b/docs/reference/fit-method-lp-1.png index 3b5f89942..ed00d5622 100644 Binary files a/docs/reference/fit-method-lp-1.png and b/docs/reference/fit-method-lp-1.png differ diff --git a/docs/reference/fit-method-lp.html b/docs/reference/fit-method-lp.html index 50bb2a86e..b7cd9145f 100644 --- a/docs/reference/fit-method-lp.html +++ b/docs/reference/fit-method-lp.html @@ -86,7 +86,7 @@ cmdstanr - 0.2.0 + 0.3.0
      @@ -191,15 +191,16 @@

      Extract log probability (target)

      constants are dropped from log probability calculations.

      +
      lp()
       
      +lp_approx()
      -

      Usage

      - -
      $lp()
      -$lp_approx()
      -
      +

      Value

      +

      A numeric vector with length equal to the number of (post-warmup) +draws for MCMC and variational inference, and length equal to 1 for +optimization.

      Details

      @@ -212,12 +213,6 @@

      Details (also on the unconstrained space). It is exposed in the variational method for performing the checks described in Yao et al. (2018) and implemented in the loo package.

      -

      Value

      - - - -

      A numeric vector with length equal to the number of (post-warmup) draws for -MCMC and variational inference, and length equal to 1 for optimization.

      References

      Yao, Y., Vehtari, A., Simpson, D., and Gelman, A. (2018). Yes, but did it @@ -229,7 +224,7 @@

      See a

      Examples

      # \dontrun{ -fit_mcmc <- cmdstanr_example("logistic")
      #> Model executable is up to date!
      head(fit_mcmc$lp())
      #> Error: Don't know how to transform an object of class 'numeric' to any supported draws format.
      +fit_mcmc <- cmdstanr_example("logistic")
      #> Model executable is up to date!
      head(fit_mcmc$lp())
      #> [1] -67.0068 -67.0346 -67.1231 -65.7535 -65.5887 -65.4090
      fit_mle <- cmdstanr_example("logistic", method = "optimize")
      #> Model executable is up to date!
      fit_mle$lp()
      #> [1] -63.9218
      fit_vb <- cmdstanr_example("logistic", method = "variational")
      #> Model executable is up to date!
      plot(fit_vb$lp(), fit_vb$lp_approx())
      # } diff --git a/docs/reference/fit-method-metadata.html b/docs/reference/fit-method-metadata.html index 142579b83..913c34d0a 100644 --- a/docs/reference/fit-method-metadata.html +++ b/docs/reference/fit-method-metadata.html @@ -82,7 +82,7 @@ cmdstanr - 0.2.0 + 0.3.0
      @@ -183,14 +183,9 @@

      Extract metadata from CmdStan CSV files

      fitting the model. See Examples and read_cmdstan_csv().

      +
      metadata()
      -

      Usage

      - - -
      $metadata()
      -
      -

      See also

      @@ -216,12 +211,12 @@

      Examp #> $ stepsize_jitter : num 0 #> $ id : num [1:4] 1 2 3 4 #> $ init : num [1:4] 2 2 2 2 -#> $ seed : num [1:4] 1.51e+09 1.29e+09 1.91e+08 2.03e+09 +#> $ seed : num [1:4] 7.77e+07 2.10e+09 1.87e+09 1.79e+09 #> $ refresh : num 100 #> $ sig_figs : num -1 #> $ sampler_diagnostics : chr [1:6] "accept_stat__" "stepsize__" "treedepth__" "n_leapfrog__" ... -#> $ model_params : chr [1:5] "lp__" "alpha" "beta[1]" "beta[2]" ... -#> $ step_size_adaptation: num [1:4] 0.783 0.71 0.765 0.854 +#> $ model_params : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ... +#> $ step_size_adaptation: num [1:4] 0.729 0.751 0.632 0.796 #> $ model_name : chr "logistic_model" #> $ adapt_engaged : num 1 #> $ adapt_delta : num 0.8 @@ -229,11 +224,12 @@

      Examp #> $ step_size : num [1:4] 1 1 1 1 #> $ iter_warmup : num 1000 #> $ iter_sampling : num 1000 -#> $ stan_variable_dims :List of 3 -#> ..$ lp__ : num 1 -#> ..$ alpha: num 1 -#> ..$ beta : num 3 -#> $ stan_variables : chr [1:3] "lp__" "alpha" "beta"
      +#> $ stan_variable_dims :List of 4 +#> ..$ lp__ : num 1 +#> ..$ alpha : num 1 +#> ..$ beta : num 3 +#> ..$ log_lik: num 100 +#> $ stan_variables : chr [1:4] "lp__" "alpha" "beta" "log_lik"
      fit_mle <- cmdstanr_example("logistic", method = "optimize")
      #> Model executable is up to date!
      str(fit_mle$metadata())
      #> List of 24 #> $ stan_version_major : num 2 #> $ stan_version_minor : num 25 @@ -251,17 +247,18 @@

      Examp #> $ save_iterations : num 0 #> $ id : num 1 #> $ init : num 2 -#> $ seed : num 1.37e+09 +#> $ seed : num 1.1e+09 #> $ refresh : num 100 #> $ sig_figs : num -1 #> $ sampler_diagnostics: chr(0) -#> $ model_params : chr [1:5] "lp__" "alpha" "beta[1]" "beta[2]" ... +#> $ model_params : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ... #> $ model_name : chr "logistic_model" -#> $ stan_variable_dims :List of 3 -#> ..$ lp__ : num 1 -#> ..$ alpha: num 1 -#> ..$ beta : num 3 -#> $ stan_variables : chr [1:3] "lp__" "alpha" "beta"

      +#> $ stan_variable_dims :List of 4 +#> ..$ lp__ : num 1 +#> ..$ alpha : num 1 +#> ..$ beta : num 3 +#> ..$ log_lik: num 100 +#> $ stan_variables : chr [1:4] "lp__" "alpha" "beta" "log_lik"
      fit_vb <- cmdstanr_example("logistic", method = "variational")
      #> Model executable is up to date!
      str(fit_vb$metadata())
      #> List of 23 #> $ stan_version_major : num 2 #> $ stan_version_minor : num 25 @@ -277,19 +274,20 @@

      Examp #> $ output_samples : num 1000 #> $ id : num 1 #> $ init : num 2 -#> $ seed : num 7.14e+08 +#> $ seed : num 2.03e+09 #> $ refresh : num 100 #> $ sig_figs : num -1 #> $ sampler_diagnostics: chr(0) -#> $ model_params : chr [1:6] "lp__" "lp_approx__" "alpha" "beta[1]" ... +#> $ model_params : chr [1:106] "lp__" "lp_approx__" "alpha" "beta[1]" ... #> $ model_name : chr "logistic_model" #> $ adapt_engaged : num 1 -#> $ stan_variable_dims :List of 4 +#> $ stan_variable_dims :List of 5 #> ..$ lp__ : num 1 #> ..$ lp_approx__: num 1 #> ..$ alpha : num 1 #> ..$ beta : num 3 -#> $ stan_variables : chr [1:4] "lp__" "lp_approx__" "alpha" "beta"

      # } +#> ..$ log_lik : num 100 +#> $ stan_variables : chr [1:5] "lp__" "lp_approx__" "alpha" "beta" ...
      # }

      diff --git a/docs/reference/fit-method-mle.html b/docs/reference/fit-method-mle.html index 0138e36f5..ef027d7d4 100644 --- a/docs/reference/fit-method-mle.html +++ b/docs/reference/fit-method-mle.html @@ -86,7 +86,7 @@ cmdstanr - 0.2.0 + 0.3.0 @@ -184,47 +184,39 @@

      Extract (penalized) maximum likelihood estimate after optimization

      The $mle() method is only available for CmdStanMLE objects. It returns the penalized maximum likelihood estimate (posterior mode) as a -numeric vector with one element per variable. The returned vector does not +numeric vector with one element per variable. The returned vector does not include lp__, the total log probability (target) accumulated in the model block of the Stan program, which is available via the $lp() method and also included in the $draws() method.

      - - -

      Usage

      - - -
      $mle(variables = NULL)
      -
      +
      mle(variables = NULL)

      Arguments

      - - - -
        -
      • variables: (character vector) The variables (parameters and generated -quantities) to include. If NULL (the default) then all variables are -included.

      • -
      + + + + + + +
      variables

      (character vector) The variables (parameters, transformed +parameters, and generated quantities) to include. If NULL (the default) +then all variables are included.

      Value

      - - -

      A numeric vector. See Examples.

      +

      A numeric vector. See Examples.

      See also

      Examples

      # \dontrun{ -fit <- cmdstanr_example("logistic", method = "optimize")
      #> Model executable is up to date!
      fit$mle()
      #> alpha beta[1] beta[2] beta[3] -#> 0.364454 -0.631561 -0.258958 0.648495
      fit$mle("alpha")
      #> alpha -#> 0.364454
      fit$mle("beta")
      #> beta[1] beta[2] beta[3] -#> -0.631561 -0.258958 0.648495
      fit$mle("beta[2]")
      #> beta[2] -#> -0.258958
      # } +fit <- cmdstanr_example("logistic", method = "optimize")
      #> Model executable is up to date!
      fit$mle("alpha")
      #> alpha +#> 0.364448
      fit$mle("beta")
      #> beta[1] beta[2] beta[3] +#> -0.631556 -0.258966 0.648501
      fit$mle("beta[2]")
      #> beta[2] +#> -0.258966
      # }
      diff --git a/docs/reference/fit-method-num_chains.html b/docs/reference/fit-method-num_chains.html new file mode 100644 index 000000000..19ce9a36e --- /dev/null +++ b/docs/reference/fit-method-num_chains.html @@ -0,0 +1,224 @@ + + + + + + + + +Extract number of chains after MCMC — fit-method-num_chains • cmdstanr + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      +
      + + + + +
      + +
      +
      + + +
      +

      The $num_chains() method returns the number of MCMC chains.

      +
      + +
      num_chains()
      + + +

      Value

      + +

      An integer.

      +

      See also

      + + + +

      Examples

      +
      # \dontrun{ +fit_mcmc <- cmdstanr_example(chains = 2)
      #> Model executable is up to date!
      fit_mcmc$num_chains()
      #> [1] 2
      # } + +
      +
      + +
      + + +
      + + +
      +

      Site built with pkgdown 1.5.1.

      +
      + +
      +
      + + + + + + + + diff --git a/docs/reference/fit-method-output.html b/docs/reference/fit-method-output.html index 7db863b67..ffd902bed 100644 --- a/docs/reference/fit-method-output.html +++ b/docs/reference/fit-method-output.html @@ -47,11 +47,12 @@ - + @@ -84,7 +85,7 @@ cmdstanr - 0.2.0 + 0.3.0 @@ -180,28 +181,25 @@

      Access console output

      -

      For MCMC the $output() method returns the stdout and stderr of -all chains as a list of character vectors. If the id argument is specified -it pretty prints the console output for a single chain.

      -

      For optimization and variational inference $output() just pretty prints the -console output.

      +

      For MCMC, the $output() method returns the stdout and stderr +of all chains as a list of character vectors if id=NULL. If the id +argument is specified it instead pretty prints the console output for a +single chain.

      +

      For optimization and variational inference $output() just pretty prints +the console output.

      - - -

      Usage

      - - -
      $output(id = NULL)
      -
      +
      output(id = NULL)

      Arguments

      - - - -
        -
      • id: (integer) For MCMC only, the chain id.

      • -
      + + + + + + +
      id

      (integer) The chain id. Ignored if the model was not fit using +MCMC.

      See also

      @@ -236,19 +234,19 @@

      Examp #> stepsize_jitter = 0 (Default) #> id = 1 #> data -#> file = /private/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmppTMpzn/temp_libpath35d63c053564/cmdstanr/logistic.data.json +#> file = /private/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpOUu4d5/temp_libpath1f1e474979aa/cmdstanr/logistic.data.json #> init = 2 (Default) #> random -#> seed = 699507595 +#> seed = 984374382 #> output -#> file = /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-1-4ff592.csv +#> file = /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/logistic-202012171338-1-74a0f0.csv #> diagnostic_file = (Default) #> refresh = 100 (Default) #> sig_figs = -1 (Default) #> #> -#> Gradient evaluation took 2.3e-05 seconds -#> 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds. +#> Gradient evaluation took 2.8e-05 seconds +#> 1000 transitions using 10 leapfrog steps per transition would take 0.28 seconds. #> Adjust your expectations accordingly! #> #> @@ -275,9 +273,9 @@

      Examp #> Iteration: 1900 / 2000 [ 95%] (Sampling) #> Iteration: 2000 / 2000 [100%] (Sampling) #> -#> Elapsed Time: 0.032 seconds (Warm-up) -#> 0.049 seconds (Sampling) -#> 0.081 seconds (Total)
      out <- fit_mcmc$output() +#> Elapsed Time: 0.035 seconds (Warm-up) +#> 0.137 seconds (Sampling) +#> 0.172 seconds (Total)
      out <- fit_mcmc$output() str(out)
      #> List of 4 #> $ : chr [1:70] "" "method = sample (Default)" " sample" " num_samples = 1000 (Default)" ... #> $ : chr [1:70] "" "method = sample (Default)" " sample" " num_samples = 1000 (Default)" ... @@ -299,19 +297,19 @@

      Examp #> save_iterations = 0 (Default) #> id = 1 #> data -#> file = /private/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmppTMpzn/temp_libpath35d63c053564/cmdstanr/logistic.data.json +#> file = /private/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpOUu4d5/temp_libpath1f1e474979aa/cmdstanr/logistic.data.json #> init = 2 (Default) #> random -#> seed = 1879167869 +#> seed = 432754710 #> output -#> file = /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-1-1cc375.csv +#> file = /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/logistic-202012171338-1-30f448.csv #> diagnostic_file = (Default) #> refresh = 100 (Default) #> sig_figs = -1 (Default) #> -#> Initial log joint probability = -81.5538 +#> Initial log joint probability = -103.426 #> Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes -#> 6 -63.9218 0.000306238 0.000762331 1 1 8 +#> 7 -63.9218 0.000113707 0.000210786 1 1 9 #> Optimization terminated normally: #> Convergence detected: relative gradient magnitude is below tolerance

      fit_vb <- cmdstanr_example("logistic", method = "variational")
      #> Model executable is up to date!
      fit_vb$output()
      #> @@ -331,12 +329,12 @@

      Examp #> output_samples = 1000 (Default) #> id = 1 #> data -#> file = /private/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmppTMpzn/temp_libpath35d63c053564/cmdstanr/logistic.data.json +#> file = /private/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpOUu4d5/temp_libpath1f1e474979aa/cmdstanr/logistic.data.json #> init = 2 (Default) #> random -#> seed = 2071003765 +#> seed = 1062574477 #> output -#> file = /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/logistic-202011121417-1-42bc7b.csv +#> file = /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/logistic-202012171338-1-7dd145.csv #> diagnostic_file = (Default) #> refresh = 100 (Default) #> sig_figs = -1 (Default) @@ -349,8 +347,8 @@

      Examp #> #> #> -#> Gradient evaluation took 2.3e-05 seconds -#> 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds. +#> Gradient evaluation took 2.8e-05 seconds +#> 1000 transitions using 10 leapfrog steps per transition would take 0.28 seconds. #> Adjust your expectations accordingly! #> #> @@ -364,13 +362,9 @@

      Examp #> #> Begin stochastic gradient ascent. #> iter ELBO delta_ELBO_mean delta_ELBO_med notes -#> 100 -67.817 1.000 1.000 -#> 200 -66.301 0.511 1.000 -#> 300 -66.246 0.341 0.023 -#> 400 -66.847 0.258 0.023 -#> 500 -66.131 0.209 0.011 -#> 600 -66.097 0.174 0.011 -#> 700 -66.152 0.149 0.009 MEDIAN ELBO CONVERGED +#> 100 -66.324 1.000 1.000 +#> 200 -66.614 0.502 1.000 +#> 300 -66.607 0.335 0.004 MEDIAN ELBO CONVERGED #> #> Drawing a sample of size 1000 from the approximate posterior... #> COMPLETED.

      # } diff --git a/docs/reference/fit-method-return_codes.html b/docs/reference/fit-method-return_codes.html index 1f3e7cd14..6cd30a87a 100644 --- a/docs/reference/fit-method-return_codes.html +++ b/docs/reference/fit-method-return_codes.html @@ -81,7 +81,7 @@ cmdstanr - 0.2.0 + 0.3.0
      @@ -181,14 +181,13 @@

      Extract return codes from CmdStan

      from the CmdStan run(s). A return code of 0 indicates a successful run.

      +
      return_codes()
      -

      Usage

      - - -
      $return_codes()
      -
      +

      Value

      +

      An integer vector of return codes with length equal to the number of +CmdStan runs (number of chains for MCMC and one otherwise).

      See also

      @@ -196,8 +195,8 @@

      See a

      Examples

      # \dontrun{ # example with return codes all zero -fit_mcmc <- cmdstanr_example("schools", method = "sample")
      #> Model executable is up to date!
      #> -#> Warning: 141 of 4000 (4.0%) transitions ended with a divergence. +fit_mcmc <- cmdstanr_example("schools", method = "sample")
      #> Model executable is up to date!
      #> Chain 1 Informational Message: The current Metropolis proposal is about to be rejected because of the following issue:
      #> Chain 1 Exception: normal_lpdf: Scale parameter is 0, but must be > 0! (in '/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/model-2ed21886c09b.stan', line 14, column 2 to column 41)
      #> Chain 1 If this warning occurs sporadically, such as for highly constrained variable types like covariance matrices, then the sampler is fine,
      #> Chain 1 but if this warning occurs often then your model may be either severely ill-conditioned or misspecified.
      #> Chain 1
      #> +#> Warning: 114 of 4000 (3.0%) transitions ended with a divergence. #> This may indicate insufficient exploration of the posterior distribution. #> Possible remedies include: #> * Increasing adapt_delta closer to 1 (default is 0.8) diff --git a/docs/reference/fit-method-sampler_diagnostics.html b/docs/reference/fit-method-sampler_diagnostics.html index 2622a0c05..6a3667236 100644 --- a/docs/reference/fit-method-sampler_diagnostics.html +++ b/docs/reference/fit-method-sampler_diagnostics.html @@ -6,7 +6,7 @@ -Extract sampler diagnostics — fit-method-sampler_diagnostics • cmdstanr +Extract sampler diagnostics after MCMC — fit-method-sampler_diagnostics • cmdstanr @@ -46,7 +46,7 @@ - + @@ -81,7 +81,7 @@ cmdstanr - 0.2.0 + 0.3.0
      @@ -171,7 +171,7 @@
      @@ -181,29 +181,20 @@

      Extract sampler diagnostics

      chain of MCMC.

      - - -

      Usage

      - - -
      $sampler_diagnostics(inc_warmup = FALSE, ...)
      -
      +
      sampler_diagnostics(inc_warmup = FALSE)

      Arguments

      - - - -
        -
      • inc_warmup: (logical) Should warmup draws be included? Defaults to FALSE.

      • -
      • ...: Arguments passed on to -posterior::as_draws_array().

      • -
      + + + + + + +
      inc_warmup

      (logical) Should warmup draws be included? Defaults to FALSE.

      Value

      - - -

      A 3-D draws_array object (iteration x chain x +

      A 3-D draws_array object (iteration x chain x variable). The variables for Stan's default MCMC algorithm are "accept_stat__", "stepsize__", "treedepth__", "n_leapfrog__", "divergent__", "energy__".

      @@ -214,7 +205,7 @@

      See a

      Examples

      # \dontrun{ fit <- cmdstanr_example("logistic")
      #> Model executable is up to date!
      sampler_diagnostics <- fit$sampler_diagnostics() -str(sampler_diagnostics)
      #> 'draws_array' num [1:1000, 1:4, 1:6] 1 0.783 0.876 0.911 0.984 ... +str(sampler_diagnostics)
      #> 'draws_array' num [1:1000, 1:4, 1:6] 0.936 0.942 0.996 0.979 0.859 ... #> - attr(*, "dimnames")=List of 3 #> ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... #> ..$ chain : chr [1:4] "1" "2" "3" "4" @@ -222,16 +213,16 @@

      Examp library(posterior) as_draws_df(sampler_diagnostics)

      #> # A draws_df: 1000 iterations, 4 chains, and 6 variables #> accept_stat__ stepsize__ treedepth__ n_leapfrog__ divergent__ energy__ -#> 1 1.00 0.66 2 3 0 66 -#> 2 0.78 0.66 2 3 0 68 -#> 3 0.88 0.66 3 7 0 68 -#> 4 0.91 0.66 2 3 0 68 -#> 5 0.98 0.66 2 3 0 68 -#> 6 1.00 0.66 2 3 0 69 -#> 7 0.88 0.66 3 7 0 68 -#> 8 0.88 0.66 3 7 0 68 -#> 9 0.99 0.66 3 7 0 67 -#> 10 0.79 0.66 2 7 0 71 +#> 1 0.94 0.68 3 7 0 67 +#> 2 0.94 0.68 3 7 0 69 +#> 3 1.00 0.68 3 7 0 66 +#> 4 0.98 0.68 3 7 0 67 +#> 5 0.86 0.68 3 7 0 69 +#> 6 0.90 0.68 2 7 0 71 +#> 7 0.96 0.68 2 7 0 69 +#> 8 1.00 0.68 3 7 0 68 +#> 9 0.82 0.68 3 7 0 70 +#> 10 1.00 0.68 3 7 0 70 #> # ... with 3990 more draws #> # ... hidden reserved variables {'.chain', '.iteration', '.draw'}
      # } diff --git a/docs/reference/fit-method-save_object.html b/docs/reference/fit-method-save_object.html index 999c7f028..197867805 100644 --- a/docs/reference/fit-method-save_object.html +++ b/docs/reference/fit-method-save_object.html @@ -84,7 +84,7 @@ cmdstanr - 0.2.0 + 0.3.0
      @@ -187,22 +187,20 @@

      Save fitted model object to a file

      safest way to guarantee that everything has been read in before saving.

      - - -

      Usage

      - - -
      $save_object(file, ...)
      -
      +
      save_object(file, ...)

      Arguments

      - - - -
        -
      • file: (string) Path where the file should be saved.

      • -
      • ...: Other arguments to pass to base::saveRDS() besides object and file.

      • -
      + + + + + + + + + + +
      file

      (string) Path where the file should be saved.

      ...

      Other arguments to pass to base::saveRDS() besides object and file.

      See also

      @@ -216,14 +214,20 @@

      Examp rm(fit) fit <- readRDS(temp_rds_file) -fit$summary()
      #> # A tibble: 5 x 10 -#> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 lp__ -66.0 -65.7 1.49 1.30 -69.0 -64.3 1.00 2094. 2922. -#> 2 alpha 0.382 0.381 0.223 0.223 0.0195 0.751 1.00 4059. 2939. -#> 3 beta[1] -0.661 -0.658 0.247 0.247 -1.09 -0.269 1.00 4235. 2991. -#> 4 beta[2] -0.280 -0.276 0.231 0.224 -0.663 0.103 1.00 3985. 2968. -#> 5 beta[3] 0.678 0.672 0.272 0.262 0.232 1.14 1.00 4119. 2911.
      # } +fit$summary()
      #> # A tibble: 105 x 10 +#> variable mean median sd mad q5 q95 rhat ess_bulk +#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> +#> 1 lp__ -66.0 -65.7 1.45 1.26 -68.8 -64.3 1.00 2256. +#> 2 alpha 0.377 0.377 0.220 0.215 0.0127 0.746 1.00 4115. +#> 3 beta[1] -0.666 -0.663 0.247 0.250 -1.07 -0.267 1.00 4477. +#> 4 beta[2] -0.276 -0.278 0.228 0.233 -0.658 0.0915 1.00 4296. +#> 5 beta[3] 0.681 0.672 0.277 0.269 0.247 1.15 1.00 4142. +#> 6 log_lik… -0.517 -0.512 0.0995 0.101 -0.694 -0.368 1.00 4197. +#> 7 log_lik… -0.403 -0.381 0.151 0.141 -0.684 -0.194 1.00 4422. +#> 8 log_lik… -0.500 -0.465 0.215 0.207 -0.904 -0.210 1.00 4562. +#> 9 log_lik… -0.449 -0.429 0.156 0.154 -0.730 -0.228 1.00 4022. +#> 10 log_lik… -1.18 -1.16 0.282 0.281 -1.67 -0.754 1.00 4546. +#> # … with 95 more rows, and 1 more variable: ess_tail <dbl>
      # }

      diff --git a/docs/reference/fit-method-save_output_files.html b/docs/reference/fit-method-save_output_files.html index 05a22e931..3ae2d64b5 100644 --- a/docs/reference/fit-method-save_output_files.html +++ b/docs/reference/fit-method-save_output_files.html @@ -87,7 +87,7 @@ cmdstanr - 0.2.0 + 0.3.0 @@ -193,33 +193,59 @@

      Save output and data files

      the current file paths without moving any files.

      +
      save_output_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE)
       
      +save_latent_dynamics_files(
      +  dir = ".",
      +  basename = NULL,
      +  timestamp = TRUE,
      +  random = TRUE
      +)
       
      -    

      Usage

      +save_data_file(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) - -
      $save_output_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE)
      -$save_latent_dynamics_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE)
      -$save_data_file(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE)
      +output_files(include_failed = FALSE)
       
      -$output_files()
      -$latent_dynamics_files()
      -$data_file()
      -
      +latent_dynamics_files(include_failed = FALSE) -

      Arguments

      +data_file()
      - +

      Arguments

      + + + + + + + + + + + + + + + + + + + + + + +
      dir

      (string) Path to directory where the files should be saved.

      basename

      (string) Base filename to use. See Details.

      timestamp

      (logical) Should a timestamp be added to the file name(s)? +Defaults to TRUE. See Details.

      random

      (logical) Should random alphanumeric characters be added to the +end of the file name(s)? Defaults to TRUE. See Details.

      include_failed

      Should CmdStan runs that failed also be included? The +default is FALSE.

      -
        -
      • dir: (string) Path to directory where the files should be saved.

      • -
      • basename: (string) Base filename to use. See Details.

      • -
      • timestamp: (logical) Should a timestamp be added to the file name(s)? -Defaults to TRUE. See Details.

      • -
      • random: (logical) Should random alphanumeric characters be added to the -end of the file name(s)? Defaults to TRUE. See Details.

      • -
      +

      Value

      +

      The $save_* methods print a message with the new file paths and (invisibly) +return a character vector of the new paths (or NA for any that couldn't be +copied). They also have the side effect of setting the internal paths in the +fitted model object to the new paths.

      +

      The methods without the save_ prefix return character vectors of file +paths without moving any files.

      Details

      @@ -237,20 +263,33 @@

      Details file name after basename.

      For $save_data_file() no id is included in the file name because even with multiple MCMC chains the data file is the same.

      -

      Value

      - - - -

      The $save_* methods print a message with the new file paths and (invisibly) -return a character vector of the new paths (or NA for any that couldn't be -copied). They also have the side effect of setting the internal paths in the -fitted model object to the new paths.

      -

      The methods without the save_ prefix return character vectors of file -paths without moving any files.

      See also

      +

      Examples

      +
      # \dontrun{ +fit <- cmdstanr_example()
      #> Model executable is up to date!
      fit$output_files()
      #> [1] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/logistic-202012171339-1-578cb5.csv" +#> [2] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/logistic-202012171339-2-578cb5.csv" +#> [3] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/logistic-202012171339-3-578cb5.csv" +#> [4] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/logistic-202012171339-4-578cb5.csv"
      fit$data_file()
      #> [1] "/private/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpOUu4d5/temp_libpath1f1e474979aa/cmdstanr/logistic.data.json"
      +# just using tempdir for the example +my_dir <- tempdir() +fit$save_output_files(dir = my_dir, basename = "banana")
      #> Moved 4 files and set internal paths to new locations: +#> - /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/banana-202012171339-1-518bfc.csv +#> - /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/banana-202012171339-2-518bfc.csv +#> - /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/banana-202012171339-3-518bfc.csv +#> - /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/banana-202012171339-4-518bfc.csv
      fit$save_output_files(dir = my_dir, basename = "tomato", timestamp = FALSE)
      #> Moved 4 files and set internal paths to new locations: +#> - /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/tomato-1-2c2672.csv +#> - /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/tomato-2-2c2672.csv +#> - /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/tomato-3-2c2672.csv +#> - /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/tomato-4-2c2672.csv
      fit$save_output_files(dir = my_dir, basename = "lettuce", timestamp = FALSE, random = FALSE)
      #> Moved 4 files and set internal paths to new locations: +#> - /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/lettuce-1.csv +#> - /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/lettuce-2.csv +#> - /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/lettuce-3.csv +#> - /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/lettuce-4.csv
      # } + +
      @@ -180,7 +180,7 @@
      @@ -188,44 +188,35 @@

      Compute a summary table of MCMC estimates and diagnostics

      The $summary() method runs summarise_draws() from the posterior -package and returns the output. For MCMC only post-warmup draws are included -in the summary.

      -

      The $print() method prints the same summary stats but removes the extra -formatting used for printing tibbles and returns the fitted model object -itself. The $print() method may also be faster than $summary() because -it is designed to only compute the summary statistics for the variables -that will actually fit in the printed output (see argument max_rows) -whereas $summary() will compute them for all of the specified variables -in order to be able to return them to the user.

      +package and returns the output. For MCMC, only post-warmup draws are +included in the summary.

      +

      There is also a $print() method that prints the same summary stats but +removes the extra formatting used for printing tibbles and returns the +fitted model object itself. The $print() method may also be faster than +$summary() because it is designed to only compute the summary statistics +for the variables that will actually fit in the printed output whereas +$summary() will compute them for all of the specified variables in order +to be able to return them to the user. See Examples.

      - - -

      Usage

      - - -
      $summary(variables = NULL, ...)
      -$print(variables = NULL, ..., digits = 2, max_rows = 10)
      -
      +
      summary(variables = NULL, ...)

      Arguments

      - - - -
        -
      • variables: (character vector) The variables to include.

      • -
      • ...: Optional arguments to pass to -posterior::summarise_draws().

      • -
      • digits: (integer) For print only, the number of digits to use for -rounding.

      • -
      • max_rows: (integer) For print only, the maximum number of rows to print.

      • -
      + + + + + + + + + + +
      variables

      (character vector) The variables to include.

      ...

      Optional arguments to pass to posterior::summarise_draws().

      Value

      - - -

      The $summary() method returns the tibble created by +

      The $summary() method returns the tibble data frame created by posterior::summarise_draws().

      The $print() method returns the fitted model object itself (invisibly), which is the standard behavior for print methods in R.

      @@ -235,49 +226,68 @@

      See a

      Examples

      # \dontrun{ -fit <- cmdstanr_example("logistic")
      #> Model executable is up to date!
      fit$summary()
      #> # A tibble: 5 x 10 -#> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 lp__ -66.0 -65.7 1.45 1.25 -68.8 -64.3 1.00 2057. 2482. -#> 2 alpha 0.380 0.381 0.218 0.217 0.0250 0.742 1.00 4479. 3075. -#> 3 beta[1] -0.672 -0.667 0.252 0.250 -1.09 -0.261 1.00 4021. 2727. -#> 4 beta[2] -0.274 -0.272 0.229 0.225 -0.662 0.101 1.00 4216. 2960. -#> 5 beta[3] 0.685 0.676 0.266 0.264 0.248 1.12 1.00 4041. 2789.
      fit$print()
      #> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -#> lp__ -65.98 -65.67 1.45 1.25 -68.83 -64.28 1.00 2056 2482 -#> alpha 0.38 0.38 0.22 0.22 0.02 0.74 1.00 4479 3075 -#> beta[1] -0.67 -0.67 0.25 0.25 -1.09 -0.26 1.00 4020 2726 -#> beta[2] -0.27 -0.27 0.23 0.22 -0.66 0.10 1.00 4216 2960 -#> beta[3] 0.68 0.68 0.27 0.26 0.25 1.12 1.00 4040 2789
      fit$print(max_rows = 2) # same as print(fit, max_rows = 2)
      #> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -#> lp__ -65.98 -65.67 1.45 1.25 -68.83 -64.28 1.00 2056 2482 -#> alpha 0.38 0.38 0.22 0.22 0.02 0.74 1.00 4479 3075 +fit <- cmdstanr_example("logistic")
      #> Model executable is up to date!
      fit$summary()
      #> # A tibble: 105 x 10 +#> variable mean median sd mad q5 q95 rhat ess_bulk +#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> +#> 1 lp__ -65.9 -65.6 1.45 1.22 -68.8 -64.3 1.00 2041. +#> 2 alpha 0.379 0.381 0.220 0.220 0.0269 0.753 1.00 4096. +#> 3 beta[1] -0.659 -0.655 0.250 0.252 -1.08 -0.261 1.00 3764. +#> 4 beta[2] -0.273 -0.271 0.225 0.216 -0.646 0.0888 1.00 4440. +#> 5 beta[3] 0.681 0.677 0.261 0.259 0.271 1.12 1.00 4279. +#> 6 log_lik… -0.517 -0.510 0.0988 0.0989 -0.686 -0.367 1.00 3990. +#> 7 log_lik… -0.403 -0.385 0.146 0.135 -0.677 -0.200 1.00 4584. +#> 8 log_lik… -0.499 -0.466 0.220 0.204 -0.891 -0.210 1.00 4330. +#> 9 log_lik… -0.450 -0.432 0.152 0.150 -0.726 -0.238 1.00 4179. +#> 10 log_lik… -1.18 -1.16 0.282 0.284 -1.68 -0.756 1.00 4453. +#> # … with 95 more rows, and 1 more variable: ess_tail <dbl>
      fit$print()
      #> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail +#> lp__ -65.95 -65.59 1.45 1.22 -68.80 -64.29 1.00 2040 2693 +#> alpha 0.38 0.38 0.22 0.22 0.03 0.75 1.00 4096 2697 +#> beta[1] -0.66 -0.65 0.25 0.25 -1.08 -0.26 1.00 3764 2833 +#> beta[2] -0.27 -0.27 0.22 0.22 -0.65 0.09 1.00 4440 2925 +#> beta[3] 0.68 0.68 0.26 0.26 0.27 1.12 1.00 4279 2926 +#> log_lik[1] -0.52 -0.51 0.10 0.10 -0.69 -0.37 1.00 3989 2987 +#> log_lik[2] -0.40 -0.39 0.15 0.13 -0.68 -0.20 1.00 4583 2861 +#> log_lik[3] -0.50 -0.47 0.22 0.20 -0.89 -0.21 1.00 4329 3207 +#> log_lik[4] -0.45 -0.43 0.15 0.15 -0.73 -0.24 1.00 4178 3105 +#> log_lik[5] -1.18 -1.16 0.28 0.28 -1.68 -0.76 1.00 4452 3084 +#> +#> # showing 10 of 105 rows (change via 'max_rows' argument)
      fit$print(max_rows = 2) # same as print(fit, max_rows = 2)
      #> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail +#> lp__ -65.95 -65.59 1.45 1.22 -68.80 -64.29 1.00 2040 2693 +#> alpha 0.38 0.38 0.22 0.22 0.03 0.75 1.00 4096 2697 #> -#> # showing 2 of 5 rows (change via 'max_rows' argument)
      +#> # showing 2 of 105 rows (change via 'max_rows' argument)
      # include only certain variables fit$summary("beta")
      #> # A tibble: 3 x 10 -#> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 beta[1] -0.672 -0.667 0.252 0.250 -1.09 -0.261 1.00 4021. 2727. -#> 2 beta[2] -0.274 -0.272 0.229 0.225 -0.662 0.101 1.00 4216. 2960. -#> 3 beta[3] 0.685 0.676 0.266 0.264 0.248 1.12 1.00 4041. 2789.
      fit$print(c("alpha", "beta[2]"))
      #> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -#> alpha 0.38 0.38 0.22 0.22 0.02 0.74 1.00 4479 3075 -#> beta[2] -0.27 -0.27 0.23 0.22 -0.66 0.10 1.00 4216 2960
      +#> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail +#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> +#> 1 beta[1] -0.659 -0.655 0.250 0.252 -1.08 -0.261 1.00 3764. 2834. +#> 2 beta[2] -0.273 -0.271 0.225 0.216 -0.646 0.0888 1.00 4440. 2926. +#> 3 beta[3] 0.681 0.677 0.261 0.259 0.271 1.12 1.00 4279. 2926.
      fit$print(c("alpha", "beta[2]"))
      #> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail +#> alpha 0.38 0.38 0.22 0.22 0.03 0.75 1.00 4096 2697 +#> beta[2] -0.27 -0.27 0.22 0.22 -0.65 0.09 1.00 4440 2925
      # include all variables but only certain summaries -fit$summary(NULL, c("mean", "sd"))
      #> # A tibble: 5 x 3 -#> variable mean sd -#> <chr> <dbl> <dbl> -#> 1 lp__ -66.0 1.45 -#> 2 alpha 0.380 0.218 -#> 3 beta[1] -0.672 0.252 -#> 4 beta[2] -0.274 0.229 -#> 5 beta[3] 0.685 0.266
      +fit$summary(NULL, c("mean", "sd"))
      #> # A tibble: 105 x 3 +#> variable mean sd +#> <chr> <dbl> <dbl> +#> 1 lp__ -65.9 1.45 +#> 2 alpha 0.379 0.220 +#> 3 beta[1] -0.659 0.250 +#> 4 beta[2] -0.273 0.225 +#> 5 beta[3] 0.681 0.261 +#> 6 log_lik[1] -0.517 0.0988 +#> 7 log_lik[2] -0.403 0.146 +#> 8 log_lik[3] -0.499 0.220 +#> 9 log_lik[4] -0.450 0.152 +#> 10 log_lik[5] -1.18 0.282 +#> # … with 95 more rows
      # can use functions created from formulas # for example, calculate Pr(beta > 0) fit$summary("beta", prob_gt_0 = ~ mean(. > 0))
      #> # A tibble: 3 x 2 #> variable prob_gt_0 #> <chr> <dbl> #> 1 beta[1] 0.003 -#> 2 beta[2] 0.115 -#> 3 beta[3] 0.996
      # } +#> 2 beta[2] 0.112 +#> 3 beta[3] 0.995
      # }
      diff --git a/docs/reference/fit-method-time.html b/docs/reference/fit-method-time.html index a77ea15c8..b4c3d5b62 100644 --- a/docs/reference/fit-method-time.html +++ b/docs/reference/fit-method-time.html @@ -82,7 +82,7 @@ cmdstanr - 0.2.0 + 0.3.0
      @@ -183,19 +183,12 @@

      Report timing of CmdStan runs

      sampling phases.

      +
      time()
      -

      Usage

      - - -
      $time()
      -
      -

      Value

      - - -

      A list with elements

        +

        A list with elements

        • total: (scalar) the total run time.

        • chains: (data frame) for MCMC only, timing info for the individual chains. The data frame has columns "chain_id", "warmup", "sampling", @@ -209,20 +202,20 @@

          See a

          Examples

          # \dontrun{ fit_mcmc <- cmdstanr_example("logistic", method = "sample")
          #> Model executable is up to date!
          fit_mcmc$time()
          #> $total -#> [1] 0.4920871 +#> [1] 0.9395189 #> #> $chains #> chain_id warmup sampling total -#> 1 1 0.031 0.060 0.091 -#> 2 2 0.027 0.036 0.063 -#> 3 3 0.034 0.041 0.075 -#> 4 4 0.032 0.039 0.071 +#> 1 1 0.028 0.124 0.152 +#> 2 2 0.035 0.109 0.144 +#> 3 3 0.028 0.101 0.129 +#> 4 4 0.042 0.149 0.191 #>
          fit_mle <- cmdstanr_example("logistic", method = "optimize")
          #> Model executable is up to date!
          fit_mle$time()
          #> $total -#> [1] 0.1188149 +#> [1] 0.1158781 #>
          fit_vb <- cmdstanr_example("logistic", method = "variational")
          #> Model executable is up to date!
          fit_vb$time()
          #> $total -#> [1] 0.118211 +#> [1] 0.1155372 #>
          # }
          diff --git a/docs/reference/index.html b/docs/reference/index.html index bfb18e39e..5ed14f79a 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -79,7 +79,7 @@ cmdstanr - 0.2.1 + 0.3.0 @@ -251,37 +251,43 @@

          model-method-check_syntax

          +

          check_syntax()

          Check syntax of a Stan program

          -

          model-method-compile

          +

          compile()

          Compile a Stan program

          -

          model-method-generate-quantities

          +

          generate_quantities()

          Run Stan's standalone generated quantities method

          -

          model-method-optimize

          +

          optimize()

          Run Stan's optimization algorithms

          -

          model-method-sample

          +

          sample()

          Run Stan's MCMC algorithms

          -

          model-method-variational

          +

          sample_mpi()

          + +

          Run Stan's MCMC algorithms with MPI

          + + + +

          variational()

          Run Stan's variational approximation algorithms

          @@ -330,85 +336,97 @@

          fit-method-cmdstan_summary

          +

          cmdstan_summary() cmdstan_diagnose()

          -

          Run CmdStan's stansummary and diagnose

          +

          Run CmdStan's stansummary and diagnose utilities

          -

          fit-method-draws

          +

          draws()

          Extract posterior draws

          -

          fit-method-init

          +

          init()

          + +

          Extract user-specified initial values

          + + + +

          inv_metric()

          -

          Extract initial values

          +

          Extract inverse metric (mass matrix) after MCMC

          -

          fit-method-inv_metric

          +

          loo()

          -

          Extract inverse metric (mass matrix)

          +

          Leave-one-out cross-validation (LOO-CV)

          -

          fit-method-lp

          +

          lp() lp_approx()

          Extract log probability (target)

          -

          fit-method-metadata

          +

          metadata()

          Extract metadata from CmdStan CSV files

          -

          fit-method-mle

          +

          mle()

          Extract (penalized) maximum likelihood estimate after optimization

          -

          fit-method-output

          +

          num_chains()

          + +

          Extract number of chains after MCMC

          + + + +

          output()

          Access console output

          -

          fit-method-return_codes

          +

          return_codes()

          Extract return codes from CmdStan

          -

          fit-method-sampler_diagnostics

          +

          sampler_diagnostics()

          -

          Extract sampler diagnostics

          +

          Extract sampler diagnostics after MCMC

          -

          fit-method-save_object

          +

          save_object()

          Save fitted model object to a file

          -

          fit-method-save_output_files

          +

          save_output_files() save_latent_dynamics_files() save_data_file() output_files() latent_dynamics_files() data_file()

          Save output and data files

          -

          fit-method-summary

          +

          summary()

          -

          Compute a summary table of MCMC estimates and diagnostics

          +

          Compute a summary table of estimates and diagnostics

          -

          fit-method-time

          +

          time()

          Report timing of CmdStan runs

          diff --git a/docs/reference/install_cmdstan.html b/docs/reference/install_cmdstan.html index b8f3257af..dd8d66c2d 100644 --- a/docs/reference/install_cmdstan.html +++ b/docs/reference/install_cmdstan.html @@ -97,7 +97,7 @@ cmdstanr - 0.2.0 + 0.3.0 diff --git a/docs/reference/model-method-check_syntax.html b/docs/reference/model-method-check_syntax.html index 4260ef1fa..694e906b1 100644 --- a/docs/reference/model-method-check_syntax.html +++ b/docs/reference/model-method-check_syntax.html @@ -82,7 +82,7 @@ cmdstanr - 0.2.1 + 0.3.0 @@ -183,39 +183,43 @@

          Check syntax of a Stan program

          parsing succeeds. If invalid syntax in found an error is thrown.

          - - -

          Usage

          - - -
          $check_syntax(
          -  pedantic = FALSE,
          -  include_paths = NULL,
          -  stanc_options = list(),
          -  quiet = FALSE
          -)
          -
          +
          check_syntax(
          +  pedantic = FALSE,
          +  include_paths = NULL,
          +  stanc_options = list(),
          +  quiet = FALSE
          +)

          Arguments

          - - - -
            -
          • pedantic: (logical) Should pedantic mode be turned on? The default is + + + + + + + + + + + + + + + + + + +
            pedantic

            (logical) Should pedantic mode be turned on? The default is FALSE. Pedantic mode attempts to warn you about potential issues in your Stan program beyond syntax errors. For details see the Pedantic mode chapter in -the Stan Reference Manual.

            -
          • include_paths: (character vector) Paths to directories where Stan +the Stan Reference Manual.

          • include_paths

            (character vector) Paths to directories where Stan should look for files specified in #include directives in the Stan -program.

            -
          • stanc_options: (list) Any other Stan-to-C++ transpiler options to be +program.

          • stanc_options

            (list) Any other Stan-to-C++ transpiler options to be used when compiling the model. See the documentation for the -$compile() method for details.

            -
          • quiet: (logical) Should informational messages be suppressed? The +$compile() method for details.

          • quiet

            (logical) Should informational messages be suppressed? The default is FALSE, which will print a message if the Stan program is valid or the compiler error message if there are syntax errors. If TRUE, only -the error message will be printed.

            - +the error message will be printed.

            Value

            @@ -235,6 +239,7 @@

            See a model-method-compile, model-method-generate-quantities, model-method-optimize, +model-method-sample_mpi, model-method-sample, model-method-variational

            @@ -259,7 +264,7 @@

            Examp mod$check_syntax()
            #> Stan program is syntactically correct
            # pedantic mode will warn that lambda should be constrained to be positive # and that lambda has no prior distribution -mod$check_syntax(pedantic = TRUE)
            #> Warning:
            #> The parameter lambda has no priors.
            #> Warning at '/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpCe3KhV/file1424ccfbed68.stan', line 11, column 14 to column 20:
            #> A poisson distribution is given parameter lambda as a rate parameter
            #> (argument 1), but lambda was not constrained to be strictly positive.
            #> Stan program is syntactically correct
            # } +mod$check_syntax(pedantic = TRUE)
            #> Warning:
            #> The parameter lambda has no priors.
            #> Warning at '/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/file2ed27f29a77b.stan', line 11, column 14 to column 20:
            #> A poisson distribution is given parameter lambda as a rate parameter
            #> (argument 1), but lambda was not constrained to be strictly positive.
            #> Stan program is syntactically correct
            # }

    diff --git a/docs/reference/model-method-compile.html b/docs/reference/model-method-compile.html index a6158f90c..33bc55a2f 100644 --- a/docs/reference/model-method-compile.html +++ b/docs/reference/model-method-compile.html @@ -58,7 +58,7 @@ $hpp_file() methods. The default is to create the executable in the same directory as the Stan program and to write the generated C++ code in a temporary directory. To save the C++ code to a non-temporary location use -$save_hpp_file()." /> +$save_hpp_file(dir)." /> @@ -91,7 +91,7 @@ cmdstanr - 0.2.1 + 0.3.0 @@ -198,69 +198,83 @@

    Compile a Stan program

    $hpp_file() methods. The default is to create the executable in the same directory as the Stan program and to write the generated C++ code in a temporary directory. To save the C++ code to a non-temporary location use -$save_hpp_file().

    +$save_hpp_file(dir).

    - - -

    Usage

    - - -
    $compile(
    -  quiet = TRUE,
    -  dir = NULL,
    -  pedantic = FALSE,
    -  include_paths = NULL,
    -  cpp_options = list(),
    -  stanc_options = list(),
    -  force_recompile = FALSE
    -)
    -$exe_file()
    -$hpp_file()
    -$save_hpp_file(dir = NULL)
    -
    +
    compile(
    +  quiet = TRUE,
    +  dir = NULL,
    +  pedantic = FALSE,
    +  include_paths = NULL,
    +  cpp_options = list(),
    +  stanc_options = list(),
    +  force_recompile = FALSE,
    +  threads = FALSE
    +)

    Arguments

    - - - -

    Leaving all arguments at their defaults should be fine for most users, but -optional arguments are provided to enable features in CmdStan (and the Stan -Math library). See the CmdStan manual for more details.

      -
    • quiet: (logical) Should the verbose output from CmdStan during + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      quiet

      (logical) Should the verbose output from CmdStan during compilation be suppressed? The default is TRUE, but if you encounter an error we recommend trying again with quiet=FALSE to see more of the -output.

      -
    • dir: (string) The path to the directory in which to store the CmdStan +output.

    • dir

      (string) The path to the directory in which to store the CmdStan executable (or .hpp file if using $save_hpp_file()). The default is the -same location as the Stan program.

      -
    • pedantic: (logical) Should pedantic mode be turned on? The default is +same location as the Stan program.

    • pedantic

      (logical) Should pedantic mode be turned on? The default is FALSE. Pedantic mode attempts to warn you about potential issues in your Stan program beyond syntax errors. For details see the Pedantic mode chapter in the Stan Reference Manual. Note: to do a pedantic check for a model that is already compiled use the -$check_syntax() method instead.

      -
    • include_paths: (character vector) Paths to directories where Stan +$check_syntax() method instead.

    • include_paths

      (character vector) Paths to directories where Stan should look for files specified in #include directives in the Stan -program.

      -
    • cpp_options: (list) Any makefile options to be used when compiling the +program.

    • cpp_options

      (list) Any makefile options to be used when compiling the model (STAN_THREADS, STAN_MPI, STAN_OPENCL, etc.). Anything you would -otherwise write in the make/local file.

      -
    • stanc_options: (list) Any Stan-to-C++ transpiler options to be used +otherwise write in the make/local file.

    • stanc_options

      (list) Any Stan-to-C++ transpiler options to be used when compiling the model. See the Examples section below as well as the stanc chapter of the CmdStan Guide for more details on available options: -https://mc-stan.org/docs/cmdstan-guide/stanc.html.

      -
    • force_recompile: (logical) Should the model be recompiled even if was -not modified since last compiled. The default is FALSE.

    • - +https://mc-stan.org/docs/cmdstan-guide/stanc.html.

      force_recompile

      (logical) Should the model be recompiled even if was +not modified since last compiled. The default is FALSE.

      threads

      Deprecated and will be removed in a future release. Please +turn on threading via cpp_options = list(stan_threads = TRUE) instead.

      Value

      The $compile() method is called for its side effect of creating the executable and adding its path to the CmdStanModel object, but it also returns the CmdStanModel object invisibly.

      -

      The $exe_file(), $hpp_file(), and $save_hpp_file() methods all return -file paths.

      +

      After compilation, the $exe_file(), $hpp_file(), and $save_hpp_file() +methods can be used and return file paths.

      See also

      @@ -287,7 +302,7 @@

      Examp mod <- cmdstan_model(file, compile = FALSE) mod$compile()
      #> Model executable is up to date!
      mod$exe_file()
      #> [1] "/Users/jgabry/.cmdstanr/cmdstan-2.25.0/examples/bernoulli/bernoulli"
      # turn on threading support (for using functions that support within-chain parallelization) -mod$compile(force_recompile = TRUE, cpp_options = list(stan_threads = TRUE))
      #> Compiling Stan program...
      #> - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ |
      mod$exe_file()
      #> [1] "/Users/jgabry/.cmdstanr/cmdstan-2.25.0/examples/bernoulli/bernoulli_threads"
      +mod$compile(force_recompile = TRUE, cpp_options = list(stan_threads = TRUE))
      #> Compiling Stan program...
      mod$exe_file()
      #> [1] "/Users/jgabry/.cmdstanr/cmdstan-2.25.0/examples/bernoulli/bernoulli_threads"
      # turn on pedantic mode (new in Stan v2.24) file_pedantic <- write_stan_file(" parameters { @@ -297,7 +312,7 @@

      Examp sigma ~ exponential(1); } ") -mod <- cmdstan_model(file_pedantic, pedantic = TRUE)

      #> Compiling Stan program...
      #> - \ | / - \
      #> Warning at '/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpCe3KhV/model-1424c4676c7fd.stan', line 6, column 2 to column 7:
      #> Parameter sigma is given a exponential distribution, which has strictly
      #> positive support, but sigma was not constrained to be strictly positive.
      #> | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ |
      +mod <- cmdstan_model(file_pedantic, pedantic = TRUE)
      #> Compiling Stan program...
      #> Warning at '/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/model-2ed2582a7eb0.stan', line 6, column 2 to column 7:
      #> Parameter sigma is given a exponential distribution, which has strictly
      #> positive support, but sigma was not constrained to be strictly positive.
      # }

    diff --git a/docs/reference/model-method-generate-quantities.html b/docs/reference/model-method-generate-quantities.html index 39cf47232..197bd1660 100644 --- a/docs/reference/model-method-generate-quantities.html +++ b/docs/reference/model-method-generate-quantities.html @@ -82,7 +82,7 @@ cmdstanr - 0.2.0 + 0.3.0 @@ -183,39 +183,93 @@

    Run Stan's standalone generated quantities method

    based on previously fitted parameters.

    - - -

    Usage

    - - -
    $generate_quantities(
    -  fitted_params,
    -  data = NULL,
    -  seed = NULL,
    -  output_dir = NULL,
    -  sig_figs = NULL,
    -  parallel_chains = getOption("mc.cores", 1),
    -  threads_per_chain = NULL
    -)
    -
    +
    generate_quantities(
    +  fitted_params,
    +  data = NULL,
    +  seed = NULL,
    +  output_dir = NULL,
    +  sig_figs = NULL,
    +  parallel_chains = getOption("mc.cores", 1),
    +  threads_per_chain = NULL
    +)

    Arguments

    - - - -
      -
    • fitted_params: (multiple options) The parameter draws to use. One of the following:

        -
      • A CmdStanMCMC fitted model object.

      • -
      • A character vector of paths to CmdStan CSV output files containing -parameter draws.

      • -
    • -
    • data, seed, output_dir, parallel_chains, threads_per_chain: -Same as for the $sample() method.

    • -
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    fitted_params

    (multiple options) The parameter draws to use. One of +the following:

    data

    (multiple options) The data to use for the variables specified in +the data block of the Stan program. One of the following:

      +
    • A named list of R objects (like for RStan). Internally this list is then +written to JSON for CmdStan using write_stan_json().

    • +
    • A path to a data file compatible with CmdStan (JSON or R dump). See the +appendices in the CmdStan manual for details on using these formats.

    • +
    • NULL or an empty list if the Stan program has no data block.

    • +
    seed

    (positive integer) A seed for the (P)RNG to pass to CmdStan.

    output_dir

    (string) A path to a directory where CmdStan should write +its output CSV files. For interactive use this can typically be left at +NULL (temporary directory) since CmdStanR makes the CmdStan output +(posterior draws and diagnostics) available in R via methods of the fitted +model objects. The behavior of output_dir is as follows:

      +
    • If NULL (the default), then the CSV files are written to a temporary +directory and only saved permanently if the user calls one of the $save_* +methods of the fitted model object (e.g., +$save_output_files()). These temporary +files are removed when the fitted model object is +garbage collected (manually or automatically).

    • +
    • If a path, then the files are created in output_dir with names +corresponding to the defaults used by $save_output_files().

    • +
    sig_figs

    (positive integer) The number of significant figures used +when storing the output values. By default, CmdStan represent the output +values with 6 significant figures. The upper limit for sig_figs is 18. +Increasing this value will result in larger output CSV files and thus an +increased usage of disk space.

    parallel_chains

    (positive integer) The maximum number of MCMC chains +to run in parallel. If parallel_chains is not specified then the default +is to look for the option "mc.cores", which can be set for an entire R +session by options(mc.cores=value). If the "mc.cores" option has not +been set then the default is 1.

    threads_per_chain

    (positive integer) If the model was +compiled with threading support, the number of +threads to use in parallelized sections within an MCMC chain (e.g., when +using the Stan functions reduce_sum() or map_rect()). This is in +contrast with parallel_chains, which specifies the number of chains to +run in parallel. The actual number of CPU cores used use is +parallel_chains*threads_per_chain. For an example of using threading see +the Stan case study Reduce Sum: A Minimal Example.

    Value

    -

    The $generate_quantities() method returns a CmdStanGQ object.

    +

    A CmdStanGQ object.

    See also

    @@ -259,7 +314,7 @@

    Examp #> #> All 4 chains finished successfully. #> Mean chain execution time: 0.0 seconds. -#> Total execution time: 3.9 seconds.
    +#> Total execution time: 0.6 seconds.
    # stan program for standalone generated quantities # (could keep model block, but not necessary so removing it) gq_program <- write_stan_file( @@ -284,7 +339,7 @@

    Examp #> #> All 4 chains finished successfully. #> Mean chain execution time: 0.0 seconds. -#> Total execution time: 3.8 seconds.

    str(fit_gq$draws())
    #> 'draws_array' int [1:1000, 1:4, 1:10] 0 0 0 0 1 0 1 0 0 0 ... +#> Total execution time: 0.5 seconds.
    str(fit_gq$draws())
    #> 'draws_array' int [1:1000, 1:4, 1:10] 0 0 0 0 1 0 1 0 0 0 ... #> - attr(*, "dimnames")=List of 3 #> ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... #> ..$ chain : chr [1:4] "1" "2" "3" "4" diff --git a/docs/reference/model-method-optimize-1.png b/docs/reference/model-method-optimize-1.png index b248a3f89..08beb3c62 100644 Binary files a/docs/reference/model-method-optimize-1.png and b/docs/reference/model-method-optimize-1.png differ diff --git a/docs/reference/model-method-optimize-2.png b/docs/reference/model-method-optimize-2.png index 35db9ff48..df8262149 100644 Binary files a/docs/reference/model-method-optimize-2.png and b/docs/reference/model-method-optimize-2.png differ diff --git a/docs/reference/model-method-optimize.html b/docs/reference/model-method-optimize.html index 4c9f3a5ee..3e60e821c 100644 --- a/docs/reference/model-method-optimize.html +++ b/docs/reference/model-method-optimize.html @@ -49,7 +49,11 @@ +estimate. +Any argument left as NULL will default to the default value used by the +installed version of CmdStan. See the +CmdStan User’s Guide +for more details." /> @@ -82,7 +86,7 @@ cmdstanr - 0.2.1 + 0.3.0
    @@ -181,122 +185,179 @@

    Run Stan's optimization algorithms

    The $optimize() method of a CmdStanModel object runs Stan's optimizer to obtain a posterior mode (penalized maximum likelihood) estimate.

    +

    Any argument left as NULL will default to the default value used by the +installed version of CmdStan. See the +CmdStan User’s Guide +for more details.

    - - -

    Details

    - -

    CmdStan can find the posterior mode (assuming there is one). If the -posterior is not convex, there is no guarantee Stan will be able to find -the global mode as opposed to a local optimum of log probability. For -optimization, the mode is calculated without the Jacobian adjustment for -constrained variables, which shifts the mode due to the change of -variables. Thus modes correspond to modes of the model as written.

    -

    -- CmdStan Interface User's Guide

    -

    Usage

    - - -
    $optimize(
    -  data = NULL,
    -  seed = NULL,
    -  refresh = NULL,
    -  init = NULL,
    -  save_latent_dynamics = FALSE,
    -  output_dir = NULL,
    -  threads = NULL,
    -  algorithm = NULL,
    -  init_alpha = NULL,
    -  iter = NULL,
    -  sig_figs = NULL
    -)
    -
    - -

    Arguments shared by all fitting methods

    - -

    The following arguments can -be specified for any of the fitting methods (sample, optimize, -variational). Arguments left at NULL default to the default used by the -installed version of CmdStan.

      -
    • data: (multiple options) The data to use. One of the following:

        -
      • A named list of R objects (like for RStan). Internally this list is -then written to JSON for CmdStan using write_stan_json().

      • -
      • A path to a data file compatible with CmdStan (JSON or R dump). See -the appendices in the CmdStan manual for details on using these formats.

      • -
    • -
    • seed: (positive integer) A seed for the (P)RNG to pass to CmdStan.

    • -
    • refresh: (non-negative integer) The number of iterations between -printed screen updates. If refresh = 0, only error messages will be printed.

    • -
    • init: (multiple options) The initialization method for the parameters block:

        -
      • A real number x>0 initializes randomly between [-x,x] (on the -unconstrained parameter space);

      • -
      • 0 initializes to 0;

      • -
      • A character vector of paths (one per chain) to JSON or Rdump files. See +

        optimize(
        +  data = NULL,
        +  seed = NULL,
        +  refresh = NULL,
        +  init = NULL,
        +  save_latent_dynamics = FALSE,
        +  output_dir = NULL,
        +  sig_figs = NULL,
        +  threads = NULL,
        +  algorithm = NULL,
        +  init_alpha = NULL,
        +  iter = NULL,
        +  tol_obj = NULL,
        +  tol_rel_obj = NULL,
        +  tol_grad = NULL,
        +  tol_rel_grad = NULL,
        +  tol_param = NULL,
        +  history_size = NULL
        +)
        + +

        Arguments

        + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
        data

        (multiple options) The data to use for the variables specified in +the data block of the Stan program. One of the following:

          +
        • A named list of R objects (like for RStan). Internally this list is then +written to JSON for CmdStan using write_stan_json().

        • +
        • A path to a data file compatible with CmdStan (JSON or R dump). See the +appendices in the CmdStan manual for details on using these formats.

        • +
        • NULL or an empty list if the Stan program has no data block.

        • +
        seed

        (positive integer) A seed for the (P)RNG to pass to CmdStan.

        refresh

        (non-negative integer) The number of iterations between +printed screen updates. If refresh = 0, only error messages will be +printed.

        init

        (multiple options) The initialization method to use for the +variables declared in the parameters block of the Stan program:

          +
        • A real number x>0. This initializes all parameters randomly between +[-x,x] (on the unconstrained parameter space);

        • +
        • The number 0. This initializes all parameters to 0;

        • +
        • A character vector of paths (one per chain) to JSON or Rdump files +containing initial values for all or some parameters. See write_stan_json() to write R objects to JSON files compatible with CmdStan.

        • -
        • A list of lists. For MCMC the list should contain a sublist for each -chain. For optimization and variational inference there should be just one -sublist. The sublists should have named elements corresponding to the -parameters for which you are specifying initial values. See Examples.

        • +
        • A list of lists containing initial values for all or some parameters. For +MCMC the list should contain a sublist for each chain. For optimization and +variational inference there should be just one sublist. The sublists should +have named elements corresponding to the parameters for which you are +specifying initial values. See Examples.

        • A function that returns a single list with names corresponding to the -parameters for which you are specifying initial values. The function -can take no arguments or a single argument chain_id. For MCMC, if the -function has argument chain_id it will be supplied with the chain id -(from 1 to number of chains) when called to generate the initial -values. See Examples.

        • -
        -
      • save_latent_dynamics: (logical) Should auxiliary diagnostic information +parameters for which you are specifying initial values. The function can +take no arguments or a single argument chain_id. For MCMC, if the function +has argument chain_id it will be supplied with the chain id (from 1 to +number of chains) when called to generate the initial values. See +Examples.

      • +
        save_latent_dynamics

        (logical) Should auxiliary diagnostic information about the latent dynamics be written to temporary diagnostic CSV files? This argument replaces CmdStan's diagnostic_file argument and the content written to CSV is controlled by the user's CmdStan installation and not -CmdStanR (and for some algorithms no content may be written). The default -is save_latent_dynamics=FALSE, which is appropriate for almost every use case -(all diagnostics recommended for users to check are always saved, e.g., -divergences for HMC). To save the temporary files created when -save_latent_dynamics=TRUE see the -$save_latent_dynamics_files() method.

        -
      • output_dir: (string) A path to a directory where CmdStan should write +CmdStanR (for some algorithms no content may be written). The default +is FALSE, which is appropriate for almost every use case. To save the +temporary files created when save_latent_dynamics=TRUE see the +$save_latent_dynamics_files() +method.

      • output_dir

        (string) A path to a directory where CmdStan should write its output CSV files. For interactive use this can typically be left at -NULL (temporary directory) since CmdStanR makes the CmdStan output (e.g., -posterior draws and diagnostics) available in R via methods of the fitted +NULL (temporary directory) since CmdStanR makes the CmdStan output +(posterior draws and diagnostics) available in R via methods of the fitted model objects. The behavior of output_dir is as follows:

        • If NULL (the default), then the CSV files are written to a temporary -directory and only saved permanently if the user calls one of the -$save_* methods of the fitted model object (e.g., +directory and only saved permanently if the user calls one of the $save_* +methods of the fitted model object (e.g., $save_output_files()). These temporary -files are removed when the fitted model object is garbage collected.

        • +files are removed when the fitted model object is +garbage collected (manually or automatically).

        • If a path, then the files are created in output_dir with names -corresponding the defaults used by $save_output_files() (and similar -methods like $save_latent_dynamics_files()).

        • -
        -
      • sig_figs: (positive integer) The number of significant figures used -for the output values. By default, CmdStan represent the output values with -6 significant figures. The upper limit for sig_figs is 18. Increasing -this value can cause an increased usage of disk space due to larger -output CSV files.

      • - - -

        Arguments unique to the optimize method

        - -

        In addition to the -arguments above, the $optimize() method also has its own set of -arguments. These arguments are described briefly here and in greater detail -in the CmdStan manual. Arguments left at NULL default to the default used -by the installed version of CmdStan.

          -
        • threads: (positive integer) If the model was +corresponding to the defaults used by $save_output_files().

        • +
        sig_figs

        (positive integer) The number of significant figures used +when storing the output values. By default, CmdStan represent the output +values with 6 significant figures. The upper limit for sig_figs is 18. +Increasing this value will result in larger output CSV files and thus an +increased usage of disk space.

        threads

        (positive integer) If the model was compiled with threading support, the number of threads to use in parallelized sections (e.g., when -using the Stan functions reduce_sum() or map_rect()).

        -
      • algorithm: (string) The optimization algorithm. One of "lbfgs", -"bfgs", or "newton".

      • -
      • iter: (positive integer) The number of iterations.

      • -
      • init_alpha: (nonnegative real) The line search step size for first -iteration. Not applicable if algorithm="newton".

      • - +using the Stan functions reduce_sum() or map_rect()).

        algorithm

        (string) The optimization algorithm. One of "lbfgs", +"bfgs", or "newton". The control parameters below are only available +for "lbfgs" and "bfgs. For their default values and more details see +the CmdStan User's Guide. The default values can also be obtained by +running cmdstanr_example(method="optimize")$metadata().

        init_alpha

        (positive real) The initial step size parameter.

        iter

        (positive integer) The maximum number of iterations.

        tol_obj

        (positive real) Convergence tolerance on changes in objective function value.

        tol_rel_obj

        (positive real) Convergence tolerance on relative changes in objective function value.

        tol_grad

        (positive real) Convergence tolerance on the norm of the gradient.

        tol_rel_grad

        (positive real) Convergence tolerance on the relative norm of the gradient.

        tol_param

        (positive real) Convergence tolerance on changes in parameter value.

        history_size

        (positive integer) The size of the history used when +approximating the Hessian. Only available for L-BFGS.

        +

        Details

        + +

        CmdStan can find the posterior mode (assuming there is one). If the +posterior is not convex, there is no guarantee Stan will be able to find +the global mode as opposed to a local optimum of log probability. For +optimization, the mode is calculated without the Jacobian adjustment for +constrained variables, which shifts the mode due to the change of +variables. Thus modes correspond to modes of the model as written.

        +

        -- CmdStan User's Guide

        Value

        -

        The $optimize() method returns a CmdStanMLE object.

        +

        A CmdStanMLE object.

        See also

        @@ -401,7 +463,7 @@

        Examp #> #> Both chains finished successfully. #> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.2 seconds.
        +#> Total execution time: 0.1 seconds.
        # Use 'posterior' package for summaries fit_mcmc$summary()
        #> # A tibble: 2 x 10 #> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail @@ -450,7 +512,7 @@

        Examp # Plot posterior using bayesplot (ggplot2) mcmc_hist(fit_mcmc$draws("theta"))

        #> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
        # Call CmdStan's diagnose and stansummary utilities -fit_mcmc$cmdstan_diagnose()
        #> Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp4wNiLP/bernoulli-202011301605-1-0ca04a.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp4wNiLP/bernoulli-202011301605-2-0ca04a.csv +fit_mcmc$cmdstan_diagnose()
        #> Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171339-1-29cea2.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171339-2-29cea2.csv #> #> Checking sampler transitions treedepth. #> Treedepth satisfactory for all transitions. @@ -465,24 +527,24 @@

        Examp #> #> Split R-hat values satisfactory all parameters. #> -#> Processing complete, no problems detected.

        fit_mcmc$cmdstan_summary()
        #> Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp4wNiLP/bernoulli-202011301605-1-0ca04a.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp4wNiLP/bernoulli-202011301605-2-0ca04a.csv +#> Processing complete, no problems detected.
        fit_mcmc$cmdstan_summary()
        #> Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171339-1-29cea2.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171339-2-29cea2.csv #> Inference for Stan model: bernoulli_model #> 2 chains: each with iter=(1000,1000); warmup=(0,0); thin=(1,1); 2000 iterations saved. #> -#> Warmup took (0.0070, 0.0070) seconds, 0.014 seconds total -#> Sampling took (0.014, 0.013) seconds, 0.027 seconds total +#> Warmup took (0.0080, 0.0080) seconds, 0.016 seconds total +#> Sampling took (0.022, 0.020) seconds, 0.042 seconds total #> #> Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat #> -#> lp__ -7.3 3.1e-02 0.74 -8.8 -7.0 -6.8 586 21709 1.0 -#> accept_stat__ 0.92 5.0e-03 0.14 0.61 0.97 1.0 7.3e+02 2.7e+04 1.0e+00 -#> stepsize__ 1.0 9.0e-02 0.090 0.93 1.1 1.1 1.0e+00 3.7e+01 2.6e+13 -#> treedepth__ 1.4 1.2e-02 0.52 1.0 1.0 2.0 1.9e+03 6.9e+04 1.0e+00 -#> n_leapfrog__ 2.6 4.0e-01 1.5 1.0 3.0 7.0 1.4e+01 5.2e+02 1.0e+00 +#> lp__ -7.3 3.1e-02 0.74 -8.8 -7.0 -6.8 586 13956 1.0 +#> accept_stat__ 0.92 5.0e-03 0.14 0.61 0.97 1.0 7.3e+02 1.7e+04 1.0e+00 +#> stepsize__ 1.0 9.0e-02 0.090 0.93 1.1 1.1 1.0e+00 2.4e+01 2.6e+13 +#> treedepth__ 1.4 1.2e-02 0.52 1.0 1.0 2.0 1.9e+03 4.4e+04 1.0e+00 +#> n_leapfrog__ 2.6 4.0e-01 1.5 1.0 3.0 7.0 1.4e+01 3.3e+02 1.0e+00 #> divergent__ 0.00 nan 0.00 0.00 0.00 0.00 nan nan nan -#> energy__ 7.8 4.0e-02 1.0 6.8 7.4 10.0 6.9e+02 2.6e+04 1.0e+00 +#> energy__ 7.8 4.0e-02 1.0 6.8 7.4 10.0 6.9e+02 1.6e+04 1.0e+00 #> -#> theta 0.25 4.5e-03 0.12 0.081 0.23 0.49 755 27971 1.00 +#> theta 0.25 4.5e-03 0.12 0.081 0.23 0.49 755 17981 1.00 #> #> Samples were drawn using hmc with nuts. #> For each parameter, N_Eff is a crude measure of effective sample size, @@ -515,8 +577,8 @@

        Examp #> This procedure has not been thoroughly tested and may be unstable #> or buggy. The interface is subject to change. #> ------------------------------------------------------------ -#> Gradient evaluation took 7e-06 seconds -#> 1000 transitions using 10 leapfrog steps per transition would take 0.07 seconds. +#> Gradient evaluation took 1.7e-05 seconds +#> 1000 transitions using 10 leapfrog steps per transition would take 0.17 seconds. #> Adjust your expectations accordingly! #> Begin eta adaptation. #> Iteration: 1 / 250 [ 0%] (Adaptation) @@ -556,7 +618,7 @@

        Examp #> #> Both chains finished successfully. #> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.3 seconds.

        fit_mcmc_w_init_fun_2 <- mod$sample( +#> Total execution time: 0.2 seconds.
        fit_mcmc_w_init_fun_2 <- mod$sample( data = stan_data, seed = 123, chains = 2, diff --git a/docs/reference/model-method-sample.html b/docs/reference/model-method-sample.html index d33b28fa9..aaeab89f2 100644 --- a/docs/reference/model-method-sample.html +++ b/docs/reference/model-method-sample.html @@ -50,7 +50,11 @@ +some data. +Any argument left as NULL will default to the default value used by the +installed version of CmdStan. See the +CmdStan User’s Guide +for more details." /> @@ -83,7 +87,7 @@ cmdstanr - 0.2.0 + 0.3.0
        @@ -183,210 +187,294 @@

        Run Stan's MCMC algorithms

        default MCMC algorithm in CmdStan (algorithm=hmc engine=nuts), to produce a set of draws from the posterior distribution of a model conditioned on some data.

        +

        Any argument left as NULL will default to the default value used by the +installed version of CmdStan. See the +CmdStan User’s Guide +for more details.

        - - -

        Usage

        - - -
        $sample(
        -  data = NULL,
        -  seed = NULL,
        -  refresh = NULL,
        -  init = NULL,
        -  save_latent_dynamics = FALSE,
        -  output_dir = NULL,
        -  chains = 4,
        -  parallel_chains = getOption("mc.cores", 1),
        -  chain_ids = seq_len(chains),
        -  threads_per_chain = NULL,
        -  iter_warmup = NULL,
        -  iter_sampling = NULL,
        -  save_warmup = FALSE,
        -  thin = NULL,
        -  max_treedepth = NULL,
        -  adapt_engaged = TRUE,
        -  adapt_delta = NULL,
        -  step_size = NULL,
        -  metric = NULL,
        -  metric_file = NULL,
        -  inv_metric = NULL,
        -  init_buffer = NULL,
        -  term_buffer = NULL,
        -  window = NULL,
        -  fixed_param = FALSE,
        -  sig_figs = NULL,
        -  validate_csv = TRUE,
        -  show_messages = TRUE
        -)
        -
        - -

        Arguments shared by all fitting methods

        - -

        The following arguments can -be specified for any of the fitting methods (sample, optimize, -variational). Arguments left at NULL default to the default used by the -installed version of CmdStan.

          -
        • data: (multiple options) The data to use. One of the following:

            -
          • A named list of R objects (like for RStan). Internally this list is -then written to JSON for CmdStan using write_stan_json().

          • -
          • A path to a data file compatible with CmdStan (JSON or R dump). See -the appendices in the CmdStan manual for details on using these formats.

          • -
        • -
        • seed: (positive integer) A seed for the (P)RNG to pass to CmdStan.

        • -
        • refresh: (non-negative integer) The number of iterations between -printed screen updates. If refresh = 0, only error messages will be printed.

        • -
        • init: (multiple options) The initialization method for the parameters block:

            -
          • A real number x>0 initializes randomly between [-x,x] (on the -unconstrained parameter space);

          • -
          • 0 initializes to 0;

          • -
          • A character vector of paths (one per chain) to JSON or Rdump files. See +

            sample(
            +  data = NULL,
            +  seed = NULL,
            +  refresh = NULL,
            +  init = NULL,
            +  save_latent_dynamics = FALSE,
            +  output_dir = NULL,
            +  sig_figs = NULL,
            +  chains = 4,
            +  parallel_chains = getOption("mc.cores", 1),
            +  chain_ids = seq_len(chains),
            +  threads_per_chain = NULL,
            +  iter_warmup = NULL,
            +  iter_sampling = NULL,
            +  save_warmup = FALSE,
            +  thin = NULL,
            +  max_treedepth = NULL,
            +  adapt_engaged = TRUE,
            +  adapt_delta = NULL,
            +  step_size = NULL,
            +  metric = NULL,
            +  metric_file = NULL,
            +  inv_metric = NULL,
            +  init_buffer = NULL,
            +  term_buffer = NULL,
            +  window = NULL,
            +  fixed_param = FALSE,
            +  validate_csv = TRUE,
            +  show_messages = TRUE,
            +  cores = NULL,
            +  num_cores = NULL,
            +  num_chains = NULL,
            +  num_warmup = NULL,
            +  num_samples = NULL,
            +  save_extra_diagnostics = NULL,
            +  max_depth = NULL,
            +  stepsize = NULL
            +)
            + +

            Arguments

            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            data

            (multiple options) The data to use for the variables specified in +the data block of the Stan program. One of the following:

              +
            • A named list of R objects (like for RStan). Internally this list is then +written to JSON for CmdStan using write_stan_json().

            • +
            • A path to a data file compatible with CmdStan (JSON or R dump). See the +appendices in the CmdStan manual for details on using these formats.

            • +
            • NULL or an empty list if the Stan program has no data block.

            • +
            seed

            (positive integer) A seed for the (P)RNG to pass to CmdStan.

            refresh

            (non-negative integer) The number of iterations between +printed screen updates. If refresh = 0, only error messages will be +printed.

            init

            (multiple options) The initialization method to use for the +variables declared in the parameters block of the Stan program:

              +
            • A real number x>0. This initializes all parameters randomly between +[-x,x] (on the unconstrained parameter space);

            • +
            • The number 0. This initializes all parameters to 0;

            • +
            • A character vector of paths (one per chain) to JSON or Rdump files +containing initial values for all or some parameters. See write_stan_json() to write R objects to JSON files compatible with CmdStan.

            • -
            • A list of lists. For MCMC the list should contain a sublist for each -chain. For optimization and variational inference there should be just one -sublist. The sublists should have named elements corresponding to the -parameters for which you are specifying initial values. See Examples.

            • +
            • A list of lists containing initial values for all or some parameters. For +MCMC the list should contain a sublist for each chain. For optimization and +variational inference there should be just one sublist. The sublists should +have named elements corresponding to the parameters for which you are +specifying initial values. See Examples.

            • A function that returns a single list with names corresponding to the -parameters for which you are specifying initial values. The function -can take no arguments or a single argument chain_id. For MCMC, if the -function has argument chain_id it will be supplied with the chain id -(from 1 to number of chains) when called to generate the initial -values. See Examples.

            • -
            -
          • save_latent_dynamics: (logical) Should auxiliary diagnostic information +parameters for which you are specifying initial values. The function can +take no arguments or a single argument chain_id. For MCMC, if the function +has argument chain_id it will be supplied with the chain id (from 1 to +number of chains) when called to generate the initial values. See +Examples.

          • +
            save_latent_dynamics

            (logical) Should auxiliary diagnostic information about the latent dynamics be written to temporary diagnostic CSV files? This argument replaces CmdStan's diagnostic_file argument and the content written to CSV is controlled by the user's CmdStan installation and not -CmdStanR (and for some algorithms no content may be written). The default -is save_latent_dynamics=FALSE, which is appropriate for almost every use case -(all diagnostics recommended for users to check are always saved, e.g., -divergences for HMC). To save the temporary files created when -save_latent_dynamics=TRUE see the -$save_latent_dynamics_files() method.

            -
          • output_dir: (string) A path to a directory where CmdStan should write +CmdStanR (for some algorithms no content may be written). The default +is FALSE, which is appropriate for almost every use case. To save the +temporary files created when save_latent_dynamics=TRUE see the +$save_latent_dynamics_files() +method.

          • output_dir

            (string) A path to a directory where CmdStan should write its output CSV files. For interactive use this can typically be left at -NULL (temporary directory) since CmdStanR makes the CmdStan output (e.g., -posterior draws and diagnostics) available in R via methods of the fitted +NULL (temporary directory) since CmdStanR makes the CmdStan output +(posterior draws and diagnostics) available in R via methods of the fitted model objects. The behavior of output_dir is as follows:

            • If NULL (the default), then the CSV files are written to a temporary -directory and only saved permanently if the user calls one of the -$save_* methods of the fitted model object (e.g., +directory and only saved permanently if the user calls one of the $save_* +methods of the fitted model object (e.g., $save_output_files()). These temporary -files are removed when the fitted model object is garbage collected.

            • +files are removed when the fitted model object is +garbage collected (manually or automatically).

            • If a path, then the files are created in output_dir with names -corresponding the defaults used by $save_output_files() (and similar -methods like $save_latent_dynamics_files()).

            • -
            -
          • sig_figs: (positive integer) The number of significant figures used -for the output values. By default, CmdStan represent the output values with -6 significant figures. The upper limit for sig_figs is 18. Increasing -this value can cause an increased usage of disk space due to larger -output CSV files.

          • - - -

            Arguments unique to the sample method

            - -

            In addition to the -arguments above, the $sample() method also has its own set of arguments.

            -

            The following three arguments are offered by CmdStanR but do not correspond -to arguments in CmdStan:

              -
            • chains: (positive integer) The number of Markov chains to run. The -default is 4.

            • -
            • parallel_chains: (positive integer) The maximum number of MCMC chains +corresponding to the defaults used by $save_output_files().

            • +
            sig_figs

            (positive integer) The number of significant figures used +when storing the output values. By default, CmdStan represent the output +values with 6 significant figures. The upper limit for sig_figs is 18. +Increasing this value will result in larger output CSV files and thus an +increased usage of disk space.

            chains

            (positive integer) The number of Markov chains to run. The +default is 4.

            parallel_chains

            (positive integer) The maximum number of MCMC chains to run in parallel. If parallel_chains is not specified then the default is to look for the option "mc.cores", which can be set for an entire R session by options(mc.cores=value). If the "mc.cores" option has not -been set then the default is 1.

            -
          • chain_ids: (vector) A vector of chain IDs. Must contain chains unique +been set then the default is 1.

          • chain_ids

            (vector) A vector of chain IDs. Must contain chains unique positive integers. If not set, the default chain IDs are used (integers -starting from 1).

            -
          • threads_per_chain: (positive integer) If the model was +starting from 1).

          • threads_per_chain

            (positive integer) If the model was compiled with threading support, the number of threads to use in parallelized sections within an MCMC chain (e.g., when using the Stan functions reduce_sum() or map_rect()). This is in contrast with parallel_chains, which specifies the number of chains to run in parallel. The actual number of CPU cores used use is parallel_chains*threads_per_chain. For an example of using threading see -the Stan case study Reduce Sum: A Minimal Example.

            -
          • show_messages: (logical) When TRUE (the default), prints all -informational messages, for example rejection of the current proposal. -Disable if you wish silence these messages, but this is not recommended -unless you are very sure that the model is correct up to numerical error. -If the messages are silenced then the $output() method of the resulting -fit object can be used to display all the silenced messages.

          • -
          • validate_csv: (logical) When TRUE (the default), validate the -sampling results in the csv files. Disable if you wish to manually read in -the sampling results and validate them yourself, for example using -read_cmdstan_csv().

          • - - -

            The rest of the arguments correspond to arguments offered by CmdStan, -although some names are slightly different. They are described briefly here -and in greater detail in the CmdStan manual. Arguments left at NULL -default to the default used by the installed version of CmdStan.

              -
            • iter_sampling: (positive integer) The number of post-warmup iterations to -run per chain.

            • -
            • iter_warmup: (positive integer) The number of warmup iterations to run -per chain.

            • -
            • save_warmup: (logical) Should warmup iterations be saved? The default +the Stan case study Reduce Sum: A Minimal Example.

            iter_warmup

            (positive integer) The number of warmup iterations to run +per chain. Note: in the CmdStan User's Guide this is referred to as +num_warmup.

            iter_sampling

            (positive integer) The number of post-warmup iterations +to run per chain. Note: in the CmdStan User's Guide this is referred to as +num_samples.

            save_warmup

            (logical) Should warmup iterations be saved? The default is FALSE. If save_warmup=TRUE then you can use $draws(inc_warmup=TRUE) to include warmup when -accessing the draws.

            -
          • thin: (positive integer) The period between saved samples. This should -be left at its default (no thinning) unless memory is a problem.

          • -
          • max_treedepth: (positive integer) The maximum allowed tree depth for the -NUTS engine. See the Tree Depth section of the CmdStan manual for more -details.

          • -
          • adapt_engaged: (logical) Do warmup adaptation? The default is TRUE. +accessing the draws.

          • thin

            (positive integer) The period between saved samples. This should +typically be left at its default (no thinning) unless memory is a problem.

            max_treedepth

            (positive integer) The maximum allowed tree depth for +the NUTS engine. See the Tree Depth section of the CmdStan User's Guide +for more details.

            adapt_engaged

            (logical) Do warmup adaptation? The default is TRUE. If a precomputed inverse metric is specified via the inv_metric argument (or metric_file) then, if adapt_engaged=TRUE, Stan will use the provided inverse metric just as an initial guess during adaptation. To turn off adaptation when using a precomputed inverse metric set -adapt_engaged=FALSE.

            -
          • adapt_delta: (real in (0,1)) The adaptation target acceptance -statistic.

          • -
          • step_size: (positive real) The initial step size for the discrete +adapt_engaged=FALSE.

          • adapt_delta

            (real in (0,1)) The adaptation target acceptance +statistic.

            step_size

            (positive real) The initial step size for the discrete approximation to continuous Hamiltonian dynamics. This is further tuned -during warmup.

            -
          • metric: (character) One of "diag_e", "dense_e", or "unit_e", +during warmup.

          • metric

            (character) One of "diag_e", "dense_e", or "unit_e", specifying the geometry of the base manifold. See the Euclidean Metric -section of the CmdStan documentation for more details. To specify a -precomputed (inverse) metric, see the inv_metric argument below.

            -
          • metric_file: (character) A character vector containing paths to JSON or +section of the CmdStan User's Guide for more details. To specify a +precomputed (inverse) metric, see the inv_metric argument below.

          • metric_file

            (character) A character vector containing paths to JSON or Rdump files (one per chain) compatible with CmdStan that contain precomputed inverse metrics. The metric_file argument is inherited from CmdStan but is confusing in that the entry in JSON or Rdump file(s) must be named inv_metric, referring to the inverse metric. We recommend instead using CmdStanR's inv_metric argument (see below) to specify an inverse -metric directly using a vector or matrix from your R session.

            -
          • inv_metric: (vector, matrix) A vector (if metric='diag_e') or a -matrix (if metric='dense_e') for initializing the inverse metric, which +metric directly using a vector or matrix from your R session.

          • inv_metric

            (vector, matrix) A vector (if metric='diag_e') or a +matrix (if metric='dense_e') for initializing the inverse metric. This can be used as an alternative to the metric_file argument. A vector is interpreted as a diagonal metric. The inverse metric is usually set to an estimate of the posterior covariance. See the adapt_engaged argument -above for details on (and control over) how specifying a precomputed -inverse metric interacts with adaptation.

            -
          • init_buffer: (nonnegative integer) Width of initial fast timestep -adaptation interval during warmup.

          • -
          • term_buffer: (nonnegative integer) Width of final fast timestep -adaptation interval during warmup.

          • -
          • window: (nonnegative integer) Initial width of slow timestep/metric -adaptation interval.

          • -
          • fixed_param: (logical) When TRUE, call CmdStan with argument +above for details about (and control over) how specifying a precomputed +inverse metric interacts with adaptation.

          • init_buffer

            (nonnegative integer) Width of initial fast timestep +adaptation interval during warmup.

            term_buffer

            (nonnegative integer) Width of final fast timestep +adaptation interval during warmup.

            window

            (nonnegative integer) Initial width of slow timestep/metric +adaptation interval.

            fixed_param

            (logical) When TRUE, call CmdStan with argument "algorithm=fixed_param". The default is FALSE. The fixed parameter sampler generates a new sample without changing the current state of the Markov chain; only generated quantities may change. This can be useful when, for example, trying to generate pseudo-data using the generated quantities block. If the parameters block is empty then using fixed_param=TRUE is mandatory. When fixed_param=TRUE the chains and -parallel_chains arguments will be set to 1.

            - +parallel_chains arguments will be set to 1.

            validate_csv

            (logical) When TRUE (the default), validate the +sampling results in the csv files. Disable if you wish to manually read in +the sampling results and validate them yourself, for example using +read_cmdstan_csv().

            show_messages

            (logical) When TRUE (the default), prints all +informational messages, for example rejection of the current proposal. +Disable if you wish silence these messages, but this is not recommended +unless you are very sure that the model is correct up to numerical error. +If the messages are silenced then the $output() method of the resulting +fit object can be used to display all the silenced messages.

            cores, num_cores, num_chains, num_warmup, num_samples, save_extra_diagnostics, max_depth, stepsize

            Deprecated and will be removed in a future release.

            Value

            -

            The $sample() method returns a CmdStanMCMC object.

            +

            A CmdStanMCMC object.

            See also

            Examples

            @@ -491,7 +580,7 @@

            Examp #> #> Both chains finished successfully. #> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.1 seconds.
            +#> Total execution time: 0.2 seconds.
            # Use 'posterior' package for summaries fit_mcmc$summary()
            #> # A tibble: 2 x 10 #> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail @@ -540,10 +629,7 @@

            Examp # Plot posterior using bayesplot (ggplot2) mcmc_hist(fit_mcmc$draws("theta"))

            #> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
            # Call CmdStan's diagnose and stansummary utilities -fit_mcmc$cmdstan_diagnose()
            #> Running bin/diagnose \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121419-1-771cfa.csv \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121419-2-771cfa.csv -#> Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121419-1-771cfa.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121419-2-771cfa.csv +fit_mcmc$cmdstan_diagnose()
            #> Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171339-1-95e531.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171339-2-95e531.csv #> #> Checking sampler transitions treedepth. #> Treedepth satisfactory for all transitions. @@ -558,27 +644,24 @@

            Examp #> #> Split R-hat values satisfactory all parameters. #> -#> Processing complete, no problems detected.

            fit_mcmc$cmdstan_summary()
            #> Running bin/stansummary \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121419-1-771cfa.csv \ -#> /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121419-2-771cfa.csv -#> Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121419-1-771cfa.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpL7jcf4/bernoulli-202011121419-2-771cfa.csv +#> Processing complete, no problems detected.
            fit_mcmc$cmdstan_summary()
            #> Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171339-1-95e531.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171339-2-95e531.csv #> Inference for Stan model: bernoulli_model #> 2 chains: each with iter=(1000,1000); warmup=(0,0); thin=(1,1); 2000 iterations saved. #> -#> Warmup took (0.0070, 0.0070) seconds, 0.014 seconds total -#> Sampling took (0.018, 0.018) seconds, 0.036 seconds total +#> Warmup took (0.010, 0.0070) seconds, 0.017 seconds total +#> Sampling took (0.018, 0.017) seconds, 0.035 seconds total #> #> Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat #> -#> lp__ -7.3 3.1e-02 0.74 -8.8 -7.0 -6.8 586 16282 1.0 -#> accept_stat__ 0.92 5.0e-03 0.14 0.61 0.97 1.0 7.3e+02 2.0e+04 1.0e+00 -#> stepsize__ 1.0 9.0e-02 0.090 0.93 1.1 1.1 1.0e+00 2.8e+01 2.6e+13 -#> treedepth__ 1.4 1.2e-02 0.52 1.0 1.0 2.0 1.9e+03 5.2e+04 1.0e+00 -#> n_leapfrog__ 2.6 4.0e-01 1.5 1.0 3.0 7.0 1.4e+01 3.9e+02 1.0e+00 +#> lp__ -7.3 3.1e-02 0.74 -8.8 -7.0 -6.8 586 16747 1.0 +#> accept_stat__ 0.92 5.0e-03 0.14 0.61 0.97 1.0 7.3e+02 2.1e+04 1.0e+00 +#> stepsize__ 1.0 9.0e-02 0.090 0.93 1.1 1.1 1.0e+00 2.9e+01 2.6e+13 +#> treedepth__ 1.4 1.2e-02 0.52 1.0 1.0 2.0 1.9e+03 5.3e+04 1.0e+00 +#> n_leapfrog__ 2.6 4.0e-01 1.5 1.0 3.0 7.0 1.4e+01 4.0e+02 1.0e+00 #> divergent__ 0.00 nan 0.00 0.00 0.00 0.00 nan nan nan -#> energy__ 7.8 4.0e-02 1.0 6.8 7.4 10.0 6.9e+02 1.9e+04 1.0e+00 +#> energy__ 7.8 4.0e-02 1.0 6.8 7.4 10.0 6.9e+02 2.0e+04 1.0e+00 #> -#> theta 0.25 4.5e-03 0.12 0.081 0.23 0.49 755 20978 1.00 +#> theta 0.25 4.5e-03 0.12 0.081 0.23 0.49 755 21577 1.00 #> #> Samples were drawn using hmc with nuts. #> For each parameter, N_Eff is a crude measure of effective sample size, @@ -611,8 +694,8 @@

            Examp #> This procedure has not been thoroughly tested and may be unstable #> or buggy. The interface is subject to change. #> ------------------------------------------------------------ -#> Gradient evaluation took 7e-06 seconds -#> 1000 transitions using 10 leapfrog steps per transition would take 0.07 seconds. +#> Gradient evaluation took 1.1e-05 seconds +#> 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds. #> Adjust your expectations accordingly! #> Begin eta adaptation. #> Iteration: 1 / 250 [ 0%] (Adaptation) diff --git a/docs/reference/model-method-sample_mpi.html b/docs/reference/model-method-sample_mpi.html new file mode 100644 index 000000000..269e5056d --- /dev/null +++ b/docs/reference/model-method-sample_mpi.html @@ -0,0 +1,551 @@ + + + + + + + + +Run Stan's MCMC algorithms with MPI — model-method-sample_mpi • cmdstanr + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            +
            + + + + +
            + +
            +
            + + +
            +

            The $sample_mpi() method of a CmdStanModel object is +identical to the $sample() method but with support for +MPI. The target audience for MPI are +those with large computer clusters. For other users, the +$sample() method provides both parallelization of +chains and threading support for within-chain parallelization.

            +

            In order to use MPI with Stan, an MPI implementation must be +installed. For Unix systems the most commonly used implementations are +MPICH and OpenMPI. The implementations provide an MPI C++ compiler wrapper +(for example mpicxx), which is required to compile the model.

            +

            An example of compiling with MPI:

            mpi_options = list(STAN_MPI=TRUE, CXX="mpicxx", TBB_CXX_TYPE="gcc")
            +mod = cmdstan_model("model.stan", cpp_options = mpi_options)
            + +

            The C++ options that must be supplied to the +compile call are:

              +
            • STAN_MPI: Enables the use of MPI with Stan if TRUE.

            • +
            • CXX: The name of the MPI C++ compiler wrapper. Typically "mpicxx".

            • +
            • TBB_CXX_TYPE: The C++ compiler the MPI wrapper wraps. Typically "gcc" +on Linux and "clang" on macOS.

            • +
            + +

            In the call to the $sample_mpi() method it is also possible to provide +the name of the MPI launcher (mpi_cmd, defaulting to "mpiexec") and any +other MPI launch arguments (mpi_args). In most cases, it is enough to +only define the number of processes. To use n_procs processes specify +mpi_args = list("n" = n_procs).

            +
            + +
            sample_mpi(
            +  data = NULL,
            +  mpi_cmd = "mpiexec",
            +  mpi_args = NULL,
            +  seed = NULL,
            +  refresh = NULL,
            +  init = NULL,
            +  save_latent_dynamics = FALSE,
            +  output_dir = NULL,
            +  chains = 1,
            +  chain_ids = seq_len(chains),
            +  iter_warmup = NULL,
            +  iter_sampling = NULL,
            +  save_warmup = FALSE,
            +  thin = NULL,
            +  max_treedepth = NULL,
            +  adapt_engaged = TRUE,
            +  adapt_delta = NULL,
            +  step_size = NULL,
            +  metric = NULL,
            +  metric_file = NULL,
            +  inv_metric = NULL,
            +  init_buffer = NULL,
            +  term_buffer = NULL,
            +  window = NULL,
            +  fixed_param = FALSE,
            +  sig_figs = NULL,
            +  validate_csv = TRUE,
            +  show_messages = TRUE
            +)
            + +

            Arguments

            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            data

            (multiple options) The data to use for the variables specified in +the data block of the Stan program. One of the following:

              +
            • A named list of R objects (like for RStan). Internally this list is then +written to JSON for CmdStan using write_stan_json().

            • +
            • A path to a data file compatible with CmdStan (JSON or R dump). See the +appendices in the CmdStan manual for details on using these formats.

            • +
            • NULL or an empty list if the Stan program has no data block.

            • +
            mpi_cmd

            (character vector) The MPI launcher used for launching MPI +processes. The default launcher is "mpiexec".

            mpi_args

            (list) A list of arguments to use when launching MPI +processes. For example, mpi_args = list("n" = 4) launches the executable +as mpiexec -n 4 model_executable, followed by CmdStan arguments for the +model executable.

            seed

            (positive integer) A seed for the (P)RNG to pass to CmdStan.

            refresh

            (non-negative integer) The number of iterations between +printed screen updates. If refresh = 0, only error messages will be +printed.

            init

            (multiple options) The initialization method to use for the +variables declared in the parameters block of the Stan program:

              +
            • A real number x>0. This initializes all parameters randomly between +[-x,x] (on the unconstrained parameter space);

            • +
            • The number 0. This initializes all parameters to 0;

            • +
            • A character vector of paths (one per chain) to JSON or Rdump files +containing initial values for all or some parameters. See +write_stan_json() to write R objects to JSON files compatible with +CmdStan.

            • +
            • A list of lists containing initial values for all or some parameters. For +MCMC the list should contain a sublist for each chain. For optimization and +variational inference there should be just one sublist. The sublists should +have named elements corresponding to the parameters for which you are +specifying initial values. See Examples.

            • +
            • A function that returns a single list with names corresponding to the +parameters for which you are specifying initial values. The function can +take no arguments or a single argument chain_id. For MCMC, if the function +has argument chain_id it will be supplied with the chain id (from 1 to +number of chains) when called to generate the initial values. See +Examples.

            • +
            save_latent_dynamics

            (logical) Should auxiliary diagnostic information +about the latent dynamics be written to temporary diagnostic CSV files? +This argument replaces CmdStan's diagnostic_file argument and the content +written to CSV is controlled by the user's CmdStan installation and not +CmdStanR (for some algorithms no content may be written). The default +is FALSE, which is appropriate for almost every use case. To save the +temporary files created when save_latent_dynamics=TRUE see the +$save_latent_dynamics_files() +method.

            output_dir

            (string) A path to a directory where CmdStan should write +its output CSV files. For interactive use this can typically be left at +NULL (temporary directory) since CmdStanR makes the CmdStan output +(posterior draws and diagnostics) available in R via methods of the fitted +model objects. The behavior of output_dir is as follows:

              +
            • If NULL (the default), then the CSV files are written to a temporary +directory and only saved permanently if the user calls one of the $save_* +methods of the fitted model object (e.g., +$save_output_files()). These temporary +files are removed when the fitted model object is +garbage collected (manually or automatically).

            • +
            • If a path, then the files are created in output_dir with names +corresponding to the defaults used by $save_output_files().

            • +
            chains

            (positive integer) The number of Markov chains to run. The +default is 4.

            chain_ids

            (vector) A vector of chain IDs. Must contain chains unique +positive integers. If not set, the default chain IDs are used (integers +starting from 1).

            iter_warmup

            (positive integer) The number of warmup iterations to run +per chain. Note: in the CmdStan User's Guide this is referred to as +num_warmup.

            iter_sampling

            (positive integer) The number of post-warmup iterations +to run per chain. Note: in the CmdStan User's Guide this is referred to as +num_samples.

            save_warmup

            (logical) Should warmup iterations be saved? The default +is FALSE. If save_warmup=TRUE then you can use +$draws(inc_warmup=TRUE) to include warmup when +accessing the draws.

            thin

            (positive integer) The period between saved samples. This should +typically be left at its default (no thinning) unless memory is a problem.

            max_treedepth

            (positive integer) The maximum allowed tree depth for +the NUTS engine. See the Tree Depth section of the CmdStan User's Guide +for more details.

            adapt_engaged

            (logical) Do warmup adaptation? The default is TRUE. +If a precomputed inverse metric is specified via the inv_metric argument +(or metric_file) then, if adapt_engaged=TRUE, Stan will use the +provided inverse metric just as an initial guess during adaptation. To turn +off adaptation when using a precomputed inverse metric set +adapt_engaged=FALSE.

            adapt_delta

            (real in (0,1)) The adaptation target acceptance +statistic.

            step_size

            (positive real) The initial step size for the discrete +approximation to continuous Hamiltonian dynamics. This is further tuned +during warmup.

            metric

            (character) One of "diag_e", "dense_e", or "unit_e", +specifying the geometry of the base manifold. See the Euclidean Metric +section of the CmdStan User's Guide for more details. To specify a +precomputed (inverse) metric, see the inv_metric argument below.

            metric_file

            (character) A character vector containing paths to JSON or +Rdump files (one per chain) compatible with CmdStan that contain +precomputed inverse metrics. The metric_file argument is inherited from +CmdStan but is confusing in that the entry in JSON or Rdump file(s) must be +named inv_metric, referring to the inverse metric. We recommend instead +using CmdStanR's inv_metric argument (see below) to specify an inverse +metric directly using a vector or matrix from your R session.

            inv_metric

            (vector, matrix) A vector (if metric='diag_e') or a +matrix (if metric='dense_e') for initializing the inverse metric. This +can be used as an alternative to the metric_file argument. A vector is +interpreted as a diagonal metric. The inverse metric is usually set to an +estimate of the posterior covariance. See the adapt_engaged argument +above for details about (and control over) how specifying a precomputed +inverse metric interacts with adaptation.

            init_buffer

            (nonnegative integer) Width of initial fast timestep +adaptation interval during warmup.

            term_buffer

            (nonnegative integer) Width of final fast timestep +adaptation interval during warmup.

            window

            (nonnegative integer) Initial width of slow timestep/metric +adaptation interval.

            fixed_param

            (logical) When TRUE, call CmdStan with argument +"algorithm=fixed_param". The default is FALSE. The fixed parameter +sampler generates a new sample without changing the current state of the +Markov chain; only generated quantities may change. This can be useful +when, for example, trying to generate pseudo-data using the generated +quantities block. If the parameters block is empty then using +fixed_param=TRUE is mandatory. When fixed_param=TRUE the chains and +parallel_chains arguments will be set to 1.

            sig_figs

            (positive integer) The number of significant figures used +when storing the output values. By default, CmdStan represent the output +values with 6 significant figures. The upper limit for sig_figs is 18. +Increasing this value will result in larger output CSV files and thus an +increased usage of disk space.

            validate_csv

            (logical) When TRUE (the default), validate the +sampling results in the csv files. Disable if you wish to manually read in +the sampling results and validate them yourself, for example using +read_cmdstan_csv().

            show_messages

            (logical) When TRUE (the default), prints all +informational messages, for example rejection of the current proposal. +Disable if you wish silence these messages, but this is not recommended +unless you are very sure that the model is correct up to numerical error. +If the messages are silenced then the $output() method of the resulting +fit object can be used to display all the silenced messages.

            + +

            Value

            + +

            A CmdStanMCMC object.

            +

            See also

            + +

            The CmdStanR website +(mc-stan.org/cmdstanr) for online +documentation and tutorials.

            +

            The Stan and CmdStan documentation:

            + +

            The Stan Math Library's MPI documentation +(mc-stan.org/math/mpi) for more +details on MPI support in Stan.

            +

            Other CmdStanModel methods: +model-method-check_syntax, +model-method-compile, +model-method-generate-quantities, +model-method-optimize, +model-method-sample, +model-method-variational

            + +

            Examples

            +
            # \dontrun{ +# mpi_options <- list(STAN_MPI=TRUE, CXX="mpicxx", TBB_CXX_TYPE="gcc") +# mod <- cmdstan_model("model.stan", cpp_options = mpi_options) +# fit <- mod$sample_mpi(..., mpi_args = list("n" = 4)) +# } + +
            +
            + +
            + + +
            + + +
            +

            Site built with pkgdown 1.5.1.

            +
            + +
            +
            + + + + + + + + diff --git a/docs/reference/model-method-variational-1.png b/docs/reference/model-method-variational-1.png index b248a3f89..08beb3c62 100644 Binary files a/docs/reference/model-method-variational-1.png and b/docs/reference/model-method-variational-1.png differ diff --git a/docs/reference/model-method-variational-2.png b/docs/reference/model-method-variational-2.png index 35db9ff48..df8262149 100644 Binary files a/docs/reference/model-method-variational-2.png and b/docs/reference/model-method-variational-2.png differ diff --git a/docs/reference/model-method-variational.html b/docs/reference/model-method-variational.html index b07357fc5..634c27d28 100644 --- a/docs/reference/model-method-variational.html +++ b/docs/reference/model-method-variational.html @@ -48,7 +48,11 @@ +Stan's variational Bayes (ADVI) algorithms. +Any argument left as NULL will default to the default value used by the +installed version of CmdStan. See the +CmdStan User’s Guide +for more details." /> @@ -81,7 +85,7 @@ cmdstanr - 0.2.1 + 0.3.0

            @@ -179,140 +183,186 @@

            Run Stan's variational approximation algorithms

            The $variational() method of a CmdStanModel object runs Stan's variational Bayes (ADVI) algorithms.

            +

            Any argument left as NULL will default to the default value used by the +installed version of CmdStan. See the +CmdStan User’s Guide +for more details.

            - - -

            Details

            - -

            CmdStan can fit a variational approximation to the posterior. The -approximation is a Gaussian in the unconstrained variable space. Stan -implements two variational algorithms. The algorithm="meanfield" option -uses a fully factorized Gaussian for the approximation. The -algorithm="fullrank" option uses a Gaussian with a full-rank covariance -matrix for the approximation.

            -

            -- CmdStan Interface User's Guide

            -

            Usage

            - - -
            $variational(
            -  data = NULL,
            -  seed = NULL,
            -  refresh = NULL,
            -  init = NULL,
            -  save_latent_dynamics = FALSE,
            -  output_dir = NULL,
            -  threads = NULL,
            -  algorithm = NULL,
            -  iter = NULL,
            -  grad_samples = NULL,
            -  elbo_samples = NULL,
            -  eta = NULL,
            -  adapt_engaged = NULL,
            -  adapt_iter = NULL,
            -  tol_rel_obj = NULL,
            -  eval_elbo = NULL,
            -  output_samples = NULL,
            -  sig_figs = NULL
            -)
            -
            - -

            Arguments shared by all fitting methods

            - -

            The following arguments can -be specified for any of the fitting methods (sample, optimize, -variational). Arguments left at NULL default to the default used by the -installed version of CmdStan.

              -
            • data: (multiple options) The data to use. One of the following:

                -
              • A named list of R objects (like for RStan). Internally this list is -then written to JSON for CmdStan using write_stan_json().

              • -
              • A path to a data file compatible with CmdStan (JSON or R dump). See -the appendices in the CmdStan manual for details on using these formats.

              • -
            • -
            • seed: (positive integer) A seed for the (P)RNG to pass to CmdStan.

            • -
            • refresh: (non-negative integer) The number of iterations between -printed screen updates. If refresh = 0, only error messages will be printed.

            • -
            • init: (multiple options) The initialization method for the parameters block:

                -
              • A real number x>0 initializes randomly between [-x,x] (on the -unconstrained parameter space);

              • -
              • 0 initializes to 0;

              • -
              • A character vector of paths (one per chain) to JSON or Rdump files. See +

                variational(
                +  data = NULL,
                +  seed = NULL,
                +  refresh = NULL,
                +  init = NULL,
                +  save_latent_dynamics = FALSE,
                +  output_dir = NULL,
                +  sig_figs = NULL,
                +  threads = NULL,
                +  algorithm = NULL,
                +  iter = NULL,
                +  grad_samples = NULL,
                +  elbo_samples = NULL,
                +  eta = NULL,
                +  adapt_engaged = NULL,
                +  adapt_iter = NULL,
                +  tol_rel_obj = NULL,
                +  eval_elbo = NULL,
                +  output_samples = NULL
                +)
                + +

                Arguments

                + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                data

                (multiple options) The data to use for the variables specified in +the data block of the Stan program. One of the following:

                  +
                • A named list of R objects (like for RStan). Internally this list is then +written to JSON for CmdStan using write_stan_json().

                • +
                • A path to a data file compatible with CmdStan (JSON or R dump). See the +appendices in the CmdStan manual for details on using these formats.

                • +
                • NULL or an empty list if the Stan program has no data block.

                • +
                seed

                (positive integer) A seed for the (P)RNG to pass to CmdStan.

                refresh

                (non-negative integer) The number of iterations between +printed screen updates. If refresh = 0, only error messages will be +printed.

                init

                (multiple options) The initialization method to use for the +variables declared in the parameters block of the Stan program:

                  +
                • A real number x>0. This initializes all parameters randomly between +[-x,x] (on the unconstrained parameter space);

                • +
                • The number 0. This initializes all parameters to 0;

                • +
                • A character vector of paths (one per chain) to JSON or Rdump files +containing initial values for all or some parameters. See write_stan_json() to write R objects to JSON files compatible with CmdStan.

                • -
                • A list of lists. For MCMC the list should contain a sublist for each -chain. For optimization and variational inference there should be just one -sublist. The sublists should have named elements corresponding to the -parameters for which you are specifying initial values. See Examples.

                • +
                • A list of lists containing initial values for all or some parameters. For +MCMC the list should contain a sublist for each chain. For optimization and +variational inference there should be just one sublist. The sublists should +have named elements corresponding to the parameters for which you are +specifying initial values. See Examples.

                • A function that returns a single list with names corresponding to the -parameters for which you are specifying initial values. The function -can take no arguments or a single argument chain_id. For MCMC, if the -function has argument chain_id it will be supplied with the chain id -(from 1 to number of chains) when called to generate the initial -values. See Examples.

                • -
                -
              • save_latent_dynamics: (logical) Should auxiliary diagnostic information +parameters for which you are specifying initial values. The function can +take no arguments or a single argument chain_id. For MCMC, if the function +has argument chain_id it will be supplied with the chain id (from 1 to +number of chains) when called to generate the initial values. See +Examples.

              • +
                save_latent_dynamics

                (logical) Should auxiliary diagnostic information about the latent dynamics be written to temporary diagnostic CSV files? This argument replaces CmdStan's diagnostic_file argument and the content written to CSV is controlled by the user's CmdStan installation and not -CmdStanR (and for some algorithms no content may be written). The default -is save_latent_dynamics=FALSE, which is appropriate for almost every use case -(all diagnostics recommended for users to check are always saved, e.g., -divergences for HMC). To save the temporary files created when -save_latent_dynamics=TRUE see the -$save_latent_dynamics_files() method.

                -
              • output_dir: (string) A path to a directory where CmdStan should write +CmdStanR (for some algorithms no content may be written). The default +is FALSE, which is appropriate for almost every use case. To save the +temporary files created when save_latent_dynamics=TRUE see the +$save_latent_dynamics_files() +method.

              • output_dir

                (string) A path to a directory where CmdStan should write its output CSV files. For interactive use this can typically be left at -NULL (temporary directory) since CmdStanR makes the CmdStan output (e.g., -posterior draws and diagnostics) available in R via methods of the fitted +NULL (temporary directory) since CmdStanR makes the CmdStan output +(posterior draws and diagnostics) available in R via methods of the fitted model objects. The behavior of output_dir is as follows:

                • If NULL (the default), then the CSV files are written to a temporary -directory and only saved permanently if the user calls one of the -$save_* methods of the fitted model object (e.g., +directory and only saved permanently if the user calls one of the $save_* +methods of the fitted model object (e.g., $save_output_files()). These temporary -files are removed when the fitted model object is garbage collected.

                • +files are removed when the fitted model object is +garbage collected (manually or automatically).

                • If a path, then the files are created in output_dir with names -corresponding the defaults used by $save_output_files() (and similar -methods like $save_latent_dynamics_files()).

                • -
                -
              • sig_figs: (positive integer) The number of significant figures used -for the output values. By default, CmdStan represent the output values with -6 significant figures. The upper limit for sig_figs is 18. Increasing -this value can cause an increased usage of disk space due to larger -output CSV files.

              • - - -

                Arguments unique to the variational method

                - -

                In addition to the -arguments above, the $variational() method also has its own set of -arguments. These arguments are described briefly here and in greater detail -in the CmdStan manual. Arguments left at NULL default to the default used -by the installed version of CmdStan.

                  -
                • threads: (positive integer) If the model was +corresponding to the defaults used by $save_output_files().

                • +
                sig_figs

                (positive integer) The number of significant figures used +when storing the output values. By default, CmdStan represent the output +values with 6 significant figures. The upper limit for sig_figs is 18. +Increasing this value will result in larger output CSV files and thus an +increased usage of disk space.

                threads

                (positive integer) If the model was compiled with threading support, the number of -threads to use in parallelized sections (e.g., when -using the Stan functions reduce_sum() or map_rect()).

                -
              • algorithm: (string) The algorithm. Either "meanfield" or "fullrank".

              • -
              • iter: (positive integer) The maximum number of iterations.

              • -
              • grad_samples: (positive integer) The number of samples for Monte Carlo -estimate of gradients.

              • -
              • elbo_samples: (positive integer) The number of samples for Monte Carlo -estimate of ELBO (objective function).

              • -
              • eta: (positive real) The step size weighting parameter for adaptive -step size sequence.

              • -
              • adapt_engaged: (logical) Do warmup adaptation?

              • -
              • adapt_iter: (positive integer) The maximum number of adaptation -iterations.

              • -
              • tol_rel_obj: (positive real) Convergence tolerance on the relative norm -of the objective.

              • -
              • eval_elbo: (positive integer) Evaluate ELBO every Nth iteration.

              • -
              • output_samples: (positive integer) Number of posterior samples to draw -and save.

              • - +threads to use in parallelized sections (e.g., when using the Stan +functions reduce_sum() or map_rect()).

                algorithm

                (string) The algorithm. Either "meanfield" or +"fullrank".

                iter

                (positive integer) The maximum number of iterations.

                grad_samples

                (positive integer) The number of samples for Monte Carlo +estimate of gradients.

                elbo_samples

                (positive integer) The number of samples for Monte Carlo +estimate of ELBO (objective function).

                eta

                (positive real) The step size weighting parameter for adaptive +step size sequence.

                adapt_engaged

                (logical) Do warmup adaptation?

                adapt_iter

                (positive integer) The maximum number of adaptation +iterations.

                tol_rel_obj

                (positive real) Convergence tolerance on the relative norm +of the objective.

                eval_elbo

                (positive integer) Evaluate ELBO every Nth iteration.

                output_samples

                (positive integer) Number of approximate posterior +samples to draw and save.

                + +

                Details

                +

                CmdStan can fit a variational approximation to the posterior. The +approximation is a Gaussian in the unconstrained variable space. Stan +implements two variational algorithms. The algorithm="meanfield" option +uses a fully factorized Gaussian for the approximation. The +algorithm="fullrank" option uses a Gaussian with a full-rank covariance +matrix for the approximation.

                +

                -- CmdStan Interface User's Guide

                Value

                -

                The $variational() method returns a CmdStanVB object.

                +

                A CmdStanVB object.

                See also

                Examples

                @@ -466,7 +517,7 @@

                Examp # Plot posterior using bayesplot (ggplot2) mcmc_hist(fit_mcmc$draws("theta"))
                #> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
                # Call CmdStan's diagnose and stansummary utilities -fit_mcmc$cmdstan_diagnose()
                #> Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp4wNiLP/bernoulli-202011301605-1-8c61c7.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp4wNiLP/bernoulli-202011301605-2-8c61c7.csv +fit_mcmc$cmdstan_diagnose()
                #> Processing csv files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171339-1-065f92.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171339-2-065f92.csv #> #> Checking sampler transitions treedepth. #> Treedepth satisfactory for all transitions. @@ -481,24 +532,24 @@

                Examp #> #> Split R-hat values satisfactory all parameters. #> -#> Processing complete, no problems detected.

                fit_mcmc$cmdstan_summary()
                #> Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp4wNiLP/bernoulli-202011301605-1-8c61c7.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/Rtmp4wNiLP/bernoulli-202011301605-2-8c61c7.csv +#> Processing complete, no problems detected.
                fit_mcmc$cmdstan_summary()
                #> Input files: /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171339-1-065f92.csv, /var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T/RtmpGraDrG/bernoulli-202012171339-2-065f92.csv #> Inference for Stan model: bernoulli_model #> 2 chains: each with iter=(1000,1000); warmup=(0,0); thin=(1,1); 2000 iterations saved. #> -#> Warmup took (0.0060, 0.0060) seconds, 0.012 seconds total -#> Sampling took (0.016, 0.016) seconds, 0.032 seconds total +#> Warmup took (0.0090, 0.0080) seconds, 0.017 seconds total +#> Sampling took (0.020, 0.016) seconds, 0.036 seconds total #> #> Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat #> -#> lp__ -7.3 3.1e-02 0.74 -8.8 -7.0 -6.8 586 18317 1.0 -#> accept_stat__ 0.92 5.0e-03 0.14 0.61 0.97 1.0 7.3e+02 2.3e+04 1.0e+00 -#> stepsize__ 1.0 9.0e-02 0.090 0.93 1.1 1.1 1.0e+00 3.1e+01 2.6e+13 -#> treedepth__ 1.4 1.2e-02 0.52 1.0 1.0 2.0 1.9e+03 5.8e+04 1.0e+00 -#> n_leapfrog__ 2.6 4.0e-01 1.5 1.0 3.0 7.0 1.4e+01 4.4e+02 1.0e+00 +#> lp__ -7.3 3.1e-02 0.74 -8.8 -7.0 -6.8 586 16282 1.0 +#> accept_stat__ 0.92 5.0e-03 0.14 0.61 0.97 1.0 7.3e+02 2.0e+04 1.0e+00 +#> stepsize__ 1.0 9.0e-02 0.090 0.93 1.1 1.1 1.0e+00 2.8e+01 2.6e+13 +#> treedepth__ 1.4 1.2e-02 0.52 1.0 1.0 2.0 1.9e+03 5.2e+04 1.0e+00 +#> n_leapfrog__ 2.6 4.0e-01 1.5 1.0 3.0 7.0 1.4e+01 3.9e+02 1.0e+00 #> divergent__ 0.00 nan 0.00 0.00 0.00 0.00 nan nan nan -#> energy__ 7.8 4.0e-02 1.0 6.8 7.4 10.0 6.9e+02 2.2e+04 1.0e+00 +#> energy__ 7.8 4.0e-02 1.0 6.8 7.4 10.0 6.9e+02 1.9e+04 1.0e+00 #> -#> theta 0.25 4.5e-03 0.12 0.081 0.23 0.49 755 23600 1.00 +#> theta 0.25 4.5e-03 0.12 0.081 0.23 0.49 755 20978 1.00 #> #> Samples were drawn using hmc with nuts. #> For each parameter, N_Eff is a crude measure of effective sample size, @@ -518,7 +569,7 @@

                Examp #> 6 -5.00402 0.000103557 2.55661e-07 1 1 9 #> Optimization terminated normally: #> Convergence detected: relative gradient magnitude is below tolerance -#> Finished in 0.4 seconds.

                +#> Finished in 0.1 seconds.
                fit_optim$summary()
                #> # A tibble: 2 x 2 #> variable estimate #> <chr> <dbl> @@ -531,8 +582,8 @@

                Examp #> This procedure has not been thoroughly tested and may be unstable #> or buggy. The interface is subject to change. #> ------------------------------------------------------------ -#> Gradient evaluation took 1.6e-05 seconds -#> 1000 transitions using 10 leapfrog steps per transition would take 0.16 seconds. +#> Gradient evaluation took 2.3e-05 seconds +#> 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds. #> Adjust your expectations accordingly! #> Begin eta adaptation. #> Iteration: 1 / 250 [ 0%] (Adaptation) @@ -572,7 +623,7 @@

                Examp #> #> Both chains finished successfully. #> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.4 seconds.

                fit_mcmc_w_init_fun_2 <- mod$sample( +#> Total execution time: 0.2 seconds.
                fit_mcmc_w_init_fun_2 <- mod$sample( data = stan_data, seed = 123, chains = 2, diff --git a/docs/reference/read_cmdstan_csv.html b/docs/reference/read_cmdstan_csv.html index 330d278bd..4ea3b387c 100644 --- a/docs/reference/read_cmdstan_csv.html +++ b/docs/reference/read_cmdstan_csv.html @@ -82,7 +82,7 @@ cmdstanr - 0.2.0 + 0.3.0
                @@ -297,13 +297,13 @@

                Examp #> ..$ stepsize_jitter : num 0 #> ..$ id : num [1:4] 1 2 3 4 #> ..$ init : num [1:4] 2 2 2 2 -#> ..$ seed : num [1:4] 4.84e+08 1.59e+09 1.16e+09 2.05e+09 +#> ..$ seed : num [1:4] 2.28e+08 9.36e+08 5.51e+07 7.81e+08 #> ..$ refresh : num 100 #> ..$ sig_figs : num -1 #> ..$ sampler_diagnostics : chr [1:6] "accept_stat__" "stepsize__" "treedepth__" "n_leapfrog__" ... #> ..$ model_params : chr [1:8] "lp__" "alpha_scalar" "theta_vector[1]" "theta_vector[2]" ... -#> ..$ step_size_adaptation: num [1:4] 0.777 0.695 0.741 0.679 -#> ..$ model_name : chr "file4754539a2780_model" +#> ..$ step_size_adaptation: num [1:4] 0.738 0.663 0.784 0.726 +#> ..$ model_name : chr "file2ed23068f793_model" #> ..$ adapt_engaged : num 1 #> ..$ adapt_delta : num 0.8 #> ..$ max_treedepth : num 10 @@ -317,31 +317,31 @@

                Examp #> .. ..$ tau_matrix : num [1:2] 2 2 #> ..$ stan_variables : chr [1:4] "lp__" "alpha_scalar" "theta_vector" "tau_matrix" #> $ inv_metric :List of 4 -#> ..$ 1: num [1:7] 1.002 1.121 1.034 0.974 1.033 ... -#> ..$ 2: num [1:7] 1.051 1.135 0.982 0.987 0.863 ... -#> ..$ 3: num [1:7] 0.923 1.001 1.002 1.086 0.976 ... -#> ..$ 4: num [1:7] 1.156 0.933 1.237 0.981 1.071 ... +#> ..$ 1: num [1:7] 0.924 1.057 0.981 0.958 1.153 ... +#> ..$ 2: num [1:7] 1.128 0.872 1.114 0.84 0.93 ... +#> ..$ 3: num [1:7] 0.823 1.038 1.009 0.976 1.071 ... +#> ..$ 4: num [1:7] 1.004 0.989 0.964 0.852 0.951 ... #> $ step_size :List of 4 -#> ..$ 1: num 0.777 -#> ..$ 2: num 0.695 -#> ..$ 3: num 0.741 -#> ..$ 4: num 0.679 -#> $ warmup_draws : 'draws_array' num [1:1000, 1:4, 1:8] -3.74 -3.74 -2.67 -5.78 -5.25 ... +#> ..$ 1: num 0.738 +#> ..$ 2: num 0.663 +#> ..$ 3: num 0.784 +#> ..$ 4: num 0.726 +#> $ warmup_draws : 'draws_array' num [1:1000, 1:4, 1:8] -2.37 -2.37 -2.37 -4.28 -2.85 ... #> ..- attr(*, "dimnames")=List of 3 #> .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... #> .. ..$ chain : chr [1:4] "1" "2" "3" "4" #> .. ..$ variable : chr [1:8] "lp__" "alpha_scalar" "theta_vector[1]" "theta_vector[2]" ... -#> $ post_warmup_draws : 'draws_array' num [1:1000, 1:4, 1:8] -4.65 -2.79 -3.38 -4.66 -3.47 ... +#> $ post_warmup_draws : 'draws_array' num [1:1000, 1:4, 1:8] -2.25 -1.48 -2.59 -2.24 -3.21 ... #> ..- attr(*, "dimnames")=List of 3 #> .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... #> .. ..$ chain : chr [1:4] "1" "2" "3" "4" #> .. ..$ variable : chr [1:8] "lp__" "alpha_scalar" "theta_vector[1]" "theta_vector[2]" ... -#> $ warmup_sampler_diagnostics : 'draws_array' num [1:1000, 1:4, 1:6] 0.543 0 0.949 0.638 1 ... +#> $ warmup_sampler_diagnostics : 'draws_array' num [1:1000, 1:4, 1:6] 9.99e-01 0.00 2.07e-10 9.81e-01 9.97e-01 ... #> ..- attr(*, "dimnames")=List of 3 #> .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... #> .. ..$ chain : chr [1:4] "1" "2" "3" "4" #> .. ..$ variable : chr [1:6] "accept_stat__" "stepsize__" "treedepth__" "n_leapfrog__" ... -#> $ post_warmup_sampler_diagnostics: 'draws_array' num [1:1000, 1:4, 1:6] 1 0.999 0.945 0.936 1 ... +#> $ post_warmup_sampler_diagnostics: 'draws_array' num [1:1000, 1:4, 1:6] 0.88 0.988 0.915 0.852 0.832 ... #> ..- attr(*, "dimnames")=List of 3 #> .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... #> .. ..$ chain : chr [1:4] "1" "2" "3" "4" diff --git a/docs/reference/read_sample_csv.html b/docs/reference/read_sample_csv.html index f6dc532ea..87c8dca0f 100644 --- a/docs/reference/read_sample_csv.html +++ b/docs/reference/read_sample_csv.html @@ -80,7 +80,7 @@ cmdstanr - 0.2.0 + 0.3.0 diff --git a/docs/reference/register_knitr_engine.html b/docs/reference/register_knitr_engine.html index b30369740..0755e3870 100644 --- a/docs/reference/register_knitr_engine.html +++ b/docs/reference/register_knitr_engine.html @@ -83,7 +83,7 @@ cmdstanr - 0.2.0 + 0.3.0 @@ -192,8 +192,8 @@

                Arg override -

                Override knitr's built-in, RStan-based engine for stan. -See below for details.

                +

                Override knitr's built-in, RStan-based engine for Stan? The +default is TRUE. See Details.

                @@ -222,8 +222,8 @@

                R diff --git a/docs/reference/set_cmdstan_path.html b/docs/reference/set_cmdstan_path.html index 437f97bf2..3c03bb2f5 100644 --- a/docs/reference/set_cmdstan_path.html +++ b/docs/reference/set_cmdstan_path.html @@ -84,7 +84,7 @@ cmdstanr - 0.2.0 + 0.3.0 diff --git a/docs/reference/stan_threads.html b/docs/reference/stan_threads.html index 66e4c6389..8d261fc58 100644 --- a/docs/reference/stan_threads.html +++ b/docs/reference/stan_threads.html @@ -80,7 +80,7 @@ cmdstanr - 0.2.0 + 0.3.0 diff --git a/docs/reference/write_stan_file.html b/docs/reference/write_stan_file.html index b1e87be6f..59f1537a5 100644 --- a/docs/reference/write_stan_file.html +++ b/docs/reference/write_stan_file.html @@ -81,7 +81,7 @@ cmdstanr - 0.2.0 + 0.3.0 @@ -224,7 +224,7 @@

                Examp " f <- write_stan_file(stan_program) -print(f)
                #> [1] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T//RtmpL7jcf4/file47547a5c526c.stan"
                +print(f)
                #> [1] "/var/folders/h6/14xy_35x4wd2tz542dn0qhtc0000gn/T//RtmpGraDrG/file2ed2636bec12.stan"
                lines <- readLines(f) print(lines)
                #> [1] "" "data {" #> [3] " int<lower=0> N;" " int<lower=0,upper=1> y[N];" diff --git a/docs/reference/write_stan_json.html b/docs/reference/write_stan_json.html index fa84ffc31..bc8d8ae27 100644 --- a/docs/reference/write_stan_json.html +++ b/docs/reference/write_stan_json.html @@ -80,7 +80,7 @@ cmdstanr - 0.2.0 + 0.3.0
                @@ -211,13 +211,13 @@

                Examp #> "N": 5, #> "K": 2, #> "x": [ -#> [-0.0251647992420593, 1.73453781176118], -#> [-0.135033612306385, 0.401795768293151], -#> [-0.303830615746938, 2.34606323819581], -#> [-0.887980057704966, -1.23152953489455], -#> [0.334674559108924, -0.333769547399135] +#> [1.58069945043383, -2.37080962953567], +#> [0.940040246321862, -1.25325860935484], +#> [1.13143023201082, -0.174847951632965], +#> [-0.213695900693846, 0.108511592901088], +#> [0.95783890475, 0.516920454312649] #> ], -#> "y": [9, 15, 11, 10, 13], +#> "y": [12, 8, 8, 8, 8], #> "z": [1, 0] #> }

    diff --git a/docs/reference/write_stan_tempfile.html b/docs/reference/write_stan_tempfile.html index 4d3cdd2e3..ed7231775 100644 --- a/docs/reference/write_stan_tempfile.html +++ b/docs/reference/write_stan_tempfile.html @@ -80,7 +80,7 @@ cmdstanr - 0.2.0 + 0.3.0 diff --git a/docs/sitemap.xml b/docs/sitemap.xml index ce66adb1d..bd2781ea9 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -48,6 +48,9 @@ https://mc-stan.org/cmdstanr/reference/fit-method-inv_metric.html + + https://mc-stan.org/cmdstanr/reference/fit-method-loo.html + https://mc-stan.org/cmdstanr/reference/fit-method-lp.html @@ -57,6 +60,9 @@ https://mc-stan.org/cmdstanr/reference/fit-method-mle.html + + https://mc-stan.org/cmdstanr/reference/fit-method-num_chains.html + https://mc-stan.org/cmdstanr/reference/fit-method-output.html @@ -96,6 +102,9 @@ https://mc-stan.org/cmdstanr/reference/model-method-sample.html + + https://mc-stan.org/cmdstanr/reference/model-method-sample_mpi.html + https://mc-stan.org/cmdstanr/reference/model-method-variational.html diff --git a/man-roxygen/model-common-args.R b/man-roxygen/model-common-args.R index 24e90d3b1..d789ffd9f 100644 --- a/man-roxygen/model-common-args.R +++ b/man-roxygen/model-common-args.R @@ -1,58 +1,65 @@ -#' @section Arguments shared by all fitting methods: The following arguments can -#' be specified for any of the fitting methods (`sample`, `optimize`, -#' `variational`). Arguments left at `NULL` default to the default used by the -#' installed version of CmdStan. -#' * `data`: (multiple options) The data to use. One of the following: -#' - A named list of \R objects (like for RStan). Internally this list is -#' then written to JSON for CmdStan using [write_stan_json()]. -#' - A path to a data file compatible with CmdStan (JSON or \R dump). See -#' the appendices in the CmdStan manual for details on using these formats. -#' * `seed`: (positive integer) A seed for the (P)RNG to pass to CmdStan. -#' * `refresh`: (non-negative integer) The number of iterations between -#' printed screen updates. If `refresh = 0`, only error messages will be printed. -#' * `init`: (multiple options) The initialization method for the parameters block: -#' - A real number `x>0` initializes randomly between `[-x,x]` (on the -#' *unconstrained* parameter space); -#' - `0` initializes to `0`; -#' - A character vector of paths (one per chain) to JSON or Rdump files. See -#' [write_stan_json()] to write \R objects to JSON files compatible with -#' CmdStan. -#' - A list of lists. For MCMC the list should contain a sublist for each -#' chain. For optimization and variational inference there should be just one -#' sublist. The sublists should have named elements corresponding to the -#' parameters for which you are specifying initial values. See **Examples**. -#' - A function that returns a single list with names corresponding to the -#' parameters for which you are specifying initial values. The function -#' can take no arguments or a single argument `chain_id`. For MCMC, if the -#' function has argument `chain_id` it will be supplied with the chain id -#' (from 1 to number of chains) when called to generate the initial -#' values. See **Examples**. -#' * `save_latent_dynamics`: (logical) Should auxiliary diagnostic information +#' @param data (multiple options) The data to use for the variables specified in +#' the `data` block of the Stan program. One of the following: +#' * A named list of \R objects (like for RStan). Internally this list is then +#' written to JSON for CmdStan using [write_stan_json()]. +#' * A path to a data file compatible with CmdStan (JSON or \R dump). See the +#' appendices in the CmdStan manual for details on using these formats. +#' * `NULL` or an empty list if the Stan program has no `data` block. +#' +#' @param seed (positive integer) A seed for the (P)RNG to pass to CmdStan. +#' +#' @param refresh (non-negative integer) The number of iterations between +#' printed screen updates. If `refresh = 0`, only error messages will be +#' printed. +#' +#' @param init (multiple options) The initialization method to use for the +#' variables declared in the `parameters` block of the Stan program: +#' * A real number `x>0`. This initializes _all_ parameters randomly between +#' `[-x,x]` (on the _unconstrained_ parameter space); +#' * The number `0`. This initializes _all_ parameters to `0`; +#' * A character vector of paths (one per chain) to JSON or Rdump files +#' containing initial values for all or some parameters. See +#' [write_stan_json()] to write \R objects to JSON files compatible with +#' CmdStan. +#' * A list of lists containing initial values for all or some parameters. For +#' MCMC the list should contain a sublist for each chain. For optimization and +#' variational inference there should be just one sublist. The sublists should +#' have named elements corresponding to the parameters for which you are +#' specifying initial values. See **Examples**. +#' * A function that returns a single list with names corresponding to the +#' parameters for which you are specifying initial values. The function can +#' take no arguments or a single argument `chain_id`. For MCMC, if the function +#' has argument `chain_id` it will be supplied with the chain id (from 1 to +#' number of chains) when called to generate the initial values. See +#' **Examples**. +#' +#' @param save_latent_dynamics (logical) Should auxiliary diagnostic information #' about the latent dynamics be written to temporary diagnostic CSV files? #' This argument replaces CmdStan's `diagnostic_file` argument and the content #' written to CSV is controlled by the user's CmdStan installation and not -#' CmdStanR (and for some algorithms no content may be written). The default -#' is `save_latent_dynamics=FALSE`, which is appropriate for almost every use case -#' (all diagnostics recommended for users to check are _always_ saved, e.g., -#' divergences for HMC). To save the temporary files created when -#' `save_latent_dynamics=TRUE` see the -#' [`$save_latent_dynamics_files()`][fit-method-save_latent_dynamics_files] method. -#' * `output_dir`: (string) A path to a directory where CmdStan should write +#' CmdStanR (for some algorithms no content may be written). The default +#' is `FALSE`, which is appropriate for almost every use case. To save the +#' temporary files created when `save_latent_dynamics=TRUE` see the +#' [`$save_latent_dynamics_files()`][fit-method-save_latent_dynamics_files] +#' method. +#' +#' @param output_dir (string) A path to a directory where CmdStan should write #' its output CSV files. For interactive use this can typically be left at -#' `NULL` (temporary directory) since CmdStanR makes the CmdStan output (e.g., -#' posterior draws and diagnostics) available in \R via methods of the fitted +#' `NULL` (temporary directory) since CmdStanR makes the CmdStan output +#' (posterior draws and diagnostics) available in \R via methods of the fitted #' model objects. The behavior of `output_dir` is as follows: -#' - If `NULL` (the default), then the CSV files are written to a temporary -#' directory and only saved permanently if the user calls one of the -#' `$save_*` methods of the fitted model object (e.g., -#' [`$save_output_files()`][fit-method-save_output_files]). These temporary -#' files are removed when the fitted model object is garbage collected. -#' - If a path, then the files are created in `output_dir` with names -#' corresponding the defaults used by `$save_output_files()` (and similar -#' methods like `$save_latent_dynamics_files()`). -#' * `sig_figs`: (positive integer) The number of significant figures used -#' for the output values. By default, CmdStan represent the output values with -#' 6 significant figures. The upper limit for `sig_figs` is 18. Increasing -#' this value can cause an increased usage of disk space due to larger -#' output CSV files. +#' * If `NULL` (the default), then the CSV files are written to a temporary +#' directory and only saved permanently if the user calls one of the `$save_*` +#' methods of the fitted model object (e.g., +#' [`$save_output_files()`][fit-method-save_output_files]). These temporary +#' files are removed when the fitted model object is +#' [garbage collected][base::gc] (manually or automatically). +#' * If a path, then the files are created in `output_dir` with names +#' corresponding to the defaults used by `$save_output_files()`. +#' +#' @param sig_figs (positive integer) The number of significant figures used +#' when storing the output values. By default, CmdStan represent the output +#' values with 6 significant figures. The upper limit for `sig_figs` is 18. +#' Increasing this value will result in larger output CSV files and thus an +#' increased usage of disk space. #' diff --git a/man-roxygen/model-sample-args.R b/man-roxygen/model-sample-args.R new file mode 100644 index 000000000..1d8ccb502 --- /dev/null +++ b/man-roxygen/model-sample-args.R @@ -0,0 +1,93 @@ +#' @param chains (positive integer) The number of Markov chains to run. The +#' default is 4. +#' @param parallel_chains (positive integer) The _maximum_ number of MCMC chains +#' to run in parallel. If `parallel_chains` is not specified then the default +#' is to look for the option `"mc.cores"`, which can be set for an entire \R +#' session by `options(mc.cores=value)`. If the `"mc.cores"` option has not +#' been set then the default is `1`. +#' @param chain_ids (vector) A vector of chain IDs. Must contain `chains` unique +#' positive integers. If not set, the default chain IDs are used (integers +#' starting from `1`). +#' @param threads_per_chain (positive integer) If the model was +#' [compiled][model-method-compile] with threading support, the number of +#' threads to use in parallelized sections _within_ an MCMC chain (e.g., when +#' using the Stan functions `reduce_sum()` or `map_rect()`). This is in +#' contrast with `parallel_chains`, which specifies the number of chains to +#' run in parallel. The actual number of CPU cores used use is +#' `parallel_chains*threads_per_chain`. For an example of using threading see +#' the Stan case study [Reduce Sum: A Minimal +#' Example](https://mc-stan.org/users/documentation/case-studies/reduce_sum_tutorial.html). +#' +#' @param show_messages (logical) When `TRUE` (the default), prints all +#' informational messages, for example rejection of the current proposal. +#' Disable if you wish silence these messages, but this is not recommended +#' unless you are very sure that the model is correct up to numerical error. +#' If the messages are silenced then the `$output()` method of the resulting +#' fit object can be used to display all the silenced messages. +#' +#' @param validate_csv (logical) When `TRUE` (the default), validate the +#' sampling results in the csv files. Disable if you wish to manually read in +#' the sampling results and validate them yourself, for example using +#' [read_cmdstan_csv()]. +#' +#' @param iter_sampling (positive integer) The number of post-warmup iterations +#' to run per chain. Note: in the CmdStan User's Guide this is referred to as +#' `num_samples`. +#' @param iter_warmup (positive integer) The number of warmup iterations to run +#' per chain. Note: in the CmdStan User's Guide this is referred to as +#' `num_warmup`. +#' @param save_warmup (logical) Should warmup iterations be saved? The default +#' is `FALSE`. If `save_warmup=TRUE` then you can use +#' [$draws(inc_warmup=TRUE)][fit-method-draws] to include warmup when +#' accessing the draws. +#' @param thin (positive integer) The period between saved samples. This should +#' typically be left at its default (no thinning) unless memory is a problem. +#' +#' @param max_treedepth (positive integer) The maximum allowed tree depth for +#' the NUTS engine. See the _Tree Depth_ section of the CmdStan User's Guide +#' for more details. +#' @param adapt_engaged (logical) Do warmup adaptation? The default is `TRUE`. +#' If a precomputed inverse metric is specified via the `inv_metric` argument +#' (or `metric_file`) then, if `adapt_engaged=TRUE`, Stan will use the +#' provided inverse metric just as an initial guess during adaptation. To turn +#' off adaptation when using a precomputed inverse metric set +#' `adapt_engaged=FALSE`. +#' @param adapt_delta (real in `(0,1)`) The adaptation target acceptance +#' statistic. +#' @param step_size (positive real) The _initial_ step size for the discrete +#' approximation to continuous Hamiltonian dynamics. This is further tuned +#' during warmup. +#' @param metric (character) One of `"diag_e"`, `"dense_e"`, or `"unit_e"`, +#' specifying the geometry of the base manifold. See the _Euclidean Metric_ +#' section of the CmdStan User's Guide for more details. To specify a +#' precomputed (inverse) metric, see the `inv_metric` argument below. +#' @param metric_file (character) A character vector containing paths to JSON or +#' Rdump files (one per chain) compatible with CmdStan that contain +#' precomputed inverse metrics. The `metric_file` argument is inherited from +#' CmdStan but is confusing in that the entry in JSON or Rdump file(s) must be +#' named `inv_metric`, referring to the _inverse_ metric. We recommend instead +#' using CmdStanR's `inv_metric` argument (see below) to specify an inverse +#' metric directly using a vector or matrix from your \R session. +#' @param inv_metric (vector, matrix) A vector (if `metric='diag_e'`) or a +#' matrix (if `metric='dense_e'`) for initializing the inverse metric. This +#' can be used as an alternative to the `metric_file` argument. A vector is +#' interpreted as a diagonal metric. The inverse metric is usually set to an +#' estimate of the posterior covariance. See the `adapt_engaged` argument +#' above for details about (and control over) how specifying a precomputed +#' inverse metric interacts with adaptation. +#' @param init_buffer (nonnegative integer) Width of initial fast timestep +#' adaptation interval during warmup. +#' @param term_buffer (nonnegative integer) Width of final fast timestep +#' adaptation interval during warmup. +#' @param window (nonnegative integer) Initial width of slow timestep/metric +#' adaptation interval. +#' +#' @param fixed_param (logical) When `TRUE`, call CmdStan with argument +#' `"algorithm=fixed_param"`. The default is `FALSE`. The fixed parameter +#' sampler generates a new sample without changing the current state of the +#' Markov chain; only generated quantities may change. This can be useful +#' when, for example, trying to generate pseudo-data using the generated +#' quantities block. If the parameters block is empty then using +#' `fixed_param=TRUE` is mandatory. When `fixed_param=TRUE` the `chains` and +#' `parallel_chains` arguments will be set to `1`. +#' diff --git a/man/CmdStanMCMC.Rd b/man/CmdStanMCMC.Rd index 4acf624a8..ce855b5cd 100644 --- a/man/CmdStanMCMC.Rd +++ b/man/CmdStanMCMC.Rd @@ -20,6 +20,7 @@ methods, all of which have their own (linked) documentation pages. \code{\link[=fit-method-inv_metric]{$inv_metric()}} \tab Return the inverse metric for each chain. \cr \code{\link[=fit-method-init]{$init()}} \tab Return user-specified initial values. \cr \code{\link[=fit-method-metadata]{$metadata()}} \tab Return a list of metadata gathered from the CmdStan CSV files. \cr + \code{\link[=fit-method-num_chains]{$num_chains()}} \tab Returns the number of MCMC chains. \cr } } @@ -46,8 +47,8 @@ methods, all of which have their own (linked) documentation pages. \subsection{Report run times, console output, return codes}{\tabular{ll}{ \strong{Method} \tab \strong{Description} \cr - \code{\link[=fit-method-time]{$time()}} \tab Report total and chain-specific run times. \cr \code{\link[=fit-method-output]{$output()}} \tab Return the stdout and stderr of all chains or pretty print the output for a single chain. \cr + \code{\link[=fit-method-time]{$time()}} \tab Report total and chain-specific run times. \cr \code{\link[=fit-method-return_codes]{$return_codes()}} \tab Return the return codes from the CmdStan runs. \cr } diff --git a/man/fit-method-cmdstan_summary.Rd b/man/fit-method-cmdstan_summary.Rd index 7939cd9cf..f715b5d24 100644 --- a/man/fit-method-cmdstan_summary.Rd +++ b/man/fit-method-cmdstan_summary.Rd @@ -2,10 +2,19 @@ % Please edit documentation in R/fit.R \name{fit-method-cmdstan_summary} \alias{fit-method-cmdstan_summary} -\alias{fit-method-cmdstan_diagnose} \alias{cmdstan_summary} +\alias{fit-method-cmdstan_diagnose} \alias{cmdstan_diagnose} -\title{Run CmdStan's \code{stansummary} and \code{diagnose}} +\title{Run CmdStan's \code{stansummary} and \code{diagnose} utilities} +\usage{ +cmdstan_summary(flags = NULL) + +cmdstan_diagnose() +} +\arguments{ +\item{flags}{An optional character vector of flags (e.g. +\code{flags = c("--sig_figs=1")}).} +} \description{ Run CmdStan's \code{stansummary} and \code{diagnose} utilities. These are documented in the CmdStan Guide: @@ -13,19 +22,15 @@ documented in the CmdStan Guide: \item https://mc-stan.org/docs/cmdstan-guide/stansummary.html \item https://mc-stan.org/docs/cmdstan-guide/diagnose.html } -} -\note{ -Although these methods also work for models fit using the + +Although these methods can be used for models fit using the \code{\link[=model-method-variational]{$variational()}} method, much of the output is -only relevant for models fit using the \code{\link[=model-method-sample]{$sample()}} -method. -} -\section{Usage}{ -\preformatted{$cmdstan_summary() -$cmdstan_diagnose() -} -} +currently only relevant for models fit using the +\code{\link[=model-method-sample]{$sample()}} method. +See the \link[=fit-method-summary]{$summary()} for computing similar summaries in +R rather than calling CmdStan's utilites. +} \examples{ \dontrun{ fit <- cmdstanr_example("logistic") @@ -35,5 +40,5 @@ fit$cmdstan_summary() } \seealso{ -\code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}} +\code{\link{CmdStanMCMC}}, \link{fit-method-summary} } diff --git a/man/fit-method-draws.Rd b/man/fit-method-draws.Rd index 8207d992b..add8ebed6 100644 --- a/man/fit-method-draws.Rd +++ b/man/fit-method-draws.Rd @@ -4,35 +4,17 @@ \alias{fit-method-draws} \alias{draws} \title{Extract posterior draws} -\description{ -Extract posterior draws after MCMC or approximate posterior -draws after variational approximation using formats provided by the -\pkg{posterior} package. - -The variables include the \code{parameters}, \verb{transformed parameters}, and -\verb{generated quantities} from the Stan program as well as \code{lp__}, the total -log probability (\code{target}) accumulated in the \code{model} block. -} -\section{Usage}{ -\preformatted{$draws(variables = NULL, inc_warmup = FALSE, ...) +\usage{ +draws(variables = NULL, inc_warmup = FALSE) } -} - -\section{Arguments}{ +\arguments{ +\item{variables}{(character vector) The variables to read in. If \code{NULL} (the +default) then all variables are included.} -\itemize{ -\item \code{variables}: (character vector) The variables (parameters and generated -quantities) to read in. If \code{NULL} (the default) then the draws of all -variables are included. -\item \code{inc_warmup}: (logical) For MCMC only, should warmup draws be included? -Defaults to \code{FALSE}. -\item \code{...}: Arguments passed on to -\code{\link[posterior:draws_array]{posterior::as_draws_array()}}. +\item{inc_warmup}{(logical) Should warmup draws be included? Defaults to +\code{FALSE}. Ignored except when used with \link{CmdStanMCMC} objects.} } -} - -\section{Value}{ - +\value{ \itemize{ \item For \link[=model-method-sample]{MCMC}, a 3-D \code{\link[posterior:draws_array]{draws_array}} object (iteration x chain x @@ -51,7 +33,15 @@ are \emph{not} actually draws, just point estimates stored in the \code{draws_ma format. See \code{\link[=fit-method-mle]{$mle()}} to extract them as a numeric vector. } } +\description{ +Extract posterior draws after MCMC or approximate posterior +draws after variational approximation using formats provided by the +\pkg{posterior} package. +The variables include the \code{parameters}, \verb{transformed parameters}, and +\verb{generated quantities} from the Stan program as well as \code{lp__}, the total +log probability (\code{target}) accumulated in the \code{model} block. +} \examples{ \dontrun{ library(posterior) diff --git a/man/fit-method-init.Rd b/man/fit-method-init.Rd index 55c6357cb..f8f20e25f 100644 --- a/man/fit-method-init.Rd +++ b/man/fit-method-init.Rd @@ -3,24 +3,21 @@ \name{fit-method-init} \alias{fit-method-init} \alias{init} -\title{Extract initial values} +\title{Extract user-specified initial values} +\usage{ +init() +} +\value{ +A list of lists. See \strong{Examples}. +} \description{ Return user-specified initial values. If the user provided initial values files or \R objects (list of lists or function) via the \code{init} argument when fitting the model then these are returned (always in -the list of lists format). Initial values generated by CmdStan are not -returned. -} -\section{Usage}{ -\preformatted{$init() -} +the list of lists format). Currently it is not possible to extract initial +values generated automatically by CmdStan, although CmdStan may support +this in the future. } - -\section{Value}{ - -A list of lists. See \strong{Examples}. -} - \examples{ \dontrun{ init_fun <- function() list(alpha = rnorm(1), beta = rnorm(3)) diff --git a/man/fit-method-inv_metric.Rd b/man/fit-method-inv_metric.Rd index e31ae8be5..247365318 100644 --- a/man/fit-method-inv_metric.Rd +++ b/man/fit-method-inv_metric.Rd @@ -3,31 +3,22 @@ \name{fit-method-inv_metric} \alias{fit-method-inv_metric} \alias{inv_metric} -\title{Extract inverse metric (mass matrix)} -\description{ -Return a list containing the inverse metric (mass matrix) for -each chain. -} -\section{Usage}{ -\preformatted{$inv_metric(matrix = TRUE) -} -} - -\section{Arguments}{ - -\itemize{ -\item \code{matrix}: (logical) If a diagonal metric was used, setting \code{matrix = FALSE} -returns a list containing just the diagonals of the matrices instead of the -full matrices. Setting \code{matrix = FALSE} has no effect for dense metrics. +\title{Extract inverse metric (mass matrix) after MCMC} +\usage{ +inv_metric(matrix = TRUE) } +\arguments{ +\item{matrix}{(logical) If a diagonal metric was used, setting \code{matrix = FALSE} returns a list containing just the diagonals of the matrices instead +of the full matrices. Setting \code{matrix = FALSE} has no effect for dense +metrics.} } - -\section{Value}{ - +\value{ A list of length equal to the number of MCMC chains. See the \code{matrix} argument for details. } - +\description{ +Extract the inverse metric (mass matrix) for each MCMC chain. +} \examples{ \dontrun{ fit <- cmdstanr_example("logistic") diff --git a/man/fit-method-loo.Rd b/man/fit-method-loo.Rd index 4d667ef6b..38ee10316 100644 --- a/man/fit-method-loo.Rd +++ b/man/fit-method-loo.Rd @@ -4,6 +4,9 @@ \alias{fit-method-loo} \alias{loo} \title{Leave-one-out cross-validation (LOO-CV)} +\usage{ +loo(variables = "log_lik", r_eff = TRUE, ...) +} \arguments{ \item{variables}{(character vector) The name(s) of the variable(s) in the Stan program containing the pointwise log-likelihood. The default is to @@ -23,6 +26,9 @@ argument to \code{\link[loo:loo]{loo::loo.array()}}. \item{...}{Other arguments (e.g., \code{cores}, \code{save_psis}, etc.) passed to \code{\link[loo:loo]{loo::loo.array()}}.} } +\value{ +The object returned by \code{\link[loo:loo]{loo::loo.array()}}. +} \description{ The \verb{$loo()} method computes approximate LOO-CV using the \pkg{loo} package. This is a simple wrapper around \code{\link[loo:loo]{loo::loo.array()}} @@ -30,21 +36,13 @@ provided for convenience and requires computing the pointwise log-likelihood in your Stan program. See the \pkg{loo} package \href{https://mc-stan.org/loo/articles/}{vignettes} for details. } -\section{Usage}{ -\preformatted{$loo(variables = "log_lik", r_eff = NULL, ...) -} -} - -\section{Value}{ - The object returned by \code{\link[loo:loo]{loo::loo.array()}}. -} - \examples{ \dontrun{ # the "logistic" example model has "log_lik" in generated quantities fit <- cmdstanr_example("logistic") -fit$loo(cores = 2) +loo_result <- fit$loo(cores = 2) +print(loo_result) } } diff --git a/man/fit-method-lp.Rd b/man/fit-method-lp.Rd index 5202c1f37..93d549bea 100644 --- a/man/fit-method-lp.Rd +++ b/man/fit-method-lp.Rd @@ -5,6 +5,16 @@ \alias{lp} \alias{lp_approx} \title{Extract log probability (target)} +\usage{ +lp() + +lp_approx() +} +\value{ +A numeric vector with length equal to the number of (post-warmup) +draws for MCMC and variational inference, and length equal to \code{1} for +optimization. +} \description{ The \verb{$lp()} method extracts \code{lp__}, the total log probability (\code{target}) accumulated in the \code{model} block of the Stan program. For @@ -15,12 +25,6 @@ See the \href{https://mc-stan.org/docs/2_23/reference-manual/sampling-statements section of the Stan Reference Manual for details on when normalizing constants are dropped from log probability calculations. } -\section{Usage}{ -\preformatted{$lp() -$lp_approx() -} -} - \section{Details}{ \code{lp__} is the unnormalized log density on Stan's \href{https://mc-stan.org/docs/2_23/reference-manual/variable-transforms-chapter.html}{unconstrained space}. @@ -34,12 +38,6 @@ for performing the checks described in Yao et al. (2018) and implemented in the \pkg{loo} package. } -\section{Value}{ - -A numeric vector with length equal to the number of (post-warmup) draws for -MCMC and variational inference, and length equal to \code{1} for optimization. -} - \examples{ \dontrun{ fit_mcmc <- cmdstanr_example("logistic") diff --git a/man/fit-method-metadata.Rd b/man/fit-method-metadata.Rd index d0e149781..755bdf6b1 100644 --- a/man/fit-method-metadata.Rd +++ b/man/fit-method-metadata.Rd @@ -4,16 +4,14 @@ \alias{fit-method-metadata} \alias{metadata} \title{Extract metadata from CmdStan CSV files} +\usage{ +metadata() +} \description{ The \verb{$metadata()} method returns a list of information gathered from the CSV output files, including the CmdStan configuration used when fitting the model. See \strong{Examples} and \code{\link[=read_cmdstan_csv]{read_cmdstan_csv()}}. } -\section{Usage}{ -\preformatted{$metadata() -} -} - \examples{ \dontrun{ fit_mcmc <- cmdstanr_example("logistic", method = "sample") diff --git a/man/fit-method-mle.Rd b/man/fit-method-mle.Rd index 338bd92d8..8809a1044 100644 --- a/man/fit-method-mle.Rd +++ b/man/fit-method-mle.Rd @@ -4,38 +4,29 @@ \alias{fit-method-mle} \alias{mle} \title{Extract (penalized) maximum likelihood estimate after optimization} +\usage{ +mle(variables = NULL) +} +\arguments{ +\item{variables}{(character vector) The variables (parameters, transformed +parameters, and generated quantities) to include. If NULL (the default) +then all variables are included.} +} +\value{ +A numeric vector. See \strong{Examples}. +} \description{ The \verb{$mle()} method is only available for \code{\link{CmdStanMLE}} objects. It returns the penalized maximum likelihood estimate (posterior mode) as a -numeric vector with one element per variable. The returned vector does not +numeric vector with one element per variable. The returned vector does \emph{not} include \code{lp__}, the total log probability (\code{target}) accumulated in the \code{model} block of the Stan program, which is available via the \code{\link[=fit-method-lp]{$lp()}} method and also included in the \code{\link[=fit-method-draws]{$draws()}} method. } -\section{Usage}{ -\preformatted{$mle(variables = NULL) -} -} - -\section{Arguments}{ - -\itemize{ -\item \code{variables}: (character vector) The variables (parameters and generated -quantities) to include. If NULL (the default) then all variables are -included. -} -} - -\section{Value}{ - -A numeric vector. See \strong{Examples}. -} - \examples{ \dontrun{ fit <- cmdstanr_example("logistic", method = "optimize") -fit$mle() fit$mle("alpha") fit$mle("beta") fit$mle("beta[2]") diff --git a/man/fit-method-num_chains.Rd b/man/fit-method-num_chains.Rd new file mode 100644 index 000000000..854f5e127 --- /dev/null +++ b/man/fit-method-num_chains.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fit.R +\name{fit-method-num_chains} +\alias{fit-method-num_chains} +\alias{num_chains} +\title{Extract number of chains after MCMC} +\usage{ +num_chains() +} +\value{ +An integer. +} +\description{ +The \verb{$num_chains()} method returns the number of MCMC chains. +} +\examples{ +\dontrun{ +fit_mcmc <- cmdstanr_example(chains = 2) +fit_mcmc$num_chains() +} + +} +\seealso{ +\code{\link{CmdStanMCMC}} +} diff --git a/man/fit-method-output.Rd b/man/fit-method-output.Rd index 272b11e69..7b71a0543 100644 --- a/man/fit-method-output.Rd +++ b/man/fit-method-output.Rd @@ -4,26 +4,22 @@ \alias{fit-method-output} \alias{output} \title{Access console output} -\description{ -For MCMC the \verb{$output()} method returns the stdout and stderr of -all chains as a list of character vectors. If the \code{id} argument is specified -it pretty prints the console output for a single chain. - -For optimization and variational inference \verb{$output()} just pretty prints the -console output. +\usage{ +output(id = NULL) } -\section{Usage}{ -\preformatted{$output(id = NULL) +\arguments{ +\item{id}{(integer) The chain id. Ignored if the model was not fit using +MCMC.} } -} - -\section{Arguments}{ +\description{ +For MCMC, the \verb{$output()} method returns the stdout and stderr +of all chains as a list of character vectors if \code{id=NULL}. If the \code{id} +argument is specified it instead pretty prints the console output for a +single chain. -\itemize{ -\item \code{id}: (integer) For MCMC only, the chain id. -} +For optimization and variational inference \verb{$output()} just pretty prints +the console output. } - \examples{ \dontrun{ fit_mcmc <- cmdstanr_example("logistic", method = "sample") diff --git a/man/fit-method-return_codes.Rd b/man/fit-method-return_codes.Rd index 760aafa63..33da090d2 100644 --- a/man/fit-method-return_codes.Rd +++ b/man/fit-method-return_codes.Rd @@ -4,15 +4,17 @@ \alias{fit-method-return_codes} \alias{return_codes} \title{Extract return codes from CmdStan} +\usage{ +return_codes() +} +\value{ +An integer vector of return codes with length equal to the number of +CmdStan runs (number of chains for MCMC and one otherwise). +} \description{ The \verb{$return_codes()} method returns a vector of return codes from the CmdStan run(s). A return code of 0 indicates a successful run. } -\section{Usage}{ -\preformatted{$return_codes() -} -} - \examples{ \dontrun{ # example with return codes all zero diff --git a/man/fit-method-sampler_diagnostics.Rd b/man/fit-method-sampler_diagnostics.Rd index 7d9b7034d..43db1c7e3 100644 --- a/man/fit-method-sampler_diagnostics.Rd +++ b/man/fit-method-sampler_diagnostics.Rd @@ -3,33 +3,23 @@ \name{fit-method-sampler_diagnostics} \alias{fit-method-sampler_diagnostics} \alias{sampler_diagnostics} -\title{Extract sampler diagnostics} -\description{ -Extract the values of sampler diagnostics for each iteration and -chain of MCMC. -} -\section{Usage}{ -\preformatted{$sampler_diagnostics(inc_warmup = FALSE, ...) -} -} - -\section{Arguments}{ - -\itemize{ -\item \code{inc_warmup}: (logical) Should warmup draws be included? Defaults to \code{FALSE}. -\item \code{...}: Arguments passed on to -\code{\link[posterior:draws_array]{posterior::as_draws_array()}}. +\title{Extract sampler diagnostics after MCMC} +\usage{ +sampler_diagnostics(inc_warmup = FALSE) } +\arguments{ +\item{inc_warmup}{(logical) Should warmup draws be included? Defaults to \code{FALSE}.} } - -\section{Value}{ - +\value{ A 3-D \code{\link[posterior:draws_array]{draws_array}} object (iteration x chain x variable). The variables for Stan's default MCMC algorithm are \code{"accept_stat__"}, \code{"stepsize__"}, \code{"treedepth__"}, \code{"n_leapfrog__"}, \code{"divergent__"}, \code{"energy__"}. } - +\description{ +Extract the values of sampler diagnostics for each iteration and +chain of MCMC. +} \examples{ \dontrun{ fit <- cmdstanr_example("logistic") diff --git a/man/fit-method-save_object.Rd b/man/fit-method-save_object.Rd index 2f37dd4ed..4a019bd91 100644 --- a/man/fit-method-save_object.Rd +++ b/man/fit-method-save_object.Rd @@ -4,6 +4,14 @@ \alias{fit-method-save_object} \alias{save_object} \title{Save fitted model object to a file} +\usage{ +save_object(file, ...) +} +\arguments{ +\item{file}{(string) Path where the file should be saved.} + +\item{...}{Other arguments to pass to \code{\link[base:readRDS]{base::saveRDS()}} besides \code{object} and \code{file}.} +} \description{ This method is a wrapper around \code{\link[base:readRDS]{base::saveRDS()}} that ensures that all posterior draws and diagnostics are saved when saving a fitted @@ -11,19 +19,6 @@ model object. Because the contents of the CmdStan output CSV files are only read into R lazily (i.e., as needed), the \verb{$save_object()} method is the safest way to guarantee that everything has been read in before saving. } -\section{Usage}{ -\preformatted{$save_object(file, ...) -} -} - -\section{Arguments}{ - -\itemize{ -\item \code{file}: (string) Path where the file should be saved. -\item \code{...}: Other arguments to pass to \code{\link[base:readRDS]{base::saveRDS()}} besides \code{object} and \code{file}. -} -} - \examples{ \dontrun{ fit <- cmdstanr_example("logistic") diff --git a/man/fit-method-save_output_files.Rd b/man/fit-method-save_output_files.Rd index 30e180a43..f6502e6a8 100644 --- a/man/fit-method-save_output_files.Rd +++ b/man/fit-method-save_output_files.Rd @@ -2,18 +2,59 @@ % Please edit documentation in R/fit.R \name{fit-method-save_output_files} \alias{fit-method-save_output_files} +\alias{save_output_files} \alias{fit-method-save_data_file} \alias{fit-method-save_latent_dynamics_files} \alias{fit-method-output_files} \alias{fit-method-data_file} \alias{fit-method-latent_dynamics_files} -\alias{save_output_files} \alias{save_data_file} \alias{save_latent_dynamics_files} \alias{output_files} \alias{data_file} \alias{latent_dynamics_files} \title{Save output and data files} +\usage{ +save_output_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) + +save_latent_dynamics_files( + dir = ".", + basename = NULL, + timestamp = TRUE, + random = TRUE +) + +save_data_file(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) + +output_files(include_failed = FALSE) + +latent_dynamics_files(include_failed = FALSE) + +data_file() +} +\arguments{ +\item{dir}{(string) Path to directory where the files should be saved.} + +\item{basename}{(string) Base filename to use. See \strong{Details}.} + +\item{timestamp}{(logical) Should a timestamp be added to the file name(s)? +Defaults to \code{TRUE}. See \strong{Details}.} + +\item{random}{(logical) Should random alphanumeric characters be added to the +end of the file name(s)? Defaults to \code{TRUE}. See \strong{Details}.} + +\item{include_failed}{Should CmdStan runs that failed also be included? The +default is \code{FALSE.}} +} +\value{ +The \verb{$save_*} methods print a message with the new file paths and (invisibly) +return a character vector of the new paths (or \code{NA} for any that couldn't be +copied). They also have the side effect of setting the internal paths in the +fitted model object to the new paths. + +The methods \emph{without} the \code{save_} prefix return character vectors of file +paths without moving any files. +} \description{ All fitted model objects have methods for saving (moving to a specified location) the files created by CmdStanR to hold CmdStan output @@ -25,29 +66,6 @@ updated to point to the new file locations.} The versions without the \code{save_} prefix (e.g., \verb{$output_files()}) return the current file paths without moving any files. } -\section{Usage}{ -\preformatted{$save_output_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) -$save_latent_dynamics_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) -$save_data_file(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) - -$output_files() -$latent_dynamics_files() -$data_file() -} -} - -\section{Arguments}{ - -\itemize{ -\item \code{dir}: (string) Path to directory where the files should be saved. -\item \code{basename}: (string) Base filename to use. See \strong{Details}. -\item \code{timestamp}: (logical) Should a timestamp be added to the file name(s)? -Defaults to \code{TRUE}. See \strong{Details}. -\item \code{random}: (logical) Should random alphanumeric characters be added to the -end of the file name(s)? Defaults to \code{TRUE}. See \strong{Details}. -} -} - \section{Details}{ For \verb{$save_output_files()} the files moved to \code{dir} will have names of @@ -67,17 +85,20 @@ For \verb{$save_data_file()} no \code{id} is included in the file name because e with multiple MCMC chains the data file is the same. } -\section{Value}{ +\examples{ +\dontrun{ +fit <- cmdstanr_example() +fit$output_files() +fit$data_file() -The \verb{$save_*} methods print a message with the new file paths and (invisibly) -return a character vector of the new paths (or \code{NA} for any that couldn't be -copied). They also have the side effect of setting the internal paths in the -fitted model object to the new paths. - -The methods \emph{without} the \code{save_} prefix return character vectors of file -paths without moving any files. +# just using tempdir for the example +my_dir <- tempdir() +fit$save_output_files(dir = my_dir, basename = "banana") +fit$save_output_files(dir = my_dir, basename = "tomato", timestamp = FALSE) +fit$save_output_files(dir = my_dir, basename = "lettuce", timestamp = FALSE, random = FALSE) } +} \seealso{ \code{\link{CmdStanMCMC}}, \code{\link{CmdStanMLE}}, \code{\link{CmdStanVB}}, \code{\link{CmdStanGQ}} } diff --git a/man/fit-method-summary.Rd b/man/fit-method-summary.Rd index 58c42f4a6..b42112432 100644 --- a/man/fit-method-summary.Rd +++ b/man/fit-method-summary.Rd @@ -3,51 +3,40 @@ \name{fit-method-summary} \alias{fit-method-summary} \alias{summary} +\alias{fit-method-print} \alias{print.CmdStanMCMC} \alias{print.CmdStanMLE} \alias{print.CmdStanVB} -\title{Compute a summary table of MCMC estimates and diagnostics} -\description{ -The \verb{$summary()} method runs -\code{\link[posterior:draws_summary]{summarise_draws()}} from the \pkg{posterior} -package and returns the output. For MCMC only post-warmup draws are included -in the summary. - -The \verb{$print()} method prints the same summary stats but removes the extra -formatting used for printing tibbles and returns the fitted model object -itself. The \verb{$print()} method may also be faster than \verb{$summary()} because -it is designed to only compute the summary statistics for the variables -that will actually fit in the printed output (see argument \code{max_rows}) -whereas \verb{$summary()} will compute them for all of the specified variables -in order to be able to return them to the user. -} -\section{Usage}{ -\preformatted{$summary(variables = NULL, ...) -$print(variables = NULL, ..., digits = 2, max_rows = 10) -} +\title{Compute a summary table of estimates and diagnostics} +\usage{ +summary(variables = NULL, ...) } +\arguments{ +\item{variables}{(character vector) The variables to include.} -\section{Arguments}{ - -\itemize{ -\item \code{variables}: (character vector) The variables to include. -\item \code{...}: Optional arguments to pass to -\code{\link[posterior:draws_summary]{posterior::summarise_draws()}}. -\item \code{digits}: (integer) For \code{print} only, the number of digits to use for -rounding. -\item \code{max_rows}: (integer) For \code{print} only, the maximum number of rows to print. +\item{...}{Optional arguments to pass to \code{\link[posterior:draws_summary]{posterior::summarise_draws()}}.} } -} - -\section{Value}{ - -The \verb{$summary()} method returns the tibble created by +\value{ +The \verb{$summary()} method returns the tibble data frame created by \code{\link[posterior:draws_summary]{posterior::summarise_draws()}}. The \verb{$print()} method returns the fitted model object itself (invisibly), which is the standard behavior for print methods in \R. } +\description{ +The \verb{$summary()} method runs +\code{\link[posterior:draws_summary]{summarise_draws()}} from the \pkg{posterior} +package and returns the output. For MCMC, only post-warmup draws are +included in the summary. +There is also a \verb{$print()} method that prints the same summary stats but +removes the extra formatting used for printing tibbles and returns the +fitted model object itself. The \verb{$print()} method may also be faster than +\verb{$summary()} because it is designed to only compute the summary statistics +for the variables that will actually fit in the printed output whereas +\verb{$summary()} will compute them for all of the specified variables in order +to be able to return them to the user. See \strong{Examples}. +} \examples{ \dontrun{ fit <- cmdstanr_example("logistic") diff --git a/man/fit-method-time.Rd b/man/fit-method-time.Rd index 8d67d0b64..11bdbe648 100644 --- a/man/fit-method-time.Rd +++ b/man/fit-method-time.Rd @@ -4,18 +4,10 @@ \alias{fit-method-time} \alias{time} \title{Report timing of CmdStan runs} -\description{ -Report the run time in seconds. For MCMC additional information -is provided about the run times of individual chains and the warmup and -sampling phases. +\usage{ +time() } -\section{Usage}{ -\preformatted{$time() -} -} - -\section{Value}{ - +\value{ A list with elements \itemize{ \item \code{total}: (scalar) the total run time. @@ -24,7 +16,11 @@ chains. The data frame has columns \code{"chain_id"}, \code{"warmup"}, \code{"sa and \code{"total"}. } } - +\description{ +Report the run time in seconds. For MCMC additional information +is provided about the run times of individual chains and the warmup and +sampling phases. +} \examples{ \dontrun{ fit_mcmc <- cmdstanr_example("logistic", method = "sample") diff --git a/man/model-method-check_syntax.Rd b/man/model-method-check_syntax.Rd index 38b20345f..ec49b206a 100644 --- a/man/model-method-check_syntax.Rd +++ b/man/model-method-check_syntax.Rd @@ -4,41 +4,38 @@ \alias{model-method-check_syntax} \alias{check_syntax} \title{Check syntax of a Stan program} -\description{ -The \verb{$check_syntax()} method of a \code{\link{CmdStanModel}} object -checks the Stan program for syntax errors and returns \code{TRUE} (invisibly) if -parsing succeeds. If invalid syntax in found an error is thrown. -} -\section{Usage}{ -\preformatted{$check_syntax( +\usage{ +check_syntax( pedantic = FALSE, include_paths = NULL, stanc_options = list(), quiet = FALSE ) } -} - -\section{Arguments}{ - -\itemize{ -\item \code{pedantic}: (logical) Should pedantic mode be turned on? The default is +\arguments{ +\item{pedantic}{(logical) Should pedantic mode be turned on? The default is \code{FALSE}. Pedantic mode attempts to warn you about potential issues in your Stan program beyond syntax errors. For details see the \href{https://mc-stan.org/docs/reference-manual/pedantic-mode.html}{\emph{Pedantic mode} chapter} in -the Stan Reference Manual. -\item \code{include_paths}: (character vector) Paths to directories where Stan +the Stan Reference Manual.} + +\item{include_paths}{(character vector) Paths to directories where Stan should look for files specified in \verb{#include} directives in the Stan -program. -\item \code{stanc_options}: (list) Any other Stan-to-C++ transpiler options to be +program.} + +\item{stanc_options}{(list) Any other Stan-to-C++ transpiler options to be used when compiling the model. See the documentation for the -\code{\link[=model-method-compile]{$compile()}} method for details. -\item \code{quiet}: (logical) Should informational messages be suppressed? The +\code{\link[=model-method-compile]{$compile()}} method for details.} + +\item{quiet}{(logical) Should informational messages be suppressed? The default is \code{FALSE}, which will print a message if the Stan program is valid or the compiler error message if there are syntax errors. If \code{TRUE}, only -the error message will be printed. +the error message will be printed.} } +\description{ +The \verb{$check_syntax()} method of a \code{\link{CmdStanModel}} object +checks the Stan program for syntax errors and returns \code{TRUE} (invisibly) if +parsing succeeds. If invalid syntax in found an error is thrown. } - \section{Value}{ The \verb{$check_syntax()} method returns \code{TRUE} (invisibly) if the model is valid. diff --git a/man/model-method-compile.Rd b/man/model-method-compile.Rd index c4a63aebb..e1e91b3e1 100644 --- a/man/model-method-compile.Rd +++ b/man/model-method-compile.Rd @@ -4,78 +4,76 @@ \alias{model-method-compile} \alias{compile} \title{Compile a Stan program} -\description{ -The \verb{$compile()} method of a \code{\link{CmdStanModel}} object translates -the Stan program to C++ and creates a compiled executable. In most cases -the user does not need to explicitly call the \verb{$compile()} method as -compilation will occur when calling \code{\link[=cmdstan_model]{cmdstan_model()}}. However it is -possible to set \code{compile=FALSE} in the call to \code{cmdstan_model()} and -subsequently call the \verb{$compile()} method directly. - -After compilation, the paths to the executable and the \code{.hpp} file -containing the generated C++ code are available via the \verb{$exe_file()} and -\verb{$hpp_file()} methods. The default is to create the executable in the same -directory as the Stan program and to write the generated C++ code in a -temporary directory. To save the C++ code to a non-temporary location use -\verb{$save_hpp_file()}. -} -\section{Usage}{ -\preformatted{$compile( +\usage{ +compile( quiet = TRUE, dir = NULL, pedantic = FALSE, include_paths = NULL, cpp_options = list(), stanc_options = list(), - force_recompile = FALSE + force_recompile = FALSE, + threads = FALSE ) -$exe_file() -$hpp_file() -$save_hpp_file(dir = NULL) -} } - -\section{Arguments}{ - -Leaving all arguments at their defaults should be fine for most users, but -optional arguments are provided to enable features in CmdStan (and the Stan -Math library). See the CmdStan manual for more details. -\itemize{ -\item \code{quiet}: (logical) Should the verbose output from CmdStan during +\arguments{ +\item{quiet}{(logical) Should the verbose output from CmdStan during compilation be suppressed? The default is \code{TRUE}, but if you encounter an error we recommend trying again with \code{quiet=FALSE} to see more of the -output. -\item \code{dir}: (string) The path to the directory in which to store the CmdStan +output.} + +\item{dir}{(string) The path to the directory in which to store the CmdStan executable (or \code{.hpp} file if using \verb{$save_hpp_file()}). The default is the -same location as the Stan program. -\item \code{pedantic}: (logical) Should pedantic mode be turned on? The default is +same location as the Stan program.} + +\item{pedantic}{(logical) Should pedantic mode be turned on? The default is \code{FALSE}. Pedantic mode attempts to warn you about potential issues in your Stan program beyond syntax errors. For details see the \href{https://mc-stan.org/docs/reference-manual/pedantic-mode.html}{\emph{Pedantic mode} chapter} in the Stan Reference Manual. \strong{Note:} to do a pedantic check for a model that is already compiled use the -\code{\link[=model-method-check_syntax]{$check_syntax()}} method instead. -\item \code{include_paths}: (character vector) Paths to directories where Stan +\code{\link[=model-method-check_syntax]{$check_syntax()}} method instead.} + +\item{include_paths}{(character vector) Paths to directories where Stan should look for files specified in \verb{#include} directives in the Stan -program. -\item \code{cpp_options}: (list) Any makefile options to be used when compiling the +program.} + +\item{cpp_options}{(list) Any makefile options to be used when compiling the model (\code{STAN_THREADS}, \code{STAN_MPI}, \code{STAN_OPENCL}, etc.). Anything you would -otherwise write in the \code{make/local} file. -\item \code{stanc_options}: (list) Any Stan-to-C++ transpiler options to be used +otherwise write in the \code{make/local} file.} + +\item{stanc_options}{(list) Any Stan-to-C++ transpiler options to be used when compiling the model. See the \strong{Examples} section below as well as the \code{stanc} chapter of the CmdStan Guide for more details on available options: -https://mc-stan.org/docs/cmdstan-guide/stanc.html. -\item \code{force_recompile}: (logical) Should the model be recompiled even if was -not modified since last compiled. The default is \code{FALSE}. -} +https://mc-stan.org/docs/cmdstan-guide/stanc.html.} + +\item{force_recompile}{(logical) Should the model be recompiled even if was +not modified since last compiled. The default is \code{FALSE}.} + +\item{threads}{Deprecated and will be removed in a future release. Please +turn on threading via \code{cpp_options = list(stan_threads = TRUE)} instead.} } +\description{ +The \verb{$compile()} method of a \code{\link{CmdStanModel}} object translates +the Stan program to C++ and creates a compiled executable. In most cases +the user does not need to explicitly call the \verb{$compile()} method as +compilation will occur when calling \code{\link[=cmdstan_model]{cmdstan_model()}}. However it is +possible to set \code{compile=FALSE} in the call to \code{cmdstan_model()} and +subsequently call the \verb{$compile()} method directly. +After compilation, the paths to the executable and the \code{.hpp} file +containing the generated C++ code are available via the \verb{$exe_file()} and +\verb{$hpp_file()} methods. The default is to create the executable in the same +directory as the Stan program and to write the generated C++ code in a +temporary directory. To save the C++ code to a non-temporary location use +\verb{$save_hpp_file(dir)}. +} \section{Value}{ The \verb{$compile()} method is called for its side effect of creating the executable and adding its path to the \code{\link{CmdStanModel}} object, but it also returns the \code{\link{CmdStanModel}} object invisibly. -The \verb{$exe_file()}, \verb{$hpp_file()}, and \verb{$save_hpp_file()} methods all return -file paths. +After compilation, the \verb{$exe_file()}, \verb{$hpp_file()}, and \verb{$save_hpp_file()} +methods can be used and return file paths. } \examples{ diff --git a/man/model-method-generate-quantities.Rd b/man/model-method-generate-quantities.Rd index 1fb59523c..d9a73d356 100644 --- a/man/model-method-generate-quantities.Rd +++ b/man/model-method-generate-quantities.Rd @@ -4,13 +4,8 @@ \alias{model-method-generate-quantities} \alias{generate_quantities} \title{Run Stan's standalone generated quantities method} -\description{ -The \verb{$generate_quantities()} method of a \code{\link{CmdStanModel}} object -runs Stan's standalone generated quantities to obtain generated quantities -based on previously fitted parameters. -} -\section{Usage}{ -\preformatted{$generate_quantities( +\usage{ +generate_quantities( fitted_params, data = NULL, seed = NULL, @@ -20,25 +15,72 @@ based on previously fitted parameters. threads_per_chain = NULL ) } -} - -\section{Arguments}{ - -\itemize{ -\item \code{fitted_params}: (multiple options) The parameter draws to use. One of the following: +\arguments{ +\item{fitted_params}{(multiple options) The parameter draws to use. One of +the following: \itemize{ \item A \link{CmdStanMCMC} or \link{CmdStanVB} fitted model object. -\item A \link[posterior:draws_array]{posterior::draws_array} (for MCMC) or \link[posterior:draws_matrix]{posterior::draws_matrix} (for VB) -object returned by CmdStanR's \code{\link[=fit-method-draws]{$draws()}} method. +\item A \link[posterior:draws_array]{posterior::draws_array} (for MCMC) or \link[posterior:draws_matrix]{posterior::draws_matrix} (for +VB) object returned by CmdStanR's \code{\link[=fit-method-draws]{$draws()}} method. \item A character vector of paths to CmdStan CSV output files. +}} + +\item{data}{(multiple options) The data to use for the variables specified in +the \code{data} block of the Stan program. One of the following: +\itemize{ +\item A named list of \R objects (like for RStan). Internally this list is then +written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. +\item A path to a data file compatible with CmdStan (JSON or \R dump). See the +appendices in the CmdStan manual for details on using these formats. +\item \code{NULL} or an empty list if the Stan program has no \code{data} block. +}} + +\item{seed}{(positive integer) A seed for the (P)RNG to pass to CmdStan.} + +\item{output_dir}{(string) A path to a directory where CmdStan should write +its output CSV files. For interactive use this can typically be left at +\code{NULL} (temporary directory) since CmdStanR makes the CmdStan output +(posterior draws and diagnostics) available in \R via methods of the fitted +model objects. The behavior of \code{output_dir} is as follows: +\itemize{ +\item If \code{NULL} (the default), then the CSV files are written to a temporary +directory and only saved permanently if the user calls one of the \verb{$save_*} +methods of the fitted model object (e.g., +\code{\link[=fit-method-save_output_files]{$save_output_files()}}). These temporary +files are removed when the fitted model object is +\link[base:gc]{garbage collected} (manually or automatically). +\item If a path, then the files are created in \code{output_dir} with names +corresponding to the defaults used by \verb{$save_output_files()}. +}} + +\item{sig_figs}{(positive integer) The number of significant figures used +when storing the output values. By default, CmdStan represent the output +values with 6 significant figures. The upper limit for \code{sig_figs} is 18. +Increasing this value will result in larger output CSV files and thus an +increased usage of disk space.} + +\item{parallel_chains}{(positive integer) The \emph{maximum} number of MCMC chains +to run in parallel. If \code{parallel_chains} is not specified then the default +is to look for the option \code{"mc.cores"}, which can be set for an entire \R +session by \code{options(mc.cores=value)}. If the \code{"mc.cores"} option has not +been set then the default is \code{1}.} + +\item{threads_per_chain}{(positive integer) If the model was +\link[=model-method-compile]{compiled} with threading support, the number of +threads to use in parallelized sections \emph{within} an MCMC chain (e.g., when +using the Stan functions \code{reduce_sum()} or \code{map_rect()}). This is in +contrast with \code{parallel_chains}, which specifies the number of chains to +run in parallel. The actual number of CPU cores used use is +\code{parallel_chains*threads_per_chain}. For an example of using threading see +the Stan case study \href{https://mc-stan.org/users/documentation/case-studies/reduce_sum_tutorial.html}{Reduce Sum: A Minimal Example}.} } -\item \code{data}, \code{seed}, \code{output_dir}, \code{sig_figs}, \code{parallel_chains}, \code{threads_per_chain}: -Same as for the \code{\link[=model-method-sample]{$sample()}} method. -} +\description{ +The \verb{$generate_quantities()} method of a \code{\link{CmdStanModel}} object +runs Stan's standalone generated quantities to obtain generated quantities +based on previously fitted parameters. } - \section{Value}{ - The \verb{$generate_quantities()} method returns a \code{\link{CmdStanGQ}} object. + A \code{\link{CmdStanGQ}} object. } \examples{ diff --git a/man/model-method-optimize.Rd b/man/model-method-optimize.Rd index dbd82dd13..9377ef666 100644 --- a/man/model-method-optimize.Rd +++ b/man/model-method-optimize.Rd @@ -4,23 +4,8 @@ \alias{model-method-optimize} \alias{optimize} \title{Run Stan's optimization algorithms} -\description{ -The \verb{$optimize()} method of a \code{\link{CmdStanModel}} object runs -Stan's optimizer to obtain a posterior mode (penalized maximum likelihood) -estimate. -} -\details{ -CmdStan can find the posterior mode (assuming there is one). If the -posterior is not convex, there is no guarantee Stan will be able to find -the global mode as opposed to a local optimum of log probability. For -optimization, the mode is calculated without the Jacobian adjustment for -constrained variables, which shifts the mode due to the change of -variables. Thus modes correspond to modes of the model as written. - --- \href{https://mc-stan.org/docs/cmdstan-guide/}{\emph{CmdStan User's Guide}} -} -\section{Usage}{ -\preformatted{$optimize( +\usage{ +optimize( data = NULL, seed = NULL, refresh = NULL, @@ -40,108 +25,128 @@ variables. Thus modes correspond to modes of the model as written. history_size = NULL ) } -} - -\section{Arguments shared by all fitting methods}{ - The following arguments can -be specified for any of the fitting methods (\code{sample}, \code{optimize}, -\code{variational}). Arguments left at \code{NULL} default to the default used by the -installed version of CmdStan. -\itemize{ -\item \code{data}: (multiple options) The data to use. One of the following: +\arguments{ +\item{data}{(multiple options) The data to use for the variables specified in +the \code{data} block of the Stan program. One of the following: \itemize{ -\item A named list of \R objects (like for RStan). Internally this list is -then written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. -\item A path to a data file compatible with CmdStan (JSON or \R dump). See -the appendices in the CmdStan manual for details on using these formats. -} -\item \code{seed}: (positive integer) A seed for the (P)RNG to pass to CmdStan. -\item \code{refresh}: (non-negative integer) The number of iterations between -printed screen updates. If \code{refresh = 0}, only error messages will be printed. -\item \code{init}: (multiple options) The initialization method for the parameters block: +\item A named list of \R objects (like for RStan). Internally this list is then +written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. +\item A path to a data file compatible with CmdStan (JSON or \R dump). See the +appendices in the CmdStan manual for details on using these formats. +\item \code{NULL} or an empty list if the Stan program has no \code{data} block. +}} + +\item{seed}{(positive integer) A seed for the (P)RNG to pass to CmdStan.} + +\item{refresh}{(non-negative integer) The number of iterations between +printed screen updates. If \code{refresh = 0}, only error messages will be +printed.} + +\item{init}{(multiple options) The initialization method to use for the +variables declared in the \code{parameters} block of the Stan program: \itemize{ -\item A real number \code{x>0} initializes randomly between \verb{[-x,x]} (on the -\emph{unconstrained} parameter space); -\item \code{0} initializes to \code{0}; -\item A character vector of paths (one per chain) to JSON or Rdump files. See +\item A real number \code{x>0}. This initializes \emph{all} parameters randomly between +\verb{[-x,x]} (on the \emph{unconstrained} parameter space); +\item The number \code{0}. This initializes \emph{all} parameters to \code{0}; +\item A character vector of paths (one per chain) to JSON or Rdump files +containing initial values for all or some parameters. See \code{\link[=write_stan_json]{write_stan_json()}} to write \R objects to JSON files compatible with CmdStan. -\item A list of lists. For MCMC the list should contain a sublist for each -chain. For optimization and variational inference there should be just one -sublist. The sublists should have named elements corresponding to the -parameters for which you are specifying initial values. See \strong{Examples}. +\item A list of lists containing initial values for all or some parameters. For +MCMC the list should contain a sublist for each chain. For optimization and +variational inference there should be just one sublist. The sublists should +have named elements corresponding to the parameters for which you are +specifying initial values. See \strong{Examples}. \item A function that returns a single list with names corresponding to the -parameters for which you are specifying initial values. The function -can take no arguments or a single argument \code{chain_id}. For MCMC, if the -function has argument \code{chain_id} it will be supplied with the chain id -(from 1 to number of chains) when called to generate the initial -values. See \strong{Examples}. -} -\item \code{save_latent_dynamics}: (logical) Should auxiliary diagnostic information +parameters for which you are specifying initial values. The function can +take no arguments or a single argument \code{chain_id}. For MCMC, if the function +has argument \code{chain_id} it will be supplied with the chain id (from 1 to +number of chains) when called to generate the initial values. See +\strong{Examples}. +}} + +\item{save_latent_dynamics}{(logical) Should auxiliary diagnostic information about the latent dynamics be written to temporary diagnostic CSV files? This argument replaces CmdStan's \code{diagnostic_file} argument and the content written to CSV is controlled by the user's CmdStan installation and not -CmdStanR (and for some algorithms no content may be written). The default -is \code{save_latent_dynamics=FALSE}, which is appropriate for almost every use case -(all diagnostics recommended for users to check are \emph{always} saved, e.g., -divergences for HMC). To save the temporary files created when -\code{save_latent_dynamics=TRUE} see the -\code{\link[=fit-method-save_latent_dynamics_files]{$save_latent_dynamics_files()}} method. -\item \code{output_dir}: (string) A path to a directory where CmdStan should write +CmdStanR (for some algorithms no content may be written). The default +is \code{FALSE}, which is appropriate for almost every use case. To save the +temporary files created when \code{save_latent_dynamics=TRUE} see the +\code{\link[=fit-method-save_latent_dynamics_files]{$save_latent_dynamics_files()}} +method.} + +\item{output_dir}{(string) A path to a directory where CmdStan should write its output CSV files. For interactive use this can typically be left at -\code{NULL} (temporary directory) since CmdStanR makes the CmdStan output (e.g., -posterior draws and diagnostics) available in \R via methods of the fitted +\code{NULL} (temporary directory) since CmdStanR makes the CmdStan output +(posterior draws and diagnostics) available in \R via methods of the fitted model objects. The behavior of \code{output_dir} is as follows: \itemize{ \item If \code{NULL} (the default), then the CSV files are written to a temporary -directory and only saved permanently if the user calls one of the -\verb{$save_*} methods of the fitted model object (e.g., +directory and only saved permanently if the user calls one of the \verb{$save_*} +methods of the fitted model object (e.g., \code{\link[=fit-method-save_output_files]{$save_output_files()}}). These temporary -files are removed when the fitted model object is garbage collected. +files are removed when the fitted model object is +\link[base:gc]{garbage collected} (manually or automatically). \item If a path, then the files are created in \code{output_dir} with names -corresponding the defaults used by \verb{$save_output_files()} (and similar -methods like \verb{$save_latent_dynamics_files()}). -} -\item \code{sig_figs}: (positive integer) The number of significant figures used -for the output values. By default, CmdStan represent the output values with -6 significant figures. The upper limit for \code{sig_figs} is 18. Increasing -this value can cause an increased usage of disk space due to larger -output CSV files. -} -} +corresponding to the defaults used by \verb{$save_output_files()}. +}} -\section{Arguments unique to the \code{optimize} method}{ - In addition to the -arguments above, the \verb{$optimize()} method also has its own set of -arguments. These arguments are described briefly here and in greater detail -in the CmdStan manual. Arguments left at \code{NULL} default to the default used -by the installed version of CmdStan. -The latest \href{https://mc-stan.org/docs/cmdstan-guide/}{CmdStan User’s Guide} -will have the defaults for the latest version of CmdStan. -\itemize{ -\item \code{threads}: (positive integer) If the model was +\item{sig_figs}{(positive integer) The number of significant figures used +when storing the output values. By default, CmdStan represent the output +values with 6 significant figures. The upper limit for \code{sig_figs} is 18. +Increasing this value will result in larger output CSV files and thus an +increased usage of disk space.} + +\item{threads}{(positive integer) If the model was \link[=model-method-compile]{compiled} with threading support, the number of threads to use in parallelized sections (e.g., when -using the Stan functions \code{reduce_sum()} or \code{map_rect()}). -\item \code{iter}: (positive integer) The maximum number of iterations. -\item \code{algorithm}: (string) The optimization algorithm. One of \code{"lbfgs"}, +using the Stan functions \code{reduce_sum()} or \code{map_rect()}).} + +\item{algorithm}{(string) The optimization algorithm. One of \code{"lbfgs"}, \code{"bfgs"}, or \code{"newton"}. The control parameters below are only available for \code{"lbfgs"} and \verb{"bfgs}. For their default values and more details see the CmdStan User's Guide. The default values can also be obtained by -running \code{cmdstanr_example(method="optimize")$metadata()}. -\item \code{init_alpha}: (positive real) The initial step size parameter. -\item \code{tol_obj}: (positive real) Convergence tolerance on changes in objective function value. -\item \code{tol_rel_obj}: (positive real) Convergence tolerance on relative changes in objective function value. -\item \code{tol_grad}: (positive real) Convergence tolerance on the norm of the gradient. -\item \code{tol_rel_grad}: (positive real) Convergence tolerance on the relative norm of the gradient. -\item \code{tol_param}: (positive real) Convergence tolerance on changes in parameter value. -\item \code{history_size}: (positive integer) The size of the history used when -approximating the Hessian. Only available for L-BFGS. +running \code{cmdstanr_example(method="optimize")$metadata()}.} + +\item{init_alpha}{(positive real) The initial step size parameter.} + +\item{iter}{(positive integer) The maximum number of iterations.} + +\item{tol_obj}{(positive real) Convergence tolerance on changes in objective function value.} + +\item{tol_rel_obj}{(positive real) Convergence tolerance on relative changes in objective function value.} + +\item{tol_grad}{(positive real) Convergence tolerance on the norm of the gradient.} + +\item{tol_rel_grad}{(positive real) Convergence tolerance on the relative norm of the gradient.} + +\item{tol_param}{(positive real) Convergence tolerance on changes in parameter value.} + +\item{history_size}{(positive integer) The size of the history used when +approximating the Hessian. Only available for L-BFGS.} } +\description{ +The \verb{$optimize()} method of a \code{\link{CmdStanModel}} object runs +Stan's optimizer to obtain a posterior mode (penalized maximum likelihood) +estimate. + +Any argument left as \code{NULL} will default to the default value used by the +installed version of CmdStan. See the +\href{https://mc-stan.org/docs/cmdstan-guide/}{CmdStan User’s Guide} +for more details. } +\details{ +CmdStan can find the posterior mode (assuming there is one). If the +posterior is not convex, there is no guarantee Stan will be able to find +the global mode as opposed to a local optimum of log probability. For +optimization, the mode is calculated without the Jacobian adjustment for +constrained variables, which shifts the mode due to the change of +variables. Thus modes correspond to modes of the model as written. +-- \href{https://mc-stan.org/docs/cmdstan-guide/}{\emph{CmdStan User's Guide}} +} \section{Value}{ - The \verb{$optimize()} method returns a \code{\link{CmdStanMLE}} object. + A \code{\link{CmdStanMLE}} object. } \examples{ diff --git a/man/model-method-sample.Rd b/man/model-method-sample.Rd index 29b836de8..6b66da06c 100644 --- a/man/model-method-sample.Rd +++ b/man/model-method-sample.Rd @@ -4,14 +4,8 @@ \alias{model-method-sample} \alias{sample} \title{Run Stan's MCMC algorithms} -\description{ -The \verb{$sample()} method of a \code{\link{CmdStanModel}} object runs the -default MCMC algorithm in CmdStan (\verb{algorithm=hmc engine=nuts}), to produce -a set of draws from the posterior distribution of a model conditioned on -some data. -} -\section{Usage}{ -\preformatted{$sample( +\usage{ +sample( data = NULL, seed = NULL, refresh = NULL, @@ -39,184 +33,211 @@ some data. window = NULL, fixed_param = FALSE, validate_csv = TRUE, - show_messages = TRUE + show_messages = TRUE, + cores = NULL, + num_cores = NULL, + num_chains = NULL, + num_warmup = NULL, + num_samples = NULL, + save_extra_diagnostics = NULL, + max_depth = NULL, + stepsize = NULL ) } -} - -\section{Arguments shared by all fitting methods}{ - The following arguments can -be specified for any of the fitting methods (\code{sample}, \code{optimize}, -\code{variational}). Arguments left at \code{NULL} default to the default used by the -installed version of CmdStan. +\arguments{ +\item{data}{(multiple options) The data to use for the variables specified in +the \code{data} block of the Stan program. One of the following: \itemize{ -\item \code{data}: (multiple options) The data to use. One of the following: -\itemize{ -\item A named list of \R objects (like for RStan). Internally this list is -then written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. -\item A path to a data file compatible with CmdStan (JSON or \R dump). See -the appendices in the CmdStan manual for details on using these formats. -} -\item \code{seed}: (positive integer) A seed for the (P)RNG to pass to CmdStan. -\item \code{refresh}: (non-negative integer) The number of iterations between -printed screen updates. If \code{refresh = 0}, only error messages will be printed. -\item \code{init}: (multiple options) The initialization method for the parameters block: +\item A named list of \R objects (like for RStan). Internally this list is then +written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. +\item A path to a data file compatible with CmdStan (JSON or \R dump). See the +appendices in the CmdStan manual for details on using these formats. +\item \code{NULL} or an empty list if the Stan program has no \code{data} block. +}} + +\item{seed}{(positive integer) A seed for the (P)RNG to pass to CmdStan.} + +\item{refresh}{(non-negative integer) The number of iterations between +printed screen updates. If \code{refresh = 0}, only error messages will be +printed.} + +\item{init}{(multiple options) The initialization method to use for the +variables declared in the \code{parameters} block of the Stan program: \itemize{ -\item A real number \code{x>0} initializes randomly between \verb{[-x,x]} (on the -\emph{unconstrained} parameter space); -\item \code{0} initializes to \code{0}; -\item A character vector of paths (one per chain) to JSON or Rdump files. See +\item A real number \code{x>0}. This initializes \emph{all} parameters randomly between +\verb{[-x,x]} (on the \emph{unconstrained} parameter space); +\item The number \code{0}. This initializes \emph{all} parameters to \code{0}; +\item A character vector of paths (one per chain) to JSON or Rdump files +containing initial values for all or some parameters. See \code{\link[=write_stan_json]{write_stan_json()}} to write \R objects to JSON files compatible with CmdStan. -\item A list of lists. For MCMC the list should contain a sublist for each -chain. For optimization and variational inference there should be just one -sublist. The sublists should have named elements corresponding to the -parameters for which you are specifying initial values. See \strong{Examples}. +\item A list of lists containing initial values for all or some parameters. For +MCMC the list should contain a sublist for each chain. For optimization and +variational inference there should be just one sublist. The sublists should +have named elements corresponding to the parameters for which you are +specifying initial values. See \strong{Examples}. \item A function that returns a single list with names corresponding to the -parameters for which you are specifying initial values. The function -can take no arguments or a single argument \code{chain_id}. For MCMC, if the -function has argument \code{chain_id} it will be supplied with the chain id -(from 1 to number of chains) when called to generate the initial -values. See \strong{Examples}. -} -\item \code{save_latent_dynamics}: (logical) Should auxiliary diagnostic information +parameters for which you are specifying initial values. The function can +take no arguments or a single argument \code{chain_id}. For MCMC, if the function +has argument \code{chain_id} it will be supplied with the chain id (from 1 to +number of chains) when called to generate the initial values. See +\strong{Examples}. +}} + +\item{save_latent_dynamics}{(logical) Should auxiliary diagnostic information about the latent dynamics be written to temporary diagnostic CSV files? This argument replaces CmdStan's \code{diagnostic_file} argument and the content written to CSV is controlled by the user's CmdStan installation and not -CmdStanR (and for some algorithms no content may be written). The default -is \code{save_latent_dynamics=FALSE}, which is appropriate for almost every use case -(all diagnostics recommended for users to check are \emph{always} saved, e.g., -divergences for HMC). To save the temporary files created when -\code{save_latent_dynamics=TRUE} see the -\code{\link[=fit-method-save_latent_dynamics_files]{$save_latent_dynamics_files()}} method. -\item \code{output_dir}: (string) A path to a directory where CmdStan should write +CmdStanR (for some algorithms no content may be written). The default +is \code{FALSE}, which is appropriate for almost every use case. To save the +temporary files created when \code{save_latent_dynamics=TRUE} see the +\code{\link[=fit-method-save_latent_dynamics_files]{$save_latent_dynamics_files()}} +method.} + +\item{output_dir}{(string) A path to a directory where CmdStan should write its output CSV files. For interactive use this can typically be left at -\code{NULL} (temporary directory) since CmdStanR makes the CmdStan output (e.g., -posterior draws and diagnostics) available in \R via methods of the fitted +\code{NULL} (temporary directory) since CmdStanR makes the CmdStan output +(posterior draws and diagnostics) available in \R via methods of the fitted model objects. The behavior of \code{output_dir} is as follows: \itemize{ \item If \code{NULL} (the default), then the CSV files are written to a temporary -directory and only saved permanently if the user calls one of the -\verb{$save_*} methods of the fitted model object (e.g., +directory and only saved permanently if the user calls one of the \verb{$save_*} +methods of the fitted model object (e.g., \code{\link[=fit-method-save_output_files]{$save_output_files()}}). These temporary -files are removed when the fitted model object is garbage collected. +files are removed when the fitted model object is +\link[base:gc]{garbage collected} (manually or automatically). \item If a path, then the files are created in \code{output_dir} with names -corresponding the defaults used by \verb{$save_output_files()} (and similar -methods like \verb{$save_latent_dynamics_files()}). -} -\item \code{sig_figs}: (positive integer) The number of significant figures used -for the output values. By default, CmdStan represent the output values with -6 significant figures. The upper limit for \code{sig_figs} is 18. Increasing -this value can cause an increased usage of disk space due to larger -output CSV files. -} -} +corresponding to the defaults used by \verb{$save_output_files()}. +}} -\section{Arguments unique to the \code{sample} method}{ - In addition to the -arguments above, the \verb{$sample()} method also has its own set of arguments. +\item{sig_figs}{(positive integer) The number of significant figures used +when storing the output values. By default, CmdStan represent the output +values with 6 significant figures. The upper limit for \code{sig_figs} is 18. +Increasing this value will result in larger output CSV files and thus an +increased usage of disk space.} -The following three arguments are offered by CmdStanR but do not correspond -to arguments in CmdStan: -\itemize{ -\item \code{chains}: (positive integer) The number of Markov chains to run. The -default is 4. -\item \code{parallel_chains}: (positive integer) The \emph{maximum} number of MCMC chains +\item{chains}{(positive integer) The number of Markov chains to run. The +default is 4.} + +\item{parallel_chains}{(positive integer) The \emph{maximum} number of MCMC chains to run in parallel. If \code{parallel_chains} is not specified then the default is to look for the option \code{"mc.cores"}, which can be set for an entire \R session by \code{options(mc.cores=value)}. If the \code{"mc.cores"} option has not -been set then the default is \code{1}. -\item \code{chain_ids}: (vector) A vector of chain IDs. Must contain \code{chains} unique +been set then the default is \code{1}.} + +\item{chain_ids}{(vector) A vector of chain IDs. Must contain \code{chains} unique positive integers. If not set, the default chain IDs are used (integers -starting from \code{1}). -\item \code{threads_per_chain}: (positive integer) If the model was +starting from \code{1}).} + +\item{threads_per_chain}{(positive integer) If the model was \link[=model-method-compile]{compiled} with threading support, the number of threads to use in parallelized sections \emph{within} an MCMC chain (e.g., when using the Stan functions \code{reduce_sum()} or \code{map_rect()}). This is in contrast with \code{parallel_chains}, which specifies the number of chains to run in parallel. The actual number of CPU cores used use is \code{parallel_chains*threads_per_chain}. For an example of using threading see -the Stan case study \href{https://mc-stan.org/users/documentation/case-studies/reduce_sum_tutorial.html}{Reduce Sum: A Minimal Example}. -\item \code{show_messages}: (logical) When \code{TRUE} (the default), prints all -informational messages, for example rejection of the current proposal. -Disable if you wish silence these messages, but this is not recommended -unless you are very sure that the model is correct up to numerical error. -If the messages are silenced then the \verb{$output()} method of the resulting -fit object can be used to display all the silenced messages. -\item \code{validate_csv}: (logical) When \code{TRUE} (the default), validate the -sampling results in the csv files. Disable if you wish to manually read in -the sampling results and validate them yourself, for example using -\code{\link[=read_cmdstan_csv]{read_cmdstan_csv()}}. -} +the Stan case study \href{https://mc-stan.org/users/documentation/case-studies/reduce_sum_tutorial.html}{Reduce Sum: A Minimal Example}.} -The rest of the arguments correspond to arguments offered by CmdStan, -although some names are slightly different. They are described briefly here -and in greater detail in the CmdStan manual. Arguments left at \code{NULL} -default to the default used by the installed version of CmdStan. -The latest \href{https://mc-stan.org/docs/cmdstan-guide/}{CmdStan User’s Guide} -will have the default values for the latest version of CmdStan. -\itemize{ -\item \code{iter_sampling}: (positive integer) The number of post-warmup iterations to -run per chain. -\item \code{iter_warmup}: (positive integer) The number of warmup iterations to run -per chain. -\item \code{save_warmup}: (logical) Should warmup iterations be saved? The default +\item{iter_warmup}{(positive integer) The number of warmup iterations to run +per chain. Note: in the CmdStan User's Guide this is referred to as +\code{num_warmup}.} + +\item{iter_sampling}{(positive integer) The number of post-warmup iterations +to run per chain. Note: in the CmdStan User's Guide this is referred to as +\code{num_samples}.} + +\item{save_warmup}{(logical) Should warmup iterations be saved? The default is \code{FALSE}. If \code{save_warmup=TRUE} then you can use \link[=fit-method-draws]{$draws(inc_warmup=TRUE)} to include warmup when -accessing the draws. -\item \code{thin}: (positive integer) The period between saved samples. This should -be left at its default (no thinning) unless memory is a problem. -\item \code{max_treedepth}: (positive integer) The maximum allowed tree depth for the -NUTS engine. See the \emph{Tree Depth} section of the CmdStan manual for more -details. -\item \code{adapt_engaged}: (logical) Do warmup adaptation? The default is \code{TRUE}. +accessing the draws.} + +\item{thin}{(positive integer) The period between saved samples. This should +typically be left at its default (no thinning) unless memory is a problem.} + +\item{max_treedepth}{(positive integer) The maximum allowed tree depth for +the NUTS engine. See the \emph{Tree Depth} section of the CmdStan User's Guide +for more details.} + +\item{adapt_engaged}{(logical) Do warmup adaptation? The default is \code{TRUE}. If a precomputed inverse metric is specified via the \code{inv_metric} argument (or \code{metric_file}) then, if \code{adapt_engaged=TRUE}, Stan will use the provided inverse metric just as an initial guess during adaptation. To turn off adaptation when using a precomputed inverse metric set -\code{adapt_engaged=FALSE}. -\item \code{adapt_delta}: (real in \verb{(0,1)}) The adaptation target acceptance -statistic. -\item \code{step_size}: (positive real) The \emph{initial} step size for the discrete +\code{adapt_engaged=FALSE}.} + +\item{adapt_delta}{(real in \verb{(0,1)}) The adaptation target acceptance +statistic.} + +\item{step_size}{(positive real) The \emph{initial} step size for the discrete approximation to continuous Hamiltonian dynamics. This is further tuned -during warmup. -\item \code{metric}: (character) One of \code{"diag_e"}, \code{"dense_e"}, or \code{"unit_e"}, +during warmup.} + +\item{metric}{(character) One of \code{"diag_e"}, \code{"dense_e"}, or \code{"unit_e"}, specifying the geometry of the base manifold. See the \emph{Euclidean Metric} -section of the CmdStan documentation for more details. To specify a -precomputed (inverse) metric, see the \code{inv_metric} argument below. -\item \code{metric_file}: (character) A character vector containing paths to JSON or +section of the CmdStan User's Guide for more details. To specify a +precomputed (inverse) metric, see the \code{inv_metric} argument below.} + +\item{metric_file}{(character) A character vector containing paths to JSON or Rdump files (one per chain) compatible with CmdStan that contain precomputed inverse metrics. The \code{metric_file} argument is inherited from CmdStan but is confusing in that the entry in JSON or Rdump file(s) must be named \code{inv_metric}, referring to the \emph{inverse} metric. We recommend instead using CmdStanR's \code{inv_metric} argument (see below) to specify an inverse -metric directly using a vector or matrix from your \R session. -\item \code{inv_metric}: (vector, matrix) A vector (if \code{metric='diag_e'}) or a -matrix (if \code{metric='dense_e'}) for initializing the inverse metric, which +metric directly using a vector or matrix from your \R session.} + +\item{inv_metric}{(vector, matrix) A vector (if \code{metric='diag_e'}) or a +matrix (if \code{metric='dense_e'}) for initializing the inverse metric. This can be used as an alternative to the \code{metric_file} argument. A vector is interpreted as a diagonal metric. The inverse metric is usually set to an estimate of the posterior covariance. See the \code{adapt_engaged} argument -above for details on (and control over) how specifying a precomputed -inverse metric interacts with adaptation. -\item \code{init_buffer}: (nonnegative integer) Width of initial fast timestep -adaptation interval during warmup. -\item \code{term_buffer}: (nonnegative integer) Width of final fast timestep -adaptation interval during warmup. -\item \code{window}: (nonnegative integer) Initial width of slow timestep/metric -adaptation interval. -\item \code{fixed_param}: (logical) When \code{TRUE}, call CmdStan with argument +above for details about (and control over) how specifying a precomputed +inverse metric interacts with adaptation.} + +\item{init_buffer}{(nonnegative integer) Width of initial fast timestep +adaptation interval during warmup.} + +\item{term_buffer}{(nonnegative integer) Width of final fast timestep +adaptation interval during warmup.} + +\item{window}{(nonnegative integer) Initial width of slow timestep/metric +adaptation interval.} + +\item{fixed_param}{(logical) When \code{TRUE}, call CmdStan with argument \code{"algorithm=fixed_param"}. The default is \code{FALSE}. The fixed parameter sampler generates a new sample without changing the current state of the Markov chain; only generated quantities may change. This can be useful when, for example, trying to generate pseudo-data using the generated quantities block. If the parameters block is empty then using \code{fixed_param=TRUE} is mandatory. When \code{fixed_param=TRUE} the \code{chains} and -\code{parallel_chains} arguments will be set to \code{1}. -} +\code{parallel_chains} arguments will be set to \code{1}.} + +\item{validate_csv}{(logical) When \code{TRUE} (the default), validate the +sampling results in the csv files. Disable if you wish to manually read in +the sampling results and validate them yourself, for example using +\code{\link[=read_cmdstan_csv]{read_cmdstan_csv()}}.} + +\item{show_messages}{(logical) When \code{TRUE} (the default), prints all +informational messages, for example rejection of the current proposal. +Disable if you wish silence these messages, but this is not recommended +unless you are very sure that the model is correct up to numerical error. +If the messages are silenced then the \verb{$output()} method of the resulting +fit object can be used to display all the silenced messages.} + +\item{cores, num_cores, num_chains, num_warmup, num_samples, save_extra_diagnostics, max_depth, stepsize}{Deprecated and will be removed in a future release.} } +\description{ +The \verb{$sample()} method of a \code{\link{CmdStanModel}} object runs the +default MCMC algorithm in CmdStan (\verb{algorithm=hmc engine=nuts}), to produce +a set of draws from the posterior distribution of a model conditioned on +some data. +Any argument left as \code{NULL} will default to the default value used by the +installed version of CmdStan. See the +\href{https://mc-stan.org/docs/cmdstan-guide/}{CmdStan User’s Guide} +for more details. +} \section{Value}{ - The \verb{$sample()} method returns a \code{\link{CmdStanMCMC}} object. + A \code{\link{CmdStanMCMC}} object. } \examples{ diff --git a/man/model-method-sample_mpi.Rd b/man/model-method-sample_mpi.Rd index 87c26044e..da9e5d7b8 100644 --- a/man/model-method-sample_mpi.Rd +++ b/man/model-method-sample_mpi.Rd @@ -4,40 +4,8 @@ \alias{model-method-sample_mpi} \alias{sample_mpi} \title{Run Stan's MCMC algorithms with MPI} -\description{ -The \verb{$sample_mpi()} method of a \code{\link{CmdStanModel}} object is -identical to the \verb{$sample()} method but with support for -\href{https://mc-stan.org/math/mpi.html}{MPI}. The target audience for MPI are -those with large computer clusters. For other users, the -\code{\link[=model-method-sample]{$sample()}} method provides both parallelization of -chains and threading support for within-chain parallelization. -} -\details{ -In order to use MPI with Stan, an MPI implementation must be -installed. For Unix systems the most commonly used implementations are -MPICH and OpenMPI. The implementations provide an MPI C++ compiler wrapper -(for example mpicxx), which is required to compile the model. - -An example of compiling with MPI:\preformatted{mpi_options <- list(STAN_MPI=TRUE, CXX="mpicxx", TBB_CXX_TYPE="gcc") -mod <- cmdstan_model("model.stan", cpp_options = mpi_options) -} - -The C++ options that must be supplied to the -\link[=model-method-compile]{compile} call are: -\itemize{ -\item \code{STAN_MPI}: Enables the use of MPI with Stan if \code{TRUE}. -\item \code{CXX}: The name of the MPI C++ compiler wrapper. Typically \code{"mpicxx"}. -\item \code{TBB_CXX_TYPE}: The C++ compiler the MPI wrapper wraps. Typically \code{"gcc"} -on Linux and \code{"clang"} on macOS. -} - -In the call to the \verb{$sample_mpi()} method we can also provide the name of -the MPI launcher (\code{mpi_cmd}, defaulting to \code{"mpiexec"}) and any other -MPI launch arguments. In most cases, it is enough to only define the number -of processes with \code{mpi_args = list("n" = 4)}. -} -\section{Usage}{ -\preformatted{$sample_mpi( +\usage{ +sample_mpi( data = NULL, mpi_cmd = "mpiexec", mpi_args = NULL, @@ -46,9 +14,7 @@ of processes with \code{mpi_args = list("n" = 4)}. init = NULL, save_latent_dynamics = FALSE, output_dir = NULL, - sig_figs = NULL, - chains = 4, - parallel_chains = getOption("mc.cores", 1), + chains = 1, chain_ids = seq_len(chains), iter_warmup = NULL, iter_sampling = NULL, @@ -65,30 +31,217 @@ of processes with \code{mpi_args = list("n" = 4)}. term_buffer = NULL, window = NULL, fixed_param = FALSE, + sig_figs = NULL, validate_csv = TRUE, show_messages = TRUE ) } -} +\arguments{ +\item{data}{(multiple options) The data to use for the variables specified in +the \code{data} block of the Stan program. One of the following: +\itemize{ +\item A named list of \R objects (like for RStan). Internally this list is then +written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. +\item A path to a data file compatible with CmdStan (JSON or \R dump). See the +appendices in the CmdStan manual for details on using these formats. +\item \code{NULL} or an empty list if the Stan program has no \code{data} block. +}} + +\item{mpi_cmd}{(character vector) The MPI launcher used for launching MPI +processes. The default launcher is \code{"mpiexec"}.} -\section{Arguments unique to the \code{sample_mpi} method}{ +\item{mpi_args}{(list) A list of arguments to use when launching MPI +processes. For example, \code{mpi_args = list("n" = 4)} launches the executable +as \verb{mpiexec -n 4 model_executable}, followed by CmdStan arguments for the +model executable.} +\item{seed}{(positive integer) A seed for the (P)RNG to pass to CmdStan.} + +\item{refresh}{(non-negative integer) The number of iterations between +printed screen updates. If \code{refresh = 0}, only error messages will be +printed.} + +\item{init}{(multiple options) The initialization method to use for the +variables declared in the \code{parameters} block of the Stan program: +\itemize{ +\item A real number \code{x>0}. This initializes \emph{all} parameters randomly between +\verb{[-x,x]} (on the \emph{unconstrained} parameter space); +\item The number \code{0}. This initializes \emph{all} parameters to \code{0}; +\item A character vector of paths (one per chain) to JSON or Rdump files +containing initial values for all or some parameters. See +\code{\link[=write_stan_json]{write_stan_json()}} to write \R objects to JSON files compatible with +CmdStan. +\item A list of lists containing initial values for all or some parameters. For +MCMC the list should contain a sublist for each chain. For optimization and +variational inference there should be just one sublist. The sublists should +have named elements corresponding to the parameters for which you are +specifying initial values. See \strong{Examples}. +\item A function that returns a single list with names corresponding to the +parameters for which you are specifying initial values. The function can +take no arguments or a single argument \code{chain_id}. For MCMC, if the function +has argument \code{chain_id} it will be supplied with the chain id (from 1 to +number of chains) when called to generate the initial values. See +\strong{Examples}. +}} + +\item{save_latent_dynamics}{(logical) Should auxiliary diagnostic information +about the latent dynamics be written to temporary diagnostic CSV files? +This argument replaces CmdStan's \code{diagnostic_file} argument and the content +written to CSV is controlled by the user's CmdStan installation and not +CmdStanR (for some algorithms no content may be written). The default +is \code{FALSE}, which is appropriate for almost every use case. To save the +temporary files created when \code{save_latent_dynamics=TRUE} see the +\code{\link[=fit-method-save_latent_dynamics_files]{$save_latent_dynamics_files()}} +method.} + +\item{output_dir}{(string) A path to a directory where CmdStan should write +its output CSV files. For interactive use this can typically be left at +\code{NULL} (temporary directory) since CmdStanR makes the CmdStan output +(posterior draws and diagnostics) available in \R via methods of the fitted +model objects. The behavior of \code{output_dir} is as follows: \itemize{ -\item \code{mpi_cmd}: (character vector) The MPI launcher used for launching MPI processes. -The default launcher is \code{"mpiexec"}. -\item \code{mpi_args}: (list) A list of arguments to use when launching MPI processes. -For example, \code{mpi_args = list("n" = 4)} launches the executable as -\verb{mpiexec -n 4 model_executable}, followed by CmdStan arguments -for the model executable. +\item If \code{NULL} (the default), then the CSV files are written to a temporary +directory and only saved permanently if the user calls one of the \verb{$save_*} +methods of the fitted model object (e.g., +\code{\link[=fit-method-save_output_files]{$save_output_files()}}). These temporary +files are removed when the fitted model object is +\link[base:gc]{garbage collected} (manually or automatically). +\item If a path, then the files are created in \code{output_dir} with names +corresponding to the defaults used by \verb{$save_output_files()}. +}} + +\item{chains}{(positive integer) The number of Markov chains to run. The +default is 4.} + +\item{chain_ids}{(vector) A vector of chain IDs. Must contain \code{chains} unique +positive integers. If not set, the default chain IDs are used (integers +starting from \code{1}).} + +\item{iter_warmup}{(positive integer) The number of warmup iterations to run +per chain. Note: in the CmdStan User's Guide this is referred to as +\code{num_warmup}.} + +\item{iter_sampling}{(positive integer) The number of post-warmup iterations +to run per chain. Note: in the CmdStan User's Guide this is referred to as +\code{num_samples}.} + +\item{save_warmup}{(logical) Should warmup iterations be saved? The default +is \code{FALSE}. If \code{save_warmup=TRUE} then you can use +\link[=fit-method-draws]{$draws(inc_warmup=TRUE)} to include warmup when +accessing the draws.} + +\item{thin}{(positive integer) The period between saved samples. This should +typically be left at its default (no thinning) unless memory is a problem.} + +\item{max_treedepth}{(positive integer) The maximum allowed tree depth for +the NUTS engine. See the \emph{Tree Depth} section of the CmdStan User's Guide +for more details.} + +\item{adapt_engaged}{(logical) Do warmup adaptation? The default is \code{TRUE}. +If a precomputed inverse metric is specified via the \code{inv_metric} argument +(or \code{metric_file}) then, if \code{adapt_engaged=TRUE}, Stan will use the +provided inverse metric just as an initial guess during adaptation. To turn +off adaptation when using a precomputed inverse metric set +\code{adapt_engaged=FALSE}.} + +\item{adapt_delta}{(real in \verb{(0,1)}) The adaptation target acceptance +statistic.} + +\item{step_size}{(positive real) The \emph{initial} step size for the discrete +approximation to continuous Hamiltonian dynamics. This is further tuned +during warmup.} + +\item{metric}{(character) One of \code{"diag_e"}, \code{"dense_e"}, or \code{"unit_e"}, +specifying the geometry of the base manifold. See the \emph{Euclidean Metric} +section of the CmdStan User's Guide for more details. To specify a +precomputed (inverse) metric, see the \code{inv_metric} argument below.} + +\item{metric_file}{(character) A character vector containing paths to JSON or +Rdump files (one per chain) compatible with CmdStan that contain +precomputed inverse metrics. The \code{metric_file} argument is inherited from +CmdStan but is confusing in that the entry in JSON or Rdump file(s) must be +named \code{inv_metric}, referring to the \emph{inverse} metric. We recommend instead +using CmdStanR's \code{inv_metric} argument (see below) to specify an inverse +metric directly using a vector or matrix from your \R session.} + +\item{inv_metric}{(vector, matrix) A vector (if \code{metric='diag_e'}) or a +matrix (if \code{metric='dense_e'}) for initializing the inverse metric. This +can be used as an alternative to the \code{metric_file} argument. A vector is +interpreted as a diagonal metric. The inverse metric is usually set to an +estimate of the posterior covariance. See the \code{adapt_engaged} argument +above for details about (and control over) how specifying a precomputed +inverse metric interacts with adaptation.} + +\item{init_buffer}{(nonnegative integer) Width of initial fast timestep +adaptation interval during warmup.} + +\item{term_buffer}{(nonnegative integer) Width of final fast timestep +adaptation interval during warmup.} + +\item{window}{(nonnegative integer) Initial width of slow timestep/metric +adaptation interval.} + +\item{fixed_param}{(logical) When \code{TRUE}, call CmdStan with argument +\code{"algorithm=fixed_param"}. The default is \code{FALSE}. The fixed parameter +sampler generates a new sample without changing the current state of the +Markov chain; only generated quantities may change. This can be useful +when, for example, trying to generate pseudo-data using the generated +quantities block. If the parameters block is empty then using +\code{fixed_param=TRUE} is mandatory. When \code{fixed_param=TRUE} the \code{chains} and +\code{parallel_chains} arguments will be set to \code{1}.} + +\item{sig_figs}{(positive integer) The number of significant figures used +when storing the output values. By default, CmdStan represent the output +values with 6 significant figures. The upper limit for \code{sig_figs} is 18. +Increasing this value will result in larger output CSV files and thus an +increased usage of disk space.} + +\item{validate_csv}{(logical) When \code{TRUE} (the default), validate the +sampling results in the csv files. Disable if you wish to manually read in +the sampling results and validate them yourself, for example using +\code{\link[=read_cmdstan_csv]{read_cmdstan_csv()}}.} + +\item{show_messages}{(logical) When \code{TRUE} (the default), prints all +informational messages, for example rejection of the current proposal. +Disable if you wish silence these messages, but this is not recommended +unless you are very sure that the model is correct up to numerical error. +If the messages are silenced then the \verb{$output()} method of the resulting +fit object can be used to display all the silenced messages.} +} +\description{ +The \verb{$sample_mpi()} method of a \code{\link{CmdStanModel}} object is +identical to the \verb{$sample()} method but with support for +\href{https://mc-stan.org/math/mpi.html}{MPI}. The target audience for MPI are +those with large computer clusters. For other users, the +\code{\link[=model-method-sample]{$sample()}} method provides both parallelization of +chains and threading support for within-chain parallelization. + +In order to use MPI with Stan, an MPI implementation must be +installed. For Unix systems the most commonly used implementations are +MPICH and OpenMPI. The implementations provide an MPI C++ compiler wrapper +(for example mpicxx), which is required to compile the model. + +An example of compiling with MPI:\preformatted{mpi_options = list(STAN_MPI=TRUE, CXX="mpicxx", TBB_CXX_TYPE="gcc") +mod = cmdstan_model("model.stan", cpp_options = mpi_options) } -All other arguments are the same as for \code{\link[=model-method-sample]{$sample()}} -except \verb{$sample_mpi()} does not have arguments \code{threads_per_chain} or -\code{parallel_chains}. +The C++ options that must be supplied to the +\link[=model-method-compile]{compile} call are: +\itemize{ +\item \code{STAN_MPI}: Enables the use of MPI with Stan if \code{TRUE}. +\item \code{CXX}: The name of the MPI C++ compiler wrapper. Typically \code{"mpicxx"}. +\item \code{TBB_CXX_TYPE}: The C++ compiler the MPI wrapper wraps. Typically \code{"gcc"} +on Linux and \code{"clang"} on macOS. } +In the call to the \verb{$sample_mpi()} method it is also possible to provide +the name of the MPI launcher (\code{mpi_cmd}, defaulting to \code{"mpiexec"}) and any +other MPI launch arguments (\code{mpi_args}). In most cases, it is enough to +only define the number of processes. To use \code{n_procs} processes specify +\code{mpi_args = list("n" = n_procs)}. +} \section{Value}{ - The \verb{$sample_mpi()} method returns a \code{\link{CmdStanMCMC}} object. + A \code{\link{CmdStanMCMC}} object. } \examples{ diff --git a/man/model-method-variational.Rd b/man/model-method-variational.Rd index 1a565d78d..a45cbd11e 100644 --- a/man/model-method-variational.Rd +++ b/man/model-method-variational.Rd @@ -4,22 +4,8 @@ \alias{model-method-variational} \alias{variational} \title{Run Stan's variational approximation algorithms} -\description{ -The \verb{$variational()} method of a \code{\link{CmdStanModel}} object runs -Stan's variational Bayes (ADVI) algorithms. -} -\details{ -CmdStan can fit a variational approximation to the posterior. The -approximation is a Gaussian in the unconstrained variable space. Stan -implements two variational algorithms. The \code{algorithm="meanfield"} option -uses a fully factorized Gaussian for the approximation. The -\code{algorithm="fullrank"} option uses a Gaussian with a full-rank covariance -matrix for the approximation. - --- \href{https://github.com/stan-dev/cmdstan/releases/latest}{\emph{CmdStan Interface User's Guide}} -} -\section{Usage}{ -\preformatted{$variational( +\usage{ +variational( data = NULL, seed = NULL, refresh = NULL, @@ -40,108 +26,131 @@ matrix for the approximation. output_samples = NULL ) } -} - -\section{Arguments shared by all fitting methods}{ - The following arguments can -be specified for any of the fitting methods (\code{sample}, \code{optimize}, -\code{variational}). Arguments left at \code{NULL} default to the default used by the -installed version of CmdStan. -\itemize{ -\item \code{data}: (multiple options) The data to use. One of the following: +\arguments{ +\item{data}{(multiple options) The data to use for the variables specified in +the \code{data} block of the Stan program. One of the following: \itemize{ -\item A named list of \R objects (like for RStan). Internally this list is -then written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. -\item A path to a data file compatible with CmdStan (JSON or \R dump). See -the appendices in the CmdStan manual for details on using these formats. -} -\item \code{seed}: (positive integer) A seed for the (P)RNG to pass to CmdStan. -\item \code{refresh}: (non-negative integer) The number of iterations between -printed screen updates. If \code{refresh = 0}, only error messages will be printed. -\item \code{init}: (multiple options) The initialization method for the parameters block: +\item A named list of \R objects (like for RStan). Internally this list is then +written to JSON for CmdStan using \code{\link[=write_stan_json]{write_stan_json()}}. +\item A path to a data file compatible with CmdStan (JSON or \R dump). See the +appendices in the CmdStan manual for details on using these formats. +\item \code{NULL} or an empty list if the Stan program has no \code{data} block. +}} + +\item{seed}{(positive integer) A seed for the (P)RNG to pass to CmdStan.} + +\item{refresh}{(non-negative integer) The number of iterations between +printed screen updates. If \code{refresh = 0}, only error messages will be +printed.} + +\item{init}{(multiple options) The initialization method to use for the +variables declared in the \code{parameters} block of the Stan program: \itemize{ -\item A real number \code{x>0} initializes randomly between \verb{[-x,x]} (on the -\emph{unconstrained} parameter space); -\item \code{0} initializes to \code{0}; -\item A character vector of paths (one per chain) to JSON or Rdump files. See +\item A real number \code{x>0}. This initializes \emph{all} parameters randomly between +\verb{[-x,x]} (on the \emph{unconstrained} parameter space); +\item The number \code{0}. This initializes \emph{all} parameters to \code{0}; +\item A character vector of paths (one per chain) to JSON or Rdump files +containing initial values for all or some parameters. See \code{\link[=write_stan_json]{write_stan_json()}} to write \R objects to JSON files compatible with CmdStan. -\item A list of lists. For MCMC the list should contain a sublist for each -chain. For optimization and variational inference there should be just one -sublist. The sublists should have named elements corresponding to the -parameters for which you are specifying initial values. See \strong{Examples}. +\item A list of lists containing initial values for all or some parameters. For +MCMC the list should contain a sublist for each chain. For optimization and +variational inference there should be just one sublist. The sublists should +have named elements corresponding to the parameters for which you are +specifying initial values. See \strong{Examples}. \item A function that returns a single list with names corresponding to the -parameters for which you are specifying initial values. The function -can take no arguments or a single argument \code{chain_id}. For MCMC, if the -function has argument \code{chain_id} it will be supplied with the chain id -(from 1 to number of chains) when called to generate the initial -values. See \strong{Examples}. -} -\item \code{save_latent_dynamics}: (logical) Should auxiliary diagnostic information +parameters for which you are specifying initial values. The function can +take no arguments or a single argument \code{chain_id}. For MCMC, if the function +has argument \code{chain_id} it will be supplied with the chain id (from 1 to +number of chains) when called to generate the initial values. See +\strong{Examples}. +}} + +\item{save_latent_dynamics}{(logical) Should auxiliary diagnostic information about the latent dynamics be written to temporary diagnostic CSV files? This argument replaces CmdStan's \code{diagnostic_file} argument and the content written to CSV is controlled by the user's CmdStan installation and not -CmdStanR (and for some algorithms no content may be written). The default -is \code{save_latent_dynamics=FALSE}, which is appropriate for almost every use case -(all diagnostics recommended for users to check are \emph{always} saved, e.g., -divergences for HMC). To save the temporary files created when -\code{save_latent_dynamics=TRUE} see the -\code{\link[=fit-method-save_latent_dynamics_files]{$save_latent_dynamics_files()}} method. -\item \code{output_dir}: (string) A path to a directory where CmdStan should write +CmdStanR (for some algorithms no content may be written). The default +is \code{FALSE}, which is appropriate for almost every use case. To save the +temporary files created when \code{save_latent_dynamics=TRUE} see the +\code{\link[=fit-method-save_latent_dynamics_files]{$save_latent_dynamics_files()}} +method.} + +\item{output_dir}{(string) A path to a directory where CmdStan should write its output CSV files. For interactive use this can typically be left at -\code{NULL} (temporary directory) since CmdStanR makes the CmdStan output (e.g., -posterior draws and diagnostics) available in \R via methods of the fitted +\code{NULL} (temporary directory) since CmdStanR makes the CmdStan output +(posterior draws and diagnostics) available in \R via methods of the fitted model objects. The behavior of \code{output_dir} is as follows: \itemize{ \item If \code{NULL} (the default), then the CSV files are written to a temporary -directory and only saved permanently if the user calls one of the -\verb{$save_*} methods of the fitted model object (e.g., +directory and only saved permanently if the user calls one of the \verb{$save_*} +methods of the fitted model object (e.g., \code{\link[=fit-method-save_output_files]{$save_output_files()}}). These temporary -files are removed when the fitted model object is garbage collected. +files are removed when the fitted model object is +\link[base:gc]{garbage collected} (manually or automatically). \item If a path, then the files are created in \code{output_dir} with names -corresponding the defaults used by \verb{$save_output_files()} (and similar -methods like \verb{$save_latent_dynamics_files()}). -} -\item \code{sig_figs}: (positive integer) The number of significant figures used -for the output values. By default, CmdStan represent the output values with -6 significant figures. The upper limit for \code{sig_figs} is 18. Increasing -this value can cause an increased usage of disk space due to larger -output CSV files. -} -} +corresponding to the defaults used by \verb{$save_output_files()}. +}} -\section{Arguments unique to the \code{variational} method}{ - In addition to the -arguments above, the \verb{$variational()} method also has its own set of -arguments. These arguments are described briefly here and in greater detail -in the CmdStan manual. Arguments left at \code{NULL} default to the default used -by the installed version of CmdStan. -\itemize{ -\item \code{threads}: (positive integer) If the model was +\item{sig_figs}{(positive integer) The number of significant figures used +when storing the output values. By default, CmdStan represent the output +values with 6 significant figures. The upper limit for \code{sig_figs} is 18. +Increasing this value will result in larger output CSV files and thus an +increased usage of disk space.} + +\item{threads}{(positive integer) If the model was \link[=model-method-compile]{compiled} with threading support, the number of -threads to use in parallelized sections (e.g., when -using the Stan functions \code{reduce_sum()} or \code{map_rect()}). -\item \code{algorithm}: (string) The algorithm. Either \code{"meanfield"} or \code{"fullrank"}. -\item \code{iter}: (positive integer) The \emph{maximum} number of iterations. -\item \code{grad_samples}: (positive integer) The number of samples for Monte Carlo -estimate of gradients. -\item \code{elbo_samples}: (positive integer) The number of samples for Monte Carlo -estimate of ELBO (objective function). -\item \code{eta}: (positive real) The step size weighting parameter for adaptive -step size sequence. -\item \code{adapt_engaged}: (logical) Do warmup adaptation? -\item \code{adapt_iter}: (positive integer) The \emph{maximum} number of adaptation -iterations. -\item \code{tol_rel_obj}: (positive real) Convergence tolerance on the relative norm -of the objective. -\item \code{eval_elbo}: (positive integer) Evaluate ELBO every Nth iteration. -\item \verb{output_samples:} (positive integer) Number of posterior samples to draw -and save. +threads to use in parallelized sections (e.g., when using the Stan +functions \code{reduce_sum()} or \code{map_rect()}).} + +\item{algorithm}{(string) The algorithm. Either \code{"meanfield"} or +\code{"fullrank"}.} + +\item{iter}{(positive integer) The \emph{maximum} number of iterations.} + +\item{grad_samples}{(positive integer) The number of samples for Monte Carlo +estimate of gradients.} + +\item{elbo_samples}{(positive integer) The number of samples for Monte Carlo +estimate of ELBO (objective function).} + +\item{eta}{(positive real) The step size weighting parameter for adaptive +step size sequence.} + +\item{adapt_engaged}{(logical) Do warmup adaptation?} + +\item{adapt_iter}{(positive integer) The \emph{maximum} number of adaptation +iterations.} + +\item{tol_rel_obj}{(positive real) Convergence tolerance on the relative norm +of the objective.} + +\item{eval_elbo}{(positive integer) Evaluate ELBO every Nth iteration.} + +\item{output_samples}{(positive integer) Number of approximate posterior +samples to draw and save.} } +\description{ +The \verb{$variational()} method of a \code{\link{CmdStanModel}} object runs +Stan's variational Bayes (ADVI) algorithms. + +Any argument left as \code{NULL} will default to the default value used by the +installed version of CmdStan. See the +\href{https://mc-stan.org/docs/cmdstan-guide/}{CmdStan User’s Guide} +for more details. } +\details{ +CmdStan can fit a variational approximation to the posterior. The +approximation is a Gaussian in the unconstrained variable space. Stan +implements two variational algorithms. The \code{algorithm="meanfield"} option +uses a fully factorized Gaussian for the approximation. The +\code{algorithm="fullrank"} option uses a Gaussian with a full-rank covariance +matrix for the approximation. +-- \href{https://github.com/stan-dev/cmdstan/releases/latest}{\emph{CmdStan Interface User's Guide}} +} \section{Value}{ - The \verb{$variational()} method returns a \code{\link{CmdStanVB}} object. + A \code{\link{CmdStanVB}} object. } \examples{ diff --git a/man/register_knitr_engine.Rd b/man/register_knitr_engine.Rd index 8783b6042..bec937cc9 100644 --- a/man/register_knitr_engine.Rd +++ b/man/register_knitr_engine.Rd @@ -7,8 +7,8 @@ register_knitr_engine(override = TRUE) } \arguments{ -\item{override}{Override knitr's built-in, RStan-based engine for \code{stan}. -See below for details.} +\item{override}{Override knitr's built-in, RStan-based engine for Stan? The +default is \code{TRUE}. See \strong{Details}.} } \description{ Registers CmdStanR's knitr engine \code{\link[=eng_cmdstan]{eng_cmdstan()}} for processing Stan chunks. @@ -42,7 +42,7 @@ specify \code{engine = "cmdstan"} in the chunk options after registering the } \references{ \itemize{ -\item \href{https://bookdown.org/yihui/rmarkdown-cookbook/custom-engine.html}{Register a custom language engine} -\item \href{https://bookdown.org/yihui/rmarkdown/language-engines.html#stan}{Stan language engine} +\item \href{https://bookdown.org/yihui/rmarkdown-cookbook/custom-engine.html}{Register a custom language engine for knitr} +\item \href{https://bookdown.org/yihui/rmarkdown/language-engines.html#stan}{knitr's built-in Stan language engine} } }