diff --git a/DESCRIPTION b/DESCRIPTION index d5ed0fa50..e0dfc741b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: cmdstanr Title: R Interface to 'CmdStan' -Version: 0.2.2 -Date: 2020-12-03 +Version: 0.3.0 +Date: 2020-12-17 Authors@R: c(person(given = "Jonah", family = "Gabry", role = c("aut", "cre"), email = "jsg2201@columbia.edu"), @@ -26,11 +26,11 @@ Roxygen: list(markdown = TRUE, r6 = FALSE) SystemRequirements: CmdStan (https://mc-stan.org/users/interfaces/cmdstan) Imports: checkmate, + data.table, jsonlite (>= 1.2.0), posterior (>= 0.1.3), processx (>= 3.4.5), - R6 (>= 2.4.0), - data.table + R6 (>= 2.4.0) Suggests: bayesplot, knitr, diff --git a/NEWS.md b/NEWS.md index 74d1142a0..8d7cc822c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# Items for next tagged release +# cmdstanr 0.3.0 ### Bug fixes @@ -29,6 +29,10 @@ objects. (#390) * The `$optimize()` method now supports all of CmdStan's tolerance-related arguments for (L)BFGS. (#398) +* The documentation for the R6 methods now uses `@param`, which allows package +developers to import the CmdStanR documentation using roxygen2's +`@inheritParams`. (#408) + # cmdstanr 0.2.2 ### Bug fixes diff --git a/R/args.R b/R/args.R index aa03b1068..9f39bebe8 100644 --- a/R/args.R +++ b/R/args.R @@ -829,7 +829,7 @@ validate_seed <- function(seed, num_procs) { #' @return An integer vector of length `num_procs`. maybe_generate_seed <- function(seed, num_procs) { if (is.null(seed)) { - seed <- sample(.Machine$integer.max, num_procs) + seed <- base::sample(.Machine$integer.max, num_procs) } else if (length(seed) == 1 && num_procs > 1) { seed <- as.integer(seed) seed <- c(seed, seed + 1:(num_procs -1)) diff --git a/R/cmdstanr-package.R b/R/cmdstanr-package.R index a65a42c49..37822b29c 100644 --- a/R/cmdstanr-package.R +++ b/R/cmdstanr-package.R @@ -52,4 +52,4 @@ #' NULL -if (getRversion() >= "2.15.1") utils::globalVariables(c("self", "private")) +if (getRversion() >= "2.15.1") utils::globalVariables(c("self", "private", "super")) diff --git a/R/fit.R b/R/fit.R index 815e21e88..5720de1d9 100644 --- a/R/fit.R +++ b/R/fit.R @@ -11,86 +11,9 @@ CmdStanFit <- R6::R6Class( self$runset <- runset invisible(self) }, - - save_object = function(file, ...) { - self$draws() - try(self$sampler_diagnostics(), silent = TRUE) - try(self$init(), silent = TRUE) - saveRDS(self, file = file, ...) - invisible(self) - }, - num_procs = function() { self$runset$num_procs() }, - - time = function() { - self$runset$time() - }, - - draws = function(variables = NULL) { - if (!length(self$output_files(include_failed = FALSE))) { - stop("Fitting failed. Unable to retrieve the draws.", call. = FALSE) - } - # CmdStanMCMC has its own implementation, this is used for VB and MLE - if (is.null(private$draws_)) { - private$read_csv_() - } - posterior::subset_draws(private$draws_, variable = variables) - }, - - lp = function() { - lp__ <- self$draws(variables = "lp__") - lp__ <- posterior::as_draws_matrix(lp__) # if mcmc this combines all chains, otherwise does nothing - as.numeric(lp__) - }, - - metadata = function() { - if (!length(self$output_files(include_failed = FALSE))) { - stop("Fitting failed. Unable to retrieve the metadata.", call. = FALSE) - } - if (is.null(private$metadata_)) { - private$read_csv_() - } - private$metadata_ - }, - - init = function() { - if (is.null(private$init_)) { - init_paths <- self$metadata()$init - if (!is.character(init_paths) || any(!file.exists(init_paths))) { - stop("Can't find initial values files.", call. = FALSE) - } - private$init_ <- lapply(init_paths, jsonlite::read_json, simplifyVector = TRUE) - } - private$init_ - }, - - summary = function(variables = NULL, ...) { - draws <- self$draws(variables) - if (self$runset$method() == "sample") { - summary <- posterior::summarise_draws(draws, ...) - } else { - if (!length(list(...))) { - # if user didn't supply any args use default summary measures, - # which don't include MCMC-specific things - summary <- posterior::summarise_draws( - draws, - posterior::default_summary_measures() - ) - } else { - # otherwise use whatever the user specified via ... - summary <- posterior::summarise_draws(draws, ...) - } - } - if (self$runset$method() == "optimize") { - summary <- summary[, c("variable", "mean")] - colnames(summary) <- c("variable", "estimate") - } - summary - }, - - # print summary table without using tibbles print = function(variables = NULL, ..., digits = 2, max_rows = 10) { if (!length(self$output_files(include_failed = FALSE))) { stop("Fitting failed. Unable to print.", call. = FALSE) @@ -124,54 +47,12 @@ CmdStanFit <- R6::R6Class( opts <- options(max.print = prod(dim(out))) on.exit(options(max.print = opts$max.print), add = TRUE) - print(out, row.names=FALSE) + base::print(out, row.names=FALSE) if (max_rows < total_rows) { cat("\n # showing", max_rows, "of", total_rows, "rows (change via 'max_rows' argument)\n") } invisible(self) - }, - - cmdstan_summary = function(...) { - self$runset$run_cmdstan_tool("stansummary", ...) - }, - cmdstan_diagnose = function(...) { - self$runset$run_cmdstan_tool("diagnose", ...) - }, - output = function(id = NULL) { - # non-MCMC fit is obtained with one process only - # so fit$output() prints the output of that process - cat(paste(self$runset$procs$proc_output(1), collapse="\n")) - }, - output_files = function(include_failed = FALSE) { - self$runset$output_files(include_failed) - }, - latent_dynamics_files = function(include_failed = FALSE) { - self$runset$latent_dynamics_files(include_failed) - }, - data_file = function() { - self$runset$data_file() - }, - save_output_files = function(dir = ".", - basename = NULL, - timestamp = TRUE, - random = TRUE) { - self$runset$save_output_files(dir, basename, timestamp, random) - }, - save_latent_dynamics_files = function(dir = ".", - basename = NULL, - timestamp = TRUE, - random = TRUE) { - self$runset$save_latent_dynamics_files(dir, basename, timestamp, random) - }, - save_data_file = function(dir = ".", - basename = NULL, - timestamp = TRUE, - random = TRUE) { - self$runset$save_data_file(dir, basename, timestamp, random) - }, - return_codes = function() { - self$runset$procs$return_codes() } ), private = list( @@ -181,9 +62,6 @@ CmdStanFit <- R6::R6Class( ) ) - -# Document methods ---------------------------------------------------------- - #' Save fitted model object to a file #' #' @name fit-method-save_object @@ -194,14 +72,8 @@ CmdStanFit <- R6::R6Class( #' read into R lazily (i.e., as needed), the `$save_object()` method is the #' safest way to guarantee that everything has been read in before saving. #' -#' @section Usage: -#' ``` -#' $save_object(file, ...) -#' ``` -#' -#' @section Arguments: -#' * `file`: (string) Path where the file should be saved. -#' * `...`: Other arguments to pass to [base::saveRDS()] besides `object` and `file`. +#' @param file (string) Path where the file should be saved. +#' @param ... Other arguments to pass to [base::saveRDS()] besides `object` and `file`. #' #' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanGQ`] #' @@ -217,7 +89,14 @@ CmdStanFit <- R6::R6Class( #' fit$summary() #' } #' -NULL +save_object <- function(file, ...) { + self$draws() + try(self$sampler_diagnostics(), silent = TRUE) + try(self$init(), silent = TRUE) + saveRDS(self, file = file, ...) + invisible(self) +} +CmdStanFit$set("public", name = "save_object", value = save_object) #' Extract posterior draws #' @@ -231,20 +110,12 @@ NULL #' `generated quantities` from the Stan program as well as `lp__`, the total #' log probability (`target`) accumulated in the `model` block. #' -#' @section Usage: -#' ``` -#' $draws(variables = NULL, inc_warmup = FALSE, ...) -#' ``` -#' @section Arguments: -#' * `variables`: (character vector) The variables (parameters and generated -#' quantities) to read in. If `NULL` (the default) then the draws of all -#' variables are included. -#' * `inc_warmup`: (logical) For MCMC only, should warmup draws be included? -#' Defaults to `FALSE`. -#' * `...`: Arguments passed on to -#' [`posterior::as_draws_array()`][posterior::draws_array]. -#' -#' @section Value: +#' @param variables (character vector) The variables to read in. If `NULL` (the +#' default) then all variables are included. +#' @param inc_warmup (logical) Should warmup draws be included? Defaults to +#' `FALSE`. Ignored except when used with [CmdStanMCMC] objects. +#' +#' @return #' * For [MCMC][model-method-sample], a 3-D #' [`draws_array`][posterior::draws_array] object (iteration x chain x #' variable). @@ -291,95 +162,35 @@ NULL #' mcmc_scatter(fit$draws(c("beta[1]", "beta[2]")), alpha = 0.3) #' } #' -NULL - -#' Extract sampler diagnostics -#' -#' @name fit-method-sampler_diagnostics -#' @aliases sampler_diagnostics -#' @description Extract the values of sampler diagnostics for each iteration and -#' chain of MCMC. -#' -#' @section Usage: -#' ``` -#' $sampler_diagnostics(inc_warmup = FALSE, ...) -#' ``` -#' @section Arguments: -#' * `inc_warmup`: (logical) Should warmup draws be included? Defaults to `FALSE`. -#' * `...`: Arguments passed on to -#' [`posterior::as_draws_array()`][posterior::draws_array]. -#' -#' @section Value: -#' A 3-D [`draws_array`][posterior::draws_array] object (iteration x chain x -#' variable). The variables for Stan's default MCMC algorithm are -#' `"accept_stat__"`, `"stepsize__"`, `"treedepth__"`, `"n_leapfrog__"`, -#' `"divergent__"`, `"energy__"`. -#' -#' @seealso [`CmdStanMCMC`] -#' -#' @examples -#' \dontrun{ -#' fit <- cmdstanr_example("logistic") -#' sampler_diagnostics <- fit$sampler_diagnostics() -#' str(sampler_diagnostics) -#' -#' library(posterior) -#' as_draws_df(sampler_diagnostics) -#' } -#' -NULL - -#' Extract inverse metric (mass matrix) -#' -#' @name fit-method-inv_metric -#' @aliases inv_metric -#' @description Return a list containing the inverse metric (mass matrix) for -#' each chain. -#' -#' @section Usage: -#' ``` -#' $inv_metric(matrix = TRUE) -#' ``` -#' @section Arguments: -#' * `matrix`: (logical) If a diagonal metric was used, setting `matrix = FALSE` -#' returns a list containing just the diagonals of the matrices instead of the -#' full matrices. Setting `matrix = FALSE` has no effect for dense metrics. -#' -#' @section Value: -#' A list of length equal to the number of MCMC chains. See the `matrix` -#' argument for details. -#' -#' @seealso [`CmdStanMCMC`] -#' -#' @examples -#' \dontrun{ -#' fit <- cmdstanr_example("logistic") -#' fit$inv_metric() -#' fit$inv_metric(matrix=FALSE) -#' -#' fit <- cmdstanr_example("logistic", metric = "dense_e") -#' fit$inv_metric() -#' } -#' -NULL +draws <- function(variables = NULL, inc_warmup = FALSE) { + # CmdStanMCMC and CmdStanGQ have separate implementations, + # this is used for CmdStanVB and CmdStanMLE + if (!length(self$output_files(include_failed = FALSE))) { + stop("Fitting failed. Unable to retrieve the draws.", call. = FALSE) + } + if (inc_warmup) { + warning("'inc_warmup' is ignored except when used with CmdStanMCMC objects.", + call. = FALSE) + } + if (is.null(private$draws_)) { + private$read_csv_() + } + posterior::subset_draws(private$draws_, variable = variables) +} +CmdStanFit$set("public", name = "draws", value = draws) -#' Extract initial values +#' Extract user-specified initial values #' #' @name fit-method-init #' @aliases init #' @description Return user-specified initial values. If the user provided #' initial values files or \R objects (list of lists or function) via the #' `init` argument when fitting the model then these are returned (always in -#' the list of lists format). Initial values generated by CmdStan are not -#' returned. -#' -#' @section Usage: -#' ``` -#' $init() -#' ``` +#' the list of lists format). Currently it is not possible to extract initial +#' values generated automatically by CmdStan, although CmdStan may support +#' this in the future. #' -#' @section Value: -#' A list of lists. See **Examples**. +#' @return A list of lists. See **Examples**. #' #' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`] #' @@ -400,7 +211,17 @@ NULL #' str(fit$init()) #' } #' -NULL +init <- function() { + if (is.null(private$init_)) { + init_paths <- self$metadata()$init + if (!is.character(init_paths) || any(!file.exists(init_paths))) { + stop("Can't find initial values files.", call. = FALSE) + } + private$init_ <- lapply(init_paths, jsonlite::read_json, simplifyVector = TRUE) + } + private$init_ +} +CmdStanFit$set("public", name = "init", value = init) #' Extract log probability (target) #' @@ -416,12 +237,6 @@ NULL #' section of the Stan Reference Manual for details on when normalizing #' constants are dropped from log probability calculations. #' -#' @section Usage: -#' ``` -#' $lp() -#' $lp_approx() -#' ``` -#' #' @section Details: #' `lp__` is the unnormalized log density on Stan's [unconstrained #' space](https://mc-stan.org/docs/2_23/reference-manual/variable-transforms-chapter.html). @@ -434,9 +249,9 @@ NULL #' for performing the checks described in Yao et al. (2018) and implemented in #' the \pkg{loo} package. #' -#' @section Value: -#' A numeric vector with length equal to the number of (post-warmup) draws for -#' MCMC and variational inference, and length equal to `1` for optimization. +#' @return A numeric vector with length equal to the number of (post-warmup) +#' draws for MCMC and variational inference, and length equal to `1` for +#' optimization. #' #' @references #' Yao, Y., Vehtari, A., Simpson, D., and Gelman, A. (2018). Yes, but did it @@ -457,77 +272,36 @@ NULL #' plot(fit_vb$lp(), fit_vb$lp_approx()) #' } #' -NULL +lp <- function() { + lp__ <- self$draws(variables = "lp__") + lp__ <- posterior::as_draws_matrix(lp__) # if mcmc this combines all chains, otherwise does nothing + as.numeric(lp__) +} +CmdStanFit$set("public", name = "lp", value = lp) -#' Extract (penalized) maximum likelihood estimate after optimization -#' -#' @name fit-method-mle -#' @aliases mle -#' @description The `$mle()` method is only available for [`CmdStanMLE`] objects. -#' It returns the penalized maximum likelihood estimate (posterior mode) as a -#' numeric vector with one element per variable. The returned vector does not -#' include `lp__`, the total log probability (`target`) accumulated in the -#' `model` block of the Stan program, which is available via the -#' [`$lp()`][fit-method-lp] method and also included in the -#' [`$draws()`][fit-method-draws] method. -#' -#' @section Usage: -#' ``` -#' $mle(variables = NULL) -#' ``` -#' @section Arguments: -#' * `variables`: (character vector) The variables (parameters and generated -#' quantities) to include. If NULL (the default) then all variables are -#' included. -#' -#' @section Value: -#' A numeric vector. See **Examples**. -#' -#' @seealso [`CmdStanMLE`] -#' -#' @examples -#' \dontrun{ -#' fit <- cmdstanr_example("logistic", method = "optimize") -#' fit$mle() -#' fit$mle("alpha") -#' fit$mle("beta") -#' fit$mle("beta[2]") -#' } -#' -NULL -#' Compute a summary table of MCMC estimates and diagnostics +#' Compute a summary table of estimates and diagnostics #' #' @name fit-method-summary -#' @aliases summary print.CmdStanMCMC print.CmdStanMLE print.CmdStanVB +#' @aliases summary fit-method-print print.CmdStanMCMC print.CmdStanMLE print.CmdStanVB #' @description The `$summary()` method runs #' [`summarise_draws()`][posterior::draws_summary] from the \pkg{posterior} -#' package and returns the output. For MCMC only post-warmup draws are included -#' in the summary. -#' -#' The `$print()` method prints the same summary stats but removes the extra -#' formatting used for printing tibbles and returns the fitted model object -#' itself. The `$print()` method may also be faster than `$summary()` because -#' it is designed to only compute the summary statistics for the variables -#' that will actually fit in the printed output (see argument `max_rows`) -#' whereas `$summary()` will compute them for all of the specified variables -#' in order to be able to return them to the user. -#' -#' @section Usage: -#' ``` -#' $summary(variables = NULL, ...) -#' $print(variables = NULL, ..., digits = 2, max_rows = 10) -#' ``` -#' @section Arguments: -#' * `variables`: (character vector) The variables to include. -#' * `...`: Optional arguments to pass to -#' [`posterior::summarise_draws()`][posterior::draws_summary]. -#' * `digits`: (integer) For `print` only, the number of digits to use for -#' rounding. -#' * `max_rows`: (integer) For `print` only, the maximum number of rows to print. -#' -#' @section Value: -#' The `$summary()` method returns the tibble created by +#' package and returns the output. For MCMC, only post-warmup draws are +#' included in the summary. +#' +#' There is also a `$print()` method that prints the same summary stats but +#' removes the extra formatting used for printing tibbles and returns the +#' fitted model object itself. The `$print()` method may also be faster than +#' `$summary()` because it is designed to only compute the summary statistics +#' for the variables that will actually fit in the printed output whereas +#' `$summary()` will compute them for all of the specified variables in order +#' to be able to return them to the user. See **Examples**. +#' +#' @param variables (character vector) The variables to include. +#' @param ... Optional arguments to pass to [`posterior::summarise_draws()`][posterior::draws_summary]. +#' +#' @return +#' The `$summary()` method returns the tibble data frame created by #' [`posterior::summarise_draws()`][posterior::draws_summary]. #' #' The `$print()` method returns the fitted model object itself (invisibly), @@ -554,31 +328,52 @@ NULL #' fit$summary("beta", prob_gt_0 = ~ mean(. > 0)) #' } #' -NULL - +summary <- function(variables = NULL, ...) { + draws <- self$draws(variables) + if (self$runset$method() == "sample") { + summary <- posterior::summarise_draws(draws, ...) + } else { + if (!length(list(...))) { + # if user didn't supply any args use default summary measures, + # which don't include MCMC-specific things + summary <- posterior::summarise_draws( + draws, + posterior::default_summary_measures() + ) + } else { + # otherwise use whatever the user specified via ... + summary <- posterior::summarise_draws(draws, ...) + } + } + if (self$runset$method() == "optimize") { + summary <- summary[, c("variable", "mean")] + colnames(summary) <- c("variable", "estimate") + } + summary +} +CmdStanFit$set("public", name = "summary", value = summary) -#' Run CmdStan's `stansummary` and `diagnose` -#' -#' @description -#' Run CmdStan's `stansummary` and `diagnose` utilities. These are -#' documented in the CmdStan Guide: -#' * https://mc-stan.org/docs/cmdstan-guide/stansummary.html -#' * https://mc-stan.org/docs/cmdstan-guide/diagnose.html +#' Run CmdStan's `stansummary` and `diagnose` utilities #' #' @name fit-method-cmdstan_summary #' @aliases fit-method-cmdstan_diagnose cmdstan_summary cmdstan_diagnose -#' @note Although these methods also work for models fit using the +#' @description Run CmdStan's `stansummary` and `diagnose` utilities. These are +#' documented in the CmdStan Guide: +#' * https://mc-stan.org/docs/cmdstan-guide/stansummary.html +#' * https://mc-stan.org/docs/cmdstan-guide/diagnose.html +#' +#' Although these methods can be used for models fit using the #' [`$variational()`][model-method-variational] method, much of the output is -#' only relevant for models fit using the [`$sample()`][model-method-sample] -#' method. +#' currently only relevant for models fit using the +#' [`$sample()`][model-method-sample] method. #' -#' @section Usage: -#' ``` -#' $cmdstan_summary() -#' $cmdstan_diagnose() -#' ``` +#' See the [$summary()][fit-method-summary] for computing similar summaries in +#' R rather than calling CmdStan's utilites. #' -#' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`] +#' @param flags An optional character vector of flags (e.g. +#' `flags = c("--sig_figs=1")`). +#' +#' @seealso [`CmdStanMCMC`], [fit-method-summary] #' #' @examples #' \dontrun{ @@ -587,7 +382,16 @@ NULL #' fit$cmdstan_summary() #' } #' -NULL +cmdstan_summary <- function(flags = NULL) { + self$runset$run_cmdstan_tool("stansummary", flags = flags) +} +CmdStanFit$set("public", name = "cmdstan_summary", value = cmdstan_summary) + +#' @rdname fit-method-cmdstan_summary +cmdstan_diagnose <- function() { + self$runset$run_cmdstan_tool("diagnose") +} +CmdStanFit$set("public", name = "cmdstan_diagnose", value = cmdstan_diagnose) #' Save output and data files #' @@ -607,23 +411,11 @@ NULL #' The versions without the `save_` prefix (e.g., `$output_files()`) return #' the current file paths without moving any files. #' -#' @section Usage: -#' ``` -#' $save_output_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) -#' $save_latent_dynamics_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) -#' $save_data_file(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) -#' -#' $output_files() -#' $latent_dynamics_files() -#' $data_file() -#' ``` -#' -#' @section Arguments: -#' * `dir`: (string) Path to directory where the files should be saved. -#' * `basename`: (string) Base filename to use. See __Details__. -#' * `timestamp`: (logical) Should a timestamp be added to the file name(s)? +#' @param dir (string) Path to directory where the files should be saved. +#' @param basename (string) Base filename to use. See __Details__. +#' @param timestamp (logical) Should a timestamp be added to the file name(s)? #' Defaults to `TRUE`. See __Details__. -#' * `random`: (logical) Should random alphanumeric characters be added to the +#' @param random (logical) Should random alphanumeric characters be added to the #' end of the file name(s)? Defaults to `TRUE`. See __Details__. #' #' @section Details: @@ -641,7 +433,7 @@ NULL #' For `$save_data_file()` no `id` is included in the file name because even #' with multiple MCMC chains the data file is the same. #' -#' @section Value: +#' @return #' The `$save_*` methods print a message with the new file paths and (invisibly) #' return a character vector of the new paths (or `NA` for any that couldn't be #' copied). They also have the side effect of setting the internal paths in the @@ -652,7 +444,65 @@ NULL #' #' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanGQ`] #' -NULL +#' @examples +#' \dontrun{ +#' fit <- cmdstanr_example() +#' fit$output_files() +#' fit$data_file() +#' +#' # just using tempdir for the example +#' my_dir <- tempdir() +#' fit$save_output_files(dir = my_dir, basename = "banana") +#' fit$save_output_files(dir = my_dir, basename = "tomato", timestamp = FALSE) +#' fit$save_output_files(dir = my_dir, basename = "lettuce", timestamp = FALSE, random = FALSE) +#' } +#' +save_output_files <- function(dir = ".", + basename = NULL, + timestamp = TRUE, + random = TRUE) { + self$runset$save_output_files(dir, basename, timestamp, random) +} +CmdStanFit$set("public", name = "save_output_files", value = save_output_files) + +#' @rdname fit-method-save_output_files +save_latent_dynamics_files <- function(dir = ".", + basename = NULL, + timestamp = TRUE, + random = TRUE) { + self$runset$save_latent_dynamics_files(dir, basename, timestamp, random) +} +CmdStanFit$set("public", name = "save_latent_dynamics_files", value = save_latent_dynamics_files) + +#' @rdname fit-method-save_output_files +save_data_file <- function(dir = ".", + basename = NULL, + timestamp = TRUE, + random = TRUE) { + self$runset$save_data_file(dir, basename, timestamp, random) +} +CmdStanFit$set("public", name = "save_data_file", value = save_data_file) + + +#' @rdname fit-method-save_output_files +#' @param include_failed Should CmdStan runs that failed also be included? The +#' default is `FALSE.` +output_files <- function(include_failed = FALSE) { + self$runset$output_files(include_failed) +} +CmdStanFit$set("public", name = "output_files", value = output_files) + +#' @rdname fit-method-save_output_files +latent_dynamics_files <- function(include_failed = FALSE) { + self$runset$latent_dynamics_files(include_failed) +} +CmdStanFit$set("public", name = "latent_dynamics_files", value = latent_dynamics_files) + +#' @rdname fit-method-save_output_files +data_file <- function() { + self$runset$data_file() +} +CmdStanFit$set("public", name = "data_file", value = data_file) #' Report timing of CmdStan runs #' @@ -662,12 +512,7 @@ NULL #' is provided about the run times of individual chains and the warmup and #' sampling phases. #' -#' @section Usage: -#' ``` -#' $time() -#' ``` -#' -#' @section Value: +#' @return #' A list with elements #' * `total`: (scalar) the total run time. #' * `chains`: (data frame) for MCMC only, timing info for the individual @@ -688,26 +533,25 @@ NULL #' fit_vb$time() #' } #' -NULL +time <- function() { + self$runset$time() +} +CmdStanFit$set("public", name = "time", value = time) #' Access console output #' #' @name fit-method-output #' @aliases output -#' @description For MCMC the `$output()` method returns the stdout and stderr of -#' all chains as a list of character vectors. If the `id` argument is specified -#' it pretty prints the console output for a single chain. +#' @description For MCMC, the `$output()` method returns the stdout and stderr +#' of all chains as a list of character vectors if `id=NULL`. If the `id` +#' argument is specified it instead pretty prints the console output for a +#' single chain. #' -#' For optimization and variational inference `$output()` just pretty prints the -#' console output. +#' For optimization and variational inference `$output()` just pretty prints +#' the console output. #' -#' @section Usage: -#' ``` -#' $output(id = NULL) -#' ``` -#' -#' @section Arguments: -#' * `id`: (integer) For MCMC only, the chain id. +#' @param id (integer) The chain id. Ignored if the model was not fit using +#' MCMC. #' #' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanGQ`] #' @@ -725,7 +569,12 @@ NULL #' fit_vb$output() #' } #' -NULL +output <- function(id = NULL) { + # MCMC has separate implementation but doc is shared + # Non-MCMC fit is obtained with one process only so id is ignored + cat(paste(self$runset$procs$proc_output(1), collapse="\n")) +} +CmdStanFit$set("public", name = "output", value = output) #' Extract metadata from CmdStan CSV files #' @@ -735,11 +584,6 @@ NULL #' from the CSV output files, including the CmdStan configuration used when #' fitting the model. See **Examples** and [read_cmdstan_csv()]. #' -#' @section Usage: -#' ``` -#' $metadata() -#' ``` -#' #' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanGQ`] #' #' @examples @@ -754,7 +598,16 @@ NULL #' str(fit_vb$metadata()) #' } #' -NULL +metadata <- function() { + if (!length(self$output_files(include_failed = FALSE))) { + stop("Fitting failed. Unable to retrieve the metadata.", call. = FALSE) + } + if (is.null(private$metadata_)) { + private$read_csv_() + } + private$metadata_ +} +CmdStanFit$set("public", name = "metadata", value = metadata) #' Extract return codes from CmdStan #' @@ -762,11 +615,8 @@ NULL #' @aliases return_codes #' @description The `$return_codes()` method returns a vector of return codes #' from the CmdStan run(s). A return code of 0 indicates a successful run. -#' -#' @section Usage: -#' ``` -#' $return_codes() -#' ``` +#' @return An integer vector of return codes with length equal to the number of +#' CmdStan runs (number of chains for MCMC and one otherwise). #' #' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanGQ`] #' @@ -781,56 +631,13 @@ NULL #' fit_opt$return_codes() # should be non-zero #' } #' -NULL - -#' Leave-one-out cross-validation (LOO-CV) -#' -#' @name fit-method-loo -#' @aliases loo -#' @description The `$loo()` method computes approximate LOO-CV using the -#' \pkg{loo} package. This is a simple wrapper around [loo::loo.array()] -#' provided for convenience and requires computing the pointwise -#' log-likelihood in your Stan program. See the \pkg{loo} package -#' [vignettes](https://mc-stan.org/loo/articles/) for details. -#' -#' @section Usage: -#' ``` -#' $loo(variables = "log_lik", r_eff = NULL, ...) -#' ``` -#' -#' @param variables (character vector) The name(s) of the variable(s) in the -#' Stan program containing the pointwise log-likelihood. The default is to -#' look for `"log_lik"`. This argument is passed to the -#' [`$draws()][fit-method-draws] method. -#' @param r_eff There are several options: -#' * `TRUE` (the default) will automatically call [loo::relative_eff.array()] -#' to compute the `r_eff` argument to pass to [loo::loo.array()]. -#' * `FALSE` or `NULL` will avoid computing `r_eff` (which can sometimes be slow) -#' but will result in a warning from the \pkg{loo} package. -#' * If `r_eff` is anything else, that object will be passed as the `r_eff` -#' argument to [loo::loo.array()]. -#' @param ... Other arguments (e.g., `cores`, `save_psis`, etc.) passed to -#' [loo::loo.array()]. -#' -#' @section Value: The object returned by [loo::loo.array()]. -#' -#' @seealso The \pkg{loo} package website with -#' [documentation](https://mc-stan.org/loo/reference/index.html) and -#' [vignettes](https://mc-stan.org/loo/articles/). -#' -#' @examples -#' -#' \dontrun{ -#' # the "logistic" example model has "log_lik" in generated quantities -#' fit <- cmdstanr_example("logistic") -#' fit$loo(cores = 2) -#' } -#' -NULL +return_codes <- function() { + self$runset$procs$return_codes() +} +CmdStanFit$set("public", name = "return_codes", value = return_codes) # CmdStanMCMC ------------------------------------------------------------- - #' CmdStanMCMC objects #' #' @name CmdStanMCMC @@ -855,6 +662,7 @@ NULL #' [`$inv_metric()`][fit-method-inv_metric] | Return the inverse metric for each chain. | #' [`$init()`][fit-method-init] | Return user-specified initial values. | #' [`$metadata()`][fit-method-metadata] | Return a list of metadata gathered from the CmdStan CSV files. | +#' [`$num_chains()`][fit-method-num_chains] | Returns the number of MCMC chains. | #' #' ## Summarize inferences and diagnostics #' @@ -878,8 +686,8 @@ NULL #' #' |**Method**|**Description**| #' |:----------|:---------------| -#' [`$time()`][fit-method-time] | Report total and chain-specific run times. | #' [`$output()`][fit-method-output] | Return the stdout and stderr of all chains or pretty print the output for a single chain. | +#' [`$time()`][fit-method-time] | Report total and chain-specific run times. | #' [`$return_codes()`][fit-method-return_codes] | Return the return codes from the CmdStan runs. | #' NULL @@ -888,6 +696,7 @@ CmdStanMCMC <- R6::R6Class( classname = "CmdStanMCMC", inherit = CmdStanFit, public = list( + # override the CmdStanFit initialize method initialize = function(runset) { super$initialize(runset) if (!length(self$output_files())) { @@ -910,11 +719,7 @@ CmdStanMCMC <- R6::R6Class( } } }, - - num_chains = function() { - super$num_procs() - }, - + # override the CmdStanFit output method output = function(id = NULL) { if (is.null(id)) { self$runset$procs$proc_output() @@ -923,6 +728,7 @@ CmdStanMCMC <- R6::R6Class( } }, + # override the CmdStanFit draws method draws = function(variables = NULL, inc_warmup = FALSE) { if (!length(self$output_files(include_failed = FALSE))) { stop("No chains finished successfully. Unable to retrieve the draws.", call. = FALSE) @@ -955,67 +761,7 @@ CmdStanMCMC <- R6::R6Class( } else { private$draws_[,,variables] } - }, - - sampler_diagnostics = function(inc_warmup = FALSE) { - if (!length(self$output_files(include_failed = FALSE))) { - stop("No chains finished successfully. Unable to retrieve the sampler diagnostics.", call. = FALSE) - } - to_read <- remaining_columns_to_read( - requested = NULL, - currently_read = dimnames(private$sampler_diagnostics_)$variable, - all = private$metadata_$sampler_diagnostics - ) - if (is.null(to_read) || any(nzchar(to_read))) { - private$read_csv_(variables = "", sampler_diagnostics = NULL) - } - if (inc_warmup) { - if (!private$metadata_$save_warmup) { - stop("Warmup sampler diagnostics were requested from a fit object without them! ", - "Please rerun the model with save_warmup = TRUE.", call. = FALSE) - } - posterior::bind_draws( - private$warmup_sampler_diagnostics_, - private$sampler_diagnostics_, - along="iteration" - ) - } else { - private$sampler_diagnostics_ - } - }, - - # returns list of inverse metrics - inv_metric = function(matrix = TRUE) { - if (!length(self$output_files(include_failed = FALSE))) { - stop("No chains finished successfully. Unable to retrieve the inverse metrics.", call. = FALSE) - } - if (is.null(private$inv_metric_)) { - private$read_csv_(variables = "", sampler_diagnostics = "") - } - out <- private$inv_metric_ - if (matrix && !is.matrix(out[[1]])) { - # convert each vector to a diagonal matrix - out <- lapply(out, diag) - } - out - }, - # approximate loo-cv using the loo package - loo = function(variables = "log_lik", r_eff = TRUE, ...) { - if (!requireNamespace("loo", quietly = TRUE)) { - stop("Please install the loo package to use this method.", call. = FALSE) - } - LLarray <- self$draws(variables) - if (is.logical(r_eff)) { - if (isTRUE(r_eff)) { - r_eff_cores <- list(...)[["cores"]] %||% getOption("mc.cores", 1) - r_eff <- loo::relative_eff(exp(LLarray), cores = r_eff_cores) - } else { - r_eff <- NULL - } - } - loo::loo.array(LLarray, r_eff = r_eff, ...) } - ), private = list( # also inherits draws_ and metadata_ from CmdStanFit @@ -1075,9 +821,181 @@ CmdStanMCMC <- R6::R6Class( ) ) +#' Leave-one-out cross-validation (LOO-CV) +#' +#' @name fit-method-loo +#' @aliases loo +#' @description The `$loo()` method computes approximate LOO-CV using the +#' \pkg{loo} package. This is a simple wrapper around [loo::loo.array()] +#' provided for convenience and requires computing the pointwise +#' log-likelihood in your Stan program. See the \pkg{loo} package +#' [vignettes](https://mc-stan.org/loo/articles/) for details. +#' +#' @param variables (character vector) The name(s) of the variable(s) in the +#' Stan program containing the pointwise log-likelihood. The default is to +#' look for `"log_lik"`. This argument is passed to the +#' [`$draws()][fit-method-draws] method. +#' @param r_eff There are several options: +#' * `TRUE` (the default) will automatically call [loo::relative_eff.array()] +#' to compute the `r_eff` argument to pass to [loo::loo.array()]. +#' * `FALSE` or `NULL` will avoid computing `r_eff` (which can sometimes be slow) +#' but will result in a warning from the \pkg{loo} package. +#' * If `r_eff` is anything else, that object will be passed as the `r_eff` +#' argument to [loo::loo.array()]. +#' @param ... Other arguments (e.g., `cores`, `save_psis`, etc.) passed to +#' [loo::loo.array()]. +#' +#' @return The object returned by [loo::loo.array()]. +#' +#' @seealso The \pkg{loo} package website with +#' [documentation](https://mc-stan.org/loo/reference/index.html) and +#' [vignettes](https://mc-stan.org/loo/articles/). +#' +#' @examples +#' +#' \dontrun{ +#' # the "logistic" example model has "log_lik" in generated quantities +#' fit <- cmdstanr_example("logistic") +#' loo_result <- fit$loo(cores = 2) +#' print(loo_result) +#' } +#' +loo <- function(variables = "log_lik", r_eff = TRUE, ...) { + if (!requireNamespace("loo", quietly = TRUE)) { + stop("Please install the loo package to use this method.", call. = FALSE) + } + LLarray <- self$draws(variables) + if (is.logical(r_eff)) { + if (isTRUE(r_eff)) { + r_eff_cores <- list(...)[["cores"]] %||% getOption("mc.cores", 1) + r_eff <- loo::relative_eff(exp(LLarray), cores = r_eff_cores) + } else { + r_eff <- NULL + } + } + loo::loo.array(LLarray, r_eff = r_eff, ...) +} +CmdStanMCMC$set("public", name = "loo", value = loo) + +#' Extract sampler diagnostics after MCMC +#' +#' @name fit-method-sampler_diagnostics +#' @aliases sampler_diagnostics +#' @description Extract the values of sampler diagnostics for each iteration and +#' chain of MCMC. +#' +#' @param inc_warmup (logical) Should warmup draws be included? Defaults to `FALSE`. +#' +#' @return +#' A 3-D [`draws_array`][posterior::draws_array] object (iteration x chain x +#' variable). The variables for Stan's default MCMC algorithm are +#' `"accept_stat__"`, `"stepsize__"`, `"treedepth__"`, `"n_leapfrog__"`, +#' `"divergent__"`, `"energy__"`. +#' +#' @seealso [`CmdStanMCMC`] +#' +#' @examples +#' \dontrun{ +#' fit <- cmdstanr_example("logistic") +#' sampler_diagnostics <- fit$sampler_diagnostics() +#' str(sampler_diagnostics) +#' +#' library(posterior) +#' as_draws_df(sampler_diagnostics) +#' } +#' +sampler_diagnostics <- function(inc_warmup = FALSE) { + if (!length(self$output_files(include_failed = FALSE))) { + stop("No chains finished successfully. Unable to retrieve the sampler diagnostics.", call. = FALSE) + } + to_read <- remaining_columns_to_read( + requested = NULL, + currently_read = dimnames(private$sampler_diagnostics_)$variable, + all = private$metadata_$sampler_diagnostics + ) + if (is.null(to_read) || any(nzchar(to_read))) { + private$read_csv_(variables = "", sampler_diagnostics = NULL) + } + if (inc_warmup) { + if (!private$metadata_$save_warmup) { + stop("Warmup sampler diagnostics were requested from a fit object without them! ", + "Please rerun the model with save_warmup = TRUE.", call. = FALSE) + } + posterior::bind_draws( + private$warmup_sampler_diagnostics_, + private$sampler_diagnostics_, + along="iteration" + ) + } else { + private$sampler_diagnostics_ + } +} +CmdStanMCMC$set("public", name = "sampler_diagnostics", value = sampler_diagnostics) + +#' Extract inverse metric (mass matrix) after MCMC +#' +#' @name fit-method-inv_metric +#' @aliases inv_metric +#' @description Extract the inverse metric (mass matrix) for each MCMC chain. +#' +#' @param matrix (logical) If a diagonal metric was used, setting `matrix = +#' FALSE` returns a list containing just the diagonals of the matrices instead +#' of the full matrices. Setting `matrix = FALSE` has no effect for dense +#' metrics. +#' +#' @return A list of length equal to the number of MCMC chains. See the `matrix` +#' argument for details. +#' +#' @seealso [`CmdStanMCMC`] +#' +#' @examples +#' \dontrun{ +#' fit <- cmdstanr_example("logistic") +#' fit$inv_metric() +#' fit$inv_metric(matrix=FALSE) +#' +#' fit <- cmdstanr_example("logistic", metric = "dense_e") +#' fit$inv_metric() +#' } +#' +inv_metric <- function(matrix = TRUE) { + if (!length(self$output_files(include_failed = FALSE))) { + stop("No chains finished successfully. Unable to retrieve the inverse metrics.", call. = FALSE) + } + if (is.null(private$inv_metric_)) { + private$read_csv_(variables = "", sampler_diagnostics = "") + } + out <- private$inv_metric_ + if (matrix && !is.matrix(out[[1]])) { + # convert each vector to a diagonal matrix + out <- lapply(out, diag) + } + out +} +CmdStanMCMC$set("public", name = "inv_metric", value = inv_metric) + +#' Extract number of chains after MCMC +#' +#' @name fit-method-num_chains +#' @aliases num_chains +#' @description The `$num_chains()` method returns the number of MCMC chains. +#' @return An integer. +#' +#' @seealso [`CmdStanMCMC`] +#' +#' @examples +#' \dontrun{ +#' fit_mcmc <- cmdstanr_example(chains = 2) +#' fit_mcmc$num_chains() +#' } +#' +num_chains = function() { + super$num_procs() +} +CmdStanMCMC$set("public", name = "num_chains", value = num_chains) -# CmdStanMLE ------------------------------------------------------------- +# CmdStanMLE ------------------------------------------------------------- #' CmdStanMLE objects #' #' @name CmdStanMLE @@ -1127,14 +1045,7 @@ NULL CmdStanMLE <- R6::R6Class( classname = "CmdStanMLE", inherit = CmdStanFit, - public = list( - mle = function(variables = NULL) { - x <- self$draws(variables) - x <- x[, colnames(x) != "lp__"] - estimate <- setNames(as.numeric(x), nm = posterior::variables(x)) - estimate - } - ), + public = list(), private = list( # inherits draws_ and metadata_ slots from CmdStanFit read_csv_ = function() { @@ -1149,8 +1060,43 @@ CmdStanMLE <- R6::R6Class( ) ) -# CmdStanVB --------------------------------------------------------------- +#' Extract (penalized) maximum likelihood estimate after optimization +#' +#' @name fit-method-mle +#' @aliases mle +#' @description The `$mle()` method is only available for [`CmdStanMLE`] objects. +#' It returns the penalized maximum likelihood estimate (posterior mode) as a +#' numeric vector with one element per variable. The returned vector does *not* +#' include `lp__`, the total log probability (`target`) accumulated in the +#' `model` block of the Stan program, which is available via the +#' [`$lp()`][fit-method-lp] method and also included in the +#' [`$draws()`][fit-method-draws] method. +#' +#' @param variables (character vector) The variables (parameters, transformed +#' parameters, and generated quantities) to include. If NULL (the default) +#' then all variables are included. +#' +#' @return A numeric vector. See **Examples**. +#' +#' @seealso [`CmdStanMLE`] +#' +#' @examples +#' \dontrun{ +#' fit <- cmdstanr_example("logistic", method = "optimize") +#' fit$mle("alpha") +#' fit$mle("beta") +#' fit$mle("beta[2]") +#' } +#' +mle <- function(variables = NULL) { + x <- self$draws(variables) + x <- x[, colnames(x) != "lp__"] + stats::setNames(as.numeric(x), posterior::variables(x)) +} +CmdStanMLE$set("public", name = "mle", value = mle) + +# CmdStanVB --------------------------------------------------------------- #' CmdStanVB objects #' #' @name CmdStanVB @@ -1203,11 +1149,7 @@ NULL CmdStanVB <- R6::R6Class( classname = "CmdStanVB", inherit = CmdStanFit, - public = list( - lp_approx = function() { - as.numeric(self$draws()[, "lp_approx__"]) - } - ), + public = list(), private = list( # inherits draws_ and metadata_ slots from CmdStanFit read_csv_ = function() { @@ -1222,8 +1164,14 @@ CmdStanVB <- R6::R6Class( ) ) -# CmdStanGQ --------------------------------------------------------------- +#' @rdname fit-method-lp +lp_approx <- function() { + as.numeric(self$draws()[, "lp_approx__"]) +} +CmdStanVB$set("public", name = "lp_approx", value = lp_approx) + +# CmdStanGQ --------------------------------------------------------------- #' CmdStanGQ objects #' #' @name CmdStanGQ @@ -1278,10 +1226,15 @@ CmdStanGQ <- R6::R6Class( num_chains = function() { super$num_procs() }, - draws = function(variables = NULL) { + # override CmdStanFit draws method + draws = function(variables = NULL, inc_warmup = FALSE) { if (!length(self$output_files(include_failed = FALSE))) { stop("Generating quantities for all MCMC chains failed. Unable to retrieve the generated quantities.", call. = FALSE) } + if (inc_warmup) { + warning("'inc_warmup' is ignored except when used with CmdStanMCMC objects.", + call. = FALSE) + } to_read <- remaining_columns_to_read( requested = variables, currently_read = dimnames(private$draws_)$variable, @@ -1302,6 +1255,7 @@ CmdStanGQ <- R6::R6Class( } private$draws_[,,variables] }, + # override CmdStanFit output method output = function(id = NULL) { if (is.null(id)) { self$runset$procs$proc_output() diff --git a/R/knitr.R b/R/knitr.R index 21c4fc55e..f6a60e044 100644 --- a/R/knitr.R +++ b/R/knitr.R @@ -5,8 +5,11 @@ #' [R Markdown CmdStan Engine](https://mc-stan.org/cmdstanr/articles/r-markdown.html) #' for a demonstration. #' -#' @param override Override knitr's built-in, RStan-based engine for `stan`. -#' See below for details. +#' @export +#' +#' @param override Override knitr's built-in, RStan-based engine for Stan? The +#' default is `TRUE`. See **Details**. +#' #' @details #' If `override = TRUE` (default), this registers CmdStanR's knitr engine as the #' engine for `stan` chunks, replacing knitr's built-in, RStan-based engine. If @@ -30,10 +33,11 @@ #' If you would like to keep `stan` chunks as `stan` chunks, it is possible to #' specify `engine = "cmdstan"` in the chunk options after registering the #' `cmdstan` engine with `override = FALSE`. +#' #' @references -#' - [Register a custom language engine](https://bookdown.org/yihui/rmarkdown-cookbook/custom-engine.html) -#' - [Stan language engine](https://bookdown.org/yihui/rmarkdown/language-engines.html#stan) -#' @export +#' * [Register a custom language engine for knitr](https://bookdown.org/yihui/rmarkdown-cookbook/custom-engine.html) +#' * [knitr's built-in Stan language engine](https://bookdown.org/yihui/rmarkdown/language-engines.html#stan) +#' register_knitr_engine <- function(override = TRUE) { if (!requireNamespace("knitr", quietly = TRUE)) { stop("Please install the knitr package.", call. = FALSE) diff --git a/R/model.R b/R/model.R index f08fcd9a4..1ccc0a54f 100644 --- a/R/model.R +++ b/R/model.R @@ -276,61 +276,43 @@ CmdStanModel <- R6::R6Class( #' `$hpp_file()` methods. The default is to create the executable in the same #' directory as the Stan program and to write the generated C++ code in a #' temporary directory. To save the C++ code to a non-temporary location use -#' `$save_hpp_file()`. +#' `$save_hpp_file(dir)`. #' -#' @section Usage: -#' ``` -#' $compile( -#' quiet = TRUE, -#' dir = NULL, -#' pedantic = FALSE, -#' include_paths = NULL, -#' cpp_options = list(), -#' stanc_options = list(), -#' force_recompile = FALSE -#' ) -#' $exe_file() -#' $hpp_file() -#' $save_hpp_file(dir = NULL) -#' ``` -#' -#' @section Arguments: -#' Leaving all arguments at their defaults should be fine for most users, but -#' optional arguments are provided to enable features in CmdStan (and the Stan -#' Math library). See the CmdStan manual for more details. -#' * `quiet`: (logical) Should the verbose output from CmdStan during +#' @param quiet (logical) Should the verbose output from CmdStan during #' compilation be suppressed? The default is `TRUE`, but if you encounter an #' error we recommend trying again with `quiet=FALSE` to see more of the #' output. -#' * `dir`: (string) The path to the directory in which to store the CmdStan +#' @param dir (string) The path to the directory in which to store the CmdStan #' executable (or `.hpp` file if using `$save_hpp_file()`). The default is the #' same location as the Stan program. -#' * `pedantic`: (logical) Should pedantic mode be turned on? The default is +#' @param pedantic (logical) Should pedantic mode be turned on? The default is #' `FALSE`. Pedantic mode attempts to warn you about potential issues in your #' Stan program beyond syntax errors. For details see the [*Pedantic mode* #' chapter](https://mc-stan.org/docs/reference-manual/pedantic-mode.html) in #' the Stan Reference Manual. **Note:** to do a pedantic check for a model #' that is already compiled use the #' [`$check_syntax()`][model-method-check_syntax] method instead. -#' * `include_paths`: (character vector) Paths to directories where Stan +#' @param include_paths (character vector) Paths to directories where Stan #' should look for files specified in `#include` directives in the Stan #' program. -#' * `cpp_options`: (list) Any makefile options to be used when compiling the +#' @param cpp_options (list) Any makefile options to be used when compiling the #' model (`STAN_THREADS`, `STAN_MPI`, `STAN_OPENCL`, etc.). Anything you would #' otherwise write in the `make/local` file. -#' * `stanc_options`: (list) Any Stan-to-C++ transpiler options to be used +#' @param stanc_options (list) Any Stan-to-C++ transpiler options to be used #' when compiling the model. See the **Examples** section below as well as the #' `stanc` chapter of the CmdStan Guide for more details on available options: #' https://mc-stan.org/docs/cmdstan-guide/stanc.html. -#' * `force_recompile`: (logical) Should the model be recompiled even if was +#' @param force_recompile (logical) Should the model be recompiled even if was #' not modified since last compiled. The default is `FALSE`. +#' @param threads Deprecated and will be removed in a future release. Please +#' turn on threading via `cpp_options = list(stan_threads = TRUE)` instead. #' #' @section Value: The `$compile()` method is called for its side effect of #' creating the executable and adding its path to the [`CmdStanModel`] object, #' but it also returns the [`CmdStanModel`] object invisibly. #' -#' The `$exe_file()`, `$hpp_file()`, and `$save_hpp_file()` methods all return -#' file paths. +#' After compilation, the `$exe_file()`, `$hpp_file()`, and `$save_hpp_file()` +#' methods can be used and return file paths. #' #' @template seealso-docs #' @@ -361,17 +343,15 @@ CmdStanModel <- R6::R6Class( #' #' } #' -NULL - -compile_method <- function(quiet = TRUE, - dir = NULL, - pedantic = FALSE, - include_paths = NULL, - cpp_options = list(), - stanc_options = list(), - force_recompile = FALSE, - #deprecated - threads = FALSE) { +compile <- function(quiet = TRUE, + dir = NULL, + pedantic = FALSE, + include_paths = NULL, + cpp_options = list(), + stanc_options = list(), + force_recompile = FALSE, + #deprecated + threads = FALSE) { if (length(cpp_options) == 0 && !is.null(private$precompile_cpp_options_)) { cpp_options <- private$precompile_cpp_options_ } @@ -531,7 +511,7 @@ compile_method <- function(quiet = TRUE, private$precompile_include_paths_ <- NULL invisible(self) } -CmdStanModel$set("public", name = "compile", value = compile_method) +CmdStanModel$set("public", name = "compile", value = compile) #' Check syntax of a Stan program #' @@ -543,29 +523,18 @@ CmdStanModel$set("public", name = "compile", value = compile_method) #' checks the Stan program for syntax errors and returns `TRUE` (invisibly) if #' parsing succeeds. If invalid syntax in found an error is thrown. #' -#' @section Usage: -#' ``` -#' $check_syntax( -#' pedantic = FALSE, -#' include_paths = NULL, -#' stanc_options = list(), -#' quiet = FALSE -#' ) -#' ``` -#' -#' @section Arguments: -#' * `pedantic`: (logical) Should pedantic mode be turned on? The default is +#' @param pedantic (logical) Should pedantic mode be turned on? The default is #' `FALSE`. Pedantic mode attempts to warn you about potential issues in your #' Stan program beyond syntax errors. For details see the [*Pedantic mode* #' chapter](https://mc-stan.org/docs/reference-manual/pedantic-mode.html) in #' the Stan Reference Manual. -#' * `include_paths`: (character vector) Paths to directories where Stan +#' @param include_paths (character vector) Paths to directories where Stan #' should look for files specified in `#include` directives in the Stan #' program. -#' * `stanc_options`: (list) Any other Stan-to-C++ transpiler options to be +#' @param stanc_options (list) Any other Stan-to-C++ transpiler options to be #' used when compiling the model. See the documentation for the #' [`$compile()`][model-method-compile] method for details. -#' * `quiet`: (logical) Should informational messages be suppressed? The +#' @param quiet (logical) Should informational messages be suppressed? The #' default is `FALSE`, which will print a message if the Stan program is valid #' or the compiler error message if there are syntax errors. If `TRUE`, only #' the error message will be printed. @@ -600,12 +569,10 @@ CmdStanModel$set("public", name = "compile", value = compile_method) #' mod$check_syntax(pedantic = TRUE) #' } #' -NULL - -check_syntax_method <- function(pedantic = FALSE, - include_paths = NULL, - stanc_options = list(), - quiet = FALSE) { +check_syntax <- function(pedantic = FALSE, + include_paths = NULL, + stanc_options = list(), + quiet = FALSE) { if (length(stanc_options) == 0 && !is.null(private$precompile_stanc_options_)) { stanc_options <- private$precompile_stanc_options_ } @@ -674,7 +641,7 @@ check_syntax_method <- function(pedantic = FALSE, } invisible(TRUE) } -CmdStanModel$set("public", name = "check_syntax", value = check_syntax_method) +CmdStanModel$set("public", name = "check_syntax", value = check_syntax) #' Run Stan's MCMC algorithms #' @@ -687,192 +654,58 @@ CmdStanModel$set("public", name = "check_syntax", value = check_syntax_method) #' a set of draws from the posterior distribution of a model conditioned on #' some data. #' -#' @section Usage: -#' ``` -#' $sample( -#' data = NULL, -#' seed = NULL, -#' refresh = NULL, -#' init = NULL, -#' save_latent_dynamics = FALSE, -#' output_dir = NULL, -#' sig_figs = NULL, -#' chains = 4, -#' parallel_chains = getOption("mc.cores", 1), -#' chain_ids = seq_len(chains), -#' threads_per_chain = NULL, -#' iter_warmup = NULL, -#' iter_sampling = NULL, -#' save_warmup = FALSE, -#' thin = NULL, -#' max_treedepth = NULL, -#' adapt_engaged = TRUE, -#' adapt_delta = NULL, -#' step_size = NULL, -#' metric = NULL, -#' metric_file = NULL, -#' inv_metric = NULL, -#' init_buffer = NULL, -#' term_buffer = NULL, -#' window = NULL, -#' fixed_param = FALSE, -#' validate_csv = TRUE, -#' show_messages = TRUE -#' ) -#' ``` +#' Any argument left as `NULL` will default to the default value used by the +#' installed version of CmdStan. See the +#' [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/) +#' for more details. #' #' @template model-common-args -#' @section Arguments unique to the `sample` method: In addition to the -#' arguments above, the `$sample()` method also has its own set of arguments. -#' -#' The following three arguments are offered by CmdStanR but do not correspond -#' to arguments in CmdStan: -#' -#' * `chains`: (positive integer) The number of Markov chains to run. The -#' default is 4. -#' -#' * `parallel_chains`: (positive integer) The _maximum_ number of MCMC chains -#' to run in parallel. If `parallel_chains` is not specified then the default -#' is to look for the option `"mc.cores"`, which can be set for an entire \R -#' session by `options(mc.cores=value)`. If the `"mc.cores"` option has not -#' been set then the default is `1`. +#' @template model-sample-args +#' @param cores,num_cores,num_chains,num_warmup,num_samples,save_extra_diagnostics,max_depth,stepsize +#' Deprecated and will be removed in a future release. #' -#' * `chain_ids`: (vector) A vector of chain IDs. Must contain `chains` unique -#' positive integers. If not set, the default chain IDs are used (integers -#' starting from `1`). -#' -#' * `threads_per_chain`: (positive integer) If the model was -#' [compiled][model-method-compile] with threading support, the number of -#' threads to use in parallelized sections _within_ an MCMC chain (e.g., when -#' using the Stan functions `reduce_sum()` or `map_rect()`). This is in -#' contrast with `parallel_chains`, which specifies the number of chains to -#' run in parallel. The actual number of CPU cores used use is -#' `parallel_chains*threads_per_chain`. For an example of using threading see -#' the Stan case study [Reduce Sum: A Minimal -#' Example](https://mc-stan.org/users/documentation/case-studies/reduce_sum_tutorial.html). -#' -#' * `show_messages`: (logical) When `TRUE` (the default), prints all -#' informational messages, for example rejection of the current proposal. -#' Disable if you wish silence these messages, but this is not recommended -#' unless you are very sure that the model is correct up to numerical error. -#' If the messages are silenced then the `$output()` method of the resulting -#' fit object can be used to display all the silenced messages. -#' -#' * `validate_csv`: (logical) When `TRUE` (the default), validate the -#' sampling results in the csv files. Disable if you wish to manually read in -#' the sampling results and validate them yourself, for example using -#' [read_cmdstan_csv()]. -#' -#' -#' The rest of the arguments correspond to arguments offered by CmdStan, -#' although some names are slightly different. They are described briefly here -#' and in greater detail in the CmdStan manual. Arguments left at `NULL` -#' default to the default used by the installed version of CmdStan. -#' The latest [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/) -#' will have the default values for the latest version of CmdStan. -#' -#' * `iter_sampling`: (positive integer) The number of post-warmup iterations to -#' run per chain. -#' * `iter_warmup`: (positive integer) The number of warmup iterations to run -#' per chain. -#' * `save_warmup`: (logical) Should warmup iterations be saved? The default -#' is `FALSE`. If `save_warmup=TRUE` then you can use -#' [$draws(inc_warmup=TRUE)][fit-method-draws] to include warmup when -#' accessing the draws. -#' * `thin`: (positive integer) The period between saved samples. This should -#' be left at its default (no thinning) unless memory is a problem. -#' * `max_treedepth`: (positive integer) The maximum allowed tree depth for the -#' NUTS engine. See the _Tree Depth_ section of the CmdStan manual for more -#' details. -#' * `adapt_engaged`: (logical) Do warmup adaptation? The default is `TRUE`. -#' If a precomputed inverse metric is specified via the `inv_metric` argument -#' (or `metric_file`) then, if `adapt_engaged=TRUE`, Stan will use the -#' provided inverse metric just as an initial guess during adaptation. To turn -#' off adaptation when using a precomputed inverse metric set -#' `adapt_engaged=FALSE`. -#' * `adapt_delta`: (real in `(0,1)`) The adaptation target acceptance -#' statistic. -#' * `step_size`: (positive real) The _initial_ step size for the discrete -#' approximation to continuous Hamiltonian dynamics. This is further tuned -#' during warmup. -#' * `metric`: (character) One of `"diag_e"`, `"dense_e"`, or `"unit_e"`, -#' specifying the geometry of the base manifold. See the _Euclidean Metric_ -#' section of the CmdStan documentation for more details. To specify a -#' precomputed (inverse) metric, see the `inv_metric` argument below. -#' * `metric_file`: (character) A character vector containing paths to JSON or -#' Rdump files (one per chain) compatible with CmdStan that contain -#' precomputed inverse metrics. The `metric_file` argument is inherited from -#' CmdStan but is confusing in that the entry in JSON or Rdump file(s) must be -#' named `inv_metric`, referring to the _inverse_ metric. We recommend instead -#' using CmdStanR's `inv_metric` argument (see below) to specify an inverse -#' metric directly using a vector or matrix from your \R session. -#' * `inv_metric`: (vector, matrix) A vector (if `metric='diag_e'`) or a -#' matrix (if `metric='dense_e'`) for initializing the inverse metric, which -#' can be used as an alternative to the `metric_file` argument. A vector is -#' interpreted as a diagonal metric. The inverse metric is usually set to an -#' estimate of the posterior covariance. See the `adapt_engaged` argument -#' above for details on (and control over) how specifying a precomputed -#' inverse metric interacts with adaptation. -#' * `init_buffer`: (nonnegative integer) Width of initial fast timestep -#' adaptation interval during warmup. -#' * `term_buffer`: (nonnegative integer) Width of final fast timestep -#' adaptation interval during warmup. -#' * `window`: (nonnegative integer) Initial width of slow timestep/metric -#' adaptation interval. -#' * `fixed_param`: (logical) When `TRUE`, call CmdStan with argument -#' `"algorithm=fixed_param"`. The default is `FALSE`. The fixed parameter -#' sampler generates a new sample without changing the current state of the -#' Markov chain; only generated quantities may change. This can be useful -#' when, for example, trying to generate pseudo-data using the generated -#' quantities block. If the parameters block is empty then using -#' `fixed_param=TRUE` is mandatory. When `fixed_param=TRUE` the `chains` and -#' `parallel_chains` arguments will be set to `1`. -#' -#' @section Value: The `$sample()` method returns a [`CmdStanMCMC`] object. +#' @section Value: A [`CmdStanMCMC`] object. #' #' @template seealso-docs #' @inherit cmdstan_model examples #' -NULL - -sample_method <- function(data = NULL, - seed = NULL, - refresh = NULL, - init = NULL, - save_latent_dynamics = FALSE, - output_dir = NULL, - sig_figs = NULL, - chains = 4, - parallel_chains = getOption("mc.cores", 1), - chain_ids = seq_len(chains), - threads_per_chain = NULL, - iter_warmup = NULL, - iter_sampling = NULL, - save_warmup = FALSE, - thin = NULL, - max_treedepth = NULL, - adapt_engaged = TRUE, - adapt_delta = NULL, - step_size = NULL, - metric = NULL, - metric_file = NULL, - inv_metric = NULL, - init_buffer = NULL, - term_buffer = NULL, - window = NULL, - fixed_param = FALSE, - validate_csv = TRUE, - show_messages = TRUE, - # deprecated - cores = NULL, - num_cores = NULL, - num_chains = NULL, - num_warmup = NULL, - num_samples = NULL, - save_extra_diagnostics = NULL, - max_depth = NULL, - stepsize = NULL) { - +sample <- function(data = NULL, + seed = NULL, + refresh = NULL, + init = NULL, + save_latent_dynamics = FALSE, + output_dir = NULL, + sig_figs = NULL, + chains = 4, + parallel_chains = getOption("mc.cores", 1), + chain_ids = seq_len(chains), + threads_per_chain = NULL, + iter_warmup = NULL, + iter_sampling = NULL, + save_warmup = FALSE, + thin = NULL, + max_treedepth = NULL, + adapt_engaged = TRUE, + adapt_delta = NULL, + step_size = NULL, + metric = NULL, + metric_file = NULL, + inv_metric = NULL, + init_buffer = NULL, + term_buffer = NULL, + window = NULL, + fixed_param = FALSE, + validate_csv = TRUE, + show_messages = TRUE, + # deprecated + cores = NULL, + num_cores = NULL, + num_chains = NULL, + num_warmup = NULL, + num_samples = NULL, + save_extra_diagnostics = NULL, + max_depth = NULL, + stepsize = NULL) { # temporary deprecation warnings if (!is.null(cores)) { warning("'cores' is deprecated. Please use 'parallel_chains' instead.") @@ -972,7 +805,7 @@ sample_method <- function(data = NULL, runset$run_cmdstan() CmdStanMCMC$new(runset) } -CmdStanModel$set("public", name = "sample", value = sample_method) +CmdStanModel$set("public", name = "sample", value = sample) #' Run Stan's MCMC algorithms with MPI #' @@ -987,15 +820,15 @@ CmdStanModel$set("public", name = "sample", value = sample_method) #' [`$sample()`][model-method-sample] method provides both parallelization of #' chains and threading support for within-chain parallelization. #' -#' @details In order to use MPI with Stan, an MPI implementation must be +#' In order to use MPI with Stan, an MPI implementation must be #' installed. For Unix systems the most commonly used implementations are #' MPICH and OpenMPI. The implementations provide an MPI C++ compiler wrapper #' (for example mpicxx), which is required to compile the model. #' #' An example of compiling with MPI: #' ``` -#' mpi_options <- list(STAN_MPI=TRUE, CXX="mpicxx", TBB_CXX_TYPE="gcc") -#' mod <- cmdstan_model("model.stan", cpp_options = mpi_options) +#' mpi_options = list(STAN_MPI=TRUE, CXX="mpicxx", TBB_CXX_TYPE="gcc") +#' mod = cmdstan_model("model.stan", cpp_options = mpi_options) #' ``` #' The C++ options that must be supplied to the #' [compile][model-method-compile] call are: @@ -1004,59 +837,21 @@ CmdStanModel$set("public", name = "sample", value = sample_method) #' - `TBB_CXX_TYPE`: The C++ compiler the MPI wrapper wraps. Typically `"gcc"` #' on Linux and `"clang"` on macOS. #' -#' In the call to the `$sample_mpi()` method we can also provide the name of -#' the MPI launcher (`mpi_cmd`, defaulting to `"mpiexec"`) and any other -#' MPI launch arguments. In most cases, it is enough to only define the number -#' of processes with `mpi_args = list("n" = 4)`. -#' -#' @section Usage: -#' ``` -#' $sample_mpi( -#' data = NULL, -#' mpi_cmd = "mpiexec", -#' mpi_args = NULL, -#' seed = NULL, -#' refresh = NULL, -#' init = NULL, -#' save_latent_dynamics = FALSE, -#' output_dir = NULL, -#' sig_figs = NULL, -#' chains = 4, -#' parallel_chains = getOption("mc.cores", 1), -#' chain_ids = seq_len(chains), -#' iter_warmup = NULL, -#' iter_sampling = NULL, -#' save_warmup = FALSE, -#' thin = NULL, -#' max_treedepth = NULL, -#' adapt_engaged = TRUE, -#' adapt_delta = NULL, -#' step_size = NULL, -#' metric = NULL, -#' metric_file = NULL, -#' inv_metric = NULL, -#' init_buffer = NULL, -#' term_buffer = NULL, -#' window = NULL, -#' fixed_param = FALSE, -#' validate_csv = TRUE, -#' show_messages = TRUE -#' ) -#' ``` -#' -#' @section Arguments unique to the `sample_mpi` method: -#' * `mpi_cmd`: (character vector) The MPI launcher used for launching MPI processes. -#' The default launcher is `"mpiexec"`. -#' * `mpi_args`: (list) A list of arguments to use when launching MPI processes. -#' For example, `mpi_args = list("n" = 4)` launches the executable as -#' `mpiexec -n 4 model_executable`, followed by CmdStan arguments -#' for the model executable. +#' In the call to the `$sample_mpi()` method it is also possible to provide +#' the name of the MPI launcher (`mpi_cmd`, defaulting to `"mpiexec"`) and any +#' other MPI launch arguments (`mpi_args`). In most cases, it is enough to +#' only define the number of processes. To use `n_procs` processes specify +#' `mpi_args = list("n" = n_procs)`. #' -#' All other arguments are the same as for [`$sample()`][model-method-sample] -#' except `$sample_mpi()` does not have arguments `threads_per_chain` or -#' `parallel_chains`. +#' @inheritParams model-method-sample +#' @param mpi_cmd (character vector) The MPI launcher used for launching MPI +#' processes. The default launcher is `"mpiexec"`. +#' @param mpi_args (list) A list of arguments to use when launching MPI +#' processes. For example, `mpi_args = list("n" = 4)` launches the executable +#' as `mpiexec -n 4 model_executable`, followed by CmdStan arguments for the +#' model executable. #' -#' @section Value: The `$sample_mpi()` method returns a [`CmdStanMCMC`] object. +#' @section Value: A [`CmdStanMCMC`] object. #' #' @template seealso-docs #' @seealso The Stan Math Library's MPI documentation @@ -1070,36 +865,34 @@ CmdStanModel$set("public", name = "sample", value = sample_method) #' # fit <- mod$sample_mpi(..., mpi_args = list("n" = 4)) #' } #' -NULL - -sample_mpi_method <- function(data = NULL, - mpi_cmd = "mpiexec", - mpi_args = NULL, - seed = NULL, - refresh = NULL, - init = NULL, - save_latent_dynamics = FALSE, - output_dir = NULL, - chains = 1, - chain_ids = seq_len(chains), - iter_warmup = NULL, - iter_sampling = NULL, - save_warmup = FALSE, - thin = NULL, - max_treedepth = NULL, - adapt_engaged = TRUE, - adapt_delta = NULL, - step_size = NULL, - metric = NULL, - metric_file = NULL, - inv_metric = NULL, - init_buffer = NULL, - term_buffer = NULL, - window = NULL, - fixed_param = FALSE, - sig_figs = NULL, - validate_csv = TRUE, - show_messages = TRUE) { +sample_mpi <- function(data = NULL, + mpi_cmd = "mpiexec", + mpi_args = NULL, + seed = NULL, + refresh = NULL, + init = NULL, + save_latent_dynamics = FALSE, + output_dir = NULL, + chains = 1, + chain_ids = seq_len(chains), + iter_warmup = NULL, + iter_sampling = NULL, + save_warmup = FALSE, + thin = NULL, + max_treedepth = NULL, + adapt_engaged = TRUE, + adapt_delta = NULL, + step_size = NULL, + metric = NULL, + metric_file = NULL, + inv_metric = NULL, + init_buffer = NULL, + term_buffer = NULL, + window = NULL, + fixed_param = FALSE, + sig_figs = NULL, + validate_csv = TRUE, + show_messages = TRUE) { if (fixed_param) { chains <- 1 save_warmup <- FALSE @@ -1147,7 +940,7 @@ sample_mpi_method <- function(data = NULL, runset$run_cmdstan_mpi(mpi_cmd, mpi_args) CmdStanMCMC$new(runset) } -CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi_method) +CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi) #' Run Stan's optimization algorithms #' @@ -1159,6 +952,11 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi_method) #' Stan's optimizer to obtain a posterior mode (penalized maximum likelihood) #' estimate. #' +#' Any argument left as `NULL` will default to the default value used by the +#' installed version of CmdStan. See the +#' [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/) +#' for more details. +#' #' @details CmdStan can find the posterior mode (assuming there is one). If the #' posterior is not convex, there is no guarantee Stan will be able to find #' the global mode as opposed to a local optimum of log probability. For @@ -1168,81 +966,48 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi_method) #' #' -- [*CmdStan User's Guide*](https://mc-stan.org/docs/cmdstan-guide/) #' -#' @section Usage: -#' ``` -#' $optimize( -#' data = NULL, -#' seed = NULL, -#' refresh = NULL, -#' init = NULL, -#' save_latent_dynamics = FALSE, -#' output_dir = NULL, -#' sig_figs = NULL, -#' threads = NULL, -#' algorithm = NULL, -#' init_alpha = NULL, -#' iter = NULL, -#' tol_obj = NULL, -#' tol_rel_obj = NULL, -#' tol_grad = NULL, -#' tol_rel_grad = NULL, -#' tol_param = NULL, -#' history_size = NULL -#' ) -#' ``` -#' #' @template model-common-args -#' @section Arguments unique to the `optimize` method: In addition to the -#' arguments above, the `$optimize()` method also has its own set of -#' arguments. These arguments are described briefly here and in greater detail -#' in the CmdStan manual. Arguments left at `NULL` default to the default used -#' by the installed version of CmdStan. -#' The latest [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/) -#' will have the defaults for the latest version of CmdStan. -#' -#' * `threads`: (positive integer) If the model was +#' @param threads (positive integer) If the model was #' [compiled][model-method-compile] with threading support, the number of #' threads to use in parallelized sections (e.g., when #' using the Stan functions `reduce_sum()` or `map_rect()`). -#' * `iter`: (positive integer) The maximum number of iterations. -#' * `algorithm`: (string) The optimization algorithm. One of `"lbfgs"`, +#' @param iter (positive integer) The maximum number of iterations. +#' @param algorithm (string) The optimization algorithm. One of `"lbfgs"`, #' `"bfgs"`, or `"newton"`. The control parameters below are only available #' for `"lbfgs"` and `"bfgs`. For their default values and more details see #' the CmdStan User's Guide. The default values can also be obtained by #' running `cmdstanr_example(method="optimize")$metadata()`. -#' * `init_alpha`: (positive real) The initial step size parameter. -#' * `tol_obj`: (positive real) Convergence tolerance on changes in objective function value. -#' * `tol_rel_obj`: (positive real) Convergence tolerance on relative changes in objective function value. -#' * `tol_grad`: (positive real) Convergence tolerance on the norm of the gradient. -#' * `tol_rel_grad`: (positive real) Convergence tolerance on the relative norm of the gradient. -#' * `tol_param`: (positive real) Convergence tolerance on changes in parameter value. -#' * `history_size`: (positive integer) The size of the history used when +#' @param init_alpha (positive real) The initial step size parameter. +#' @param tol_obj (positive real) Convergence tolerance on changes in objective function value. +#' @param tol_rel_obj (positive real) Convergence tolerance on relative changes in objective function value. +#' @param tol_grad (positive real) Convergence tolerance on the norm of the gradient. +#' @param tol_rel_grad (positive real) Convergence tolerance on the relative norm of the gradient. +#' @param tol_param (positive real) Convergence tolerance on changes in parameter value. +#' @param history_size (positive integer) The size of the history used when #' approximating the Hessian. Only available for L-BFGS. #' -#' @section Value: The `$optimize()` method returns a [`CmdStanMLE`] object. +#' @section Value: A [`CmdStanMLE`] object. #' #' @template seealso-docs #' @inherit cmdstan_model examples #' -NULL - -optimize_method <- function(data = NULL, - seed = NULL, - refresh = NULL, - init = NULL, - save_latent_dynamics = FALSE, - output_dir = NULL, - sig_figs = NULL, - threads = NULL, - algorithm = NULL, - init_alpha = NULL, - iter = NULL, - tol_obj = NULL, - tol_rel_obj = NULL, - tol_grad = NULL, - tol_rel_grad = NULL, - tol_param = NULL, - history_size = NULL) { +optimize <- function(data = NULL, + seed = NULL, + refresh = NULL, + init = NULL, + save_latent_dynamics = FALSE, + output_dir = NULL, + sig_figs = NULL, + threads = NULL, + algorithm = NULL, + init_alpha = NULL, + iter = NULL, + tol_obj = NULL, + tol_rel_obj = NULL, + tol_grad = NULL, + tol_rel_grad = NULL, + tol_param = NULL, + history_size = NULL) { checkmate::assert_integerish(threads, lower = 1, len = 1, null.ok = TRUE) if (is.null(self$cpp_options()[["stan_threads"]])) { if (!is.null(threads)) { @@ -1292,7 +1057,7 @@ optimize_method <- function(data = NULL, runset$run_cmdstan() CmdStanMLE$new(runset) } -CmdStanModel$set("public", name = "optimize", value = optimize_method) +CmdStanModel$set("public", name = "optimize", value = optimize) #' Run Stan's variational approximation algorithms @@ -1304,6 +1069,11 @@ CmdStanModel$set("public", name = "optimize", value = optimize_method) #' @description The `$variational()` method of a [`CmdStanModel`] object runs #' Stan's variational Bayes (ADVI) algorithms. #' +#' Any argument left as `NULL` will default to the default value used by the +#' installed version of CmdStan. See the +#' [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/) +#' for more details. +#' #' @details CmdStan can fit a variational approximation to the posterior. The #' approximation is a Gaussian in the unconstrained variable space. Stan #' implements two variational algorithms. The `algorithm="meanfield"` option @@ -1313,84 +1083,52 @@ CmdStanModel$set("public", name = "optimize", value = optimize_method) #' #' -- [*CmdStan Interface User's Guide*](https://github.com/stan-dev/cmdstan/releases/latest) #' -#' @section Usage: -#' ``` -#' $variational( -#' data = NULL, -#' seed = NULL, -#' refresh = NULL, -#' init = NULL, -#' save_latent_dynamics = FALSE, -#' output_dir = NULL, -#' sig_figs = NULL, -#' threads = NULL, -#' algorithm = NULL, -#' iter = NULL, -#' grad_samples = NULL, -#' elbo_samples = NULL, -#' eta = NULL, -#' adapt_engaged = NULL, -#' adapt_iter = NULL, -#' tol_rel_obj = NULL, -#' eval_elbo = NULL, -#' output_samples = NULL -#' ) -#' ``` -#' #' @template model-common-args -#' @section Arguments unique to the `variational` method: In addition to the -#' arguments above, the `$variational()` method also has its own set of -#' arguments. These arguments are described briefly here and in greater detail -#' in the CmdStan manual. Arguments left at `NULL` default to the default used -#' by the installed version of CmdStan. -#' -#' * `threads`: (positive integer) If the model was +#' @param threads (positive integer) If the model was #' [compiled][model-method-compile] with threading support, the number of -#' threads to use in parallelized sections (e.g., when -#' using the Stan functions `reduce_sum()` or `map_rect()`). -#' * `algorithm`: (string) The algorithm. Either `"meanfield"` or `"fullrank"`. -#' * `iter`: (positive integer) The _maximum_ number of iterations. -#' * `grad_samples`: (positive integer) The number of samples for Monte Carlo +#' threads to use in parallelized sections (e.g., when using the Stan +#' functions `reduce_sum()` or `map_rect()`). +#' @param algorithm (string) The algorithm. Either `"meanfield"` or +#' `"fullrank"`. +#' @param iter (positive integer) The _maximum_ number of iterations. +#' @param grad_samples (positive integer) The number of samples for Monte Carlo #' estimate of gradients. -#' * `elbo_samples`: (positive integer) The number of samples for Monte Carlo +#' @param elbo_samples (positive integer) The number of samples for Monte Carlo #' estimate of ELBO (objective function). -#' * `eta`: (positive real) The step size weighting parameter for adaptive +#' @param eta (positive real) The step size weighting parameter for adaptive #' step size sequence. -#' * `adapt_engaged`: (logical) Do warmup adaptation? -#' * `adapt_iter`: (positive integer) The _maximum_ number of adaptation +#' @param adapt_engaged (logical) Do warmup adaptation? +#' @param adapt_iter (positive integer) The _maximum_ number of adaptation #' iterations. -#' * `tol_rel_obj`: (positive real) Convergence tolerance on the relative norm +#' @param tol_rel_obj (positive real) Convergence tolerance on the relative norm #' of the objective. -#' * `eval_elbo`: (positive integer) Evaluate ELBO every Nth iteration. -#' * `output_samples:` (positive integer) Number of posterior samples to draw -#' and save. +#' @param eval_elbo (positive integer) Evaluate ELBO every Nth iteration. +#' @param output_samples (positive integer) Number of approximate posterior +#' samples to draw and save. #' -#' -#' @section Value: The `$variational()` method returns a [`CmdStanVB`] object. +#' @section Value: A [`CmdStanVB`] object. #' #' @template seealso-docs #' @inherit cmdstan_model examples #' -NULL - -variational_method <- function(data = NULL, - seed = NULL, - refresh = NULL, - init = NULL, - save_latent_dynamics = FALSE, - output_dir = NULL, - sig_figs = NULL, - threads = NULL, - algorithm = NULL, - iter = NULL, - grad_samples = NULL, - elbo_samples = NULL, - eta = NULL, - adapt_engaged = NULL, - adapt_iter = NULL, - tol_rel_obj = NULL, - eval_elbo = NULL, - output_samples = NULL) { +variational <- function(data = NULL, + seed = NULL, + refresh = NULL, + init = NULL, + save_latent_dynamics = FALSE, + output_dir = NULL, + sig_figs = NULL, + threads = NULL, + algorithm = NULL, + iter = NULL, + grad_samples = NULL, + elbo_samples = NULL, + eta = NULL, + adapt_engaged = NULL, + adapt_iter = NULL, + tol_rel_obj = NULL, + eval_elbo = NULL, + output_samples = NULL) { checkmate::assert_integerish(threads, lower = 1, len = 1, null.ok = TRUE) if (is.null(self$cpp_options()[["stan_threads"]])) { if (!is.null(threads)) { @@ -1441,7 +1179,7 @@ variational_method <- function(data = NULL, runset$run_cmdstan() CmdStanVB$new(runset) } -CmdStanModel$set("public", name = "variational", value = variational_method) +CmdStanModel$set("public", name = "variational", value = variational) #' Run Stan's standalone generated quantities method #' @@ -1453,29 +1191,15 @@ CmdStanModel$set("public", name = "variational", value = variational_method) #' runs Stan's standalone generated quantities to obtain generated quantities #' based on previously fitted parameters. #' -#' @section Usage: -#' ``` -#' $generate_quantities( -#' fitted_params, -#' data = NULL, -#' seed = NULL, -#' output_dir = NULL, -#' sig_figs = NULL, -#' parallel_chains = getOption("mc.cores", 1), -#' threads_per_chain = NULL -#' ) -#' ``` +#' @inheritParams model-method-sample +#' @param fitted_params (multiple options) The parameter draws to use. One of +#' the following: +#' * A [CmdStanMCMC] or [CmdStanVB] fitted model object. +#' * A [posterior::draws_array] (for MCMC) or [posterior::draws_matrix] (for +#' VB) object returned by CmdStanR's [`$draws()`][fit-method-draws] method. +#' * A character vector of paths to CmdStan CSV output files. #' -#' @section Arguments: -#' * `fitted_params`: (multiple options) The parameter draws to use. One of the following: -#' - A [CmdStanMCMC] or [CmdStanVB] fitted model object. -#' - A [posterior::draws_array] (for MCMC) or [posterior::draws_matrix] (for VB) -#' object returned by CmdStanR's [`$draws()`][fit-method-draws] method. -#' - A character vector of paths to CmdStan CSV output files. -#' * `data`, `seed`, `output_dir`, `sig_figs`, `parallel_chains`, `threads_per_chain`: -#' Same as for the [`$sample()`][model-method-sample] method. -#' -#' @section Value: The `$generate_quantities()` method returns a [`CmdStanGQ`] object. +#' @section Value: A [`CmdStanGQ`] object. #' #' @template seealso-docs #' @@ -1522,15 +1246,13 @@ CmdStanModel$set("public", name = "variational", value = variational_method) #' as_draws_df(fit_gq$draws()) #' } #' -NULL - -generate_quantities_method <- function(fitted_params, - data = NULL, - seed = NULL, - output_dir = NULL, - sig_figs = NULL, - parallel_chains = getOption("mc.cores", 1), - threads_per_chain = NULL) { +generate_quantities <- function(fitted_params, + data = NULL, + seed = NULL, + output_dir = NULL, + sig_figs = NULL, + parallel_chains = getOption("mc.cores", 1), + threads_per_chain = NULL) { checkmate::assert_integerish(parallel_chains, lower = 1, null.ok = TRUE) checkmate::assert_integerish(threads_per_chain, lower = 1, len = 1, null.ok = TRUE) if (is.null(self$cpp_options()[["stan_threads"]])) { @@ -1572,4 +1294,4 @@ generate_quantities_method <- function(fitted_params, runset$run_cmdstan() CmdStanGQ$new(runset) } -CmdStanModel$set("public", name = "generate_quantities", value = generate_quantities_method) +CmdStanModel$set("public", name = "generate_quantities", value = generate_quantities) diff --git a/docs/404.html b/docs/404.html index 43b2a3f6a..f82b192b0 100644 --- a/docs/404.html +++ b/docs/404.html @@ -79,7 +79,7 @@
diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index b96e169fb..11f3941a8 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -79,7 +79,7 @@ diff --git a/docs/LICENSE.html b/docs/LICENSE.html index 81d4b0e51..105fe6888 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -79,7 +79,7 @@ diff --git a/docs/articles/cmdstanr-internals.html b/docs/articles/cmdstanr-internals.html index c03ca23e8..8ef4ae7e3 100644 --- a/docs/articles/cmdstanr-internals.html +++ b/docs/articles/cmdstanr-internals.html @@ -38,7 +38,7 @@ @@ -229,14 +229,14 @@