diff --git a/R/get_sundered_data.R b/R/get_sundered_data.R index 066565b62..808b742b7 100644 --- a/R/get_sundered_data.R +++ b/R/get_sundered_data.R @@ -268,6 +268,14 @@ get_sundered_data <- function( ) } + # Stop function if `tbl_checked` is not present + if (!"tbl_checked" %in% colnames(agent$validation_set)) { + stop( + "`agent` is missing `tbl_checked` information required for sundering. ", + "See `?interrogate`." + ) + } + # Get the row count of the input table row_count_input_tbl <- input_tbl %>% diff --git a/R/interrogate.R b/R/interrogate.R index 14a3825bf..37722420d 100644 --- a/R/interrogate.R +++ b/R/interrogate.R @@ -44,6 +44,15 @@ #' The default is `TRUE` and further options allow for fine control of how #' these rows are collected. #' +#' @param extract_tbl_checked *Collect validation results from each step* +#' +#' `scalar` // *default:* `TRUE` +#' +#' An option to collect processed data frames produced by executing the +#' validation steps. This information is necessary for some functions +#' (e.g., `get_sundered_data()`), but may grow to a large size. To opt out +#' of attaching this data to the agent, set this argument to `FALSE`. +#' #' @param get_first_n *Get the first n values* #' #' `scalar` // *default:* `NULL` (`optional`) @@ -143,6 +152,7 @@ interrogate <- function( agent, extract_failed = TRUE, + extract_tbl_checked = TRUE, get_first_n = NULL, sample_n = NULL, sample_frac = NULL, @@ -729,6 +739,11 @@ interrogate <- function( # all validation steps have been carried out class(agent) <- c("has_intel", "ptblank_agent") + # Drop $tbl_checked if `extract_tbl_checked = FALSE` + if (!extract_tbl_checked) { + agent$validation_set$tbl_checked <- NULL + } + # Add the ending time to the `agent` object agent$time_end <- Sys.time() diff --git a/man/interrogate.Rd b/man/interrogate.Rd index d7cc65fdd..1a16c3efe 100644 --- a/man/interrogate.Rd +++ b/man/interrogate.Rd @@ -7,6 +7,7 @@ interrogate( agent, extract_failed = TRUE, + extract_tbl_checked = TRUE, get_first_n = NULL, sample_n = NULL, sample_frac = NULL, @@ -31,6 +32,15 @@ An option to collect rows that didn't pass a particular validation step. The default is \code{TRUE} and further options allow for fine control of how these rows are collected.} +\item{extract_tbl_checked}{\emph{Collect validation results from each step} + +\verb{scalar} // \emph{default:} \code{TRUE} + +An option to collect processed data frames produced by executing the +validation steps. This information is necessary for some functions +(e.g., \code{get_sundered_data()}), but may grow to a large size. To opt out +of attaching this data to the agent, set this argument to \code{FALSE}.} + \item{get_first_n}{\emph{Get the first n values} \verb{scalar} // \emph{default:} \code{NULL} (\code{optional}) diff --git a/tests/testthat/test-sundering.R b/tests/testthat/test-sundering.R index be126edfb..e3a2d0850 100644 --- a/tests/testthat/test-sundering.R +++ b/tests/testthat/test-sundering.R @@ -548,3 +548,23 @@ test_that("an error occurs if using `get_sundered_data()` when agent has no inte get_sundered_data() ) }) + +test_that("an error occurs if using `get_sundered_data()` when agent is missing `$tbl_checked`", { + + # Expect an error if the agent performed an interrogation + # with `extract_tbl_checked = FALSE` + expect_error( + create_agent(tbl = small_table) %>% + col_vals_gt(vars(date_time), vars(date), na_pass = TRUE) %>% + col_vals_gt(vars(b), vars(g), na_pass = TRUE) %>% + rows_distinct(vars(d, e)) %>% + rows_distinct(vars(a, f)) %>% + col_vals_gt(vars(d), 100) %>% + col_vals_equal(vars(d), vars(d), na_pass = TRUE) %>% + col_vals_between(vars(c), left = vars(a), right = vars(d), na_pass = TRUE) %>% + interrogate(extract_tbl_checked = FALSE) %>% + get_sundered_data(), + "missing `tbl_checked`" + ) +}) +