From c978dd8236329b63b35e8e51a79faf026a334fc1 Mon Sep 17 00:00:00 2001 From: olivroy Date: Tue, 3 Dec 2024 17:02:56 -0500 Subject: [PATCH 1/4] Simplify dplyr code --- R/get_sundered_data.R | 10 +++++----- R/interrogate.R | 14 ++++++-------- R/scan_data.R | 25 ++++++++----------------- R/utils-profiling.R | 11 +++++------ scripts/generate_roadmap.R | 18 +++++++++--------- 5 files changed, 33 insertions(+), 45 deletions(-) diff --git a/R/get_sundered_data.R b/R/get_sundered_data.R index 230b5c81d..532810405 100644 --- a/R/get_sundered_data.R +++ b/R/get_sundered_data.R @@ -279,7 +279,7 @@ get_sundered_data <- function( # Get the row count of the input table row_count_input_tbl <- input_tbl %>% - dplyr::summarize(n = dplyr::n()) %>% + dplyr::count() %>% dplyr::pull(n) %>% as.numeric() @@ -289,15 +289,15 @@ get_sundered_data <- function( # - are `active` validation_set_prefiltered <- agent$validation_set %>% - dplyr::filter(eval_error == FALSE) %>% dplyr::filter( + !eval_error, assertion_type %in% base::setdiff( row_based_validation_fns_vec(), c("rows_distinct", "col_vals_make_set", "col_vals_make_subset") - ) - ) %>% - dplyr::filter(active == TRUE) + ), + active == TRUE + ) # Get a character vector of preconditions preconditions_vec <- diff --git a/R/interrogate.R b/R/interrogate.R index 181a1adc4..d2e6cf299 100644 --- a/R/interrogate.R +++ b/R/interrogate.R @@ -2414,9 +2414,7 @@ interrogate_distinct <- function( unduplicated <- table %>% dplyr::select({{ column_names }}) %>% - dplyr::group_by(!!!col_syms) %>% - dplyr::summarize(`pb_is_good_` = dplyr::n()) %>% - dplyr::ungroup() %>% + dplyr::count(!!!col_syms, name = "pb_is_good_") %>% dplyr::mutate(`pb_is_good_` = ifelse(`pb_is_good_` == 1, TRUE, FALSE)) %>% dplyr::filter(`pb_is_good_` == TRUE) @@ -3001,7 +2999,7 @@ add_reporting_data <- function( # Get total count of rows row_count <- tbl_checked %>% - dplyr::summarize(n = dplyr::n()) %>% + dplyr::count() %>% dplyr::pull(n) %>% as.numeric() @@ -3016,7 +3014,7 @@ add_reporting_data <- function( n_passed <- tbl_checked %>% dplyr::filter(pb_is_good_ == 1) %>% - dplyr::summarize(n = dplyr::n()) %>% + dplyr::count() %>% dplyr::pull(n) %>% as.numeric() @@ -3027,7 +3025,7 @@ add_reporting_data <- function( n_passed <- tbl_checked %>% dplyr::filter(pb_is_good_ == TRUE) %>% - dplyr::summarize(n = dplyr::n()) %>% + dplyr::count() %>% dplyr::pull(n) %>% as.numeric() } @@ -3043,7 +3041,7 @@ add_reporting_data <- function( n_failed <- tbl_checked %>% dplyr::filter(pb_is_good_ == 0) %>% - dplyr::summarize(n = dplyr::n()) %>% + dplyr::count() %>% dplyr::pull(n) %>% as.numeric() @@ -3054,7 +3052,7 @@ add_reporting_data <- function( n_failed <- tbl_checked %>% dplyr::filter(pb_is_good_ == FALSE) %>% - dplyr::summarize(n = dplyr::n()) %>% + dplyr::count() %>% dplyr::pull(n) %>% as.numeric() } diff --git a/R/scan_data.R b/R/scan_data.R index b55edc9b8..71ed7d889 100644 --- a/R/scan_data.R +++ b/R/scan_data.R @@ -338,16 +338,14 @@ probe_overview_stats <- function( r_col_types_tbl <- dplyr::tibble(r_col_types = r_col_types) %>% - dplyr::group_by(r_col_types) %>% - dplyr::summarize(count = dplyr::n()) %>% - dplyr::arrange(dplyr::desc(count)) %>% + dplyr::count(r_col_types, name = "count", sort = TRUE) %>% utils::head(6E8) data_overview_gt <- - gt::gt(data_overview_tbl) %>% + gt::gt(data_overview_tbl, locale = locale) %>% gt::fmt_markdown(columns = "label") %>% - gt::fmt_number(columns = "value", decimals = 0, locale = locale) %>% - gt::fmt_percent(columns = "pct", decimals = 2, locale = locale) %>% + gt::fmt_integer(columns = "value") %>% + gt::fmt_percent(columns = "pct", decimals = 2) %>% gt::cols_merge(columns = c("value", "pct"), pattern = "{1} ({2})") %>% gt::cols_align(align = "right", columns = "value") %>% gt::text_transform( @@ -684,8 +682,7 @@ get_descriptive_stats_gt <- function( descriptive_stats <- dplyr::tibble(mean = mean, variance = variance, sd = sd, cv = cv) - descriptive_stats <- - dplyr::summarize_all(descriptive_stats, ~ round(., 2)) + descriptive_stats <- round(descriptive_stats, 2) descriptive_stats <- as.list(descriptive_stats) } else { @@ -703,8 +700,7 @@ get_descriptive_stats_gt <- function( cv = ~ cv(.) ) ) - descriptive_stats <- - dplyr::summarize_all(descriptive_stats, ~ round(., 2)) + descriptive_stats <- round(descriptive_stats, 2) descriptive_stats <- as.list(descriptive_stats) } @@ -746,11 +742,8 @@ get_common_values_gt <- function( n_rows <- get_table_total_rows(data = data_column) - common_values_tbl <- dplyr::group_by_at(data_column, 1) - common_values_tbl <- dplyr::count(common_values_tbl) - common_values_tbl <- dplyr::arrange(common_values_tbl, dplyr::desc(n)) + common_values_tbl <- dplyr::count(data_column, dplyr::pick(1), sort = TRUE) common_values_tbl <- utils::head(common_values_tbl, 6E8) - common_values_tbl <- dplyr::ungroup(common_values_tbl) n_rows_common_values_tbl <- dplyr::pull(dplyr::count(common_values_tbl, name = "n", wt = n), n) @@ -885,9 +878,7 @@ get_top_bottom_slice <- function( data_column_freq <- data_column %>% - dplyr::group_by_at(1) %>% - dplyr::count() %>% - dplyr::ungroup() + dplyr::count(dplyr::pick(1)) name_1 <- rlang::sym(get_lsv("table_scan/tbl_lab_value")[[lang]]) name_2 <- rlang::sym(get_lsv("table_scan/tbl_lab_count")[[lang]]) diff --git a/R/utils-profiling.R b/R/utils-profiling.R index 3e1ce28df..18bd8b94a 100644 --- a/R/utils-profiling.R +++ b/R/utils-profiling.R @@ -50,8 +50,8 @@ get_table_total_missing_values <- function(data) { collected <- dplyr::collect( - dplyr::summarise_all( - data, ~ sum(ifelse(is.na(.), 1, 0), na.rm = TRUE) + dplyr::summarise( + data, dplyr::across(dplyr::everything(), function(x) sum(ifelse(is.na(x), 1, 0), na.rm = TRUE)) ) ) @@ -145,7 +145,7 @@ get_df_column_qtile_stats <- function(data_column) { ) ) %>% dplyr::mutate(range = max - min) %>% - dplyr::summarize_all(~ round(., 2)) %>% + round(2) %>% as.list() } @@ -294,7 +294,7 @@ get_dbi_column_qtile_stats <- function(data_column) { iqr = q_3 - q_1, range = max - min ) %>% - dplyr::summarize_all(~ round(., 2)) %>% + round(2) %>% as.list() } @@ -330,8 +330,7 @@ get_table_column_histogram <- function(data_column, lang, locale) { data_column %>% dplyr::mutate_all(.funs = nchar) %>% dplyr::rename(nchar = 1) %>% - dplyr::group_by(nchar) %>% - dplyr::summarize(n = dplyr::n()) %>% + dplyr::count(nchar) %>% dplyr::collect() %>% dplyr::filter(!is.na(nchar)) %>% dplyr::mutate_all(.funs = as.numeric) %>% diff --git a/scripts/generate_roadmap.R b/scripts/generate_roadmap.R index 8e309cbe4..91500896f 100644 --- a/scripts/generate_roadmap.R +++ b/scripts/generate_roadmap.R @@ -7,7 +7,7 @@ myrepo <- create_repo_ref("rstudio", "pointblank") issues <- get_issues(myrepo, state = "open") issues_df <- dplyr::as_tibble(parse_issues(issues)) -tbl <- +tbl <- issues_df %>% dplyr::filter(!is.na(milestone_title)) %>% dplyr::select(number, title, labels_name, milestone_title) %>% @@ -32,14 +32,14 @@ tbl <- dplyr::select( number, title, milestone_title, major, minor, patch, type, difficulty, effort, priority ) %>% - dplyr::mutate(major = gsub("v", "", major)) %>% - dplyr::mutate_at(.vars = vars(major, minor, patch), .funs = as.integer) %>% + dplyr::mutate(major = gsub("v", "", major)) %>% + dplyr::mutate(dplyr::across(c(major, minor, patch), .fns = as.integer)) %>% dplyr::mutate(difficulty = gsub(".*?([1-3]).*", "\\1", difficulty)) %>% dplyr::mutate(effort = gsub(".*?([1-3]).*", "\\1", effort)) %>% dplyr::mutate(priority = gsub(".*?([1-3]).*", "\\1", priority)) %>% dplyr::mutate(priority = ifelse(grepl("[^1-3]", priority), 4, priority)) %>% dplyr::mutate(type = gsub(".*?Type: (.*?)\\\"\\)", "\\1", type)) %>% - dplyr::mutate_at(.vars = vars(priority, difficulty, effort), .funs = as.numeric) %>% + dplyr::mutate(dplyr::across(c(priority, difficulty, effort), .fns = as.numeric)) %>% dplyr::arrange( major, minor, @@ -51,7 +51,7 @@ tbl <- dplyr::mutate(number = paste0("#", number)) %>% dplyr::select(-c(major, minor, patch)) -gt_tbl <- +gt_tbl <- tbl %>% gt( rowname_col = "number", @@ -109,13 +109,13 @@ gt_tbl <- locations = cells_body(columns = priority), fn = function(x) { ifelse(x == "4", "♨︎", x) - } + } ) %>% text_transform( locations = cells_body(columns = priority), fn = function(x) { ifelse(x == "4", "♨︎", x) - } + } ) %>% tab_style( style = "height: 50px", @@ -175,8 +175,8 @@ svg_object <- gsub("style>", ">", ., fixed = TRUE) %>% gsub("

", "

", ., fixed = TRUE) %>% gsub( - "; width: 0px\">", -"; width: 0px; + "; width: 0px\">", +"; width: 0px; @-webkit-keyframes AnimationName {0% {background-position:50% 0%} 50% {background-position:51% 100%} 100% {background-position:50% 0%}} @-moz-keyframes AnimationName {0% {background-position:50% 0%} 50% {background-position:51% 100%} 100% {background-position:50% 0%}} @-o-keyframes AnimationName {0% {background-position:50% 0%} 50% {background-position:51% 100%} 100% {background-position:50% 0%}} From dd0df1291d902ff27abeb6130d55c90ae3a249c8 Mon Sep 17 00:00:00 2001 From: olivroy Date: Tue, 3 Dec 2024 17:03:18 -0500 Subject: [PATCH 2/4] Test simplification --- tests/testthat/test-yaml_exec.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-yaml_exec.R b/tests/testthat/test-yaml_exec.R index c4438ea38..4bb8b3640 100644 --- a/tests/testthat/test-yaml_exec.R +++ b/tests/testthat/test-yaml_exec.R @@ -146,8 +146,8 @@ test_that("The `yaml_exec()` function effectively processes .yml files", { ) # Expect that neither the agent nor the informant were saved - expect_equal(length(fs::path_abs(fs::dir_ls(path = work_path, regexp = "agent.*?rds$"))), 0) - expect_equal(length(fs::path_abs(fs::dir_ls(path = work_path, regexp = "informant.*?rds$"))), 0) + expect_length(fs::path_abs(fs::dir_ls(path = work_path, regexp = "agent.*?rds$")), 0) + expect_length(fs::path_abs(fs::dir_ls(path = work_path, regexp = "informant.*?rds$")), 0) # Read just one of the YAML files (the agent) from the specified path, # write output to a path relative to the working directory; we will @@ -171,7 +171,7 @@ test_that("The `yaml_exec()` function effectively processes .yml files", { # not all units passed, let's check for that as well) and no # table to be present (would be in `agent_1$tbl`) expect_false(all_passed(agent_1)) - expect_equal(length(agent_1$extracts), 0) + expect_length(agent_1$extracts, 0) expect_null(agent_1$tbl) # Delete the written agent file From 09428dd75c418109f6c76e3c2f8d086b3feb2971 Mon Sep 17 00:00:00 2001 From: olivroy Date: Tue, 3 Dec 2024 17:10:08 -0500 Subject: [PATCH 3/4] fix lint --- R/utils-profiling.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/utils-profiling.R b/R/utils-profiling.R index 18bd8b94a..fc7e4c511 100644 --- a/R/utils-profiling.R +++ b/R/utils-profiling.R @@ -51,7 +51,10 @@ get_table_total_missing_values <- function(data) { collected <- dplyr::collect( dplyr::summarise( - data, dplyr::across(dplyr::everything(), function(x) sum(ifelse(is.na(x), 1, 0), na.rm = TRUE)) + data, dplyr::across( + dplyr::everything(), + function(x) sum(ifelse(is.na(x), 1, 0), na.rm = TRUE) + ) ) ) From 3ee55a436043cfab065202dbb7d6602e96baa61b Mon Sep 17 00:00:00 2001 From: olivroy <52606734+olivroy@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:29:09 -0500 Subject: [PATCH 4/4] Update DESCRIPTION --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b26cfe845..5505108d4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,8 @@ Authors@R: c( person("Mauricio", "Vargas", , "mavargas11@uc.cl", c("aut"), comment = c(ORCID = "0000-0003-1017-7574")), person("June", "Choe", , "jchoe001@gmail.com", c("aut"), - comment = c(ORCID = "0000-0002-0701-921X")) + comment = c(ORCID = "0000-0002-0701-921X")), + person("Olivier", "Roy", role = c("ctb")) ) License: MIT + file LICENSE URL: https://rstudio.github.io/pointblank/, https://github.com/rstudio/pointblank