From 94dc0b884abc7d441e99296a41ca097a0dd9348c Mon Sep 17 00:00:00 2001 From: Saranjeet Kaur Date: Tue, 29 Oct 2024 23:27:08 +0000 Subject: [PATCH] add unit test for gen_data_weighted_spline --- R/data-process.R | 65 ++++++++++++++++++-------- inst/config/config.yml | 7 ++- man/gen_data_weighted_burden_spline.Rd | 4 +- tests/testthat/test-data-process.R | 29 ++++++++++++ 4 files changed, 82 insertions(+), 23 deletions(-) diff --git a/R/data-process.R b/R/data-process.R index 457fb75..a31cc44 100644 --- a/R/data-process.R +++ b/R/data-process.R @@ -292,10 +292,16 @@ gen_data_weighted_burden <- function(data_weighted) { #' #' This function performs data smoothing for burden of disease, when necessary. For instance, with only a few simulations, there can be positive values in difference in burden of disease. #' -#' @param data_weighted_burden A data frame containing weighted values for burden of disease. +#' @param data_weighted_burden_wide_collapse A data frame containing weighted values for burden of disease. #' @return A data frame with spline smoothing applied for burden of disease. #' @export -gen_data_weighted_burden_spline <- function(data_weighted_burden) { +gen_data_weighted_burden_spline <- function(data_weighted_burden_wide_collapse) { + config <- load_config("default") + config_file_path <- system.file("config", "config.yml", package = "healthgpsrvis") + burden_spline <- config::get(value = "burden_spline", + file = config_file_path, + use_parent = FALSE) + ## This function is data smoothing ## It is applied manually now in India project due to abnormal positive values in diff_daly or cumdiff_daly @@ -303,38 +309,57 @@ gen_data_weighted_burden_spline <- function(data_weighted_burden) { ## Notes for India project: Delete years 27,30,32-33 for ps3-low; Delete years 2028 for ps4-low - data_weighted_burden_mean <- data_weighted_burden |> - dplyr::filter(data_weighted_burden$cumdiff_daly_mean <= 0) + data_weighted_burden_mean <- data_weighted_burden_wide_collapse |> + dplyr::filter(!!rlang::sym(burden_spline[[1]]$burden_mean) <= 0) - data_weighted_burden_min <- data_weighted_burden |> - dplyr::filter(data_weighted_burden$cumdiff_daly_min <= 0) + data_weighted_burden_min <- data_weighted_burden_wide_collapse |> + dplyr::filter(!!rlang::sym(burden_spline[[2]]$burden_min) <= 0) ## Notes for India project: Delete years 29, 31 for ps2-high; Delete 37-38 for ps3-low; Delete 33-34 for ps4-middle; Delete 36-38 for ps4-low - data_weighted_burden_max <- data_weighted_burden |> - dplyr::filter(data_weighted_burden$cumdiff_daly_max <= 0) + data_weighted_burden_max <- data_weighted_burden_wide_collapse |> + dplyr::filter(!!rlang::sym(burden_spline[[3]]$burden_max) <= 0) ## New data frame - data_weighted_burden_spline <- data.frame(time = seq(min(data_weighted_burden$time), - max(data_weighted_burden$time), + data_weighted_burden_spline <- data.frame(time = seq( + min(data_weighted_burden_wide_collapse[config$group]), + max(data_weighted_burden_wide_collapse[config$group]), length.out = 34 )) ## Fit spline and predict - spline_fit <- splines::interpSpline(data_weighted_burden_mean$time, data_weighted_burden_mean$cumdiff_daly_mean) - data_weighted_burden_spline$cumdiff_daly_mean <- stats::predict(spline_fit, data_weighted_burden_spline$time)$y - - spline_fit_min <- splines::interpSpline(data_weighted_burden_min$time, data_weighted_burden_min$cumdiff_daly_min) - data_weighted_burden_spline$cumdiff_daly_min <- stats::predict(spline_fit_min, data_weighted_burden_spline$time)$y + spline_fit_mean <- splines::interpSpline( + as.numeric(unlist(data_weighted_burden_mean[config$group])), + as.numeric(unlist(data_weighted_burden_mean[burden_spline[[1]]$burden_mean]))) + data_weighted_burden_spline[burden_spline[[1]]$burden_mean] <- stats::predict( + spline_fit_mean, + as.numeric(unlist(data_weighted_burden_spline[config$group])))$y + + spline_fit_min <- splines::interpSpline( + as.numeric(unlist(data_weighted_burden_min[config$group])), + as.numeric(unlist(data_weighted_burden_min[burden_spline[[2]]$burden_min]))) + data_weighted_burden_spline[burden_spline[[2]]$burden_min] <- stats::predict( + spline_fit_min, + as.numeric(unlist(data_weighted_burden_spline[config$group])))$y ## Use smooth.spline for ps4-low - spline_fit_max <- splines::interpSpline(data_weighted_burden_max$time, data_weighted_burden_max$cumdiff_daly_max) - data_weighted_burden_spline$cumdiff_daly_max <- stats::predict(spline_fit_max, data_weighted_burden_spline$time)$y + spline_fit_max <- splines::interpSpline( + as.numeric(unlist(data_weighted_burden_max[config$group])), + as.numeric(unlist(data_weighted_burden_max[burden_spline[[3]]$burden_max]))) + data_weighted_burden_spline[burden_spline[[3]]$burden_max] <- stats::predict( + spline_fit_max, + as.numeric(unlist(data_weighted_burden_spline[config$group])))$y ## Keep 0 values in the first two years, before policy implementation - data_weighted_burden_spline$cumdiff_daly_mean <- ifelse(data_weighted_burden_spline$time < 2024, 0, data_weighted_burden_spline$cumdiff_daly_mean) - data_weighted_burden_spline$cumdiff_daly_min <- ifelse(data_weighted_burden_spline$time < 2024, 0, data_weighted_burden_spline$cumdiff_daly_min) - data_weighted_burden_spline$cumdiff_daly_max <- ifelse(data_weighted_burden_spline$time < 2024, 0, data_weighted_burden_spline$cumdiff_daly_max) + group <- config$group + burden_spline <- unlist(config$burden_spline) + + for (burden_sp in burden_spline) { + data_weighted_burden_spline[[burden_sp]] <- ifelse( + data_weighted_burden_spline[[group]] < 2024, + 0, + data_weighted_burden_spline[[burden_sp]]) + } return(data_weighted_burden_spline) } diff --git a/inst/config/config.yml b/inst/config/config.yml index ac536ec..06578f0 100644 --- a/inst/config/config.yml +++ b/inst/config/config.yml @@ -53,7 +53,7 @@ default: - obesity group: - time # Also used in gen_data_weighted_burden - summary_columns_rf: # Variables that are used to calculate the summary statistics + summary_columns_rf: # Risk factor variables to calculate summary statistics, such as mean, min and max - diff_sodium - diff_ei - diff_bmi @@ -105,6 +105,11 @@ default: - cumdiff_daly - cumdiff_yll - cumdiff_yld + # For the gen_data_weighted_burden_spline() function + burden_spline: + - burden_mean: cumdiff_daly_mean + - burden_min: cumdiff_daly_min + - burden_max: cumdiff_daly_max stroke: - intracerebralhemorrhage - ischemicstroke diff --git a/man/gen_data_weighted_burden_spline.Rd b/man/gen_data_weighted_burden_spline.Rd index c3dbe7a..ab11976 100644 --- a/man/gen_data_weighted_burden_spline.Rd +++ b/man/gen_data_weighted_burden_spline.Rd @@ -4,10 +4,10 @@ \alias{gen_data_weighted_burden_spline} \title{Perform data smoothing} \usage{ -gen_data_weighted_burden_spline(data_weighted_burden) +gen_data_weighted_burden_spline(data_weighted_burden_wide_collapse) } \arguments{ -\item{data_weighted_burden}{A data frame containing weighted values for burden of disease.} +\item{data_weighted_burden_wide_collapse}{A data frame containing weighted values for burden of disease.} } \value{ A data frame with spline smoothing applied for burden of disease. diff --git a/tests/testthat/test-data-process.R b/tests/testthat/test-data-process.R index 2b5c6dc..5747aff 100644 --- a/tests/testthat/test-data-process.R +++ b/tests/testthat/test-data-process.R @@ -144,3 +144,32 @@ test_that("Columns in the generated dataframe", { ) ) }) + +# Testing gen_data_weighted_burden_spline() function +test_that("Columns in the generated dataframe", { + # Get the path to the .rds file + filepath <- testthat::test_path("testdata", "data_ps3_reformulation") + + # Read the .rds file + data <- readRDS(filepath) + + # Generate the weighted data + data_weighted <- gen_data_weighted(data) + + # Generate the weighted data for the risk factors (using 'bd' instead of + # 'burden' to keep lintr happy) + data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted) + + # Generate a data frame with spline smoothing applied for burden of disease + data_weighted_burden_spline <- gen_data_weighted_burden_spline( + data_weighted_bd_wide_collapse) + + # Check if the data has the expected number of columns + expect_equal(ncol(data_weighted_burden_spline), 4) + + # Check if the data has the expected column names + expect_equal( + colnames(data_weighted_burden_spline), + c("time", "cumdiff_daly_mean", "cumdiff_daly_min", "cumdiff_daly_max") + ) +})