Skip to content

Commit

Permalink
add unit test for gen_data_weighted_spline
Browse files Browse the repository at this point in the history
  • Loading branch information
SaranjeetKaur committed Oct 29, 2024
1 parent 5e1b570 commit 94dc0b8
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 23 deletions.
65 changes: 45 additions & 20 deletions R/data-process.R
Original file line number Diff line number Diff line change
Expand Up @@ -292,49 +292,74 @@ gen_data_weighted_burden <- function(data_weighted) {
#'
#' This function performs data smoothing for burden of disease, when necessary. For instance, with only a few simulations, there can be positive values in difference in burden of disease.
#'
#' @param data_weighted_burden A data frame containing weighted values for burden of disease.
#' @param data_weighted_burden_wide_collapse A data frame containing weighted values for burden of disease.
#' @return A data frame with spline smoothing applied for burden of disease.
#' @export
gen_data_weighted_burden_spline <- function(data_weighted_burden) {
gen_data_weighted_burden_spline <- function(data_weighted_burden_wide_collapse) {
config <- load_config("default")
config_file_path <- system.file("config", "config.yml", package = "healthgpsrvis")
burden_spline <- config::get(value = "burden_spline",
file = config_file_path,
use_parent = FALSE)

## This function is data smoothing
## It is applied manually now in India project due to abnormal positive values in diff_daly or cumdiff_daly

## Only keep those 0 or negative values

## Notes for India project: Delete years 27,30,32-33 for ps3-low; Delete years 2028 for ps4-low

data_weighted_burden_mean <- data_weighted_burden |>
dplyr::filter(data_weighted_burden$cumdiff_daly_mean <= 0)
data_weighted_burden_mean <- data_weighted_burden_wide_collapse |>
dplyr::filter(!!rlang::sym(burden_spline[[1]]$burden_mean) <= 0)

data_weighted_burden_min <- data_weighted_burden |>
dplyr::filter(data_weighted_burden$cumdiff_daly_min <= 0)
data_weighted_burden_min <- data_weighted_burden_wide_collapse |>
dplyr::filter(!!rlang::sym(burden_spline[[2]]$burden_min) <= 0)

## Notes for India project: Delete years 29, 31 for ps2-high; Delete 37-38 for ps3-low; Delete 33-34 for ps4-middle; Delete 36-38 for ps4-low

data_weighted_burden_max <- data_weighted_burden |>
dplyr::filter(data_weighted_burden$cumdiff_daly_max <= 0)
data_weighted_burden_max <- data_weighted_burden_wide_collapse |>
dplyr::filter(!!rlang::sym(burden_spline[[3]]$burden_max) <= 0)

## New data frame
data_weighted_burden_spline <- data.frame(time = seq(min(data_weighted_burden$time),
max(data_weighted_burden$time),
data_weighted_burden_spline <- data.frame(time = seq(
min(data_weighted_burden_wide_collapse[config$group]),
max(data_weighted_burden_wide_collapse[config$group]),
length.out = 34
))

## Fit spline and predict
spline_fit <- splines::interpSpline(data_weighted_burden_mean$time, data_weighted_burden_mean$cumdiff_daly_mean)
data_weighted_burden_spline$cumdiff_daly_mean <- stats::predict(spline_fit, data_weighted_burden_spline$time)$y

spline_fit_min <- splines::interpSpline(data_weighted_burden_min$time, data_weighted_burden_min$cumdiff_daly_min)
data_weighted_burden_spline$cumdiff_daly_min <- stats::predict(spline_fit_min, data_weighted_burden_spline$time)$y
spline_fit_mean <- splines::interpSpline(
as.numeric(unlist(data_weighted_burden_mean[config$group])),
as.numeric(unlist(data_weighted_burden_mean[burden_spline[[1]]$burden_mean])))
data_weighted_burden_spline[burden_spline[[1]]$burden_mean] <- stats::predict(
spline_fit_mean,
as.numeric(unlist(data_weighted_burden_spline[config$group])))$y

spline_fit_min <- splines::interpSpline(
as.numeric(unlist(data_weighted_burden_min[config$group])),
as.numeric(unlist(data_weighted_burden_min[burden_spline[[2]]$burden_min])))
data_weighted_burden_spline[burden_spline[[2]]$burden_min] <- stats::predict(
spline_fit_min,
as.numeric(unlist(data_weighted_burden_spline[config$group])))$y

## Use smooth.spline for ps4-low
spline_fit_max <- splines::interpSpline(data_weighted_burden_max$time, data_weighted_burden_max$cumdiff_daly_max)
data_weighted_burden_spline$cumdiff_daly_max <- stats::predict(spline_fit_max, data_weighted_burden_spline$time)$y
spline_fit_max <- splines::interpSpline(
as.numeric(unlist(data_weighted_burden_max[config$group])),
as.numeric(unlist(data_weighted_burden_max[burden_spline[[3]]$burden_max])))
data_weighted_burden_spline[burden_spline[[3]]$burden_max] <- stats::predict(
spline_fit_max,
as.numeric(unlist(data_weighted_burden_spline[config$group])))$y

## Keep 0 values in the first two years, before policy implementation
data_weighted_burden_spline$cumdiff_daly_mean <- ifelse(data_weighted_burden_spline$time < 2024, 0, data_weighted_burden_spline$cumdiff_daly_mean)
data_weighted_burden_spline$cumdiff_daly_min <- ifelse(data_weighted_burden_spline$time < 2024, 0, data_weighted_burden_spline$cumdiff_daly_min)
data_weighted_burden_spline$cumdiff_daly_max <- ifelse(data_weighted_burden_spline$time < 2024, 0, data_weighted_burden_spline$cumdiff_daly_max)
group <- config$group
burden_spline <- unlist(config$burden_spline)

for (burden_sp in burden_spline) {
data_weighted_burden_spline[[burden_sp]] <- ifelse(
data_weighted_burden_spline[[group]] < 2024,
0,
data_weighted_burden_spline[[burden_sp]])
}

return(data_weighted_burden_spline)
}
Expand Down
7 changes: 6 additions & 1 deletion inst/config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ default:
- obesity
group:
- time # Also used in gen_data_weighted_burden
summary_columns_rf: # Variables that are used to calculate the summary statistics
summary_columns_rf: # Risk factor variables to calculate summary statistics, such as mean, min and max
- diff_sodium
- diff_ei
- diff_bmi
Expand Down Expand Up @@ -105,6 +105,11 @@ default:
- cumdiff_daly
- cumdiff_yll
- cumdiff_yld
# For the gen_data_weighted_burden_spline() function
burden_spline:
- burden_mean: cumdiff_daly_mean
- burden_min: cumdiff_daly_min
- burden_max: cumdiff_daly_max
stroke:
- intracerebralhemorrhage
- ischemicstroke
Expand Down
4 changes: 2 additions & 2 deletions man/gen_data_weighted_burden_spline.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions tests/testthat/test-data-process.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,32 @@ test_that("Columns in the generated dataframe", {
)
)
})

# Testing gen_data_weighted_burden_spline() function
test_that("Columns in the generated dataframe", {
# Get the path to the .rds file
filepath <- testthat::test_path("testdata", "data_ps3_reformulation")

# Read the .rds file
data <- readRDS(filepath)

# Generate the weighted data
data_weighted <- gen_data_weighted(data)

# Generate the weighted data for the risk factors (using 'bd' instead of
# 'burden' to keep lintr happy)
data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted)

# Generate a data frame with spline smoothing applied for burden of disease
data_weighted_burden_spline <- gen_data_weighted_burden_spline(
data_weighted_bd_wide_collapse)

# Check if the data has the expected number of columns
expect_equal(ncol(data_weighted_burden_spline), 4)

# Check if the data has the expected column names
expect_equal(
colnames(data_weighted_burden_spline),
c("time", "cumdiff_daly_mean", "cumdiff_daly_min", "cumdiff_daly_max")
)
})

0 comments on commit 94dc0b8

Please sign in to comment.