diff --git a/.Rprofile b/.Rprofile index e721723..81b960f 100644 --- a/.Rprofile +++ b/.Rprofile @@ -1,2 +1 @@ source("renv/activate.R") -styler.save_after_styling <- TRUE diff --git a/R/data-process.R b/R/data-process.R index e46a7af..a1bd03a 100644 --- a/R/data-process.R +++ b/R/data-process.R @@ -53,12 +53,13 @@ NULL #' This function calculates weighted mean values for various metrics over years. #' #' @param data A data frame containing values for various metrics. +#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing"). #' @return A data frame with weighted values for various metrics over years. #' @export -gen_data_weighted <- function(data) { +gen_data_weighted <- function(data, configname = "default") { print("Loading the config file...") - config <- load_config("default") + config <- load_config(configname) print("Processing the data...") colnames(data) <- gsub("^mean_", "", colnames(data)) # Clean the column names ## by removing 'mean_' @@ -124,12 +125,13 @@ gen_data_weighted <- function(data) { #' #' @param data_weighted A data frame containing weighted mean values for various #' metrics. +#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing"). #' @return A data frame with differences between intervention and baseline #' values for risk factors. #' @export -gen_data_weighted_rf <- function(data_weighted) { +gen_data_weighted_rf <- function(data_weighted, configname = "default") { print("Loading the config file...") - config <- load_config("default") + config <- load_config(configname) print("Processing the data...") data_weighted_rf <- dplyr::select( data_weighted, @@ -187,12 +189,13 @@ gen_data_weighted_rf <- function(data_weighted) { #' #' @param data_weighted A data frame containing weighted mean values for various #' metrics. +#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing"). #' @return A data frame with differences between intervention and baseline #' values for incidences. #' @export -gen_data_weighted_ds_diff <- function(data_weighted) { +gen_data_weighted_ds_diff <- function(data_weighted, configname = "default") { print("Loading the config file...") - config <- load_config("default") + config <- load_config(configname) print("Processing the data...") data_weighted_ds <- dplyr::select( data_weighted, @@ -237,12 +240,13 @@ gen_data_weighted_ds_diff <- function(data_weighted) { #' #' @param data_weighted A data frame containing weighted mean values for various #' metrics. +#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing"). #' @return A data frame with differences between intervention and baseline #' values for incidences. #' @export -gen_data_weighted_ds_cumdiff <- function(data_weighted) { +gen_data_weighted_ds_cumdiff <- function(data_weighted, configname = "default") { print("Loading the config file...") - config <- load_config("default") + config <- load_config(configname) print("Processing the data...") data_weighted_ds <- dplyr::select( data_weighted, @@ -320,12 +324,13 @@ gen_data_weighted_ds_cumdiff <- function(data_weighted) { #' #' @param data_weighted A data frame containing weighted mean values for various #' metrics. +#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing"). #' @return A data frame with differences between intervention and baseline #' values for burden of disease. #' @export -gen_data_weighted_burden <- function(data_weighted) { +gen_data_weighted_burden <- function(data_weighted, configname = "default") { print("Loading the config file...") - config <- load_config("default") + config <- load_config(configname) print("Processing the data...") data_weighted_burden <- dplyr::select( data_weighted, @@ -399,11 +404,12 @@ gen_data_weighted_burden <- function(data_weighted) { #' #' @param data_weighted_bd_wide_collapse A data frame containing weighted #' values for burden of disease. +#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing"). #' @return A data frame with spline smoothing applied for burden of disease. #' @export -gen_data_weighted_bd_spline <- function(data_weighted_bd_wide_collapse) { +gen_data_weighted_bd_spline <- function(data_weighted_bd_wide_collapse, configname = "default") { print("Loading the config file...") - config <- load_config("default") + config <- load_config(configname) print("Processing the data...") config_file_path <- system.file("config", "config.yml", @@ -492,9 +498,10 @@ gen_data_weighted_bd_spline <- function(data_weighted_bd_wide_collapse) { #' This function calculates life expectancy for various age and groups. #' #' @param data_mean A data frame containing mean values for various metrics. +#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing"). #' @return A data frame with life expectancy. #' @export -gen_data_le <- function(data_mean) { +gen_data_le <- function(data_mean, configname = "default") { data_le <- data_mean[, c( data_mean$source, data_mean$time, diff --git a/README.Rmd b/README.Rmd index 1030522..b8fab76 100644 --- a/README.Rmd +++ b/README.Rmd @@ -47,30 +47,36 @@ library(healthgpsrvis) # Get the path to the .rds file included in the testdata folder filepath <- testthat::test_path("testdata", "data_ps3_reformulation") -#filepath <- "path/to/data.rds" # Get the path to the .rds file included in any other local folder +#filepath <- "path/to/data.rds" # Get the path to the .rds file included in any +#other local folder # Read the .rds file data <- readRDS(filepath) # Generate the weighted data -data_weighted <- gen_data_weighted(data) +data_weighted <- gen_data_weighted(data, configname = "default") + +# If you want to use a customised configuration, you will have to change the configname to "production". Then make sure you are editing to use the values (under the `production` field) that you need in the `config.yml` file which is located in the `inst/config` folder. # Generate the weighted data for the risk factors -data_weighted_rf_wide_collapse <- gen_data_weighted_rf(data_weighted) +data_weighted_rf_wide_collapse <- gen_data_weighted_rf(data_weighted, + configname = "default") # View structure of the weighted data for the risk factors str(data_weighted_rf_wide_collapse) ``` -To plot a risk factor (say, "bmi") for the weighted data, you can use the following code: +To plot a risk factor (say, "bmi") for the weighted data, you can use the +following code: ```{r riskfactor_plot} # Plot the risk factor "bmi" riskfactors("bmi", data_weighted) ``` -To plot the difference in the risk factor (say, "bmi") for the weighted data, you can use the following code: +To plot the difference in the risk factor (say, "bmi") for the weighted data, +you can use the following code: ```{r riskfactors_diff_plot} # Plot of difference in the risk factor "bmi" @@ -81,17 +87,20 @@ riskfactors_diff("bmi", scale_y_continuous_labels = c(-0.148, -0.074, 0)) ``` -To plot the incidence difference for, say, "stroke", you can use the following code: +To plot the incidence difference for, say, "stroke", you can use the following +code: ```{r inc_diff_plot} -data_weighted_ds_wide_diff <- gen_data_weighted_ds_diff(data_weighted) +data_weighted_ds_wide_diff <- gen_data_weighted_ds_diff(data_weighted, + configname = "default") inc_diff("stroke", data_weighted_ds_wide_diff) ``` To plot the cumulative incidence difference for, say, "diabetes", you can use the following code: ```{r inc_cum_plot} -data_weighted_ds_wide_collapse <- gen_data_weighted_ds_cumdiff(data_weighted) +data_weighted_ds_wide_collapse <- gen_data_weighted_ds_cumdiff(data_weighted, + configname = "default") inc_cum("diabetes", data_weighted_ds_wide_collapse, scale_y_continuous_limits = c(-4424000, 0), @@ -103,7 +112,8 @@ inc_cum("diabetes", To plot burden of disease for, say, "yld", you can use the following code: ```{r burden_disease_plot} -data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted) +data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted, + configname = "default") burden_disease("yld", data_weighted_bd_wide_collapse) ``` diff --git a/README.md b/README.md index f6e197f..7eec8d8 100644 --- a/README.md +++ b/README.md @@ -38,20 +38,24 @@ library(healthgpsrvis) # Get the path to the .rds file included in the testdata folder filepath <- testthat::test_path("testdata", "data_ps3_reformulation") -#filepath <- "path/to/data.rds" # Get the path to the .rds file included in any other local folder +#filepath <- "path/to/data.rds" # Get the path to the .rds file included in any +#other local folder # Read the .rds file data <- readRDS(filepath) # Generate the weighted data -data_weighted <- gen_data_weighted(data) +data_weighted <- gen_data_weighted(data, configname = "default") #> [1] "Loading the config file..." #> [1] "Processing the data..." #> [1] "Data processing complete." +# If you want to use a customised configuration, you will have to change the configname to "production". Then make sure you are editing to use the values (under the `production` field) that you need in the `config.yml` file which is located in the `inst/config` folder. + # Generate the weighted data for the risk factors -data_weighted_rf_wide_collapse <- gen_data_weighted_rf(data_weighted) +data_weighted_rf_wide_collapse <- gen_data_weighted_rf(data_weighted, + configname = "default") #> [1] "Loading the config file..." #> [1] "Processing the data..." #> [1] "Data processing complete." @@ -106,7 +110,8 @@ To plot the incidence difference for, say, “stroke”, you can use the following code: ``` r -data_weighted_ds_wide_diff <- gen_data_weighted_ds_diff(data_weighted) +data_weighted_ds_wide_diff <- gen_data_weighted_ds_diff(data_weighted, + configname = "default") #> [1] "Loading the config file..." #> [1] "Processing the data..." #> [1] "Data processing complete." @@ -121,7 +126,8 @@ To plot the cumulative incidence difference for, say, “diabetes”, you can use the following code: ``` r -data_weighted_ds_wide_collapse <- gen_data_weighted_ds_cumdiff(data_weighted) +data_weighted_ds_wide_collapse <- gen_data_weighted_ds_cumdiff(data_weighted, + configname = "default") #> [1] "Loading the config file..." #> [1] "Processing the data..." #> [1] "Data processing complete." @@ -143,7 +149,8 @@ To plot burden of disease for, say, “yld”, you can use the following code: ``` r -data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted) +data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted, + configname = "default") #> [1] "Loading the config file..." #> [1] "Processing the data..." #> [1] "Data processing complete." diff --git a/inst/config/config.yml b/inst/config/config.yml index a5e9a78..a2cd3b6 100644 --- a/inst/config/config.yml +++ b/inst/config/config.yml @@ -114,3 +114,121 @@ default: - intracerebralhemorrhage - ischemicstroke - subarachnoidhemorrhage + + +production: +# For the gen_data_weighted() function + grouping_vars: # Variables that are used to group the data + - source + - time + - simID + weight: # Population size of each group is used as weight + - count + weighted_vars: # List of risk factors to calculate population weighted mean + - income + - sector + - sodium + - carbohydrate + - fat + - protein + - energyintake + - physicalactivity + - bmi + - height + - weight + - over_weight + - obese_weight + - disability_weight + - deaths + - migrations + prevalence_disease: # List of disease to calculate the total prevalence number in the population + - prevalence_ischemicheartdisease # ihd at places + - prevalence_diabetes # at some places used db, check + - prevalence_stroke + - prevalence_asthma + - prevalence_chronickidneydisease # ckd at places + incidence_disease: # List of disease to calculate the total incidence number in the population + - incidence_ischemicheartdisease # ihd at places + - incidence_diabetes # at some places used db, check + - incidence_stroke + - incidence_asthma + - incidence_chronickidneydisease # ckd at places + # For the gen_data_weighted_rf() function + names_from: # Also used in gen_data_weighted_burden + - source + id_cols: # Also used in gen_data_weighted_burdens + - time + - simID + weighted_rf: + - weighted_sodium + - weighted_energyintake + - weighted_bmi + - weighted_obesity + rf: + - sodium + - energyintake + - bmi + - obesity + group: + - time # Also used in gen_data_weighted_burden + summary_columns_rf: # Risk factor variables to calculate summary statistics, such as mean, min and max + - diff_sodium + - diff_ei + - diff_bmi + - diff_obesity + # For the gen_data_weighted_ds_diff() and gen_data_weighted_ds_cumdiff() functions + weighted_ds: + - totalcase_ihd + - totalcase_diabetes + - totalcase_stroke + - totalcase_asthma + - totalcase_ckd + disease: # List of disease to calculate the total prevalence number or incidence number in the population + - ihd + - diabetes # at some places used db, check + - stroke + - asthma + - ckd + group_ds: # Also used in gen_data_weighted_burden + - simID + summary_columns_ds: # Variables that are used to calculate the summary statistics + - diff_inc_ihd + - diff_inc_db + - diff_inc_stroke + - diff_inc_asthma + - diff_inc_ckd + summary_columns_ds_cum: # Variables that are used to calculate the summary statistics + - cumdiff_inc_ihd + - cumdiff_inc_db + - cumdiff_inc_stroke + - cumdiff_inc_asthma + - cumdiff_inc_ckd + # For the gen_data_weighted_burden() function + weighted_burden: + - total_yll + - total_yld + - total_daly + burden: # List of disease burden measures + - yll + - yld + - daly + summary_columns_burden: # Variables that are used to calculate the summary statistics + - diff_daly + - diff_yll + - diff_yld + summary_columns_burden_cum: # Variables that are used to calculate the summary statistics + - diff_daly + - diff_yll + - diff_yld + - cumdiff_daly + - cumdiff_yll + - cumdiff_yld + # For the gen_data_weighted_burden_spline() function + burden_spline: + - burden_mean: cumdiff_daly_mean + - burden_min: cumdiff_daly_min + - burden_max: cumdiff_daly_max + stroke: + - intracerebralhemorrhage + - ischemicstroke + - subarachnoidhemorrhage diff --git a/man/gen_data_le.Rd b/man/gen_data_le.Rd index d7b850d..8253b5f 100644 --- a/man/gen_data_le.Rd +++ b/man/gen_data_le.Rd @@ -4,10 +4,12 @@ \alias{gen_data_le} \title{Calculate Life Expectancy} \usage{ -gen_data_le(data_mean) +gen_data_le(data_mean, configname = "default") } \arguments{ \item{data_mean}{A data frame containing mean values for various metrics.} + +\item{configname}{The name of the configuration file to use (e.g., "default", "development", "production", "testing").} } \value{ A data frame with life expectancy. diff --git a/man/gen_data_weighted.Rd b/man/gen_data_weighted.Rd index bbf0027..4beb13c 100644 --- a/man/gen_data_weighted.Rd +++ b/man/gen_data_weighted.Rd @@ -4,10 +4,12 @@ \alias{gen_data_weighted} \title{Generate Weighted Mean Values Over Years} \usage{ -gen_data_weighted(data) +gen_data_weighted(data, configname = "default") } \arguments{ \item{data}{A data frame containing values for various metrics.} + +\item{configname}{The name of the configuration file to use (e.g., "default", "development", "production", "testing").} } \value{ A data frame with weighted values for various metrics over years. diff --git a/man/gen_data_weighted_bd_spline.Rd b/man/gen_data_weighted_bd_spline.Rd index 754e106..057a2b2 100644 --- a/man/gen_data_weighted_bd_spline.Rd +++ b/man/gen_data_weighted_bd_spline.Rd @@ -4,11 +4,16 @@ \alias{gen_data_weighted_bd_spline} \title{Perform data smoothing} \usage{ -gen_data_weighted_bd_spline(data_weighted_bd_wide_collapse) +gen_data_weighted_bd_spline( + data_weighted_bd_wide_collapse, + configname = "default" +) } \arguments{ \item{data_weighted_bd_wide_collapse}{A data frame containing weighted values for burden of disease.} + +\item{configname}{The name of the configuration file to use (e.g., "default", "development", "production", "testing").} } \value{ A data frame with spline smoothing applied for burden of disease. diff --git a/man/gen_data_weighted_burden.Rd b/man/gen_data_weighted_burden.Rd index b2c6e30..63cfd8f 100644 --- a/man/gen_data_weighted_burden.Rd +++ b/man/gen_data_weighted_burden.Rd @@ -4,11 +4,13 @@ \alias{gen_data_weighted_burden} \title{Calculate Differences for Burden of Disease} \usage{ -gen_data_weighted_burden(data_weighted) +gen_data_weighted_burden(data_weighted, configname = "default") } \arguments{ \item{data_weighted}{A data frame containing weighted mean values for various metrics.} + +\item{configname}{The name of the configuration file to use (e.g., "default", "development", "production", "testing").} } \value{ A data frame with differences between intervention and baseline diff --git a/man/gen_data_weighted_ds_cumdiff.Rd b/man/gen_data_weighted_ds_cumdiff.Rd index 4e3e2f4..5ec8e49 100644 --- a/man/gen_data_weighted_ds_cumdiff.Rd +++ b/man/gen_data_weighted_ds_cumdiff.Rd @@ -4,11 +4,13 @@ \alias{gen_data_weighted_ds_cumdiff} \title{Calculate Cumulative Differences for Incidences} \usage{ -gen_data_weighted_ds_cumdiff(data_weighted) +gen_data_weighted_ds_cumdiff(data_weighted, configname = "default") } \arguments{ \item{data_weighted}{A data frame containing weighted mean values for various metrics.} + +\item{configname}{The name of the configuration file to use (e.g., "default", "development", "production", "testing").} } \value{ A data frame with differences between intervention and baseline diff --git a/man/gen_data_weighted_ds_diff.Rd b/man/gen_data_weighted_ds_diff.Rd index 0b1bc23..52422e9 100644 --- a/man/gen_data_weighted_ds_diff.Rd +++ b/man/gen_data_weighted_ds_diff.Rd @@ -4,11 +4,13 @@ \alias{gen_data_weighted_ds_diff} \title{Calculate Differences for Incidences} \usage{ -gen_data_weighted_ds_diff(data_weighted) +gen_data_weighted_ds_diff(data_weighted, configname = "default") } \arguments{ \item{data_weighted}{A data frame containing weighted mean values for various metrics.} + +\item{configname}{The name of the configuration file to use (e.g., "default", "development", "production", "testing").} } \value{ A data frame with differences between intervention and baseline diff --git a/man/gen_data_weighted_rf.Rd b/man/gen_data_weighted_rf.Rd index 16b9f21..ac6c229 100644 --- a/man/gen_data_weighted_rf.Rd +++ b/man/gen_data_weighted_rf.Rd @@ -4,11 +4,13 @@ \alias{gen_data_weighted_rf} \title{Calculate Differences for Risk Factors} \usage{ -gen_data_weighted_rf(data_weighted) +gen_data_weighted_rf(data_weighted, configname = "default") } \arguments{ \item{data_weighted}{A data frame containing weighted mean values for various metrics.} + +\item{configname}{The name of the configuration file to use (e.g., "default", "development", "production", "testing").} } \value{ A data frame with differences between intervention and baseline diff --git a/tests/testthat/test-data-process.R b/tests/testthat/test-data-process.R index f5f8978..21dee41 100644 --- a/tests/testthat/test-data-process.R +++ b/tests/testthat/test-data-process.R @@ -19,7 +19,7 @@ test_that("Columns in the generated dataframe", { data <- readRDS(filepath) # Generate the weighted data - data_weighted <- gen_data_weighted(data) + data_weighted <- gen_data_weighted(data, configname = "default") # Check if the data has the expected number of columns expect_equal(ncol(data_weighted), 37) @@ -62,10 +62,11 @@ test_that("Columns in the generated dataframe", { data <- readRDS(filepath) # Generate the weighted data - data_weighted <- gen_data_weighted(data) + data_weighted <- gen_data_weighted(data, configname = "default") # Generate the weighted data for the risk factors - data_weighted_rf_wide_collapse <- gen_data_weighted_rf(data_weighted) + data_weighted_rf_wide_collapse <- gen_data_weighted_rf(data_weighted, + configname = "default") # Check if the data has the expected number of columns expect_equal(ncol(data_weighted_rf_wide_collapse), 13) @@ -91,10 +92,11 @@ test_that("Columns in the generated dataframe", { data <- readRDS(filepath) # Generate the weighted data - data_weighted <- gen_data_weighted(data) + data_weighted <- gen_data_weighted(data, configname = "default") # Generate the weighted data for the risk factors - data_weighted_ds_wide_diff <- gen_data_weighted_ds_diff(data_weighted) + data_weighted_ds_wide_diff <- gen_data_weighted_ds_diff(data_weighted, + configname = "default") # Check if the data has the expected number of columns expect_equal(ncol(data_weighted_ds_wide_diff), 17) @@ -122,10 +124,11 @@ test_that("Columns in the generated dataframe", { data <- readRDS(filepath) # Generate the weighted data - data_weighted <- gen_data_weighted(data) + data_weighted <- gen_data_weighted(data, configname = "default") # Generate the weighted data for the risk factors - data_weighted_ds_wide_collapse <- gen_data_weighted_ds_cumdiff(data_weighted) + data_weighted_ds_wide_collapse <- gen_data_weighted_ds_cumdiff(data_weighted, + configname = "default") # Check if the data has the expected number of columns expect_equal(ncol(data_weighted_ds_wide_collapse), 16) @@ -153,11 +156,12 @@ test_that("Columns in the generated dataframe", { data <- readRDS(filepath) # Generate the weighted data - data_weighted <- gen_data_weighted(data) + data_weighted <- gen_data_weighted(data, configname = "default") # Generate the weighted data for the risk factors (using 'bd' instead of # 'burden' to keep lintr happy) - data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted) + data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted, + configname = "default") # Check if the data has the expected number of columns expect_equal(ncol(data_weighted_bd_wide_collapse), 19) @@ -185,11 +189,12 @@ test_that("Columns in the generated dataframe", { data <- readRDS(filepath) # Generate the weighted data - data_weighted <- gen_data_weighted(data) + data_weighted <- gen_data_weighted(data, configname = "default") # Generate the weighted data for the risk factors (using 'bd' instead of # 'burden' to keep lintr happy) - data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted) + data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted, + configname = "default") # Generate a data frame with spline smoothing applied for burden of disease data_weighted_burden_spline <- gen_data_weighted_bd_spline(