Merge pull request #116 from imperialCHEPI/config_update

Customise config.yml with an argument to adjust the config being used
imperialCHEPI · Dec 18, 2024 · 595f66a · 595f66a
2 parents 37718da + dba705e
commit 595f66a
Show file tree

Hide file tree

Showing 13 changed files with 210 additions and 47 deletions.
diff --git a/.Rprofile b/.Rprofile
@@ -1,2 +1 @@
 source("renv/activate.R")
-styler.save_after_styling <- TRUE
diff --git a/R/data-process.R b/R/data-process.R
@@ -53,12 +53,13 @@ NULL
 #' This function calculates weighted mean values for various metrics over years.
 #'
 #' @param data A data frame containing values for various metrics.
+#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing").
 #' @return A data frame with weighted values for various metrics over years.
 #' @export
-gen_data_weighted <- function(data) {
+gen_data_weighted <- function(data, configname = "default") {
 
   print("Loading the config file...")
-  config <- load_config("default")
+  config <- load_config(configname)
   print("Processing the data...")
   colnames(data) <- gsub("^mean_", "", colnames(data)) # Clean the column names
   ## by removing 'mean_'
@@ -124,12 +125,13 @@ gen_data_weighted <- function(data) {
 #'
 #' @param data_weighted A data frame containing weighted mean values for various
 #'  metrics.
+#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing").
 #' @return A data frame with differences between intervention and baseline
 #' values for risk factors.
 #' @export
-gen_data_weighted_rf <- function(data_weighted) {
+gen_data_weighted_rf <- function(data_weighted, configname = "default") {
   print("Loading the config file...")
-  config <- load_config("default")
+  config <- load_config(configname)
   print("Processing the data...")
   data_weighted_rf <- dplyr::select(
     data_weighted,
@@ -187,12 +189,13 @@ gen_data_weighted_rf <- function(data_weighted) {
 #'
 #' @param data_weighted A data frame containing weighted mean values for various
 #'  metrics.
+#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing").
 #' @return A data frame with differences between intervention and baseline
 #' values for incidences.
 #' @export
-gen_data_weighted_ds_diff <- function(data_weighted) {
+gen_data_weighted_ds_diff <- function(data_weighted, configname = "default") {
   print("Loading the config file...")
-  config <- load_config("default")
+  config <- load_config(configname)
   print("Processing the data...")
   data_weighted_ds <- dplyr::select(
     data_weighted,
@@ -237,12 +240,13 @@ gen_data_weighted_ds_diff <- function(data_weighted) {
 #'
 #' @param data_weighted A data frame containing weighted mean values for various
 #'  metrics.
+#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing").
 #' @return A data frame with differences between intervention and baseline
 #' values for incidences.
 #' @export
-gen_data_weighted_ds_cumdiff <- function(data_weighted) {
+gen_data_weighted_ds_cumdiff <- function(data_weighted, configname = "default") {
   print("Loading the config file...")
-  config <- load_config("default")
+  config <- load_config(configname)
   print("Processing the data...")
   data_weighted_ds <- dplyr::select(
     data_weighted,
@@ -320,12 +324,13 @@ gen_data_weighted_ds_cumdiff <- function(data_weighted) {
 #'
 #' @param data_weighted A data frame containing weighted mean values for various
 #'  metrics.
+#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing").
 #' @return A data frame with differences between intervention and baseline
 #' values for burden of disease.
 #' @export
-gen_data_weighted_burden <- function(data_weighted) {
+gen_data_weighted_burden <- function(data_weighted, configname = "default") {
   print("Loading the config file...")
-  config <- load_config("default")
+  config <- load_config(configname)
   print("Processing the data...")
   data_weighted_burden <- dplyr::select(
     data_weighted,
@@ -399,11 +404,12 @@ gen_data_weighted_burden <- function(data_weighted) {
 #'
 #' @param data_weighted_bd_wide_collapse A data frame containing weighted
 #' values for burden of disease.
+#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing").
 #' @return A data frame with spline smoothing applied for burden of disease.
 #' @export
-gen_data_weighted_bd_spline <- function(data_weighted_bd_wide_collapse) {
+gen_data_weighted_bd_spline <- function(data_weighted_bd_wide_collapse, configname = "default") {
   print("Loading the config file...")
-  config <- load_config("default")
+  config <- load_config(configname)
   print("Processing the data...")
   config_file_path <- system.file("config",
                                   "config.yml",
@@ -492,9 +498,10 @@ gen_data_weighted_bd_spline <- function(data_weighted_bd_wide_collapse) {
 #' This function calculates life expectancy for various age and groups.
 #'
 #' @param data_mean A data frame containing mean values for various metrics.
+#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing").
 #' @return A data frame with life expectancy.
 #' @export
-gen_data_le <- function(data_mean) {
+gen_data_le <- function(data_mean, configname = "default") {
   data_le <- data_mean[, c(
     data_mean$source,
     data_mean$time,

diff --git a/README.Rmd b/README.Rmd
@@ -47,30 +47,36 @@ library(healthgpsrvis)
 
 # Get the path to the .rds file included in the testdata folder
 filepath <- testthat::test_path("testdata", "data_ps3_reformulation")
-#filepath <- "path/to/data.rds" # Get the path to the .rds file included in any other local folder
+#filepath <- "path/to/data.rds" # Get the path to the .rds file included in any 
+#other local folder
 
 
 # Read the .rds file
 data <- readRDS(filepath)
 
 # Generate the weighted data
-data_weighted <- gen_data_weighted(data)
+data_weighted <- gen_data_weighted(data, configname = "default")
+
+# If you want to use a customised configuration, you will have to change the configname to "production". Then make sure you are editing to use the values (under the `production` field) that you need in the `config.yml` file which is located in the `inst/config` folder.
 
 # Generate the weighted data for the risk factors
-data_weighted_rf_wide_collapse <- gen_data_weighted_rf(data_weighted)
+data_weighted_rf_wide_collapse <- gen_data_weighted_rf(data_weighted, 
+                                                       configname = "default")
 
 # View structure of the weighted data for the risk factors
 str(data_weighted_rf_wide_collapse)
 ```
 
-To plot a risk factor (say, "bmi") for the weighted data, you can use the following code:
+To plot a risk factor (say, "bmi") for the weighted data, you can use the 
+following code:
 
 ```{r riskfactor_plot}
 # Plot the risk factor "bmi"
 riskfactors("bmi", data_weighted)
 ```
 
-To plot the difference in the risk factor (say, "bmi") for the weighted data, you can use the following code:
+To plot the difference in the risk factor (say, "bmi") for the weighted data, 
+you can use the following code:
 
 ```{r riskfactors_diff_plot}
 # Plot of difference in the risk factor "bmi"
@@ -81,17 +87,20 @@ riskfactors_diff("bmi",
                  scale_y_continuous_labels = c(-0.148, -0.074, 0))
 ```
 
-To plot the incidence difference for, say, "stroke", you can use the following code:
+To plot the incidence difference for, say, "stroke", you can use the following 
+code:
 
 ```{r inc_diff_plot}
-data_weighted_ds_wide_diff <- gen_data_weighted_ds_diff(data_weighted)
+data_weighted_ds_wide_diff <- gen_data_weighted_ds_diff(data_weighted, 
+                                                        configname = "default")
 inc_diff("stroke", data_weighted_ds_wide_diff)
 ```
 
 To plot the cumulative incidence difference for, say, "diabetes", you can use the following code:
 
 ```{r inc_cum_plot}
-data_weighted_ds_wide_collapse <- gen_data_weighted_ds_cumdiff(data_weighted)
+data_weighted_ds_wide_collapse <- gen_data_weighted_ds_cumdiff(data_weighted,
+                                                               configname = "default")
 inc_cum("diabetes",
     data_weighted_ds_wide_collapse,
     scale_y_continuous_limits = c(-4424000, 0),
@@ -103,7 +112,8 @@ inc_cum("diabetes",
 To plot burden of disease for, say, "yld", you can use the following code:
 
 ```{r burden_disease_plot}
-data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted)
+data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted,
+                                                           configname = "default")
 burden_disease("yld", data_weighted_bd_wide_collapse)
 ```
 

diff --git a/README.md b/README.md
@@ -38,20 +38,24 @@ library(healthgpsrvis)
 
 # Get the path to the .rds file included in the testdata folder
 filepath <- testthat::test_path("testdata", "data_ps3_reformulation")
-#filepath <- "path/to/data.rds" # Get the path to the .rds file included in any other local folder
+#filepath <- "path/to/data.rds" # Get the path to the .rds file included in any 
+#other local folder
 
 
 # Read the .rds file
 data <- readRDS(filepath)
 
 # Generate the weighted data
-data_weighted <- gen_data_weighted(data)
+data_weighted <- gen_data_weighted(data, configname = "default")
 #> [1] "Loading the config file..."
 #> [1] "Processing the data..."
 #> [1] "Data processing complete."
 
+# If you want to use a customised configuration, you will have to change the configname to "production". Then make sure you are editing to use the values (under the `production` field) that you need in the `config.yml` file which is located in the `inst/config` folder.
+
 # Generate the weighted data for the risk factors
-data_weighted_rf_wide_collapse <- gen_data_weighted_rf(data_weighted)
+data_weighted_rf_wide_collapse <- gen_data_weighted_rf(data_weighted, 
+                                                       configname = "default")
 #> [1] "Loading the config file..."
 #> [1] "Processing the data..."
 #> [1] "Data processing complete."
@@ -106,7 +110,8 @@ To plot the incidence difference for, say, “stroke”, you can use the
 following code:
 
 ``` r
-data_weighted_ds_wide_diff <- gen_data_weighted_ds_diff(data_weighted)
+data_weighted_ds_wide_diff <- gen_data_weighted_ds_diff(data_weighted, 
+                                                        configname = "default")
 #> [1] "Loading the config file..."
 #> [1] "Processing the data..."
 #> [1] "Data processing complete."
@@ -121,7 +126,8 @@ To plot the cumulative incidence difference for, say, “diabetes”, you
 can use the following code:
 
 ``` r
-data_weighted_ds_wide_collapse <- gen_data_weighted_ds_cumdiff(data_weighted)
+data_weighted_ds_wide_collapse <- gen_data_weighted_ds_cumdiff(data_weighted,
+                                                               configname = "default")
 #> [1] "Loading the config file..."
 #> [1] "Processing the data..."
 #> [1] "Data processing complete."
@@ -143,7 +149,8 @@ To plot burden of disease for, say, “yld”, you can use the following
 code:
 
 ``` r
-data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted)
+data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted,
+                                                           configname = "default")
 #> [1] "Loading the config file..."
 #> [1] "Processing the data..."
 #> [1] "Data processing complete."

diff --git a/inst/config/config.yml b/inst/config/config.yml
@@ -114,3 +114,121 @@ default:
     - intracerebralhemorrhage
     - ischemicstroke
     - subarachnoidhemorrhage
+
+
+production:
+# For the gen_data_weighted() function
+  grouping_vars:  # Variables that are used to group the data
+    - source
+    - time
+    - simID
+  weight: # Population size of each group is used as weight
+    - count
+  weighted_vars:  # List of risk factors to calculate population weighted mean
+    - income
+    - sector
+    - sodium
+    - carbohydrate
+    - fat
+    - protein
+    - energyintake
+    - physicalactivity
+    - bmi
+    - height
+    - weight
+    - over_weight
+    - obese_weight
+    - disability_weight
+    - deaths
+    - migrations
+  prevalence_disease:  # List of disease to calculate the total prevalence number in the population
+    - prevalence_ischemicheartdisease # ihd at places
+    - prevalence_diabetes # at some places used db, check
+    - prevalence_stroke
+    - prevalence_asthma
+    - prevalence_chronickidneydisease # ckd at places
+  incidence_disease:  # List of disease to calculate the total incidence number in the population
+    - incidence_ischemicheartdisease # ihd at places
+    - incidence_diabetes # at some places used db, check
+    - incidence_stroke
+    - incidence_asthma
+    - incidence_chronickidneydisease # ckd at places
+  # For the gen_data_weighted_rf() function
+  names_from: # Also used in gen_data_weighted_burden
+    - source
+  id_cols: # Also used in gen_data_weighted_burdens
+    - time
+    - simID
+  weighted_rf:
+    - weighted_sodium
+    - weighted_energyintake
+    - weighted_bmi
+    - weighted_obesity
+  rf:
+    - sodium
+    - energyintake
+    - bmi
+    - obesity
+  group:
+    - time # Also used in gen_data_weighted_burden
+  summary_columns_rf:  # Risk factor variables to calculate summary statistics, such as mean, min and max
+    - diff_sodium
+    - diff_ei
+    - diff_bmi
+    - diff_obesity
+  # For the gen_data_weighted_ds_diff() and gen_data_weighted_ds_cumdiff() functions
+  weighted_ds:
+    - totalcase_ihd
+    - totalcase_diabetes
+    - totalcase_stroke
+    - totalcase_asthma
+    - totalcase_ckd
+  disease:  # List of disease to calculate the total prevalence number or incidence number in the population
+    - ihd
+    - diabetes # at some places used db, check
+    - stroke
+    - asthma
+    - ckd
+  group_ds:  # Also used in gen_data_weighted_burden
+    - simID
+  summary_columns_ds:  # Variables that are used to calculate the summary statistics
+    - diff_inc_ihd
+    - diff_inc_db
+    - diff_inc_stroke
+    - diff_inc_asthma
+    - diff_inc_ckd
+  summary_columns_ds_cum:  # Variables that are used to calculate the summary statistics
+    - cumdiff_inc_ihd
+    - cumdiff_inc_db
+    - cumdiff_inc_stroke
+    - cumdiff_inc_asthma
+    - cumdiff_inc_ckd
+  # For the gen_data_weighted_burden() function
+  weighted_burden:
+    - total_yll
+    - total_yld
+    - total_daly
+  burden:  # List of disease burden measures
+    - yll
+    - yld
+    - daly
+  summary_columns_burden:  # Variables that are used to calculate the summary statistics
+    - diff_daly
+    - diff_yll
+    - diff_yld
+  summary_columns_burden_cum:  # Variables that are used to calculate the summary statistics
+    - diff_daly
+    - diff_yll
+    - diff_yld
+    - cumdiff_daly
+    - cumdiff_yll
+    - cumdiff_yld
+  # For the gen_data_weighted_burden_spline() function
+  burden_spline:
+    - burden_mean: cumdiff_daly_mean
+    - burden_min: cumdiff_daly_min
+    - burden_max: cumdiff_daly_max
+  stroke:
+    - intracerebralhemorrhage
+    - ischemicstroke
+    - subarachnoidhemorrhage
diff --git a/man/gen_data_le.Rd b/man/gen_data_le.Rd
diff --git a/man/gen_data_weighted.Rd b/man/gen_data_weighted.Rd
Original file line number	Diff line number	Diff line change
		@@ -1,2 +1 @@
		source("renv/activate.R")
		styler.save_after_styling <- TRUE