Skip to content

Commit

Permalink
Merge pull request #116 from imperialCHEPI/config_update
Browse files Browse the repository at this point in the history
Customise config.yml with an argument to adjust the config being used
  • Loading branch information
SaranjeetKaur authored Dec 18, 2024
2 parents 37718da + dba705e commit 595f66a
Show file tree
Hide file tree
Showing 13 changed files with 210 additions and 47 deletions.
1 change: 0 additions & 1 deletion .Rprofile
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
source("renv/activate.R")
styler.save_after_styling <- TRUE
33 changes: 20 additions & 13 deletions R/data-process.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,13 @@ NULL
#' This function calculates weighted mean values for various metrics over years.
#'
#' @param data A data frame containing values for various metrics.
#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing").
#' @return A data frame with weighted values for various metrics over years.
#' @export
gen_data_weighted <- function(data) {
gen_data_weighted <- function(data, configname = "default") {

print("Loading the config file...")
config <- load_config("default")
config <- load_config(configname)
print("Processing the data...")
colnames(data) <- gsub("^mean_", "", colnames(data)) # Clean the column names
## by removing 'mean_'
Expand Down Expand Up @@ -124,12 +125,13 @@ gen_data_weighted <- function(data) {
#'
#' @param data_weighted A data frame containing weighted mean values for various
#' metrics.
#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing").
#' @return A data frame with differences between intervention and baseline
#' values for risk factors.
#' @export
gen_data_weighted_rf <- function(data_weighted) {
gen_data_weighted_rf <- function(data_weighted, configname = "default") {
print("Loading the config file...")
config <- load_config("default")
config <- load_config(configname)
print("Processing the data...")
data_weighted_rf <- dplyr::select(
data_weighted,
Expand Down Expand Up @@ -187,12 +189,13 @@ gen_data_weighted_rf <- function(data_weighted) {
#'
#' @param data_weighted A data frame containing weighted mean values for various
#' metrics.
#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing").
#' @return A data frame with differences between intervention and baseline
#' values for incidences.
#' @export
gen_data_weighted_ds_diff <- function(data_weighted) {
gen_data_weighted_ds_diff <- function(data_weighted, configname = "default") {
print("Loading the config file...")
config <- load_config("default")
config <- load_config(configname)
print("Processing the data...")
data_weighted_ds <- dplyr::select(
data_weighted,
Expand Down Expand Up @@ -237,12 +240,13 @@ gen_data_weighted_ds_diff <- function(data_weighted) {
#'
#' @param data_weighted A data frame containing weighted mean values for various
#' metrics.
#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing").
#' @return A data frame with differences between intervention and baseline
#' values for incidences.
#' @export
gen_data_weighted_ds_cumdiff <- function(data_weighted) {
gen_data_weighted_ds_cumdiff <- function(data_weighted, configname = "default") {
print("Loading the config file...")
config <- load_config("default")
config <- load_config(configname)
print("Processing the data...")
data_weighted_ds <- dplyr::select(
data_weighted,
Expand Down Expand Up @@ -320,12 +324,13 @@ gen_data_weighted_ds_cumdiff <- function(data_weighted) {
#'
#' @param data_weighted A data frame containing weighted mean values for various
#' metrics.
#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing").
#' @return A data frame with differences between intervention and baseline
#' values for burden of disease.
#' @export
gen_data_weighted_burden <- function(data_weighted) {
gen_data_weighted_burden <- function(data_weighted, configname = "default") {
print("Loading the config file...")
config <- load_config("default")
config <- load_config(configname)
print("Processing the data...")
data_weighted_burden <- dplyr::select(
data_weighted,
Expand Down Expand Up @@ -399,11 +404,12 @@ gen_data_weighted_burden <- function(data_weighted) {
#'
#' @param data_weighted_bd_wide_collapse A data frame containing weighted
#' values for burden of disease.
#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing").
#' @return A data frame with spline smoothing applied for burden of disease.
#' @export
gen_data_weighted_bd_spline <- function(data_weighted_bd_wide_collapse) {
gen_data_weighted_bd_spline <- function(data_weighted_bd_wide_collapse, configname = "default") {
print("Loading the config file...")
config <- load_config("default")
config <- load_config(configname)
print("Processing the data...")
config_file_path <- system.file("config",
"config.yml",
Expand Down Expand Up @@ -492,9 +498,10 @@ gen_data_weighted_bd_spline <- function(data_weighted_bd_wide_collapse) {
#' This function calculates life expectancy for various age and groups.
#'
#' @param data_mean A data frame containing mean values for various metrics.
#' @param configname The name of the configuration file to use (e.g., "default", "development", "production", "testing").
#' @return A data frame with life expectancy.
#' @export
gen_data_le <- function(data_mean) {
gen_data_le <- function(data_mean, configname = "default") {
data_le <- data_mean[, c(
data_mean$source,
data_mean$time,
Expand Down
28 changes: 19 additions & 9 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -47,30 +47,36 @@ library(healthgpsrvis)
# Get the path to the .rds file included in the testdata folder
filepath <- testthat::test_path("testdata", "data_ps3_reformulation")
#filepath <- "path/to/data.rds" # Get the path to the .rds file included in any other local folder
#filepath <- "path/to/data.rds" # Get the path to the .rds file included in any
#other local folder
# Read the .rds file
data <- readRDS(filepath)
# Generate the weighted data
data_weighted <- gen_data_weighted(data)
data_weighted <- gen_data_weighted(data, configname = "default")
# If you want to use a customised configuration, you will have to change the configname to "production". Then make sure you are editing to use the values (under the `production` field) that you need in the `config.yml` file which is located in the `inst/config` folder.
# Generate the weighted data for the risk factors
data_weighted_rf_wide_collapse <- gen_data_weighted_rf(data_weighted)
data_weighted_rf_wide_collapse <- gen_data_weighted_rf(data_weighted,
configname = "default")
# View structure of the weighted data for the risk factors
str(data_weighted_rf_wide_collapse)
```

To plot a risk factor (say, "bmi") for the weighted data, you can use the following code:
To plot a risk factor (say, "bmi") for the weighted data, you can use the
following code:

```{r riskfactor_plot}
# Plot the risk factor "bmi"
riskfactors("bmi", data_weighted)
```

To plot the difference in the risk factor (say, "bmi") for the weighted data, you can use the following code:
To plot the difference in the risk factor (say, "bmi") for the weighted data,
you can use the following code:

```{r riskfactors_diff_plot}
# Plot of difference in the risk factor "bmi"
Expand All @@ -81,17 +87,20 @@ riskfactors_diff("bmi",
scale_y_continuous_labels = c(-0.148, -0.074, 0))
```

To plot the incidence difference for, say, "stroke", you can use the following code:
To plot the incidence difference for, say, "stroke", you can use the following
code:

```{r inc_diff_plot}
data_weighted_ds_wide_diff <- gen_data_weighted_ds_diff(data_weighted)
data_weighted_ds_wide_diff <- gen_data_weighted_ds_diff(data_weighted,
configname = "default")
inc_diff("stroke", data_weighted_ds_wide_diff)
```

To plot the cumulative incidence difference for, say, "diabetes", you can use the following code:

```{r inc_cum_plot}
data_weighted_ds_wide_collapse <- gen_data_weighted_ds_cumdiff(data_weighted)
data_weighted_ds_wide_collapse <- gen_data_weighted_ds_cumdiff(data_weighted,
configname = "default")
inc_cum("diabetes",
data_weighted_ds_wide_collapse,
scale_y_continuous_limits = c(-4424000, 0),
Expand All @@ -103,7 +112,8 @@ inc_cum("diabetes",
To plot burden of disease for, say, "yld", you can use the following code:

```{r burden_disease_plot}
data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted)
data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted,
configname = "default")
burden_disease("yld", data_weighted_bd_wide_collapse)
```

Expand Down
19 changes: 13 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,24 @@ library(healthgpsrvis)

# Get the path to the .rds file included in the testdata folder
filepath <- testthat::test_path("testdata", "data_ps3_reformulation")
#filepath <- "path/to/data.rds" # Get the path to the .rds file included in any other local folder
#filepath <- "path/to/data.rds" # Get the path to the .rds file included in any
#other local folder


# Read the .rds file
data <- readRDS(filepath)

# Generate the weighted data
data_weighted <- gen_data_weighted(data)
data_weighted <- gen_data_weighted(data, configname = "default")
#> [1] "Loading the config file..."
#> [1] "Processing the data..."
#> [1] "Data processing complete."

# If you want to use a customised configuration, you will have to change the configname to "production". Then make sure you are editing to use the values (under the `production` field) that you need in the `config.yml` file which is located in the `inst/config` folder.

# Generate the weighted data for the risk factors
data_weighted_rf_wide_collapse <- gen_data_weighted_rf(data_weighted)
data_weighted_rf_wide_collapse <- gen_data_weighted_rf(data_weighted,
configname = "default")
#> [1] "Loading the config file..."
#> [1] "Processing the data..."
#> [1] "Data processing complete."
Expand Down Expand Up @@ -106,7 +110,8 @@ To plot the incidence difference for, say, “stroke”, you can use the
following code:

``` r
data_weighted_ds_wide_diff <- gen_data_weighted_ds_diff(data_weighted)
data_weighted_ds_wide_diff <- gen_data_weighted_ds_diff(data_weighted,
configname = "default")
#> [1] "Loading the config file..."
#> [1] "Processing the data..."
#> [1] "Data processing complete."
Expand All @@ -121,7 +126,8 @@ To plot the cumulative incidence difference for, say, “diabetes”, you
can use the following code:

``` r
data_weighted_ds_wide_collapse <- gen_data_weighted_ds_cumdiff(data_weighted)
data_weighted_ds_wide_collapse <- gen_data_weighted_ds_cumdiff(data_weighted,
configname = "default")
#> [1] "Loading the config file..."
#> [1] "Processing the data..."
#> [1] "Data processing complete."
Expand All @@ -143,7 +149,8 @@ To plot burden of disease for, say, “yld”, you can use the following
code:

``` r
data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted)
data_weighted_bd_wide_collapse <- gen_data_weighted_burden(data_weighted,
configname = "default")
#> [1] "Loading the config file..."
#> [1] "Processing the data..."
#> [1] "Data processing complete."
Expand Down
118 changes: 118 additions & 0 deletions inst/config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,121 @@ default:
- intracerebralhemorrhage
- ischemicstroke
- subarachnoidhemorrhage


production:
# For the gen_data_weighted() function
grouping_vars: # Variables that are used to group the data
- source
- time
- simID
weight: # Population size of each group is used as weight
- count
weighted_vars: # List of risk factors to calculate population weighted mean
- income
- sector
- sodium
- carbohydrate
- fat
- protein
- energyintake
- physicalactivity
- bmi
- height
- weight
- over_weight
- obese_weight
- disability_weight
- deaths
- migrations
prevalence_disease: # List of disease to calculate the total prevalence number in the population
- prevalence_ischemicheartdisease # ihd at places
- prevalence_diabetes # at some places used db, check
- prevalence_stroke
- prevalence_asthma
- prevalence_chronickidneydisease # ckd at places
incidence_disease: # List of disease to calculate the total incidence number in the population
- incidence_ischemicheartdisease # ihd at places
- incidence_diabetes # at some places used db, check
- incidence_stroke
- incidence_asthma
- incidence_chronickidneydisease # ckd at places
# For the gen_data_weighted_rf() function
names_from: # Also used in gen_data_weighted_burden
- source
id_cols: # Also used in gen_data_weighted_burdens
- time
- simID
weighted_rf:
- weighted_sodium
- weighted_energyintake
- weighted_bmi
- weighted_obesity
rf:
- sodium
- energyintake
- bmi
- obesity
group:
- time # Also used in gen_data_weighted_burden
summary_columns_rf: # Risk factor variables to calculate summary statistics, such as mean, min and max
- diff_sodium
- diff_ei
- diff_bmi
- diff_obesity
# For the gen_data_weighted_ds_diff() and gen_data_weighted_ds_cumdiff() functions
weighted_ds:
- totalcase_ihd
- totalcase_diabetes
- totalcase_stroke
- totalcase_asthma
- totalcase_ckd
disease: # List of disease to calculate the total prevalence number or incidence number in the population
- ihd
- diabetes # at some places used db, check
- stroke
- asthma
- ckd
group_ds: # Also used in gen_data_weighted_burden
- simID
summary_columns_ds: # Variables that are used to calculate the summary statistics
- diff_inc_ihd
- diff_inc_db
- diff_inc_stroke
- diff_inc_asthma
- diff_inc_ckd
summary_columns_ds_cum: # Variables that are used to calculate the summary statistics
- cumdiff_inc_ihd
- cumdiff_inc_db
- cumdiff_inc_stroke
- cumdiff_inc_asthma
- cumdiff_inc_ckd
# For the gen_data_weighted_burden() function
weighted_burden:
- total_yll
- total_yld
- total_daly
burden: # List of disease burden measures
- yll
- yld
- daly
summary_columns_burden: # Variables that are used to calculate the summary statistics
- diff_daly
- diff_yll
- diff_yld
summary_columns_burden_cum: # Variables that are used to calculate the summary statistics
- diff_daly
- diff_yll
- diff_yld
- cumdiff_daly
- cumdiff_yll
- cumdiff_yld
# For the gen_data_weighted_burden_spline() function
burden_spline:
- burden_mean: cumdiff_daly_mean
- burden_min: cumdiff_daly_min
- burden_max: cumdiff_daly_max
stroke:
- intracerebralhemorrhage
- ischemicstroke
- subarachnoidhemorrhage
4 changes: 3 additions & 1 deletion man/gen_data_le.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/gen_data_weighted.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 595f66a

Please sign in to comment.