-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtemplate_alcohol.R
99 lines (68 loc) · 3.73 KB
/
template_alcohol.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# Extracts dates for alcohol consumption code occurrences in GP records
# Merges with index dates
# Defines alcohol status at index dates according to our algorithm, described here: https://github.com/Exeter-Diabetes/CPRD-Codelists#alcohol-consumption
############################################################################################
# Setup
library(tidyverse)
library(aurum)
rm(list=ls())
cprd = CPRDData$new(cprdEnv = "test-remote",cprdConf = "~/.aurum.yaml")
codesets = cprd$codesets()
codes = codesets$getAllCodeSetVersion(v = "31/10/2021")
cohort_prefix <- ""
# e.g. "mm" for treatment response (MASTERMIND) cohort
analysis = cprd$analysis(cohort_prefix)
############################################################################################
# Pull out all raw code instances and cache with 'all_patid' prefix
analysis = cprd$analysis("all_patid")
raw_alcohol_medcodes <- cprd$tables$observation %>%
inner_join(codes$alcohol, by="medcodeid") %>%
analysis$cached("raw_alcohol_medcodes", indexes=c("patid", "obsdate"))
############################################################################################
# Clean: remove if before DOB or after lcd/deregistration/death, and re-cache
clean_alcohol_medcodes <- raw_alcohol_medcodes %>%
inner_join(cprd$tables$validDateLookup, by="patid") %>%
filter(obsdate>=min_dob & obsdate<=gp_ons_end_date) %>%
select(patid, date=obsdate, alcohol_cat) %>%
distinct() %>%
analysis$cached("clean_alcohol_medcodes", indexes=c("patid", "date", "alcohol_cat"))
############################################################################################
# Find alcohol status according to algorithm at index dates
## Get index dates
analysis = cprd$analysis(cohort_prefix)
index_dates <- index_dates %>% analysis$cached("index_dates")
## Join with alcohol codes on patid and retain codes before index date or up to 7 days after
pre_index_date_alcohol_codes <- index_dates %>%
inner_join(clean_alcohol_medcodes, by="patid") %>%
filter(datediff(date, index_date)<=7) %>%
analysis$cached("pre_index_date_alcohol_merge", indexes=c("patid", "index_date", "alcohol_cat"))
## Find if ever previously a 'harmful' drinker (category 3)
harmful_drinker_ever <- pre_index_date_alcohol_codes %>%
filter(alcohol_cat=="AlcoholConsumptionLevel3") %>%
distinct(patid, index_date) %>%
mutate(harmful_drinker_ever=1L)
## Find most recent code
### If different categories on same day, use highest
most_recent_code <- pre_index_date_alcohol_codes %>%
mutate(alcohol_cat_numeric = ifelse(alcohol_cat=="AlcoholConsumptionLevel0", 0L,
ifelse(alcohol_cat=="AlcoholConsumptionLevel1", 1L,
ifelse(alcohol_cat=="AlcoholConsumptionLevel2", 2L,
ifelse(alcohol_cat=="AlcoholConsumptionLevel3", 3L, NA))))) %>%
group_by(patid, index_date) %>%
filter(date==max(date, na.rm=TRUE)) %>%
filter(alcohol_cat_numeric==max(alcohol_cat_numeric, na.rm=TRUE)) %>%
ungroup() %>%
analysis$cached("alcohol_interim_1", indexes=c("patid", "index_date"))
## Pull together
alcohol_cat <- index_dates %>%
left_join(harmful_drinker_ever, by=c("patid", "index_date")) %>%
left_join(most_recent_code, by=c("patid", "index_date")) %>%
mutate(alcohol_cat_numeric=ifelse(!is.na(harmful_drinker_ever) & harmful_drinker_ever==1, 3L, alcohol_cat_numeric),
alcohol_cat=case_when(
alcohol_cat_numeric==0 ~ "None",
alcohol_cat_numeric==1 ~ "Within limits",
alcohol_cat_numeric==2 ~ "Excess",
alcohol_cat_numeric==3 ~ "Harmful"
)) %>%
select(patid, index_date, alcohol_cat) %>%
analysis$cached("alcohol", indexes=c("patid", "index_date"))