-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path3.workflows-solutions.Rmd
98 lines (67 loc) · 2.68 KB
/
3.workflows-solutions.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
---
title: "Workflow solutions"
author: "Mark Dunning"
date: '`r format(Sys.time(), "Last modified: %d %b %Y")`'
output: html_document
---
******
Take the steps used to clean the patients dataset and calculate BMI (see below for the code)
- Re-write in the piping framework
******
```{r message = FALSE}
library(dplyr)
library(stringr)
patients <- read.delim("patient-data.txt")
patients <- tbl_df(patients)
patients_clean <- mutate(patients, Sex = factor(str_trim(Sex)))
patients_clean <- mutate(patients_clean, Height= as.numeric(str_replace_all(patients_clean$Height, pattern = "cm", "")))
patients_clean <- mutate(patients_clean, Weight = as.numeric(str_replace_all(patients_clean$Weight, "kg", "")))
patients_clean <- mutate(patients_clean, BMI = (Weight/(Height/100)^2), Overweight = BMI > 25)
patients_clean <- mutate(patients_clean, Smokes = str_replace_all(Smokes, "Yes", "TRUE"))
patients_clean <- mutate(patients_clean, Smokes = as.logical(str_replace_all(Smokes, "No", "FALSE")))
```
```{r}
## Re-write the above template using 'pipes'
patients_clean <- read.delim("patient-data.txt") %>%
tbl_df %>%
mutate(Sex = factor(str_trim(Sex))) %>%
mutate(Height= as.numeric(str_replace_all(Height, pattern = "cm", ""))) %>%
mutate(Weight = as.numeric(str_replace_all(Weight, "kg", ""))) %>%
mutate(BMI = (Weight/(Height/100)^2), Overweight = BMI > 25) %>%
mutate(Smokes = str_replace_all(Smokes, "Yes", "TRUE")) %>%
mutate(Smokes = as.logical(str_replace_all(Smokes, "No", "FALSE")))
patients_clean
```
******
Use `filter` to print the following subsets of the dataset
- Choose the Female patients from New York or New Jersey
```{r}
filter(patients_clean, Sex == "Female", State == "New York" | State == "New Jersey")
filter(patients_clean, Sex == "Female", State %in% c("New York", "New Jersey"))
filter(patients_clean, Sex == "Female", grepl("New", State))
```
- Choose the overweight smokers that are still alive
```{r}
filter(patients_clean, Overweight, Smokes, !Died)
```
- Choose the patients who own a Pet that is not a dog
```{r}
filter(patients_clean, !Pet %in% c("None", "NONE", "NULL"), !is.na(Pet), !Pet %in% c("DOG", "Dog"))
```
- Patients born in June
```{r message = FALSE}
filter(patients_clean, substr(Birth, 6, 7) == "06")
library(lubridate)
filter(patients_clean, month(ymd(Birth)) == 6)
```
- Patients with a Number > 100
```{r}
filter(patients_clean, as.numeric(substr(patients$ID, 7, 9)) > 100)
```
- Patients that entered the study on 2016-05-31
```{r}
library(tidyr)
mutate(patients_clean, Date.Entered.Study = ifelse(Date.Entered.Study == "", NA, as.character(Date.Entered.Study))) %>%
fill(Date.Entered.Study) %>%
filter(Date.Entered.Study == "2016-05-31")
```