forked from microbiome/course_2022_oulu
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path07-3-ex-sol-ADHD.Rmd
92 lines (83 loc) · 3.19 KB
/
07-3-ex-sol-ADHD.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
---
output:
html_document: default
pdf_document: default
---
* **Importing and processing data**
```{r, message=FALSE, warning=FALSE}
# Defining file paths
biom_file_path <- "data/Aggregated_humanization2.biom"
sample_meta_file_path <- "data/Mapping_file_ADHD_aggregated.csv"
tree_file_path <- "data/Data_humanization_phylo_aggregation.tre"
library(mia)
# Imports the data
se <- loadFromBiom(biom_file_path)
names(rowData(se)) <- c("Kingdom", "Phylum", "Class", "Order",
"Family", "Genus")
# Goes through the whole DataFrame. Removes '.*[kpcofg]__' from strings, where [kpcofg]
# is any character from listed ones, and .* any character.
rowdata_modified <- BiocParallel::bplapply(rowData(se),
FUN = stringr::str_remove,
pattern = '.*[kpcofg]__')
# Genus level has additional '\"', so let's delete that also
rowdata_modified <- BiocParallel::bplapply(rowdata_modified,
FUN = stringr::str_remove,
pattern = '\"')
# rowdata_modified is a list, so it is converted back to DataFrame format.
rowdata_modified <- DataFrame(rowdata_modified)
# And then assigned back to the SE object
rowData(se) <- rowdata_modified
# We use this to check what type of data it is
# read.table(sample_meta_file_path)
# It seems like a comma separated file and it does not include headers
# Let us read it and then convert from data.frame to DataFrame
# (required for our purposes)
sample_meta <- DataFrame(read.table(sample_meta_file_path, sep = ",", header = FALSE))
# Add sample names to rownames
rownames(sample_meta) <- sample_meta[,1]
# Delete column that included sample names
sample_meta[,1] <- NULL
# We can add headers
colnames(sample_meta) <- c("patient_status", "cohort", "patient_status_vs_cohort", "sample_name")
# Then it can be added to colData
colData(se) <- sample_meta
# Convert to tse format
tse <- as(se, "TreeSummarizedExperiment")
# Reads the tree file
tree <- ape::read.tree(tree_file_path)
# Add tree to rowTree
rowTree(tse) <- tree
```
* Estimate alpha diversity for each sample and draw a histogram. Tip:
estimateDiversity
``` {r}
# Indices chosen for diversity
indices <- c("shannon","coverage", "faith", "fisher",
"gini_simpson", "inverse_simpson" )
# Defining column names where to store the results
names <- as.vector(sapply(indices, paste0, "_index"))
# Calculating the diversities
tse <- estimateDiversity(tse, index = indices, name = names)
# Plotting the histograms
library(ggplot2)
for (i in 1:6) {
hist_plot <- ggplot(as.data.frame(colData(tse)),
aes_string(x = names[i])) +
geom_histogram(bins = 20, fill = "gray", color = "black") +
labs(x = names[i], y = "Sample frequency")
print(hist_plot)
}
```
* Compare the results between two or more
alpha diversity indices (visually and/or statistically).
``` {r, message=FALSE}
# Comparing results with a cross-plot
for (i in 2:6) {
cross_plot <- ggplot(as.data.frame(colData(tse)),
aes_string(x=names[1],y=names[i]))+
geom_point()+
geom_smooth(method = "lm")+
labs(x = names[1], y = names[i])
print(cross_plot)
}
```