-
Notifications
You must be signed in to change notification settings - Fork 10
1. 16s Microbiome Data Analysis
This document is a guide to do the downstream analysis of 16s Microbiome data analysis. Following packages are used to do the data analysis. Please follow the documentation of each package for more details.
Following R libraries are needed to complete this Pipeline and make all the libraries are properly installed.
library('phyloseq')
library("DESeq2")
library("ggplot2")
library('gridExtra')
library('msa')
library('scales')
library("RColorBrewer")
library('plyr')
library("colorspace")
require('gdata')
library('ClassDiscovery')
library('reshape2')
library('metagenomeSeq')
library('biomformat')
library('gss')
Now loading "biom" file to Phyloseq library
biom_file<-"otu_table_mc10_w_tax.biom"
mapping_file<-"161108_batch_mod_corrected.txt"
treefilename<-"rep_set.tre"
Now loading "biom" file to Phyloseq library
biom_otu_tax<-import_biom(biom_file, treefilename=treefilename)
import_merged_biom<-import_qiime_sample_data(mapping_file)
mapping_file<-subset_samples(import_merged_biom, !IncubationDate==0) #Remove 0 day
merged_mapping_biom<-merge_phyloseq(biom_otu_tax,mapping_file)
colnames(tax_table(merged_mapping_biom))<-c("kingdom", "Phylum","Class", "Order", "Family", "Genus", "species")
merged_mapping_biom_edit<-merged_mapping_biom
tax_table( merged_mapping_biom_edit)<-gsub("k__([[:alnum:]])","\\1",tax_table( merged_mapping_biom_edit))
tax_table(merged_mapping_biom_edit)<-gsub("p__([[:alnum:]])","\\1",tax_table(merged_mapping_biom_edit))
tax_table(merged_mapping_biom_edit)<-gsub("c__([[:alnum:]])","\\1",tax_table(merged_mapping_biom_edit))
tax_table(merged_mapping_biom_edit)<-gsub("o__([[:alnum:]])","\\1",tax_table(merged_mapping_biom_edit))
tax_table(merged_mapping_biom_edit)<-gsub("f__([[:alnum:]])","\\1",tax_table(merged_mapping_biom_edit))
tax_table(merged_mapping_biom_edit)<-gsub("g__([[:alnum:]])","\\1",tax_table(merged_mapping_biom_edit))
tax_table(merged_mapping_biom_edit)<-gsub("s__([[:alnum:]])","\\1",tax_table(merged_mapping_biom_edit))
tax_table(merged_mapping_biom_edit)<-gsub("p__(\\[)","\\1",tax_table(merged_mapping_biom_edit))
tax_table(merged_mapping_biom_edit)<-gsub("c__(\\[)","\\1",tax_table(merged_mapping_biom_edit))
tax_table(merged_mapping_biom_edit)<-gsub("o__(\\[)","\\1",tax_table(merged_mapping_biom_edit))
tax_table(merged_mapping_biom_edit)<-gsub("f__(\\[)","\\1",tax_table(merged_mapping_biom_edit))
tax_table(merged_mapping_biom_edit)<-gsub("g__(\\[)","\\1",tax_table(merged_mapping_biom_edit))
tax_table(merged_mapping_biom_edit)<-gsub("s__(\\[)","\\1",tax_table(merged_mapping_biom_edit))
merged_mapping_biom<-merged_mapping_biom_edit
phyloseq provides useful tools for filtering, subsetting, and agglomerating taxa – a task that is often appropriate or even necessary for effective analysis of microbiome count data. In this subsection, we graphically explore the prevalence of taxa in the example dataset and demonstrate how this can be used as a filtering criterion. One of the reasons to filter in this way is to avoid spending much time analyzing taxa that were only rarely seen.
prevelance_th<-2
source('prevalencefiltering.R')
merged_mapping_biom_flt_unidentified_Phylum<-prevalence_filter(merged_mapping_biom,prevelance_th)
ggplot(merged_mapping_biom_flt_unidentified_Phylum$prevdf1, aes(TotalAbundance, Prevalence, color = Phylum)) +
geom_hline(yintercept = merged_mapping_biom_flt_unidentified_Phylum$prev_th, alpha = 0.5, linetype = 2) +
geom_point(size = 2, alpha = 0.7) +
scale_y_log10() + scale_x_log10() +xlab("Total~Abundance") +facet_wrap(~ Phylum)
Please read following articles before your pcik your normalization method and the cut-off thresholds.
If you are using "TSS" method to normalize your data and using filtering method describes here,please fill the following section.
normalization_method<-"TSS"
source('normalization_tss.R')
merged_mapping_biom_norm_tss<-normalization_filter_tss(merged_mapping_biom_flt_unidentified_Phylum$flt_b,normalization_method)
samplesize<-500
merged_mapping_biom_norm_raref<-rarefy_even_depth(merged_mapping_biom, sample.size = samplesize, replace=F)
```r
merged_mapping_biom_norm_raref.ord<-ordinate(merged_mapping_biom_norm_raref, "NMDS", "bray", trymax=1000)
plot_ordination(merged_mapping_biom_norm_raref, merged_mapping_biom_norm_raref.ord, type="taxa", color="Class", title='Class')+theme(axis.text.x = element_text(angle = 90, hjust = 0.5,vjust=0.5, size=8), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background =element_rect(fill='black', colour='black'), plot.background=element_rect(fill="gray80"))
plot_ordination(merged_mapping_biom_norm_raref, merged_mapping_biom_norm_raref.ord, type="sample", color="Treatment", title='Sample', label="Description")+theme(axis.text.x = element_text(angle = 90, hjust = 0.5,vjust=0.5, size=8), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background =element_rect(fill='black', colour='black'), plot.background=element_rect(fill="gray80")) +geom_point(size = 3)
merged_mapping_biom_norm_raref_OR<-subset_samples(merged_mapping_biom_norm_raref,!Description %in% c('RE22','RE23', 'RE32'))
merged_mapping_biom_norm_raref.ord_OR<-ordinate(merged_mapping_biom_norm_raref_OR, "NMDS", "bray", trymax=1000)
plot_ordination(merged_mapping_biom_norm_raref_OR, merged_mapping_biom_norm_raref.ord_OR, type="taxa", color="Class", title='Class')+theme(axis.text.x = element_text(angle = 90, hjust = 0.5,vjust=0.5, size=8), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background =element_rect(fill='black', colour='black'), plot.background=element_rect(fill="gray80"))
plot_ordination(merged_mapping_biom_norm_raref_OR,merged_mapping_biom_norm_raref.ord_OR, type="sample", color="Treatment", title='Sample')+theme(axis.text.x = element_text(angle = 90, hjust = 0.5,vjust=0.5, size=8), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background =element_rect(fill='black', colour='black'), plot.background=element_rect(fill="gray80")) +geom_point(size = 3)
ggsave(file="plot_ordination_without_Outliers.png", width=15, height=10)
If you are using "CSS" method to normalize your data and using filtering method describes here,please fill the following section.
Here we using TSS normalised data to plot.
Transform to relative abundance. Save as new object.
source('plotabundancevioline_g.R')
biom_file_aband_graph<-merged_mapping_biom_norm_tss
colname_fltr<-'Treatment'
level<-"Family"
setColor<-function(n){
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
col=sample(col_vector, n)
return(col)
}
target_Age<-c("CO","RE", "CR", "HW")
m_merged_mapping_biom_flt_unidentified_Phylum_CE <- psmelt(biom_file_aband_graph)
m_merged_mapping_biom_flt_unidentified_Phylum_CE_NAF<-NAToUnknown(m_merged_mapping_biom_flt_unidentified_Phylum_CE, unknown='NOTA')
class_sort_order_tss<-unique(as.vector(as.data.frame(m_merged_mapping_biom_flt_unidentified_Phylum_CE_NAF))[['Family']])
isUnknown<-"Unknown" %in% class_sort_order_tss
if(isUnknown ){
class_sort_order_tss$"Unknown"<-NULL
lastUnknows<-c(levels(withoutUnknows), "Unknown")
}else{
lastUnknows<-class_sort_order_tss
}
m_merged_mapping_biom_flt_unidentified_Phylum_CE<-subset(m_merged_mapping_biom_flt_unidentified_Phylum_CE_NAF, Abundance > 0)
m_merged_mapping_biom_flt_unidentified_Phylum_CE$Treatment<-reorder.factor(m_merged_mapping_biom_flt_unidentified_Phylum_CE$Treatment, new.order=target_Age)
m_merged_mapping_biom_flt_unidentified_Phylum_CE$Family<-reorder.factor(m_merged_mapping_biom_flt_unidentified_Phylum_CE$Family, new.order=class_sort_order_tss)
plot_abundance_violin(m_merged_mapping_biom_flt_unidentified_Phylum_CE,"Relative Abundances", level, level)
source('plotabundancebar_g.R')
n<-length(class_sort_order_tss)
col<-setColor(n)
plot_abundance_bar(m_merged_mapping_biom_flt_unidentified_Phylum_CE,"Relative Abundances", level, level, col)
Now loading "biom" file to metagenomicseq library
merged_mapping_biom<-subset_samples(merged_mapping_biom,!Description %in% c('RE22','RE23', 'RE32'))
phenotypeData<-AnnotatedDataFrame(as.data.frame(sample_data(merged_mapping_biom)))
TAXdata<-AnnotatedDataFrame(as.data.frame(tax_table(merged_mapping_biom)))
OTUdata<-as(otu_table(merged_mapping_biom), "matrix")
obj<-newMRexperiment(OTUdata, phenoData = phenotypeData, featureData = TAXdata)
normalization_method<-"CSS"
present<-1
depth<-1000
source('normalization_css.R')
obj_normalized_flr<-normalization_filter_css(obj,present,depth,normalization_method)
Transform to relative abundance.
source('plotabundancevioline_g.R')
obj_file_aband_graph<-obj_normalized_flr
level<-"Family"
target_Age<-c("CO","RE", "CR", "HW")
#Oreder your x-axis with a X factor (Age)
aggTax_obj_file_aband_graph<-as.data.frame(aggTax(obj_file_aband_graph,lvl=level, out="matrix", norm=T))
pdata_obj_file_aband_graph<- as.data.frame(pData(obj_file_aband_graph)[,c(1,5,6)])
df_aggTax_obj_file_aband_graph<-as.data.frame(t(aggTax_obj_file_aband_graph))
or_df_aggTax_obj_file_aband_graph<-df_aggTax_obj_file_aband_graph[order(rownames(df_aggTax_obj_file_aband_graph)),]
or_df_aggTax_obj_file_aband_graph_merged<-cbind(or_df_aggTax_obj_file_aband_graph, pdata_obj_file_aband_graph[order(rownames(pdata_obj_file_aband_graph)),])
m_or_df_aggTax_obj_file_aband_graph_merged<-melt(or_df_aggTax_obj_file_aband_graph_merged, id=c("X.SampleID", "IncubationDate", "Treatment"), variable.name=level, value.name = "Abundance" )
m_or_df_aggTax_obj_file_aband_graph_merged<- subset(m_or_df_aggTax_obj_file_aband_graph_merged, Abundance > 0)
m_or_df_aggTax_obj_file_aband_graph_merged$Age<-reorder.factor(m_or_df_aggTax_obj_file_aband_graph_merged$Treatment, new.order=target_Age)
plot_abundance_violin(m_or_df_aggTax_obj_file_aband_graph_merged,"Relative Abundances", level, level)
source('plotabundancebar_g.R')
n<-length(unique(m_or_df_aggTax_obj_file_aband_graph_merged$Family))
col<-setColor(n)
plot_abundance_bar(m_or_df_aggTax_obj_file_aband_graph_merged,"Relative Abundances", level, level, col)
The hypothesis for the implemented presence-absence test is that the proportion/odds of a given feature present is higher/lower among one group of individuals compared to another, and we want to test whether any difference in the proportions observed is significant.
mrEx_obj_pr_ab<-obj_file_aband_graph
factor_test<-pData(mrEx_obj_pr_ab)$Treatment
res_pr_ab<-fitPA(mrEx_obj_pr_ab,cl=factor_test)
t_rank_bind_res_pr_ab<-cbind(res_pr_ab,fData(mrEx_obj_pr_ab))
head(t_rank_bind_res_pr_ab[order(t_rank_bind_res_pr_ab$adjPvalues),])
tail(t_rank_bind_res_pr_ab[order(t_rank_bind_res_pr_ab$adjPvalues),])
To find features absent from any number of classes the function uniqueFeatures provides a table of the feature ids, the number of positive features and reads for each group. Thresholding for the number of positive samples or reads required are options.
mrEx_obj_cor_t<-obj_file_aband_graph
factor_test<-pData(mrEx_obj_pr_ab)[["Treatment"]]
uniqueFeatures(mrEx_obj_cor_t, factor_test, nsamples = 10, nreads = 100)
treatment<-pData(obj_file_aband_graph)$Treatment
heatmapColColors <-brewer.pal(12, "Set3")[as.integer(factor(treatment))]
heatmapCols <-colorRampPalette(brewer.pal(9, "RdBu"))(50)
plotMRheatmap(obj = obj_file_aband_graph, n = 200, cexRow = 0.4, cexCol = 0.4, trace = "none", col = heatmapCols, ColSideColors = heatmapColColors)
plotCorr(obj = obj_file_aband_graph, n = 200, cexRow = 0.25, cexCol = 0.25, trace = "none", dendrogram = "none", col = heatmapCols)
In here all the "NOTA"s and "f_" are summarized as "Unassigned". Input is manually created.
m_merged_mapping_biom_flt_Abun_graph_df_family <-read.delim("m_merged_mapping_biom_flt_Abun_graph_df_family_01_17_17_mod.txt", header = T)
source('plotabundancebar_g.R')
plot_abundance_bar(m_merged_mapping_biom_flt_Abun_graph_df_family,"Relative Abundances", level, level, col)
plot_abundance_violin(m_merged_mapping_biom_flt_Abun_graph_df_family,"Relative Abundances", level, level)
class_sort_order_tss<-unique(as.vector(as.data.frame(m_merged_mapping_biom_flt_Abun_graph_df_family))[['Family']])
s1<-m_merged_mapping_biom_flt_Abun_graph_df_family[m_merged_mapping_biom_flt_Abun_graph_df_family$Abundance>0,]
target_Age<-c("CO","RE", "CR", "HW")
s1$Treatment<-reorder.factor(s1$Treatment, new.order=target_Age)
s1$Family<-reorder.factor(s1$Family, new.order=class_sort_order_tss)
plot_abundance_violin(s1,"Relative Abundances", level, level)
source('plotabundancebar_g.R')
n2<-length(class_sort_order_tss)
n2<-49
col<-setColor(n2)
plot_abundance_bar(s1,"Relative Abundances", level, level, col)