Skip to content

Commit

Permalink
improved taxonomy parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
jbisanz committed Oct 19, 2018
1 parent ba443df commit 337067a
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 1 deletion.
7 changes: 6 additions & 1 deletion R/qza_to_phyloseq.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,12 @@ qza_to_phyloseq<-function(features,tree,taxonomy,metadata, tmp){

if(!missing(taxonomy)){
taxonomy<-read_qza(taxonomy, tmp=tmp)$data
taxt<-suppressWarnings(do.call(rbind, strsplit(as.character(taxonomy$Taxon),"\\; ")))
taxt<-strsplit(as.character(taxonomy$Taxon),"\\; ")
taxt<-lapply(taxt, function(x){length(x)=7;return(x)})
taxt<-do.call(rbind, taxt)
taxt<-apply(taxt,2, function(x) replace(x, grepl("^[kpcofgs]__$", x), "Not_Assigned"))
rownames(taxt)<-taxonomy$Feature.ID
colnames(taxt)<-c("Kingdom","Phylum","Class","Order","Family","Genus","Species")
argstring<-paste(argstring, "tax_table(taxt),")
}

Expand All @@ -56,3 +60,4 @@ qza_to_phyloseq<-function(features,tree,taxonomy,metadata, tmp){

return(physeq)
}

36 changes: 36 additions & 0 deletions inst/sample-metadata.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#SampleID BarcodeSequence LinkerPrimerSequence BodySite Year Month Day Subject ReportedAntibioticUsage DaysSinceExperimentStart Description
#q2:types categorical categorical categorical numeric numeric numeric categorical categorical numeric categorical
L1S8 AGCTGACTAGTC GTGCCAGCMGCCGCGGTAA gut 2008 10 28 subject-1 Yes 0 subject-1.gut.2008-10-28
L1S57 ACACACTATGGC GTGCCAGCMGCCGCGGTAA gut 2009 1 20 subject-1 No 84 subject-1.gut.2009-1-20
L1S76 ACTACGTGTGGT GTGCCAGCMGCCGCGGTAA gut 2009 2 17 subject-1 No 112 subject-1.gut.2009-2-17
L1S105 AGTGCGATGCGT GTGCCAGCMGCCGCGGTAA gut 2009 3 17 subject-1 No 140 subject-1.gut.2009-3-17
L2S155 ACGATGCGACCA GTGCCAGCMGCCGCGGTAA left palm 2009 1 20 subject-1 No 84 subject-1.left-palm.2009-1-20
L2S175 AGCTATCCACGA GTGCCAGCMGCCGCGGTAA left palm 2009 2 17 subject-1 No 112 subject-1.left-palm.2009-2-17
L2S204 ATGCAGCTCAGT GTGCCAGCMGCCGCGGTAA left palm 2009 3 17 subject-1 No 140 subject-1.left-palm.2009-3-17
L2S222 CACGTGACATGT GTGCCAGCMGCCGCGGTAA left palm 2009 4 14 subject-1 No 168 subject-1.left-palm.2009-4-14
L3S242 ACAGTTGCGCGA GTGCCAGCMGCCGCGGTAA right palm 2008 10 28 subject-1 Yes 0 subject-1.right-palm.2008-10-28
L3S294 CACGACAGGCTA GTGCCAGCMGCCGCGGTAA right palm 2009 1 20 subject-1 No 84 subject-1.right-palm.2009-1-20
L3S313 AGTGTCACGGTG GTGCCAGCMGCCGCGGTAA right palm 2009 2 17 subject-1 No 112 subject-1.right-palm.2009-2-17
L3S341 CAAGTGAGAGAG GTGCCAGCMGCCGCGGTAA right palm 2009 3 17 subject-1 No 140 subject-1.right-palm.2009-3-17
L3S360 CATCGTATCAAC GTGCCAGCMGCCGCGGTAA right palm 2009 4 14 subject-1 No 168 subject-1.right-palm.2009-4-14
L5S104 CAGTGTCAGGAC GTGCCAGCMGCCGCGGTAA tongue 2008 10 28 subject-1 Yes 0 subject-1.tongue.2008-10-28
L5S155 ATCTTAGACTGC GTGCCAGCMGCCGCGGTAA tongue 2009 1 20 subject-1 No 84 subject-1.tongue.2009-1-20
L5S174 CAGACATTGCGT GTGCCAGCMGCCGCGGTAA tongue 2009 2 17 subject-1 No 112 subject-1.tongue.2009-2-17
L5S203 CGATGCACCAGA GTGCCAGCMGCCGCGGTAA tongue 2009 3 17 subject-1 No 140 subject-1.tongue.2009-3-17
L5S222 CTAGAGACTCTT GTGCCAGCMGCCGCGGTAA tongue 2009 4 14 subject-1 No 168 subject-1.tongue.2009-4-14
L1S140 ATGGCAGCTCTA GTGCCAGCMGCCGCGGTAA gut 2008 10 28 subject-2 Yes 0 subject-2.gut.2008-10-28
L1S208 CTGAGATACGCG GTGCCAGCMGCCGCGGTAA gut 2009 1 20 subject-2 No 84 subject-2.gut.2009-1-20
L1S257 CCGACTGAGATG GTGCCAGCMGCCGCGGTAA gut 2009 3 17 subject-2 No 140 subject-2.gut.2009-3-17
L1S281 CCTCTCGTGATC GTGCCAGCMGCCGCGGTAA gut 2009 4 14 subject-2 No 168 subject-2.gut.2009-4-14
L2S240 CATATCGCAGTT GTGCCAGCMGCCGCGGTAA left palm 2008 10 28 subject-2 Yes 0 subject-2.left-palm.2008-10-28
L2S309 CGTGCATTATCA GTGCCAGCMGCCGCGGTAA left palm 2009 1 20 subject-2 No 84 subject-2.left-palm.2009-1-20
L2S357 CTAACGCAGTCA GTGCCAGCMGCCGCGGTAA left palm 2009 3 17 subject-2 No 140 subject-2.left-palm.2009-3-17
L2S382 CTCAATGACTCA GTGCCAGCMGCCGCGGTAA left palm 2009 4 14 subject-2 No 168 subject-2.left-palm.2009-4-14
L3S378 ATCGATCTGTGG GTGCCAGCMGCCGCGGTAA right palm 2008 10 28 subject-2 Yes 0 subject-2.right-palm.2008-10-28
L4S63 CTCGTGGAGTAG GTGCCAGCMGCCGCGGTAA right palm 2009 1 20 subject-2 No 84 subject-2.right-palm.2009-1-20
L4S112 GCGTTACACACA GTGCCAGCMGCCGCGGTAA right palm 2009 3 17 subject-2 No 140 subject-2.right-palm.2009-3-17
L4S137 GAACTGTATCTC GTGCCAGCMGCCGCGGTAA right palm 2009 4 14 subject-2 No 168 subject-2.right-palm.2009-4-14
L5S240 CTGGACTCATAG GTGCCAGCMGCCGCGGTAA tongue 2008 10 28 subject-2 Yes 0 subject-2.tongue.2008-10-28
L6S20 GAGGCTCATCAT GTGCCAGCMGCCGCGGTAA tongue 2009 1 20 subject-2 No 84 subject-2.tongue.2009-1-20
L6S68 GATACGTCCTGA GTGCCAGCMGCCGCGGTAA tongue 2009 3 17 subject-2 No 140 subject-2.tongue.2009-3-17
L6S93 GATTAGCACTCT GTGCCAGCMGCCGCGGTAA tongue 2009 4 14 subject-2 No 168 subject-2.tongue.2009-4-14

0 comments on commit 337067a

Please sign in to comment.