Skip to content

Commit

Permalink
Update for compatibility with Microbiome 0.9 (breaking) (#99)
Browse files Browse the repository at this point in the history
* add Retest

* bump version

* fix indexing breaks

* fix lack of inline tests
  • Loading branch information
kescobo authored Feb 20, 2022
1 parent aad6cb4 commit b7cb366
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 115 deletions.
6 changes: 4 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,22 @@ keywords = ["microbiology", "microbiome", "biology", "metagenomics"]
license = "MIT"
desc = "Convenience functions for working with the bioBakery"
authors = ["kescobo <kevbonham@gmail.com>", "annelle-abatoni <aa1@wellesley.edu>", "anikaluo <al8@wellesley.edu>", "Vanja Klepac-Ceraj <vklepacc@wellesley.edu>"]
version = "0.5.6"
version = "0.6"

[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"
Microbiome = "3bd8f0ae-a0f2-5238-a5af-e1b399a4940c"
ReTest = "e0db7c4e-2690-44b9-bad6-7687da720f89"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
CSV = "0.8, 0.9, 0.10"
Conda = "1.5"
Microbiome = "0.8, 0.9"
Microbiome = "0.9"
Reexport = "0.2, 1"
ReTest = "0.3"
Tables = "1.2.1"
julia = "1.6"
1 change: 1 addition & 0 deletions src/BiobakeryUtils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ using CSV
using Tables
using SparseArrays
using Conda
using ReTest

include("utils.jl")
include("metaphlan.jl")
Expand Down
117 changes: 117 additions & 0 deletions test/BiobakeryUtilsTests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
module BiobakeryUtilsTests

using Random
using ReTest
using BiobakeryUtils
using BiobakeryUtils.Conda
using SparseArrays
using DelimitedFiles
using CSV

isdir(Conda.bin_dir(:BiobakeryUtils)) || BiobakeryUtils.install_deps()
ENV["PATH"] = ENV["PATH"] * ":" * Conda.bin_dir(:BiobakeryUtils)

@testset "CLI" begin
@testset "Utilities" begin
cmd = ["thing", "foo_bar"]
cmd2 = copy(cmd)

BiobakeryUtils.add_cli_kwargs!(cmd, Dict(:some_thing=> "foo", :bool=> true))
@test all(cmd .== ["thing", "foo_bar", "--some_thing", "foo", "--bool"])
BiobakeryUtils.add_cli_kwargs!(cmd2, Dict(:some_thing=> "foo", :bool=> true); optunderscores=false)
@test all(cmd2 .== ["thing", "foo_bar", "--some-thing", "foo", "--bool"])
end

@testset "Metaphlan" begin
@test BiobakeryUtils.check_for_install("metaphlan") |> isnothing
@test BiobakeryUtils.check_for_install("merge_metaphlan_tables.py") |> isnothing

@test metaphlan("", ""; help=true).exitcode == 0

profiles = filter(f-> contains(f, "_profile.tsv"), readdir(joinpath(@__DIR__, "files/metaphlan"), join=true))
@test metaphlan_merge(profiles, joinpath(@__DIR__, "files/metaphlan/merged_abundance_table.tsv")).exitcode == 0
end

@testset "Humann" begin
@test BiobakeryUtils.check_for_install("humann") |> isnothing
@test BiobakeryUtils.check_for_install("humann_rename_table") |> isnothing
@test BiobakeryUtils.check_for_install("humann_renorm_table") |> isnothing
@test BiobakeryUtils.check_for_install("humann_join_tables") |> isnothing
@test BiobakeryUtils.check_for_install("humann") |> isnothing
@test humann("", ""; help=true).exitcode == 0

end
end

@testset "Metaphlan" begin
profile_1 = metaphlan_profile(joinpath(@__DIR__, "files/metaphlan/SRS014464-Anterior_nares_profile.tsv"); sample="SRS014464")
@test profile_1["k__Bacteria", "SRS014464"] == 100.0
@test profile_1["o__Pseudomonadales", "SRS014464"] == 97.28734
@test size(profile_1) == (13, 1)
profile_2 = metaphlan_profile(joinpath(@__DIR__, "files/metaphlan/SRS014459-Stool_profile.tsv"), 3)
@test size(profile_2) == (2, 1)
@test profile_2["p__Firmicutes", "SRS014459-Stool_profile"] == 68.90167
profile_3 = metaphlan_profile(joinpath(@__DIR__, "files/metaphlan/SRS014464-Anterior_nares_profile.tsv"), :phylum)
@test size(profile_3) == (2, 1)
@test profile_3["p__Proteobacteria", 1] == 97.28734

merge_profile_1 = metaphlan_profiles(joinpath(@__DIR__, "files/metaphlan/merged_abundance_table.tsv"); samplestart=3)
@test size(merge_profile_1) == (62, 6)
@test merge_profile_1["g__Moraxella", 5] == 97.28734
merge_profile_2 = metaphlan_profiles(joinpath(@__DIR__, "files/metaphlan/merged_abundance_table.tsv"), :family; samplestart=3)
@test size(merge_profile_2) == (13, 6)
@test merge_profile_2["f__Micrococcaceae", "SRS014464-Anterior_nares"] == 0.0
merge_profile_3 = metaphlan_profiles(joinpath(@__DIR__, "files/metaphlan/merged_abundance_table.tsv"), 7; samplestart=3)
@test size(merge_profile_3) == (16, 6)
@test merge_profile_3["s__Haemophilus_haemolyticus", 3] == 1.35528
CSV.write(joinpath(@__DIR__, "files/metaphlan/merged_abundance_table2.csv"), merge_profile_1)

profiles = filter(f-> contains(f, "_profile.tsv"), readdir(joinpath(@__DIR__, "files/metaphlan"), join=true))
@test_throws ArgumentError metaphlan_profiles(profiles; samples = ["sample1"])
multi_profile_1 = metaphlan_profiles(profiles; samples=["sample$i" for i in 1:length(profiles)])
@test abundances(multi_profile_1) == abundances(metaphlan_profiles(profiles))
@test size(multi_profile_1) == (62, 6)
@test multi_profile_1["p__Firmicutes", "sample1"] == 68.90167
multi_profile_2 = metaphlan_profiles(profiles, 3; samples=["sample$i" for i in 1:length(profiles)])
@test abundances(multi_profile_2) == abundances(metaphlan_profiles(profiles, :class))
@test size(multi_profile_2) == (6,6)
@test multi_profile_2["c__Bacteroidia", "sample1"] == 31.09833

taxstring = "k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanobrevibacter|s__Methanobrevibacter_smithii"
taxa = parsetaxa(taxstring)
@test length(taxa) == 7
@test parsetaxon(taxstring, 1) == Taxon("Archaea", :kingdom)
@test parsetaxon(taxstring, :family) == Taxon("Methanobacteriaceae", :family)
@test parsetaxon(taxstring) == Taxon("Methanobrevibacter_smithii", :species)
@test_throws ArgumentError parsetaxon(taxstring, 8)

@test parsetaxon("k__Archaea|p__Euryarchaeota|c__Methanobacteria", 2) == Taxon("Euryarchaeota", :phylum)
@test parsetaxon("k__Archaea|p__Euryarchaeota|c__Methanobacteria") == Taxon("Methanobacteria", :class)
end

@testset "HUMAnN" begin
p1 = humann_profile(joinpath(@__DIR__, "files/humann/single_1.tsv"))
p2 = humann_profile(joinpath(@__DIR__, "files/humann/single_2.tsv"))
@test p1 isa CommunityProfile
@test size(p1) == (560, 1)
@test samplenames(p1) == ["single_1"]
@test samplenames(humann_profile(joinpath(@__DIR__, "files/humann/single_1.tsv"); sample = "sample1")) == ["sample1"]
@test samplenames(humann_profile(joinpath(@__DIR__, "files/humann/single_1.tsv"); sample = MicrobiomeSample("sample1"))) == ["sample1"]

@test all(f-> !hastaxon(f), features(p1)) # unstratified
@test all(f-> !occursin('|', name(f)), features(p1))

pj = humann_profiles(joinpath(@__DIR__, "files/humann/joined.tsv"))
@test size(pj) == (560, 2)
@test isempty(setdiff(features(pj), features(commjoin(p1, p2))))
@test samplenames(pj) == samplenames(commjoin(p1, p2))

pj_strat = humann_profiles(joinpath(@__DIR__, "files/humann/joined.tsv"); stratified = true)
@test size(pj_strat) == (1358, 2)
@test !isempty(setdiff(features(pj_strat), features(pj)))
@test isempty(setdiff(featurenames(pj_strat), featurenames(pj)))
@test isempty(setdiff(features(filter(!hastaxon, pj_strat)), features(pj)))
CSV.write(joinpath(@__DIR__, "files/humann/joined_roundtrip.tsv"), pj_strat; delim='\t')
end

end # module
1 change: 1 addition & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
Microbiome = "3bd8f0ae-a0f2-5238-a5af-e1b399a4940c"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
ReTest = "e0db7c4e-2690-44b9-bad6-7687da720f89"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
118 changes: 5 additions & 113 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,114 +1,6 @@
using Random
using Test
using BiobakeryUtils
using BiobakeryUtils.Conda
using SparseArrays
using DelimitedFiles
using CSV

isdir(Conda.bin_dir(:BiobakeryUtils)) || BiobakeryUtils.install_deps()
ENV["PATH"] = ENV["PATH"] * ":" * Conda.bin_dir(:BiobakeryUtils)

@testset "CLI" begin
@testset "Utilities" begin
cmd = ["thing", "foo_bar"]
cmd2 = copy(cmd)

BiobakeryUtils.add_cli_kwargs!(cmd, Dict(:some_thing=> "foo", :bool=> true))
@test all(cmd .== ["thing", "foo_bar", "--some_thing", "foo", "--bool"])
BiobakeryUtils.add_cli_kwargs!(cmd2, Dict(:some_thing=> "foo", :bool=> true); optunderscores=false)
@test all(cmd2 .== ["thing", "foo_bar", "--some-thing", "foo", "--bool"])
end

@testset "Metaphlan" begin
@test BiobakeryUtils.check_for_install("metaphlan") |> isnothing
@test BiobakeryUtils.check_for_install("merge_metaphlan_tables.py") |> isnothing

@test metaphlan("", ""; help=true).exitcode == 0

profiles = filter(f-> contains(f, "_profile.tsv"), readdir("files/metaphlan", join=true))
@test metaphlan_merge(profiles, "files/metaphlan/merged_abundance_table.tsv").exitcode == 0
end

@testset "Humann" begin
@test BiobakeryUtils.check_for_install("humann") |> isnothing
@test BiobakeryUtils.check_for_install("humann_rename_table") |> isnothing
@test BiobakeryUtils.check_for_install("humann_renorm_table") |> isnothing
@test BiobakeryUtils.check_for_install("humann_join_tables") |> isnothing
@test BiobakeryUtils.check_for_install("humann") |> isnothing
@test humann("", ""; help=true).exitcode == 0

end
end

@testset "Metaphlan" begin
profile_1 = metaphlan_profile("files/metaphlan/SRS014464-Anterior_nares_profile.tsv"; sample="SRS014464")
@test first(abundances(profile_1["Bacteria", "SRS014464"])) == 100.0
@test first(abundances(profile_1["Pseudomonadales", "SRS014464"])) == 97.28734
@test size(profile_1) == (13, 1)
profile_2 = metaphlan_profile("files/metaphlan/SRS014459-Stool_profile.tsv", 3)
@test size(profile_2) == (2, 1)
@test first(abundances(profile_2["Firmicutes", "SRS014459-Stool_profile"])) == 68.90167
profile_3 = metaphlan_profile("files/metaphlan/SRS014464-Anterior_nares_profile.tsv", :phylum)
@test size(profile_3) == (2, 1)
@test first(abundances(profile_3["Proteobacteria", 1])) == 97.28734

merge_profile_1 = metaphlan_profiles("files/metaphlan/merged_abundance_table.tsv"; samplestart=3)
@test size(merge_profile_1) == (62, 6)
@test first(abundances(merge_profile_1["Moraxella", 5])) == 97.28734
merge_profile_2 = metaphlan_profiles("files/metaphlan/merged_abundance_table.tsv", :family; samplestart=3)
@test size(merge_profile_2) == (13, 6)
@test first(abundances(merge_profile_2["Micrococcaceae", "SRS014464-Anterior_nares"])) == 0.0
merge_profile_3 = metaphlan_profiles("files/metaphlan/merged_abundance_table.tsv", 7; samplestart=3)
@test size(merge_profile_3) == (16, 6)
@test first(abundances(merge_profile_3["Haemophilus_haemolyticus", 3])) == 1.35528
CSV.write("files/metaphlan/merged_abundance_table2.csv", merge_profile_1)

profiles = filter(f-> contains(f, "_profile.tsv"), readdir("files/metaphlan", join=true))
@test_throws ArgumentError metaphlan_profiles(profiles; samples = ["sample1"])
multi_profile_1 = metaphlan_profiles(profiles; samples=["sample$i" for i in 1:length(profiles)])
@test abundances(multi_profile_1) == abundances(metaphlan_profiles(profiles))
@test size(multi_profile_1) == (62, 6)
@test first(abundances(multi_profile_1["Firmicutes", "sample1"])) == 68.90167
multi_profile_2 = metaphlan_profiles(profiles, 3; samples=["sample$i" for i in 1:length(profiles)])
@test abundances(multi_profile_2) == abundances(metaphlan_profiles(profiles, :class))
@test size(multi_profile_2) == (6,6)
@test first(abundances(multi_profile_2["Bacteroidia", "sample1"])) == 31.09833

taxstring = "k__Archaea|p__Euryarchaeota|c__Methanobacteria|o__Methanobacteriales|f__Methanobacteriaceae|g__Methanobrevibacter|s__Methanobrevibacter_smithii"
taxa = parsetaxa(taxstring)
@test length(taxa) == 7
@test parsetaxon(taxstring, 1) == Taxon("Archaea", :kingdom)
@test parsetaxon(taxstring, :family) == Taxon("Methanobacteriaceae", :family)
@test parsetaxon(taxstring) == Taxon("Methanobrevibacter_smithii", :species)
@test_throws ArgumentError parsetaxon(taxstring, 8)

@test parsetaxon("k__Archaea|p__Euryarchaeota|c__Methanobacteria", 2) == Taxon("Euryarchaeota", :phylum)
@test parsetaxon("k__Archaea|p__Euryarchaeota|c__Methanobacteria") == Taxon("Methanobacteria", :class)
end

@testset "HUMAnN" begin
p1 = humann_profile("files/humann/single_1.tsv")
p2 = humann_profile("files/humann/single_2.tsv")
@test p1 isa CommunityProfile
@test size(p1) == (560, 1)
@test samplenames(p1) == ["single_1"]
@test samplenames(humann_profile("files/humann/single_1.tsv"; sample = "sample1")) == ["sample1"]
@test samplenames(humann_profile("files/humann/single_1.tsv"; sample = MicrobiomeSample("sample1"))) == ["sample1"]

@test all(f-> !hastaxon(f), features(p1)) # unstratified
@test all(f-> !occursin('|', name(f)), features(p1))

pj = humann_profiles("files/humann/joined.tsv")
@test size(pj) == (560, 2)
@test isempty(setdiff(features(pj), features(commjoin(p1, p2))))
@test samplenames(pj) == samplenames(commjoin(p1, p2))

pj_strat = humann_profiles("files/humann/joined.tsv"; stratified = true)
@test size(pj_strat) == (1358, 2)
@test !isempty(setdiff(features(pj_strat), features(pj)))
@test isempty(setdiff(featurenames(pj_strat), featurenames(pj)))
@test isempty(setdiff(features(filter(!hastaxon, pj_strat)), features(pj)))
CSV.write("files/humann/joined_roundtrip.tsv", pj_strat; delim='\t')
end
include("BiobakeryUtilsTests.jl")
BiobakeryUtilsTests.runtests()

# # uncomment if there are ever inline tests
# using BiobakeryUtils
# BiobakeryUtils.runtests()

2 comments on commit b7cb366

@kescobo
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/55058

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.6.0 -m "<description of version>" b7cb366131d5e6259f9de661586fde81bf89b22c
git push origin v0.6.0

Please sign in to comment.