From b87875a33dc7c3960f4b193b4c33b4c2b59604b5 Mon Sep 17 00:00:00 2001 From: Philipp Resl Date: Fri, 1 Apr 2022 15:32:36 +0200 Subject: [PATCH] mltree: Separate iqtree from preparing iqtree input. This is now done in a new rule prepare_iqtree --- data/cluster-config-SGE.yaml.template | 2 + data/cluster-config-SLURM.yaml.template | 2 + data/cluster-config-TORQUE.yaml.template | 2 + rules/tree.smk | 68 ++++++++++++++---------- 4 files changed, 45 insertions(+), 29 deletions(-) diff --git a/data/cluster-config-SGE.yaml.template b/data/cluster-config-SGE.yaml.template index e796b91..df650d3 100644 --- a/data/cluster-config-SGE.yaml.template +++ b/data/cluster-config-SGE.yaml.template @@ -87,6 +87,8 @@ modeltest: raxmlng: N: raxmlng h_vmem: 8G +prepare_iqtree: + N: prepiqt iqtree: N: iqtree h_vmem: 16G diff --git a/data/cluster-config-SLURM.yaml.template b/data/cluster-config-SLURM.yaml.template index ede6801..767071e 100644 --- a/data/cluster-config-SLURM.yaml.template +++ b/data/cluster-config-SLURM.yaml.template @@ -99,6 +99,8 @@ modeltest: raxmlng: job-name: raxmlng mem: 94G +prepare_iqtree: + job-name: prepiqt iqtree: job-name: iqtree mem: 94G diff --git a/data/cluster-config-TORQUE.yaml.template b/data/cluster-config-TORQUE.yaml.template index 6235a53..c3ee2e9 100644 --- a/data/cluster-config-TORQUE.yaml.template +++ b/data/cluster-config-TORQUE.yaml.template @@ -88,6 +88,8 @@ modeltest: raxmlng: N: raxmlng h_vmem: 8gb +prepare_iqtree: + N: prepiqt iqtree: N: iqtree mem: 94gb diff --git a/rules/tree.smk b/rules/tree.smk index d31ca89..e5baec5 100644 --- a/rules/tree.smk +++ b/rules/tree.smk @@ -69,11 +69,46 @@ rule raxmlng: echo -e $statistics_string > {params.wd}/{output.statistics} touch {params.wd}/{output.checkpoint} """ -rule iqtree: + +rule prepare_iqtree: input: -# "results/statistics/filter-{aligner}-{alitrim}/alignment_filter_information_{alitrim}_{aligner}.txt" -# "results/checkpoints/modes/modeltest.done" "results/checkpoints/modeltest/aggregate_best_models_{aligner}_{alitrim}.done" + output: + algn = directory("results/phylogeny-{bootstrap}/iqtree/{aligner}-{alitrim}/algn"), + nexus = "results/phylogeny-{bootstrap}/iqtree/{aligner}-{alitrim}/concat.nex" + singularity: + containers["iqtree"] + params: + wd = os.getcwd(), + models = "results/modeltest/best_models_{aligner}_{alitrim}.txt", + genes = get_input_genes + shell: + """ + mkdir {output.algn} + echo "$(date) - prepare_iqtree {wildcards.aligner}-{wildcards.alitrim}: Will use bootstrap cutoff ({wildcards.bootstrap}) before creating concatenated alignment" >> {params.wd}/results/statistics/runlog.txt + for gene in $(echo "{params.genes}") + do + cp {params.wd}/results/alignments/filtered/{wildcards.aligner}-{wildcards.alitrim}/"$gene"_aligned_trimmed.fas {output.algn}/ + done + + echo "Will create NEXUS partition file with model information now." >> {params.wd}/results/statistics/runlog.txt + echo "#nexus" > {output.nexus} + echo "begin sets;" >> {output.nexus} + i=1 + charpart="" + for gene in $(echo "{params.genes}") + do + cat {params.wd}/{params.models} | grep $gene | awk -v i=$i '{{print "charset part"i" = algn/"$1"_aligned_trimmed.fas:*;"}}' >> {output.nexus} + charpart=$charpart$(cat {params.wd}/{params.models} | grep $gene | awk -v i=$i '{{printf($2":part"i", ")}}' | sed 's/\\(.*\\), /\\1, /') + i=$((++i)) + done + echo "charpartition mine = "$charpart";" >> {output.nexus} + echo "end;" >> {output.nexus} concat.nex + echo "$(date) - nexus file for iqtree written." >> {params.wd}/results/statistics/runlog.txt + """ +rule iqtree: + input: + rules.prepare_iqtree.output output: checkpoint = "results/checkpoints/iqtree_{aligner}_{alitrim}_{bootstrap}.done", statistics = "results/statistics/mltree/mltree_iqtree_{aligner}_{alitrim}_{bootstrap}_statistics.txt" @@ -95,36 +130,11 @@ rule iqtree: config["iqtree"]["threads"] shell: """ - rm -rf results/phylogeny-{wildcards.bootstrap}/iqtree/{wildcards.aligner}-{wildcards.alitrim}/algn - if [[ ! -d results/phylogeny-{wildcards.bootstrap}/iqtree/{wildcards.aligner}-{wildcards.alitrim} ]]; then mkdir -p results/phylogeny-{wildcards.bootstrap}/iqtree/{wildcards.aligner}-{wildcards.alitrim}; fi cd results/phylogeny-{wildcards.bootstrap}/iqtree/{wildcards.aligner}-{wildcards.alitrim}/ - mkdir algn - echo "$(date) - iqtree {wildcards.aligner}-{wildcards.alitrim}: Will use bootstrap cutoff ({wildcards.bootstrap}) before creating concatenated alignment" >> {params.wd}/results/statistics/runlog.txt - for gene in $(echo "{params.genes}") - do - cp {params.wd}/results/alignments/filtered/{wildcards.aligner}-{wildcards.alitrim}/"$gene"_aligned_trimmed.fas algn - done - # here we decide how iqtree should be run. In case modeltesting was run before, this will not be repeated here. - echo "Will create NEXUS partition file with model information now." - echo "#nexus" > concat.nex - echo "begin sets;" >> concat.nex - i=1 - charpart="" - for gene in $(echo "{params.genes}") - do - cat {params.wd}/{params.models} | grep $gene | awk -v i=$i '{{print "charset part"i" = algn/"$1"_aligned_trimmed.fas:*;"}}' >> concat.nex - charpart=$charpart$(cat {params.wd}/{params.models} | grep $gene | awk -v i=$i '{{printf($2":part"i", ")}}' | sed 's/\\(.*\\), /\\1, /') - i=$((++i)) - done - echo "charpartition mine = "$charpart";" >> concat.nex - echo "end;" >> concat.nex - echo "$(date) - nexus file for iqtree written." >> {params.wd}/results/statistics/runlog.txt iqtree -p concat.nex --prefix concat -bb {params.bb} -nt {threads} $(if [[ "{params.seed}" != "None" ]]; then echo "-seed {params.seed}"; fi) {params.additional_params} - #rm -r algn statistics_string="iqtree\t{wildcards.aligner}\t{wildcards.alitrim}\t{params.bb}\t{wildcards.bootstrap}\t$(ls algn | wc -l)\t$(cat concat.contree)" echo -e $statistics_string > {params.wd}/{output.statistics} - cd {params.wd} - touch {output.checkpoint} + touch {params.wd}/{output.checkpoint} """ if "phylobayes" in config["tree"]["method"]: