diff --git a/README.md b/README.md
index f9cbea7..a5ec410 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,7 @@ so alternatively install `mamba` and use that (snakemake has beta support for it
         conda install -c conda-forge mamba
         mamba create -c conda-forge -c bioconda -n signal snakemake pandas conda mamba
         conda activate signal
+        # mamba activate signal is equivalent
 
 Additional software dependencies are managed directly by `snakemake` using conda environment files:
 
@@ -79,31 +80,27 @@ Additional software dependencies are managed directly by `snakemake` using conda
 
 ## SIGNAL Help Screen:
 
-Using the provided `signal.py` script, the majority of SIGNAL functions can be accessed easily.
+Using the provided `signalexe.py` script, the majority of SIGNAL functions can be accessed easily.
 
 To display the help screen:
 
 ```
-python signal.py -h
+python signalexe.py -h
 
-Output:
-usage: signal.py [-h] [-c CONFIGFILE] [-d DIRECTORY] [--cores CORES] [--config-only] [--remove-freebayes] [--add-breseq]
-                 [-neg NEG_PREFIX] [--dependencies] [-ri] [--unlock] [-F] [-n] [--verbose] [-v]
-                 [all ...] [postprocess ...] [ncov_tools ...]
+usage: signalexe.py [-h] [-c CONFIGFILE] [-d DIRECTORY] [--cores CORES] [--config-only] [--remove-freebayes] [--add-breseq] [-neg NEG_PREFIX] [--dependencies] [--data DATA] [-ri] [-ii] [--unlock]
+                    [-F] [-n] [--quiet] [--verbose] [-v]
+                    [all ...] [postprocess ...] [ncov_tools ...] [install ...]
 
-SARS-CoV-2 Illumina GeNome Assembly Line (SIGNAL) aims to take Illumina short-read sequences and perform consensus assembly +
-variant calling for ongoing surveillance and research efforts towards the emergent coronavirus: Severe Acute Respiratory Syndrome
-Coronavirus 2 (SARS-CoV-2).
+SARS-CoV-2 Illumina GeNome Assembly Line (SIGNAL) aims to take Illumina short-read sequences and perform consensus assembly + variant calling for ongoing surveillance and research efforts towards
+the emergent coronavirus: Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2).
 
 positional arguments:
-  all                   Run SIGNAL with all associated assembly rules. Does not include postprocessing '--configfile' or '--
-                        directory' required. The latter will automatically generate a configuration file and sample table. If
-                        both provided, then '--configfile' will take priority
-  postprocess           Run SIGNAL postprocessing on completed SIGNAL run. '--configfile' is required but will be generated if '
-                        --directory' is provided
-  ncov_tools            Generate configuration file and filesystem setup required and then execute ncov-tools quality control
-                        assessment. Requires 'ncov-tools' submodule! '--configfile' is required but will be generated if '--
-                        directory' is provided
+  all                   Run SIGNAL with all associated assembly rules. Does not include postprocessing '--configfile' or '--directory' required. The latter will automatically generate a
+                        configuration file and sample table. If both provided, then '--configfile' will take priority
+  postprocess           Run SIGNAL postprocessing on completed SIGNAL run. '--configfile' is required but will be generated if '--directory' is provided
+  ncov_tools            Generate configuration file and filesystem setup required and then execute ncov-tools quality control assessment. Requires 'ncov-tools' submodule! '--configfile' is required
+                        but will be generated if '--directory' is provided
+  install               Install individual rule environments and ensure SIGNAL is functional. The only parameter operable will be '--data'. Will override other operations!
 
 optional arguments:
   -h, --help            show this help message and exit
@@ -113,49 +110,57 @@ optional arguments:
                         Path to directory containing reads. Will be used to generate sample table and configuration file
   --cores CORES         Number of cores. Default = 1
   --config-only         Generate sample table and configuration file (i.e., config.yaml) and exit. '--directory' required
-  --remove-freebayes    Configuration file generator parameter. Set flag to DISABLE freebayes variant calling (improves overall
-                        speed)
-  --add-breseq          Configuration file generator parameter. Set flag to ENABLE optional breseq step (will take more time for
-                        analysis to complete)
+  --remove-freebayes    Configuration file generator parameter. Set flag to DISABLE freebayes variant calling (improves overall speed)
+  --add-breseq          Configuration file generator parameter. Set flag to ENABLE optional breseq step (will take more time for analysis to complete)
   -neg NEG_PREFIX, --neg-prefix NEG_PREFIX
-                        Configuration file generator parameter. Comma-separated list of negative sontrol sample name(s) or
-                        prefix(es). For example, 'Blank' will cover Blank1, Blank2, etc. Recommended if running ncov-tools. Will
-                        be left empty, if not provided
-  --dependencies        Download data dependencies (under a created 'data' directory) required for SIGNAL analysis and exit.
-                        Note: Will override other flags! (~10 GB storage required)
+                        Configuration file generator parameter. Comma-separated list of negative control sample name(s) or prefix(es). For example, 'Blank' will cover Blank1, Blank2, etc.
+                        Recommended if running ncov-tools. Will be left empty, if not provided
+  --dependencies        Download data dependencies (under a created 'data' directory) required for SIGNAL analysis and exit. Note: Will override other parameters! (~10 GB storage required)
+  --data DATA           SIGNAL install and data dependencies parameter. Set location for data dependancies. If '--dependancies' is run, a folder will be created in the specified directory. If '--
+                        config-only' or '--directory' is used, the value will be applied to the configuration file. (Upcoming feature): When used with 'SIGNAL install', any tests run will use the
+                        dependencies located at this directory. Default = 'data'
   -ri, --rerun-incomplete
                         Snakemake parameter. Re-run any incomplete samples from a previously failed run
+  -ii, --ignore-incomplete
+                        Snakemake parameter. Do not check for incomplete output files
   --unlock              Snakemake parameter. Remove a lock on the working directory after a failed run
   -F, --forceall        Snakemake parameter. Force the re-run of all rules regardless of prior output
   -n, --dry-run         Snakemake parameter. Do not execute anything and only display what would be done
+  --quiet               Snakemake parameter. Do not output any progress or rule information. If used with '--dry-run`, it will just display a summary of the DAG of jobs
   --verbose             Snakemake parameter. Display snakemake debugging output
   -v, --version         Display version number
 ```
 
 ## Summary:
 
-`signal.py` simplies the execution of all functions of SIGNAL. At its simplest, SIGNAL can be run with one line, provided only the directory of sequencing reads.
+`signalexe.py` simplies the execution of all functions of SIGNAL. At its simplest, SIGNAL can be run with one line, provided only the directory of sequencing reads.
 
 ```
 # Download dependances (only needs to be run once; ~10GB of storage required)
-python signal.py --dependencies
+# --data flag allows you to rename and relocate dependencies directory
+python signalexe.py --data data --dependencies
 
-# Generate configuration file and sample table (--neg_prefix can be used to note negative controls)
-python signal.py --config-only --directory /path/to/reads
+# Generate configuration file and sample table
+# --neg_prefix can be used to note negative controls
+# --data can be used to specify location of data dependencies
+python signalexe.py --config-only --directory /path/to/reads
 
 # Execute pipeline (step-by-step; --cores defaults to 1 if not provided)
-python signal.py --configfile config.yaml --cores NCORES aLL
-python signal.py --configfile config.yaml --cores NCORES postprocess
-python signal.py --configfile config.yaml --cores NCORES ncov_tools
+# --data can be used to specify location of data dependencies
+python signalexe.py --configfile config.yaml --cores NCORES all
+python signalexe.py --configfile config.yaml --cores NCORES postprocess
+python signalexe.py --configfile config.yaml --cores NCORES ncov_tools
 
 # ALTERNATIVE
 # Execute pipeline (one line)
-python signal.py --configfile config.yaml --cores NCORES all postprocess ncov_tools
+# --data can be used to specify location of data dependencies
+python signalexe.py --configfile config.yaml --cores NCORES all postprocess ncov_tools
 
 # ALTERNATIVE
 # Execute pipeline (one line; no prior configuration file or sample table steps)
 # --directory can be used in place of --configfile to automatically generate a configuration file
-python signal.py --directory /path/to/reads --cores NCORES all postprocess ncov_tools
+# --data can be used to specify location of data dependencies
+python signalexe.py --directory /path/to/reads --cores NCORES all postprocess ncov_tools
 ```
 
 Each of the steps in SIGNAL can be run **manually** by accessing the individual scripts or running snakemake.
@@ -187,8 +192,9 @@ The pipeline requires:
 - kraken2 viral database
 - Human GRCh38 reference fasta (for composite human-viral BWA index)
 
-       python signal.py --dependencies
+       python signalexe.py --dependencies
        # defaults to a directory called `data` in repository root
+       # --data can be used to rename and relocate the resultant directory
 
        OR
 
@@ -197,14 +203,24 @@ The pipeline requires:
 
 **Note: Downloading the database files requires ~10GB of storage, with up to ~35GB required for all temporary downloads!**
 
+### 1.5. Prepare per-rule conda environments (optional, but recommended):
+
+SIGNAL uses controlled conda environments for individual steps in the workflow. These are generally produced upon first execution of SIGNAL with input data; however, an option to install the per-rule environments is available through the `signalexe.py` script.
+
+       python signalexe.py install
+
+       # Will install per-rule environments
+       # Later versions of SIGNAL will include a testing module with curated data to ensure function
+
 ### 2. Generate configuration file:
 
-You can use the `--config-only` flag to generate both `config.yaml` and `sample_table.csv` (see step 4). The directory provided will be used to auto-generate a name for the run.
+You can use the `--config-only` flag to generate both `config.yaml` and `sample_table.csv`. The directory provided will be used to auto-generate a name for the run.
 
 ```
-python signal.py --config-only --directory /path/to/reads
+python signalexe.py --config-only --directory /path/to/reads
 
 # Outputs: 'reads_config.yaml' and 'reads_sample_table.csv'
+# --data can be used to specify the location of data dependancies
 ```
 
 You can also create the configuration file through modifying the `example_config.yaml` to suit your system.
@@ -215,7 +231,7 @@ You can also create the configuration file through modifying the `example_config
 
 See the example table `example_sample_table.csv` for an idea of how to organise this table.
 
-**Using the `--config-only` flag, both configuration file and sample table will be generated (see above in step 3) from a given directory path to reads.**
+**Using the `--config-only` flag, both configuration file and sample table will be generated (see above in step 2) from a given directory path to reads.**
 
 Alternatively, you can attempt to use `generate_sample_table.sh` to circumvent manual creation of the table.
 
@@ -248,7 +264,7 @@ bash scripts/generate_sample_table.sh -d /path/to/more/reads -e sample_table.csv
 
 ### 4. Execute pipeline:
 
-For the main `signal.py` script, positional arguments inform the rules of the pipeline to execute with flags supplementing input parameters.
+For the main `signalexe.py` script, positional arguments inform the rules of the pipeline to execute with flags supplementing input parameters.
 
 The main rules of the pipeline are as followed:
 
@@ -258,7 +274,7 @@ The main rules of the pipeline are as followed:
 
 The generated configuration file from the above steps can be used as input. To run the general pipeline:
 
-`python signal.py --configfile config.yaml --cores 4 all`
+`python signalexe.py --configfile config.yaml --cores 4 all`
 
 is equivalent to running
 
@@ -266,9 +282,9 @@ is equivalent to running
 
 You can run the snakemake command as written above, but note that if the `--conda-prefix` is not set as this (i.e., `$PWD/.snakemake/conda`), then all envs will be reinstalled for each time you change the `results_dir` in the `config.yaml`.
 
-Alternatively, you can skip the above configuration and sample table generation steps by simply providing the directory of reads to the main script:
+Alternatively, you can skip the above configuration and sample table generation steps by simply providing the directory of reads to the main script (see step 2):
 
-`python signal.py --directory /path/to/reads --cores 4 all`
+`python signalexe.py --directory /path/to/reads --cores 4 all`
 
 A configuartion file and sample table will automatically be generated prior to running SIGNAL `all`.
 
@@ -278,7 +294,7 @@ FreeBayes variant calling and BreSeq mutational analysis are technically optiona
 
 As with the general pipeline, the generated configuration file from the above steps can be used as input. To run `postprocess` which summarizes the SIGNAL results:
 
-`python signal.py --configfile config.yaml --cores 1 postprocess`
+`python signalexe.py --configfile config.yaml --cores 1 postprocess`
 
 is equivalent to running
 
@@ -306,7 +322,7 @@ Related: because pipeline stages can fail, we run (and recommend running if usin
 Additionally, SIGNAL can prepare output and execute @jts' [ncov-tools](https://github.com/jts/ncov-tools)
 to generate phylogenies and alternative summaries.
 
-`python signal.py --configfile config.yaml --cores 1 ncov_tools`
+`python signalexe.py --configfile config.yaml --cores 1 ncov_tools`
 
 is equivalent to running
 
@@ -318,17 +334,19 @@ SIGNAL will then execute ncov-tools and the **output will be found within the SI
 
 ### Multiple operations:
 
-Using `signal.py` positional arguments, you can specify SIGNAL to perform multiple rules in succession.
+Using `signalexe.py` positional arguments, you can specify SIGNAL to perform multiple rules in succession.
 
-`python signal.py --configfile config.yaml --cores NCORES all postprocess ncov_tools`
+`python signalexe.py --configfile config.yaml --cores NCORES all postprocess ncov_tools`
 
 In the above command, SIGNAL `all`, `postprocess`, and `ncov_tools` will run using the provided configuration file as input, which links to a sample table.
 
 **Note: Regardless of order for positional arguments, or placement of other parameter flags, SIGNAL will always run in the set order priority: `all` > `postprocess` > `ncov_tools`!**
 
+**Note: If `install` is provided as input, it will override all other positional arguments!**
+
 If no configuration file or sample table was generated for a run, you can provide `--directory` with the path to sequencing reads and SIGNAL will auto-generate both required inputs prior to running any rules.
 
-`python signal.py --directory /path/to/reads --cores NCORES all postprocess ncov_tools`
+`python signalexe.py --directory /path/to/reads --cores NCORES all postprocess ncov_tools`
 
 Overall, this simplifies executing SIGNAL to one line!
 
@@ -359,6 +377,44 @@ Then execute the pipeline:
 
 - To generate summaries of BreSeq among many samples, see [how to summarize BreSeq results using gdtools](resources/dev_scripts/summaries/README.md)
 
+### Convenient extraction script:
+
+SIGNAL produces several output files and directories on its own alongside the output for ncov-tools. Select files from the output can be copied or transferred for easier parsing using a provided convenience bash script:
+
+```
+bash scripts/get_signal_results.sh
+
+Usage:
+bash get_signal_results.sh -s <SIGNAL_results_dir> -d <destination_dir> [-m] [-c]
+
+This scripts aims to copy (rsync by default, or cp) or move (mv) select output from SIGNAL 'all', 'postprocess', and 'ncov_tools'.
+
+The following files will be transferred over to the specified destination directory (if found):
+SIGNAL 'all' & 'postprocess':
+-> signal-results/<sample>/<sample>_sample.txt
+-> signal-results/<sample>/core/<sample>.consensus.fa
+-> signal-results/<sample>/core/<sample>_ivar_variants.tsv
+-> signal-results/<sample>/freebayes/<sample>.consensus.fasta
+-> signal-results/<sample>/freebayes/<sample>.variants.norm.vcf
+
+SIGNAL 'ncov_tools':
+-> ncov_tools-results/qc_annotation/<sample>.ann.vcf
+-> ncov-tools-results/qc_reports/<run_name>_ambiguous_position_report.tsv
+-> ncov-tools-results/qc_reports/<run_name>_mixture_report.tsv
+-> ncov-tools-results/qc_reports/<run_name>_ncov_watch_variants.tsv
+-> ncov-tools-results/qc_reports/<run_name>_negative_control_report.tsv
+-> ncov-tools-results/qc_reports/<run_name>_summary_qc.tsv
+
+Flags:
+        -s  :  SIGNAL results directory
+        -d  :  Directory where summary will be outputted
+        -m  :  Invoke 'mv' move command instead of 'rsync' copying of results. Optional
+        -c  :  Invoke 'cp' copy command instead of 'rsync' copying of results. Optional
+
+```
+
+The script uses `rsync` to provide accurate copies of select output files organized into `signal-results` and `ncov-tools-results` within a provided destination directory (that must exist). If the `-c` is provided, `cp` will be used instead of `rsync` to produce copies. Similarly, if `-m` is provided, `mv` will be used instead (**WARNING: Any interruption during `mv` could result in data loss.**)
+
 ## Pipeline details:
 
 For a step-by-step walkthrough of the pipeline, see [pipeline/README.md](PIPELINE.md).
diff --git a/Snakefile b/Snakefile
index 76a4e5d..d807d96 100644
--- a/Snakefile
+++ b/Snakefile
@@ -127,7 +127,9 @@ rule clean_reads:
        expand('{sn}/mapped_clean_reads/{sn}_R{r}.fastq.gz', sn=sample_names, r=[1,2])
 
 rule consensus:
-    input: expand('{sn}/core/{sn}.consensus.fa', sn=sample_names)
+    input: expand('{sn}/core/{sn}.consensus.fa', sn=sample_names),
+           'all_genomes.fa',
+#           'failed_samples.log'
 
 rule ivar_variants:
     input: expand('{sn}/core/{sn}_ivar_variants.tsv', sn=sample_names)
@@ -136,14 +138,15 @@ rule breseq:
     input: expand('{sn}/breseq/output/index.html', sn=sample_names)
 
 rule freebayes:
-    input: 
+    input:
+        'all_freebayes_genomes.fa',
+#        'failed_samples.log',
         expand('{sn}/freebayes/{sn}.consensus.fasta', sn=sample_names),
         expand('{sn}/freebayes/{sn}.variants.norm.vcf', sn=sample_names),
         'freebayes_lineage_assignments.tsv',
         expand('{sn}/freebayes/quast/{sn}_quast_report.html', sn=sample_names),
         expand('{sn}/freebayes/{sn}_consensus_compare.vcf', sn=sample_names)
 
-    
 rule coverage:
     input: expand('{sn}/coverage/{sn}_depth.txt', sn=sample_names)
 
@@ -239,7 +242,8 @@ rule ncov_tools:
         negative_control_prefix = config['negative_control_prefix'],
         freebayes_run = config['run_freebayes'],
         pangolin = versions['pangolin'],
-        mode = pango_speed
+        mode = pango_speed,
+        failed = 'failed_samples.log'
     input:
         consensus = expand('{sn}/core/{sn}.consensus.fa', sn=sample_names),
         primertrimmed_bams = expand("{sn}/core/{sn}_viral_reference.mapping.primertrimmed.sorted.bam", sn=sample_names),
@@ -327,7 +331,7 @@ rule raw_reads_composite_reference_bwa_map:
     shell:
         '(bwa mem -t {threads} {params.composite_index} '
         '{input.raw_r1} {input.raw_r2} | '
-        '{params.script_path} -c {params.viral_contig_name} > {output}) 2> {log}'
+        "{params.script_path} -c {params.viral_contig_name} > {output}) 2> {log} || echo '' > {output}"
 
 rule get_host_removed_reads:
     threads: 2
@@ -375,7 +379,7 @@ rule run_trimgalore:
     shell:
         'trim_galore --quality {params.min_qual} --length {params.min_len} '
         ' -o {params.output_prefix} --cores {threads} --fastqc '
-        '--paired {input.raw_r1} {input.raw_r2} 2> {log} || touch {output}'
+        "--paired {input.raw_r1} {input.raw_r2} 2> {log} || (echo -e 'Total reads processed:  0\nReads written (passing filters):  0 (0.0%)\nTotal basepairs processed:  0 bp\nTotal written (filtered):  0 bp (0.0%)' >> {log}; touch {output})"
 
 rule run_filtering_of_residual_adapters:
     threads: 2
@@ -769,6 +773,32 @@ rule run_quast_freebayes:
          'quast {input} -r {params.genome} -g {params.fcoords} --output-dir {params.outdir} --threads {threads} >{log} && '
          'for f in {params.unlabelled_reports}; do mv $f ${{f/report/{params.sample_name}}}; done'
 
+rule collect_core_genomes:
+    output:
+        "all_genomes.fa"
+    input:
+        expand(['{sn}/core/{sn}.consensus.fa'], sn=sample_names)
+    shell:
+        """
+        cat {input} > {output}
+        sample=''
+        count=''
+        echo "Samples that failed to assemble:" > failed_samples.log
+        while read -r line;
+        do
+            if [[ $line =~ '>' ]]; then
+                sample=$(echo $line | cut -d'.' -f1 | cut -d'_' -f2)
+            else
+                count=$(echo $line | wc -c)
+                if [[ $count -eq 1 ]]; then
+                    echo $sample >> failed_samples.log
+                else
+                    continue
+                fi
+            fi
+        done < {output}
+        """
+
 rule run_lineage_assignment:
     threads: 4
     conda: 'conda_envs/assign_lineages.yaml'
@@ -777,7 +807,7 @@ rule run_lineage_assignment:
         nextclade_ver_out = 'input_nextclade_versions.txt',
         lin_out = 'lineage_assignments.tsv'
     input:
-        expand('{sn}/core/{sn}.consensus.fa', sn=sample_names)
+        'all_genomes.fa'
     params:
         pangolin_ver = versions['pangolin'],
         pangolearn_ver = versions['pangolearn'],
@@ -794,8 +824,51 @@ rule run_lineage_assignment:
     shell:
         "echo -e 'pangolin: {params.pangolin_ver}\nconstellations: {params.constellations_ver}\nscorpio: {params.scorpio_ver}\npangolearn: {params.pangolearn_ver}\npango-designation: {params.designation_ver}\npangolin-data: {params.data_ver}' > {output.pango_ver_out} && "
         "echo -e 'nextclade: {params.nextclade_ver}\nnextclade-dataset: {params.nextclade_data}\nnextclade-include-recomb: {params.nextclade_recomb}' > {output.nextclade_ver_out} && "
-        'cat {input} > all_genomes.fa && '
-        '{params.assignment_script_path} -i all_genomes.fa -t {threads} -o {output.lin_out} -p {output.pango_ver_out} -n {output.nextclade_ver_out} --mode {params.analysis_mode}'
+        '{params.assignment_script_path} -i {input} -t {threads} -o {output.lin_out} -p {output.pango_ver_out} -n {output.nextclade_ver_out} --mode {params.analysis_mode}'
+
+rule collect_freebayes_genomes:
+    output:
+        "all_freebayes_genomes.fa"
+    input:
+        expand('{sn}/freebayes/{sn}.consensus.fasta', sn=sample_names)
+    shell:
+        """
+        cat {input} > {output}
+        sample=''
+        seq=''
+        count=''
+        out=''
+        if [[ -f 'failed_samples.log' ]]; then
+            out='.failed_freebayes_samples.tmp'
+            cat failed_samples.log | sed 1,1d > $out
+            echo "Samples that failed to assemble:" > failed_samples.log
+        else
+            out='failed_samples.log'
+            echo "Samples that failed to assemble:" > $out
+        fi
+        while read -r line;
+        do
+            if [[ $line =~ '>' ]]; then
+                if [[ $(echo $seq | wc -c) -eq 1 ]]; then # check if new seq
+                    count=$(echo $seq | grep -vc 'N')
+                    if [[ $count -eq 0 ]]; then
+                        echo $sample >> $out
+                    fi
+                    sample=$(echo $line | cut -d'>' -f2) # start new seq
+                    seq=''
+                else
+                    sample=$(echo $line | cut -d'>' -f2) # first seq
+                fi
+            else
+                seq+=$line # append seq
+            fi
+        done < {output}
+        
+        if [[ ! $out == 'failed_samples.log' ]]; then
+            sort -b -d -f $out | uniq >> failed_samples.log
+            rm $out
+        fi
+        """
 
 rule run_lineage_assignment_freebayes:
     threads: 4
@@ -805,10 +878,9 @@ rule run_lineage_assignment_freebayes:
     input:
         p_vers = 'input_pangolin_versions.txt',
         n_vers = 'input_nextclade_versions.txt',
-        consensus = expand('{sn}/freebayes/{sn}.consensus.fasta', sn=sample_names)
+        consensus = 'all_freebayes_genomes.fa'
     params:
         analysis_mode = pango_speed,
         assignment_script_path = os.path.join(exec_dir, 'scripts', 'assign_lineages.py')
     shell:
-        'cat {input.consensus} > all_freebayes_genomes.fa && '
-        '{params.assignment_script_path} -i all_freebayes_genomes.fa -t {threads} -o {output} -p {input.p_vers} -n {input.n_vers} --mode {params.analysis_mode} --skip'
+        '{params.assignment_script_path} -i {input.consensus} -t {threads} -o {output} -p {input.p_vers} -n {input.n_vers} --mode {params.analysis_mode} --skip'
diff --git a/conda_envs/assign_lineages.yaml b/conda_envs/assign_lineages.yaml
index a5eecb8..fc4e8fd 100644
--- a/conda_envs/assign_lineages.yaml
+++ b/conda_envs/assign_lineages.yaml
@@ -11,7 +11,6 @@ dependencies:
   - python>=3.7
   - snakemake-minimal
   - gofasta
-  - nodejs
   - usher
   - pandas
   - pysam==0.16.0.1
diff --git a/scripts/assign_lineages.py b/scripts/assign_lineages.py
index 43f2bfd..0ba5d3c 100755
--- a/scripts/assign_lineages.py
+++ b/scripts/assign_lineages.py
@@ -8,6 +8,7 @@
 import shutil
 import os, sys
 from datetime import datetime
+import json
 
 
 def check_file(path: str) -> Path:
@@ -136,27 +137,53 @@ def update_nextclade_dataset(vers, skip):
 
 	# If specific tag requested, attempt to install, otherwise install latest
 	accession = 'MN908947'
+	current_tag = None
+	if os.path.exists(os.path.join(output_dir, 'tag.json')):
+		j = open(os.path.join(output_dir, 'tag.json'))
+		data = json.load(j)
+		current_tag = data['tag']
+		j.close()
 	if requested is not None:
+		# check existing database, if found
+			if requested == current_tag:
+				print(f"Nextclade dataset {requested} already installed! Skipping update!")
+			else:
+				try:
+					print(f"\nDownloading Nextclade {dataset} dataset tagged {requested} for reference {accession}!")
+					subprocess.run(f"nextclade dataset get "
+								f"--name '{dataset}' "
+								f"--reference '{accession}' "
+								f"--tag {requested} "
+								f"--output-dir '{output_dir}'", shell=True, check=True)
+				except subprocess.CalledProcessError:
+					print(f"\nDatabase not found! Please check whether {requested} tag exists! Downloading latest Nextclade {dataset} dataset for reference {accession}...")
+					try:
+						subprocess.run(f"nextclade dataset get "
+									f"--name '{dataset}' "
+									f"--reference '{accession}' "
+									f"--output-dir '{output_dir}'", shell=True, check=True)
+					except subprocess.CalledProcessError:
+						if current_tag is not None:
+							print(f"Something went wrong updating the Nextclade dataset, using {current_tag} instead!")
+							requested = current_tag
+						else:
+							print(f"Something went wrong updating the Nextclade dataset! No database could be found which may result in errors! Skipping update...")
+							requested = "Unknown"
+	else:
 		try:
-			print(f"\nDownloading Nextclade {dataset} dataset tagged {requested} for reference {accession}!")
+			print(f"\nDownloading latest Nextclade {dataset} dataset for reference {accession}!")
 			subprocess.run(f"nextclade dataset get "
 						f"--name '{dataset}' "
 						f"--reference '{accession}' "
-						f"--tag {requested} "
 						f"--output-dir '{output_dir}'", shell=True, check=True)
 		except subprocess.CalledProcessError:
-			print(f"\nDatabase not found! Please check whether {requested} tag exists! Downloading latest Nextclade {dataset} dataset for reference {accession}...")
-			subprocess.run(f"nextclade dataset get "
-						f"--name '{dataset}' "
-						f"--reference '{accession}' "
-						f"--output-dir '{output_dir}'", shell=True, check=True)
-	else:
-		print(f"\nDownloading latest Nextclade {dataset} dataset for reference {accession}!")
-		subprocess.run(f"nextclade dataset get "
-					f"--name '{dataset}' "
-					f"--reference '{accession}' "
-					f"--output-dir '{output_dir}'", shell=True, check=True)
-
+			if current_tag is not None:
+				print(f"Something went wrong updating the Nextclade dataset, using {current_tag} instead!")
+				requested = current_tag
+			else:
+				print(f"Something went wrong updating the Nextclade dataset! No database could be found which may result in errors! Skipping update...")
+				requested = "Unknown"
+	
 	# Obtain final version information for output
 	nextclade_version = subprocess.run(f"nextclade --version".split(), stdout=subprocess.PIPE).stdout.decode('utf-8').strip().lower()
 	if nextclade_version.startswith("nextclade"):
diff --git a/scripts/get_data_dependencies.sh b/scripts/get_data_dependencies.sh
index c525294..43bcf68 100755
--- a/scripts/get_data_dependencies.sh
+++ b/scripts/get_data_dependencies.sh
@@ -14,34 +14,42 @@ accession="MN908947.3"
 
 HELP="""
 Flags:
-    -d  :  Directory to configure database within (~10GB)
-    -a  :  Accession to use as viral reference (default=MN908947.3)
+	-d  :  Directory to configure database within (~10GB)
+	-a  :  Accession to use as viral reference (default=MN908947.3)
 """
 
 while getopts ":d:a:" option; do
-    case "${option}" in
-        d) database_dir=$OPTARG;;
-        a) accession=$OPTARG;;
-    esac
+	case "${option}" in
+		d) database_dir=$OPTARG;;
+		a) accession=$OPTARG;;
+	esac
 done
 
 if [ $database_dir = 0 ] ; then
-    echo "You must specify a data directory to install data dependencies."
-    echo "$HELP"
-    exit 1
+	echo "You must specify a data directory to install data dependencies."
+	echo "$HELP"
+	exit 1
 fi
 
 echo -e "Warning: \n - final databases require ~10GB of storage\n - building databases temporarily requires a peak of ~35GB of storage and ~4GB of memory \n - script takes up to ~1.5 hours (system depending)"
 
 # make database dir and get abspath to it
-mkdir -p $database_dir
+if [ ! -d $database_dir ]; then mkdir -p $database_dir; fi
 database_dir=$(realpath $database_dir)
 
 # use curl to grab "simple data dependencies"
-curl -s "https://raw.githubusercontent.com/timflutre/trimmomatic/3694641a92d4dd9311267fed85b05c7a11141e7c/adapters/NexteraPE-PE.fa" > $database_dir/NexteraPE-PE.fa
-curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=${accession}&rettype=gb&retmode=txt" > $database_dir/$accession.gbk
-curl -s "https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?db=nuccore&report=gff3&id=${accession}" > $database_dir/$accession.gff3
-curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=${accession}&rettype=fasta&retmode=txt" > $database_dir/$accession.fasta
+if [ ! -f $database_dir/'NexteraPE-PE.fa' ]; then
+	curl -s "https://raw.githubusercontent.com/timflutre/trimmomatic/3694641a92d4dd9311267fed85b05c7a11141e7c/adapters/NexteraPE-PE.fa" > $database_dir/NexteraPE-PE.fa
+fi
+if [ ! -f $database_dir/${accession}.gbk ]; then
+	curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=${accession}&rettype=gb&retmode=txt" > $database_dir/$accession.gbk
+fi
+if [ ! -f $database_dir/${accession}.gff3 ]; then
+	curl -s "https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?db=nuccore&report=gff3&id=${accession}" > $database_dir/$accession.gff3
+fi
+if [ ! -f $database_dir/${accession}.fasta ]; then
+	curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=${accession}&rettype=fasta&retmode=txt" > $database_dir/$accession.fasta
+fi
 
 # install and activate env for kraken/bwa to build their databases/index
 CONDA_BASE=$($CONDA_EXE info --base)
@@ -51,19 +59,37 @@ conda activate data_dependencies
 
 # get the GRCh38 human genome
 # as per https://lh3.github.io/2017/11/13/which-human-reference-genome-to-use
-curl -s "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.15_GRCh38/seqs_for_alignment_pipelines.ucsc_ids/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz" > $database_dir/GRC38_no_alt_analysis_set.fna.gz
-gunzip $database_dir/GRC38_no_alt_analysis_set.fna.gz
+if [ ! -f $database_dir/"GRC38_no_alt_analysis_set.fna" ]; then
+	curl -s "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.15_GRCh38/seqs_for_alignment_pipelines.ucsc_ids/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz" > $database_dir/GRC38_no_alt_analysis_set.fna.gz
+	gunzip $database_dir/GRC38_no_alt_analysis_set.fna.gz
+fi
 
 # create composite reference of human and virus for competitive bwt mapping 
 # based host removal
+if [ ! -f $database_dir/'composite_human_viral_reference.fna' ]; then
 cat $database_dir/GRC38_no_alt_analysis_set.fna $database_dir/$accession.fasta > $database_dir/composite_human_viral_reference.fna
-bwa index $database_dir/composite_human_viral_reference.fna
+fi
+for file in $database_dir/composite_human_viral_reference.fna.{amb,ann,bwt,pac,sa}; do
+	if [ ! -f $file ]; then
+		bwa index $database_dir/composite_human_viral_reference.fna
+		break
+	else
+		continue
+	fi
+done
 
 # get kraken2 viral db
 mkdir -p $database_dir/Kraken2/db
-curl -s "https://genome-idx.s3.amazonaws.com/kraken/k2_viral_20210517.tar.gz" > $database_dir/Kraken2/db/k2_viral_20210517.tar.gz
-cd $database_dir/Kraken2/db
-tar xvf k2_viral_20210517.tar.gz
+for file in $database_dir/Kraken2/db/{hash,opts,taxo}.k2d; do
+	if [ ! -f $file ]; then
+		curl -s "https://genome-idx.s3.amazonaws.com/kraken/k2_viral_20210517.tar.gz" > $database_dir/Kraken2/db/k2_viral_20210517.tar.gz
+		cd $database_dir/Kraken2/db
+		tar xvf k2_viral_20210517.tar.gz
+		break
+	else
+		continue
+	fi
+done
 
 # create blank fasta for 'phylo_include_seqs'
 touch $database_dir/blank.fasta
diff --git a/scripts/get_signal_results.sh b/scripts/get_signal_results.sh
new file mode 100755
index 0000000..0936c39
--- /dev/null
+++ b/scripts/get_signal_results.sh
@@ -0,0 +1,122 @@
+#!/bin/env bash
+
+shopt -s extglob
+
+source=0
+destination=0
+move='false'
+copy='false'
+
+HELP="""
+Usage:
+bash get_signal_results.sh -s <SIGNAL_results_dir> -d <destination_dir> [-m] [-c]
+
+This scripts aims to copy (rsync by default, or cp) or move (mv) select output from SIGNAL 'all', 'postprocess', and 'ncov_tools'.
+
+The following files will be transferred over to the specified destination directory (if found):
+SIGNAL 'all' & 'postprocess':
+-> signal-results/<sample>/<sample>_sample.txt
+-> signal-results/<sample>/core/<sample>.consensus.fa
+-> signal-results/<sample>/core/<sample>_ivar_variants.tsv
+-> signal-results/<sample>/freebayes/<sample>.consensus.fasta
+-> signal-results/<sample>/freebayes/<sample>.variants.norm.vcf
+
+SIGNAL 'ncov_tools':
+-> ncov_tools-results/qc_annotation/<sample>.ann.vcf
+-> ncov-tools-results/qc_reports/<run_name>_ambiguous_position_report.tsv
+-> ncov-tools-results/qc_reports/<run_name>_mixture_report.tsv
+-> ncov-tools-results/qc_reports/<run_name>_ncov_watch_variants.tsv
+-> ncov-tools-results/qc_reports/<run_name>_negative_control_report.tsv
+-> ncov-tools-results/qc_reports/<run_name>_summary_qc.tsv
+
+Flags:
+	-s  :  SIGNAL results directory
+	-d  :  Directory where summary will be outputted
+	-m  :  Invoke 'mv' move command instead of 'rsync' copying of results. Optional
+	-c  :  Invoke 'cp' copy command instead of 'rsync' copying of results. Optional
+"""
+
+while getopts ":s:d:mc" option; do
+	case "${option}" in
+		s) source=$OPTARG;;
+		d) destination=$OPTARG;;
+		m) move='true';;
+		c) copy='true';;
+	esac
+done
+
+
+if [ $source = 0 ] || [ $destination = 0 ] ; then
+	echo "You must specify both source and destination locations."
+	echo "$HELP"
+	exit 1
+fi
+
+if [ ! -d $destination ]; then
+	echo "Invalid destination directory!"
+	exit 1
+fi
+
+if [ ! -f $source/summary.html ] && [ ! -f $source/summary.zip ]; then
+	echo "Invalid SIGNAL directory! Make sure you've run SIGNAL 'all' and 'postprocess'!"
+	exit 1
+else
+	run_name=$(basename $source)
+	final_dir=${destination}/${run_name}
+	mkdir -p $final_dir/signal-results
+fi
+	
+if [ ${move} = true ] && [ ${copy} = true ]; then
+	echo -e "Only pick one of '-m' or '-c' depending on whether you wish to move or copy files, respectively"
+	exit
+elif [ ${move} = true ] && [ ${copy} = false ]; then
+	cmd='mv'
+elif [ ${move} = false ] && [ ${copy} = true ]; then
+	cmd='cp'
+else
+	cmd='rsync -avh'
+	# rsync -avh
+fi
+
+echo -e "We will use ${cmd} for your files!"
+
+### SIGNAL results_dir
+for file in $source/*; do 
+	if [ -d $file ]; then # results_dir/sample
+		sample=$(basename $file) # sample name, within contain our files
+		sample_dest=${final_dir}/'signal-results'/${sample}
+		if [[ ! $sample == 'ncov-tools-results' ]]; then
+			mkdir -p $sample_dest
+		fi
+		for d in $file/*; do
+			name=$(basename $d)
+			if [ -d $d ] && [[ $name == 'core' ]]; then
+				mkdir -p $sample_dest/core
+				$cmd ${d}/${sample}.consensus.fa $sample_dest/core/${sample}.consensus.fa
+				$cmd ${d}/${sample}_ivar_variants.tsv $sample_dest/core/${sample}_ivar_variants.tsv
+			elif [ -d $d ] && [[ $name == 'freebayes' ]]; then
+				mkdir -p $sample_dest/freebayes
+				$cmd ${d}/${sample}.consensus.fasta $sample_dest/freebayes/${sample}.consensus.fasta 
+				$cmd ${d}/${sample}.variants.norm.vcf $sample_dest/freebayes/${sample}.variants.norm.vcf
+			elif [ -f $d ] && [[ $name =~ '_sample.txt' ]]; then
+				$cmd ${d} $sample_dest/$(basename $d)
+			else
+				continue
+			fi
+		done
+	fi
+done
+
+echo "Files from SIGNAL transferred!"
+
+### NCOV-TOOLS
+if [ ! -d $source/ncov-tools-results ]; then
+	echo "No ncov-tools-results directory found!"
+else
+	ncov_dest=${final_dir}/ncov-tools-results
+	mkdir -p $ncov_dest/qc_{annotation,reports}
+	$cmd $source/ncov-tools-results/qc_reports/* $ncov_dest/qc_reports
+	$cmd $source/ncov-tools-results/qc_annotation/*.ann.vcf  $ncov_dest/qc_annotation
+	
+	echo "Files from ncov-tools transferred!"
+fi
\ No newline at end of file
diff --git a/scripts/ncov-tools.py b/scripts/ncov-tools.py
index 76e5201..16cea46 100755
--- a/scripts/ncov-tools.py
+++ b/scripts/ncov-tools.py
@@ -6,17 +6,21 @@
 import fileinput
 import glob
 
-def link_ivar(root, replace=False):
+def link_ivar(root, neg, failed, replace=False):
 	print("Linking iVar files to ncov-tools!")
 
 	for variants in snakemake.input['variants']:
 		sample = variants.split('/')[0]
+		if (sample in failed) and (sample not in neg):
+			continue
 		ln_path = f"{root}/{sample}.variants.tsv"
 		if (not os.path.exists(ln_path)) or (replace is True):
 			os.link(variants, ln_path)
 
 	for consensus in snakemake.input['consensus']:
 		sample = consensus.split('/')[0]
+		if (sample in failed) and (sample not in neg):
+			continue
 		ln_path = f"{root}/{sample}.consensus.fasta"
 		if (not os.path.exists(ln_path)) or (replace is True):
 			os.link(consensus, ln_path)
@@ -30,15 +34,17 @@ def link_ivar(root, replace=False):
 
 # take sample name from iVar results, redirect to where corresponding FreeBayes should be
 # if FreeBayes file cannot be found, break from loop, replace all with iVar
-def link_freebayes(root):
+def link_freebayes(root, neg, failed):
 	print("Linking FreeBayes files to ncov-tools!")
 
 	for variants in snakemake.input['variants']:
 		sample = variants.split('/')[0]
+		if (sample in failed) and (sample not in neg):
+			continue
 		expected_path = os.path.join(sample, 'freebayes', sample+'.variants.norm.vcf')
 		if not os.path.exists(expected_path):
 			print("Missing FreeBayes variant file! Switching to iVar input!")
-			link_ivar(root, True)
+			link_ivar(root, neg, failed, replace=True)
 			break
 		else:
 			ln_path = f"{root}/{sample}.variants.vcf"
@@ -47,10 +53,12 @@ def link_freebayes(root):
 
 	for consensus in snakemake.input['consensus']:
 		sample = consensus.split('/')[0]
+		if (sample in failed) and (sample not in neg):
+			continue
 		expected_path = os.path.join(sample, 'freebayes', sample+'.consensus.fasta')
 		if not os.path.exists(expected_path):
 			print("Missing FreeBayes variant file! Switching to iVar input!")
-			link_ivar(root, True)
+			link_ivar(root, neg, failed, replace=True)
 			break
 		else:
 			ln_path = f"{root}/{sample}.consensus.fasta"
@@ -75,10 +83,26 @@ def set_up():
 	try:
 		assert pangolin == "3" or pangolin == "4" # directly supported versions
 	except AssertionError:
-		import urllib.request as web
-		commit_url = web.urlopen(f"https://github.com/cov-lineages/pangolin/releases/latest").geturl()
-		pangolin = commit_url.split("/")[-1].split(".")[0].lower().strip("v") 
+		# import urllib.request as web
+		# commit_url = web.urlopen(f"https://github.com/cov-lineages/pangolin/releases/latest").geturl()
+		# pangolin = commit_url.split("/")[-1].split(".")[0].lower().strip("v") 
 		# latest version (should ensure temporary compatibility)
+		installed_versions = subprocess.run(["pangolin", "--all-versions"],
+								check=True,
+								stdout=subprocess.PIPE)
+		installed_versions = installed_versions.stdout.decode('utf-8')
+		installed_ver_dict = {}
+		for dep_ver in map(str.strip, installed_versions.split('\n')):
+		# skip empty line at end
+			if len(dep_ver) == 0:
+				continue
+			try:
+				dependency, version = dep_ver.split(': ')
+			except ValueError:
+				continue
+			if dependency == 'pangolin':
+				pangolin = str(version).split(".",1)[0].strip('v')
+				break
 
 ### Create data directory within ncov-tools
 	data_root = os.path.abspath(os.path.join(exec_dir, 'ncov-tools', "%s" %(result_dir)))
@@ -99,6 +123,13 @@ def set_up():
 	neg_list = list(neg_samples)
 	print("Negative control samples found include: %s" %(neg_list))
 
+### Pull failed samples (SIGNAL log file: failed_samples.log)
+	if os.path.exists(snakemake.params['failed']):
+		with open(snakemake.params['failed']) as fail:
+			failed_list = [i.strip() for i in fail.readlines()[1:]]
+	else:
+		failed_list = []
+	print("Failed samples found include: %s" %(failed_list))
 
 ### config.yaml parameters
 	config = {'data_root': f"'{data_root}'",
@@ -126,27 +157,32 @@ def set_up():
 	print("Linking alignment BAMs to ncov-tools!")
 	for bam in snakemake.input['bams']:
 		sample = bam.split('/')[0]
+		# if sample failed and not a negative, skip linking
+		if (sample in failed_list) and (sample not in neg_list):
+			continue
 		ln_path = f"{data_root}/{sample}.bam"
-		if (not os.path.exists(ln_path)) or (replace is True):
+		if not os.path.exists(ln_path):
 			os.link(bam, ln_path)
 
 	for primer_trimmed_bam in snakemake.input['primertrimmed_bams']:
 		sample = primer_trimmed_bam.split('/')[0]
+		if (sample in failed_list) and (sample not in neg_list):
+			continue
 		ln_path = f"{data_root}/{sample}.mapped.primertrimmed.sorted.bam"
-		if (not os.path.exists(ln_path)) or (replace is True):
+		if not os.path.exists(ln_path):
 			os.link(primer_trimmed_bam, ln_path)
 			
 	if snakemake.params['freebayes_run']:
-		link_freebayes(data_root)
+		link_freebayes(data_root, neg_list, failed_list)
 		config['variants_pattern'] = "'{data_root}/{sample}.variants.vcf'"
 	else:
-		link_ivar(data_root)
+		link_ivar(data_root, neg_list, failed_list, replace=False)
 
 	with open(os.path.join(exec_dir, 'ncov-tools', 'config.yaml'), 'w') as fh:
 		for key, value in config.items():
 			fh.write(f"{key}: {value}\n")
 			
-	return exec_dir, result_dir
+	return exec_dir, result_dir, data_root
 
 def run_all():
 	os.system(f"snakemake -s workflow/Snakefile --cores {snakemake.threads} all")
@@ -185,12 +221,14 @@ def move(cwd, dest, prefix):
 		print("Missing ncov-tools 'qc_analysis' directory")
 
 if __name__ == '__main__':
-	exec_dir, result_dir = set_up()
+	exec_dir, result_dir, data_root = set_up()
 	run_script = os.path.join(exec_dir, 'scripts', 'run_ncov_tools.sh')
 	#print("Don't forget to update the config.yaml file as needed prior to running ncov-tools.")
 	print("Running ncov-tools using %s cores!" %(snakemake.threads))
 
 	subprocess.run([run_script, '-c', str(snakemake.threads), '-s', str(result_dir)])
-
+	
+	# clean up
+	shutil.rmtree(data_root)
 	#run_all()
 	#move(exec_dir, result_root, result_dir)
diff --git a/scripts/run_ncov_tools.sh b/scripts/run_ncov_tools.sh
index 2a5f098..5a68aad 100755
--- a/scripts/run_ncov_tools.sh
+++ b/scripts/run_ncov_tools.sh
@@ -35,9 +35,9 @@ if [ $1 = 'help' ]; then
 fi
 
 if [ $SIGNAL = 0 ] ; then
-    echo "You must specify the name of the directory holding SIGNAL results."
-    echo "$HELP"
-    exit 1
+	echo "You must specify the name of the directory holding SIGNAL results."
+	echo "$HELP"
+	exit 1
 fi
 
 # Start point for executing from ncov-tools.py is SIGNAL results directory
@@ -48,9 +48,9 @@ RESULTS=$PWD
 cd ../ncov-tools
 
 # run ncov-tools
-snakemake -s workflow/Snakefile --cores ${CORES} all
+snakemake -k -s workflow/Snakefile --cores ${CORES} all
 
-# move ncovresults to SIGNAL results directory
+# move ncovresults to SIGNAL results directory and clean up
 mv ${SIGNAL}'_ncovresults' ${RESULTS}/ncov-tools-results
 
 # return success
diff --git a/scripts/signal_postprocess.py b/scripts/signal_postprocess.py
index a0a0aba..20fbcde 100755
--- a/scripts/signal_postprocess.py
+++ b/scripts/signal_postprocess.py
@@ -17,7 +17,7 @@
 
 assert long_git_id.startswith('$Id: ')
 #short_git_id = long_git_id[5:12]
-short_git_id = "v1.5.9"
+short_git_id = "v1.6.0"
 
 # Suppresses matplotlib warning (https://github.com/jaleezyy/covid-19-signal/issues/59)
 # Creates a small memory leak, but it's nontrivial to fix, and won't be a practical concern!
diff --git a/signal.py b/signalexe.py
similarity index 64%
rename from signal.py
rename to signalexe.py
index 00764cd..1d10df6 100755
--- a/signal.py
+++ b/signalexe.py
@@ -1,23 +1,31 @@
 #!/usr/bin/env python
 
-# v1.5.0+
-# signal.py assumes Snakefile is in current working directory (i.e., SIGNAL root)
+# v1.6.0+
+# signalexe.py assumes Snakefile is in current working directory (i.e., SIGNAL root)
 
+import signal
 import argparse
 import subprocess, os, sys
 import re
 from pathlib import Path
+import platform
+
+# for compatibility between platforms
+if platform.system() != 'Linux':
+	signal.SIGHUP = 1
 
 def create_parser():
-	allowed = {'all': False, 'postprocess': False, 'ncov_tools': False}
+	allowed = {'install': False, 'all': False, 'postprocess': False, 'ncov_tools': False}
 
-	parser = argparse.ArgumentParser(prog='signal.py', description="SARS-CoV-2 Illumina GeNome Assembly Line (SIGNAL) aims to take Illumina short-read sequences and perform consensus assembly + variant calling for ongoing surveillance and research efforts towards the emergent coronavirus: Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2).")
+	parser = argparse.ArgumentParser(prog='signalexe.py', description="SARS-CoV-2 Illumina GeNome Assembly Line (SIGNAL) aims to take Illumina short-read sequences and perform consensus assembly + variant calling for ongoing surveillance and research efforts towards the emergent coronavirus: Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2).")
 	parser.add_argument('all', nargs='*',
 						help="Run SIGNAL with all associated assembly rules. Does not include postprocessing '--configfile' or '--directory' required. The latter will automatically generate a configuration file and sample table. If both provided, then '--configfile' will take priority")
 	parser.add_argument('postprocess', nargs='*',
 						help="Run SIGNAL postprocessing on completed SIGNAL run. '--configfile' is required but will be generated if '--directory' is provided")
 	parser.add_argument('ncov_tools', nargs='*',
 						help="Generate configuration file and filesystem setup required and then execute ncov-tools quality control assessment. Requires 'ncov-tools' submodule! '--configfile' is required but will be generated if '--directory' is provided")
+	parser.add_argument('install', nargs='*',
+						help="Install individual rule environments and ensure SIGNAL is functional. The only parameter operable will be '--data'. Will override other operations!")
 	parser.add_argument('-c', '--configfile', type=check_file, default=None,
 						help="Configuration file (i.e., config.yaml) for SIGNAL analysis")
 	parser.add_argument('-d', '--directory', type=check_directory, default=None,
@@ -26,18 +34,22 @@ def create_parser():
 	parser.add_argument('--config-only', action='store_true', help="Generate sample table and configuration file (i.e., config.yaml) and exit. '--directory' required")
 	parser.add_argument('--remove-freebayes', action='store_false', help="Configuration file generator parameter. Set flag to DISABLE freebayes variant calling (improves overall speed)")
 	parser.add_argument('--add-breseq', action='store_true', help="Configuration file generator parameter. Set flag to ENABLE optional breseq step (will take more time for analysis to complete)")
-	parser.add_argument('-neg', '--neg-prefix', default=None, help="Configuration file generator parameter. Comma-separated list of negative sontrol sample name(s) or prefix(es). For example, 'Blank' will cover Blank1, Blank2, etc. Recommended if running ncov-tools. Will be left empty, if not provided")
-	parser.add_argument('--dependencies', action='store_true', help="Download data dependencies (under a created 'data' directory) required for SIGNAL analysis and exit. Note: Will override other flags! (~10 GB storage required)")
+	parser.add_argument('-neg', '--neg-prefix', default=None, help="Configuration file generator parameter. Comma-separated list of negative control sample name(s) or prefix(es). For example, 'Blank' will cover Blank1, Blank2, etc. Recommended if running ncov-tools. Will be left empty, if not provided")
+	parser.add_argument('--dependencies', action='store_true', help="Download data dependencies (under a created 'data' directory) required for SIGNAL analysis and exit. Note: Will override other parameters! (~10 GB storage required)")
+	parser.add_argument('--data', default='data', help="SIGNAL install and data dependencies parameter. Set location for data dependancies. If '--dependancies' is run, a folder will be created in the specified directory. If '--config-only' or '--directory' is used, the value will be applied to the configuration file. (Upcoming feature): When used with 'SIGNAL install', any tests run will use the dependencies located at this directory. Default = 'data'")
+	#parser.add_argument('--enable-test', action='store_true', help='SIGNAL install parameter. Add SIGNAL testing after environment installation using curated test data')
 	parser.add_argument('-ri', '--rerun-incomplete', action='store_true', help="Snakemake parameter. Re-run any incomplete samples from a previously failed run")
+	parser.add_argument('-ii', '--ignore-incomplete', action='store_true', help='Snakemake parameter. Do not check for incomplete output files')
 	parser.add_argument('--unlock', action='store_true', help="Snakemake parameter. Remove a lock on the working directory after a failed run")
 	parser.add_argument('-F', '--forceall', action='store_true', help='Snakemake parameter. Force the re-run of all rules regardless of prior output')
 	parser.add_argument('-n', '--dry-run', action='store_true', help='Snakemake parameter. Do not execute anything and only display what would be done')
+	parser.add_argument('--quiet', action='store_true', help="Snakemake parameter. Do not output any progress or rule information. If used with '--dry-run`, it will just display a summary of the DAG of jobs")
 	parser.add_argument('--verbose', action='store_true', help="Snakemake parameter. Display snakemake debugging output")
 	parser.add_argument('-v', '--version', action='store_true', help="Display version number")
 	args, unknown = parser.parse_known_args()
 
 	provided = []
-	for opt in allowed: # ['all', 'postprocess', 'ncov_tools']
+	for opt in allowed: # ['install', 'all', 'postprocess', 'ncov_tools']
 		if len(getattr(args, opt)) > 0:
 			provided = provided + getattr(args, opt)
 			getattr(args, opt).clear()
@@ -49,18 +61,16 @@ def create_parser():
 			allowed[val.lower()] = True
 		else:
 			print(f"Ignoring unknown command: {val}")
-	
-	# Unknown
-	# for x in unknown:
-		# filter out unknown options (like -b or --b or alll)
-		# exit with error
-		# if x.startswith(('-', '--')):
-			# parser.error(f"unknown argument {x}")
-		# identify what belongs where
-		# getattr(result, 'provided').append(x)
-	
+
 	return args, allowed
-	
+
+def check_frontend():
+	try:
+		subprocess.check_call(['mamba', 'list'], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
+		return 'mamba'
+	except subprocess.CalledProcessError:
+		return 'conda'
+
 def check_directory(path: str) -> Path:
 	"""
 	Check an input directory exists and is readable
@@ -109,8 +119,12 @@ def check_submodule(exec_dir):
 		try:
 			print("Updating ncov-tools!")
 			subprocess.run(['git', 'submodule', 'update', '--init', '--recursive'])
+			return True
 		except subprocess.CalledProcessError:
-			exit("Could not find nor update the required 'ncov-tools' directory! Manually download/update and try again!")
+			print("Could not find nor update the required 'ncov-tools' directory! Manually download/update and try again!")
+			sys.exit(1)
+	else:
+		return False
 		
 def write_sample_table(sample_data, output_table):
 	"""
@@ -122,8 +136,7 @@ def write_sample_table(sample_data, output_table):
 		for sample in sample_data:
 			out_fh.write(",".join(sample) + '\n')
 
-def download_dependences():
-	dir_name = 'data'
+def download_dependencies(dir_name):
 	script = os.path.join(script_path, 'scripts', 'get_data_dependencies.sh')
 	subprocess.run(['bash', script, '-d', dir_name, '-a', 'MN908947.3'])
 
@@ -135,7 +148,7 @@ def generate_sample_table(project_directory, project_name):
 	out_table = project_name + "_sample_table.csv"
 	subprocess.run(['bash', script, '-d', project_directory, '-n', out_table])
 
-def write_config_file(run_name, config_file, opt_tasks):
+def write_config_file(run_name, config_file, data_directory, opt_tasks):
 ### opt_tasks = [args.breseq, args.freebayes, [args.neg_prefix]] - latter only applies to SIGNAL v1.5.8 and earlier
 
 	config = f"""# This file contains a high-level summary of pipeline configuration and inputs.
@@ -157,26 +170,26 @@ def write_config_file(run_name, config_file, opt_tasks):
 scheme_bed: 'resources/primer_schemes/artic_v3/nCoV-2019.bed'
 
 # Path from snakemake dir to bwa indexed human + viral reference genome
-composite_reference: 'data/composite_human_viral_reference.fna'
+composite_reference: "{data_directory}/composite_human_viral_reference.fna"
 
 # Used as bwa reference genome when removing host sequences.
 # Also used as 'ivar' reference genome in variant detection + consensus.
 # Used as -r,-g arguments to 'quast'
 # contig needed for hostremoval filtering script
 viral_reference_contig_name: 'MN908947.3'
-viral_reference_genome: 'data/MN908947.3.fasta'
-viral_reference_feature_coords: 'data/MN908947.3.gff3'
+viral_reference_genome: "{data_directory}/MN908947.3.fasta"
+viral_reference_feature_coords: "{data_directory}/MN908947.3.gff3"
 
 # breseq_reference must be defined if run_breseq == True
 run_breseq: {opt_tasks[0]}
 # Used as --reference argument to 'breseq'
-breseq_reference: 'data/MN908947.3.gbk'
+breseq_reference: "{data_directory}/MN908947.3.gbk"
 
 # run freebayes for variant and consensus calling (as well as ivar)
 run_freebayes: {opt_tasks[1]}
 
 # Used as --db argument to 'kraken2'
-kraken2_db: 'data/Kraken2/db'
+kraken2_db: "{data_directory}/Kraken2/db"
 
 # For Ivar's amplicon filter 
 # https://github.com/andersen-lab/ivar/commit/7027563fd75581c78dabc6040ebffdee2b24abe6
@@ -226,7 +239,7 @@ def write_config_file(run_name, config_file, opt_tasks):
 amplicon_loc_bed: 'resources/primer_schemes/artic_v3/ncov-qc_V3.scheme.bed'
 
 # fasta of sequences to include with pangolin phylogeny
-phylo_include_seqs: "data/blank.fasta"
+phylo_include_seqs: "{data_directory}/blank.fasta"
 
 # List of negative control sample names or prefixes (i.e., ['Blank'] will cover Blank1, Blank2, etc.)
 negative_control_prefix: {opt_tasks[2]}"""
@@ -234,21 +247,47 @@ def write_config_file(run_name, config_file, opt_tasks):
 	with open(config_file, 'w') as fh:
 		fh.write(config)
 
+def install_signal(frontend, data='data'):
+	"""
+	Install SIGNAL dependencies per rule and test using a sample dataset, if desired
+	"""
+	dep_snakefile = os.path.join(script_path, 'resources', 'dependencies')
+	assert os.path.exists(dep_snakefile)
+	try:
+		subprocess.run(f"snakemake -s {dep_snakefile} --conda-frontend {frontend} --cores 1 --use-conda --conda-prefix=$PWD/.snakemake/conda", shell=True, check=True)
+	except subprocess.CalledProcessError: 
+		print("Installation of environments failed!")
+		sys.exit(1)
+	
+	### TODO: Test SIGNAL with curated data
+	if os.path.exists(data):
+		pass
+	
+
 if __name__ == '__main__':
 	# note: add root_dir to determine the root directory of SIGNAL
 	script_path = os.path.join(os.path.abspath(sys.argv[0]).rsplit("/",1)[0])
 	args, allowed = create_parser()
-	version = 'v1.5.9'
+	version = 'v1.6.0'
 	alt_options = []
 	
 	if args.version:
-		exit(f"{version}")
+		print(f"{version}")
+		sys.exit(0)
+	
+	conda_frontend = check_frontend() # 'mamba' or 'conda'
+	
+	if allowed['install']:
+		install_signal(conda_frontend, args.data)
+		print("Installation of environments completed successfully!")
+		sys.exit(0)
 	
 	if args.dependencies:
 		print("Downloading necessary reference and dependency files!")
-		download_dependences()
-		exit("Download complete!")
-	
+		download_dependencies(args.data)
+		print("Complete!")
+		sys.exit(0)
+		
 	if args.configfile is None:
 		assert args.directory is not None, "Please provide '--directory' to proceed! ('--configfile' if a configuration file already exists!)"
 		run_name = args.directory.name
@@ -258,35 +297,53 @@ def write_config_file(run_name, config_file, opt_tasks):
 			neg = [pre.replace(" ","") for pre in args.neg_prefix.split(",")]
 		else:
 			neg = [args.neg_prefix]
-		write_config_file(run_name, config_file, [args.add_breseq, args.remove_freebayes, neg])
+		write_config_file(run_name, config_file, args.data, [args.add_breseq, args.remove_freebayes, neg])
 		if args.config_only:
-			exit("Configuration file and sample table generated!")
+			print("Configuration file and sample table generated!")
+			sys.exit(0)
 	else:
 		config_file = args.configfile
 	
 	if not any([allowed[x] for x in allowed]):
-		exit("No task specified! Please provide at least one of 'all', 'postprocess', or 'ncov_tools'! See 'signal.py -h' for details!")
+		print("No task specified! Please provide at least one of 'all', 'postprocess', or 'ncov_tools'! See 'signalexe.py -h' for details!")
+		sys.exit(1)
 	else:
 		if args.verbose: alt_options.append('--verbose')
+		if args.quiet: alt_options.append('--quiet')
 		if args.unlock: alt_options.append('--unlock')
 		if args.forceall: alt_options.append('--forceall')
 		if args.dry_run: alt_options.append('--dry-run')
 		if args.rerun_incomplete: alt_options.append('--rerun-incomplete')
+		if args.ignore_incomplete: alt_options.append('--ignore-incomplete')
 		opt = " ".join(alt_options)
 		for task in allowed:
-			if allowed[task] is True:
+			if (allowed[task] is True) and (task != 'install'):
 				print(f"Running SIGNAL {task}!")
 				try:
-					subprocess.run(f"snakemake --conda-frontend mamba --configfile {config_file} --cores={args.cores} --use-conda --conda-prefix=$PWD/.snakemake/conda {task} -kp {opt}", shell=True, check=True)
-				except subprocess.CalledProcessError: # likely missing mamba 
+					subprocess.run(f"snakemake --conda-frontend {conda_frontend} --configfile {config_file} --cores={args.cores} --use-conda --conda-prefix=$PWD/.snakemake/conda {task} -kp {opt}", shell=True, check=True)
+				except subprocess.CalledProcessError:
 					if task == "ncov_tools":
-						check_submodule(os.getcwd())
-					if opt.split(" ")[-1] == '--rerun-incomplete': # remove redundant flag
-						opt = " ".join(opt.split(" ")[:-1])
-					try:
-						print("Retrying...")
-						subprocess.run(f"snakemake --conda-frontend conda --configfile {config_file} --cores={args.cores} --use-conda --conda-prefix=$PWD/.snakemake/conda {task} -kp --rerun-incomplete {opt}", shell=True, check=True)
-					except subprocess.CalledProcessError:
-						exit(f"Something went wrong running SIGNAL {task}! Check input and try again!")
+						mod = check_submodule(os.getcwd())
+						if mod:
+							if opt.split(" ")[-1] == '--rerun-incomplete': # remove redundant flag
+								opt = " ".join(opt.split(" ")[:-1])
+							try:
+								print("Retrying...ncov-tools!")
+								subprocess.run(f"snakemake --conda-frontend {conda_frontend} --configfile {config_file} --cores={args.cores} --use-conda --conda-prefix=$PWD/.snakemake/conda {task} -kp --rerun-incomplete {opt}", shell=True, check=True)
+							except subprocess.CalledProcessError:
+								print(f"Some jobs failed while running SIGNAL {task}! Check snakemake logs and the ncov-tools directory for additional details!")
+								continue
+						else:
+							print(f"Some jobs failed while running SIGNAL {task}! Check snakemake logs and the ncov-tools directory for additional details!")
+							continue
+					elif task == 'all':
+						print(f"Some jobs failed while running SIGNAL {task}! This does NOT necessarily mean your run was erroneous! Samples that failed assembly can be found in 'failed_samples.log'! If no such file exists or is blank, check your inputs and logs and try again!")
+						continue
+					elif task == 'postprocess':
+						print(f"Some jobs failed while running SIGNAL {task}! Some output files may be missing! Check SIGNAL results and logs and try again!")
+						continue
+					else:
+						print(f"Some jobs failed while running SIGNAL {task}! Check SIGNAL inputs, logs, and results and try again!")
 	
-	exit("SIGNAL completed successfully!")
+	print("SIGNAL run complete! Check corresponding snakemake logs for any details!")
+	sys.exit(0)