Merge branch 'CW-1170' into 'dev'

CW-1170 Fix failed to plot See merge request epi2melabs/workflows/wf-transcriptomes!78
epi2me-labs · Nov 16, 2022 · 14ee337 · 14ee337
2 parents f852dde + 5477e72
commit 14ee337
Show file tree

Hide file tree

Showing 7 changed files with 29 additions and 27 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [unreleased]
+## [v0.1.6]
 ### Updated
 - Removed sanitize option
 - Reduce size of differential expression data.
@@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 - Fix JAFFAL terminating workflow when no fusions found.
 - Error if condition sheet and sample sheet don't match.
+- Failed to plot DE graphs when one of data sets is 0 length.
+
 
 ## [v0.1.5]
 ### Added

diff --git a/bin/de_plots.py b/bin/de_plots.py
@@ -205,9 +205,9 @@ def dexseq_section(dexseq_file, section, id_dic):
                 ],
         title="Average copy per million (CPM) vs Log-fold change (LFC)",
         colors=["red", "blue", "black"],
-        xlim=[0, 5],
         names=["Up", "Down", "NotSig"]
         )
+
     dexseq_plot.xaxis.axis_label = "A (log2 transformed mean exon read counts)"
     dexseq_plot.yaxis.axis_label = """
     M (log2 transformed differential abundance)
@@ -277,10 +277,9 @@ def dge_section(dge_file, section, ids_dic):
                 not_sig["logFC"],
                 ],
         title="Average copy per million (CPM) vs Log-fold change (LFC)",
-        colors=["blue", "red", "black"],
+        colors=["red", "blue", "black"],
         names=["Up", "Down", "NotSig"]
         )
-
     logcpm_vs_logfc.xaxis.axis_label = "Average log CPM"
     logcpm_vs_logfc.yaxis.axis_label = "Log-fold change"
     logcpm_caption = """### Results of the edgeR Analysis."""

diff --git a/bin/merge_count_tsvs.py b/bin/merge_count_tsvs.py
@@ -11,35 +11,35 @@
 parser = argparse.ArgumentParser(
     description="""Merge tab separated files on a given field using pandas.""")
 parser.add_argument(
-    '-j', metavar='join', type=str, help="Join type (outer).", default="outer")
+    '-j', metavar='join', help="Join type (outer).", default="outer")
 parser.add_argument(
-    '-f', metavar='field', type=str,
+    '-f', metavar='field',
     help="Join on this field (Reference).", default="Reference")
 parser.add_argument(
-    '-o', metavar='out_tsv', type=str,
+    '-o', metavar='out_tsv',
     help="Output tsv (merge_tsvs.tsv).", default="merge_tsvs.tsv")
 parser.add_argument(
     '-z', action="store_true",
     help="Fill NA values with zero.", default=False)
 parser.add_argument(
-    'tsvs', metavar='input_tsvs', nargs='*',
-    type=str, help="Input tab separated files.")
-parser.add_argument(
-    '-tpm', type=bool, nargs='*', default=False,
+    '-tpm', type=bool, default=False,
     help="TPM instead of counts")
-
+parser.add_argument(
+    '-tsvs', metavar='input_tsvs', nargs='*',
+    help="Input tab separated files.")
 
 if __name__ == '__main__':
-    args = parser.parse_args()
 
+    args = parser.parse_args()
     dfs = {x: pd.read_csv(x, sep="\t") for x in args.tsvs}
 
     ndfs = []
     for x, df in dfs.items():
         # Transform counts to integers:
-        df = df.rename(columns={'NumReads': 'Count', 'Name': 'Reference'})
         if args.tpm:
             df = df.rename(columns={'TPM': 'Count', 'Name': 'Reference'})
+        else:
+            df = df.rename(columns={'NumReads': 'Count', 'Name': 'Reference'})
         df.Count = np.array(df.Count, dtype=int)
         # Take only non-zero counts:
         df = df[df.Count > 0]

diff --git a/environment.yaml b/environment.yaml
@@ -7,7 +7,7 @@ channels:
 dependencies:
     - python==3.8.*
     - bokeh==2.4.3
-    - aplanat>=0.6.4
+    - aplanat>=0.6.15
     - epi2melabs
     - minimap2==2.24
     - samtools==1.14

diff --git a/nextflow.config b/nextflow.config
@@ -23,7 +23,7 @@ params {
     out_dir = "output"
     sample = null
     sample_sheet = null
-    wfversion = "v0.1.5"
+    wfversion = "v0.1.6"
     aws_image_prefix = null
     aws_queue = null
     report_name = "report"

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -363,7 +363,7 @@
         },
         "wfversion": {
             "type": "string",
-            "default": "v0.1.5",
+            "default": "v0.1.6",
             "hidden": true
         },
         "monochrome_logs": {

diff --git a/subworkflows/differential_expression.nf b/subworkflows/differential_expression.nf
@@ -2,16 +2,17 @@ process count_transcripts {
     // Count transcripts using Salmon.
     // library type is specified as forward stranded (-l SF) as it should have either been through pychopper or come from direct RNA reads.
     label "isoforms"
+    cpus params.threads
     input:
         tuple val(sample_id), path(bam)
         path ref_transcriptome
     output:
         path "*transcript_counts.tsv", emit: counts
         path "*seqkit.stats", emit: seqkit_stats
     """
-    salmon quant --noErrorModel -p $params.threads -t $ref_transcriptome -l SF -a $bam -o counts
-    mv counts/quant.sf "${sample_id}".transcript_counts.tsv
-    seqkit bam  "$bam" 2>  "${sample_id}".seqkit.stats
+    salmon quant --noErrorModel -p "${task.cpus}" -t "${ref_transcriptome}" -l SF -a "${bam}" -o counts
+    mv counts/quant.sf "${sample_id}.transcript_counts.tsv"
+    seqkit bam  "${bam}" 2>  "${sample_id}.seqkit.stats"
     """
 }
 
@@ -23,7 +24,7 @@ process mergeCounts {
     output:
         path "all_counts.tsv"
     """
-    merge_count_tsvs.py -z -o all_counts.tsv $counts
+    merge_count_tsvs.py -z -o all_counts.tsv -tsvs ${counts}
     """
 }
 
@@ -34,7 +35,7 @@ process mergeTPM {
     output:
         path "tpm_counts.tsv"
     """
-    merge_count_tsvs.py -z -o tpm_counts.tsv $counts -tpm 
+    merge_count_tsvs.py -o tpm_counts.tsv -z -tpm True -tsvs $counts 
     """
 }
 
@@ -101,7 +102,7 @@ process build_minimap_index_transcriptome{
         path "genome_index.mmi", emit: index
     script:
     """
-    minimap2 -t ${params.threads} ${params.minimap_index_opts} -I 1000G -d "genome_index.mmi" ${reference}
+    minimap2 -t "${task.cpus}" ${params.minimap_index_opts}  -I 1000G -d "genome_index.mmi" "${reference}"
   
     """
 }
@@ -123,10 +124,10 @@ process map_transcriptome{
     output:
        tuple val(sample_id), path("${sample_id}_reads_aln_sorted.bam"), emit: bam
     """
-    minimap2 -t ${params.threads} -ax splice -uf -p 1.0 $index $fastq_reads\
-    | samtools view -Sb > output.bam
-    samtools sort -@ ${params.threads} output.bam -o "${sample_id}"_reads_aln_sorted.bam
-    samtools index "${sample_id}"_reads_aln_sorted.bam
+    minimap2 -t ${task.cpus} -ax splice -uf -p 1.0 "${index}" "${fastq_reads}" \
+    | samtools view -Sb > "output.bam"
+    samtools sort -@ ${task.cpus} "output.bam" -o "${sample_id}_reads_aln_sorted.bam"
+    samtools index "${sample_id}_reads_aln_sorted.bam"
     """
 }