Skip to content

Commit

Permalink
Merge branch 'CW-1170' into 'dev'
Browse files Browse the repository at this point in the history
CW-1170 Fix failed to plot

See merge request epi2melabs/workflows/wf-transcriptomes!78
  • Loading branch information
sarahjeeeze committed Nov 16, 2022
2 parents f852dde + 5477e72 commit 14ee337
Show file tree
Hide file tree
Showing 7 changed files with 29 additions and 27 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [unreleased]
## [v0.1.6]
### Updated
- Removed sanitize option
- Reduce size of differential expression data.
Expand All @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Fix JAFFAL terminating workflow when no fusions found.
- Error if condition sheet and sample sheet don't match.
- Failed to plot DE graphs when one of data sets is 0 length.


## [v0.1.5]
### Added
Expand Down
5 changes: 2 additions & 3 deletions bin/de_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,9 @@ def dexseq_section(dexseq_file, section, id_dic):
],
title="Average copy per million (CPM) vs Log-fold change (LFC)",
colors=["red", "blue", "black"],
xlim=[0, 5],
names=["Up", "Down", "NotSig"]
)

dexseq_plot.xaxis.axis_label = "A (log2 transformed mean exon read counts)"
dexseq_plot.yaxis.axis_label = """
M (log2 transformed differential abundance)
Expand Down Expand Up @@ -277,10 +277,9 @@ def dge_section(dge_file, section, ids_dic):
not_sig["logFC"],
],
title="Average copy per million (CPM) vs Log-fold change (LFC)",
colors=["blue", "red", "black"],
colors=["red", "blue", "black"],
names=["Up", "Down", "NotSig"]
)

logcpm_vs_logfc.xaxis.axis_label = "Average log CPM"
logcpm_vs_logfc.yaxis.axis_label = "Log-fold change"
logcpm_caption = """### Results of the edgeR Analysis."""
Expand Down
20 changes: 10 additions & 10 deletions bin/merge_count_tsvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,35 +11,35 @@
parser = argparse.ArgumentParser(
description="""Merge tab separated files on a given field using pandas.""")
parser.add_argument(
'-j', metavar='join', type=str, help="Join type (outer).", default="outer")
'-j', metavar='join', help="Join type (outer).", default="outer")
parser.add_argument(
'-f', metavar='field', type=str,
'-f', metavar='field',
help="Join on this field (Reference).", default="Reference")
parser.add_argument(
'-o', metavar='out_tsv', type=str,
'-o', metavar='out_tsv',
help="Output tsv (merge_tsvs.tsv).", default="merge_tsvs.tsv")
parser.add_argument(
'-z', action="store_true",
help="Fill NA values with zero.", default=False)
parser.add_argument(
'tsvs', metavar='input_tsvs', nargs='*',
type=str, help="Input tab separated files.")
parser.add_argument(
'-tpm', type=bool, nargs='*', default=False,
'-tpm', type=bool, default=False,
help="TPM instead of counts")

parser.add_argument(
'-tsvs', metavar='input_tsvs', nargs='*',
help="Input tab separated files.")

if __name__ == '__main__':
args = parser.parse_args()

args = parser.parse_args()
dfs = {x: pd.read_csv(x, sep="\t") for x in args.tsvs}

ndfs = []
for x, df in dfs.items():
# Transform counts to integers:
df = df.rename(columns={'NumReads': 'Count', 'Name': 'Reference'})
if args.tpm:
df = df.rename(columns={'TPM': 'Count', 'Name': 'Reference'})
else:
df = df.rename(columns={'NumReads': 'Count', 'Name': 'Reference'})
df.Count = np.array(df.Count, dtype=int)
# Take only non-zero counts:
df = df[df.Count > 0]
Expand Down
2 changes: 1 addition & 1 deletion environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ channels:
dependencies:
- python==3.8.*
- bokeh==2.4.3
- aplanat>=0.6.4
- aplanat>=0.6.15
- epi2melabs
- minimap2==2.24
- samtools==1.14
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ params {
out_dir = "output"
sample = null
sample_sheet = null
wfversion = "v0.1.5"
wfversion = "v0.1.6"
aws_image_prefix = null
aws_queue = null
report_name = "report"
Expand Down
2 changes: 1 addition & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@
},
"wfversion": {
"type": "string",
"default": "v0.1.5",
"default": "v0.1.6",
"hidden": true
},
"monochrome_logs": {
Expand Down
21 changes: 11 additions & 10 deletions subworkflows/differential_expression.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@ process count_transcripts {
// Count transcripts using Salmon.
// library type is specified as forward stranded (-l SF) as it should have either been through pychopper or come from direct RNA reads.
label "isoforms"
cpus params.threads
input:
tuple val(sample_id), path(bam)
path ref_transcriptome
output:
path "*transcript_counts.tsv", emit: counts
path "*seqkit.stats", emit: seqkit_stats
"""
salmon quant --noErrorModel -p $params.threads -t $ref_transcriptome -l SF -a $bam -o counts
mv counts/quant.sf "${sample_id}".transcript_counts.tsv
seqkit bam "$bam" 2> "${sample_id}".seqkit.stats
salmon quant --noErrorModel -p "${task.cpus}" -t "${ref_transcriptome}" -l SF -a "${bam}" -o counts
mv counts/quant.sf "${sample_id}.transcript_counts.tsv"
seqkit bam "${bam}" 2> "${sample_id}.seqkit.stats"
"""
}

Expand All @@ -23,7 +24,7 @@ process mergeCounts {
output:
path "all_counts.tsv"
"""
merge_count_tsvs.py -z -o all_counts.tsv $counts
merge_count_tsvs.py -z -o all_counts.tsv -tsvs ${counts}
"""
}

Expand All @@ -34,7 +35,7 @@ process mergeTPM {
output:
path "tpm_counts.tsv"
"""
merge_count_tsvs.py -z -o tpm_counts.tsv $counts -tpm
merge_count_tsvs.py -o tpm_counts.tsv -z -tpm True -tsvs $counts
"""
}

Expand Down Expand Up @@ -101,7 +102,7 @@ process build_minimap_index_transcriptome{
path "genome_index.mmi", emit: index
script:
"""
minimap2 -t ${params.threads} ${params.minimap_index_opts} -I 1000G -d "genome_index.mmi" ${reference}
minimap2 -t "${task.cpus}" ${params.minimap_index_opts} -I 1000G -d "genome_index.mmi" "${reference}"
"""
}
Expand All @@ -123,10 +124,10 @@ process map_transcriptome{
output:
tuple val(sample_id), path("${sample_id}_reads_aln_sorted.bam"), emit: bam
"""
minimap2 -t ${params.threads} -ax splice -uf -p 1.0 $index $fastq_reads\
| samtools view -Sb > output.bam
samtools sort -@ ${params.threads} output.bam -o "${sample_id}"_reads_aln_sorted.bam
samtools index "${sample_id}"_reads_aln_sorted.bam
minimap2 -t ${task.cpus} -ax splice -uf -p 1.0 "${index}" "${fastq_reads}" \
| samtools view -Sb > "output.bam"
samtools sort -@ ${task.cpus} "output.bam" -o "${sample_id}_reads_aln_sorted.bam"
samtools index "${sample_id}_reads_aln_sorted.bam"
"""
}

Expand Down

0 comments on commit 14ee337

Please sign in to comment.