diff --git a/docs/CHANGELOG.rst b/docs/CHANGELOG.rst index 07d973e58..267b006a0 100644 --- a/docs/CHANGELOG.rst +++ b/docs/CHANGELOG.rst @@ -20,6 +20,8 @@ Added Changed ------- +- Remove ``rnaseqc-qc`` 3' bias statistics from MultiQC report +- Remove ``rnaseqc-qc`` from RNA-seq workflows Fixed ----- diff --git a/resolwe_bio/processes/support_processors/multiqc.py b/resolwe_bio/processes/support_processors/multiqc.py index bfabce990..7925506b0 100644 --- a/resolwe_bio/processes/support_processors/multiqc.py +++ b/resolwe_bio/processes/support_processors/multiqc.py @@ -65,12 +65,6 @@ def create_summary_table(samples, species, build): json.dump(sample_summary_json, out_file) -def parse_rnaseqc_report(report): - """Parse RNA-SeQC QC report file.""" - df = pd.read_csv(report, sep="\t") - return dict(df.values) - - def parse_genebody_report(report): """Parse QoRTs gene body coverage metrics report file.""" df = pd.read_csv(report, sep="\t", compression="gzip") @@ -79,37 +73,6 @@ def parse_genebody_report(report): return dict -def create_coverage_table(sample_names, reports): - """Prepare coverage metrics table.""" - coverage_stats = [ - "Genes used in 3' bias", - "Mean 3' bias", - "Median 3' bias", - "3' bias Std", - "3' bias MAD_Std", - "3' Bias, 25th Percentile", - "3' Bias, 75th Percentile", - ] - - coverage_qc_json = { - "id": "coverage_qc", - "section_name": "RNA-SeQC Coverage Stats", - "plot_type": "table", - "file_format": "json", - "data": {}, - } - - for sample_name, report in zip(sample_names, reports): - report_data = parse_rnaseqc_report(report) - - coverage_qc_json["data"][sample_name] = { - k: report_data[k] for k in coverage_stats if k in report_data - } - - with open("rnaseqc_coverage_mqc.json", "w") as out_file: - json.dump(coverage_qc_json, out_file) - - def create_coverage_plot(sample_names, reports): """Prepare QoRTs gene body coverage plot.""" genebody_qc_json = { @@ -474,7 +437,7 @@ class MultiQC(Process): } category = "QC" data_name = "MultiQC report" - version = "1.23.0" + version = "1.24.0" class Input: """Input fields to process MultiQC.""" @@ -552,8 +515,6 @@ def run(self, inputs, outputs): unsupported_data = [] star_quantification_samples = [] star_quantification_reports = [] - rnaseqc_samples = [] - rnaseqc_reports = [] qorts_samples = [] qorts_reports = [] @@ -733,14 +694,9 @@ def run(self, inputs, outputs): elif d.process.type == "data:rnaseqc:qc:": name = os.path.basename(d.output.metrics.path) - rnaseqc_samples.append(sample_name) - rnaseqc_reports.append(d.output.metrics.path) create_symlink( src=d.output.metrics.path, dst=os.path.join(sample_dir, name) ) - create_coverage_table( - sample_names=rnaseqc_samples, reports=rnaseqc_reports - ) elif d.process.type == "data:expression:salmon:": # Symlink files/dirs without the parent directory to diff --git a/resolwe_bio/processes/workflows/bbduk_salmon_qc.py b/resolwe_bio/processes/workflows/bbduk_salmon_qc.py index 9eb902eda..e3c8329d4 100644 --- a/resolwe_bio/processes/workflows/bbduk_salmon_qc.py +++ b/resolwe_bio/processes/workflows/bbduk_salmon_qc.py @@ -42,7 +42,7 @@ class WorkflowBbdukSalmonQc(Process): entity = { "type": "sample", } - version = "4.3.1" + version = "4.4.0" process_type = "data:workflow:rnaseq:salmon" category = "Pipeline" @@ -449,22 +449,4 @@ def run(self, inputs, outputs): ] } - # RNA-SeQC tool is initiated only if annotation source is ENSEMBL - if inputs.annotation.output.source == "ENSEMBL": - input_rnaseqc = { - "alignment": alignment_qc, - "annotation": inputs.annotation, - "strand_detection_options": { - "stranded": "auto", - "cdna_index": inputs.salmon_index, - "n_reads": 5000000, - }, - } - rnaseqc = Data.create( - process=BioProcess.get_latest(slug="rnaseqc-qc"), - input=input_rnaseqc, - name=f"RNA-SeQC QC report ({inputs.reads.name})", - ) - input_multiqc["data"].append(rnaseqc) - Data.create(process=BioProcess.get_latest(slug="multiqc"), input=input_multiqc) diff --git a/resolwe_bio/processes/workflows/bbduk_star.py b/resolwe_bio/processes/workflows/bbduk_star.py index 3f9c7c138..1b95621d8 100644 --- a/resolwe_bio/processes/workflows/bbduk_star.py +++ b/resolwe_bio/processes/workflows/bbduk_star.py @@ -41,7 +41,7 @@ class WorkflowSTAR(Process): "expression-engine": "jinja", } data_name = "{{ reads|name|default('?') }}" - version = "1.4.0" + version = "1.5.0" entity = { "type": "sample", } @@ -731,24 +731,4 @@ def run(self, inputs, outputs): "advanced": {"dirs": True, "config": True}, } - # RNA-SeQC tool is initiated only if annotation source is ENSEMBL - if inputs.annotation.output.source == "ENSEMBL": - input_rnaseqc = { - "alignment": alignment_downsampled, - "annotation": inputs.annotation, - "strand_detection_options": {"stranded": inputs.assay_type}, - } - - if inputs.cdna_index: - input_rnaseqc["strand_detection_options"][ - "cdna_index" - ] = inputs.cdna_index - - rnaseqc = Data.create( - process=BioProcess.get_latest(slug="rnaseqc-qc"), - input=input_rnaseqc, - name=f"RNA-SeQC QC report ({inputs.reads.name})", - ) - input_multiqc["data"].append(rnaseqc) - Data.create(process=BioProcess.get_latest(slug="multiqc"), input=input_multiqc) diff --git a/resolwe_bio/processes/workflows/bbduk_star_featurecounts_qc.py b/resolwe_bio/processes/workflows/bbduk_star_featurecounts_qc.py index 635022261..1030800ce 100644 --- a/resolwe_bio/processes/workflows/bbduk_star_featurecounts_qc.py +++ b/resolwe_bio/processes/workflows/bbduk_star_featurecounts_qc.py @@ -46,7 +46,7 @@ class WorkflowBBDukStarFcQC(Process): entity = { "type": "sample", } - version = "6.2.0" + version = "6.3.0" process_type = "data:workflow:rnaseq:featurecounts:qc" category = "Pipeline" @@ -764,24 +764,4 @@ def run(self, inputs, outputs): "advanced": {"dirs": True, "config": True}, } - # RNA-SeQC tool is initiated only if annotation source is ENSEMBL - if inputs.annotation.output.source == "ENSEMBL": - input_rnaseqc = { - "alignment": alignment_downsampled, - "annotation": inputs.annotation, - "strand_detection_options": {"stranded": inputs.assay_type}, - } - - if inputs.cdna_index: - input_rnaseqc["strand_detection_options"][ - "cdna_index" - ] = inputs.cdna_index - - rnaseqc = Data.create( - process=BioProcess.get_latest(slug="rnaseqc-qc"), - input=input_rnaseqc, - name=f"RNA-SeQC QC report ({inputs.reads.name})", - ) - input_multiqc["data"].append(rnaseqc) - Data.create(process=BioProcess.get_latest(slug="multiqc"), input=input_multiqc) diff --git a/resolwe_bio/tests/processes/test_support_processors.py b/resolwe_bio/tests/processes/test_support_processors.py index 482ec22fd..5242e3b2c 100644 --- a/resolwe_bio/tests/processes/test_support_processors.py +++ b/resolwe_bio/tests/processes/test_support_processors.py @@ -491,14 +491,6 @@ def test_multiqc(self): }, ) - rnaseqc_report = self.run_process( - "rnaseqc-qc", - { - "alignment": star_alignment.id, - "annotation": annotation.id, - }, - ) - # BED file is not part of a sample entity. Test if MultiQC process # correctly skips this input data object bed = self.run_process( @@ -518,7 +510,6 @@ def test_multiqc(self): star_quantification.id, samtools_idxstats.id, qorts_report.id, - rnaseqc_report.id, bed.id, ], "advanced": {