diff --git a/CHANGELOG.md b/CHANGELOG.md index be802bf..32af769 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ ### Bug fixes - Ensure `renee build` creates necessary `config` directory during initialization. (#139, @kelly-sovacool) +- Run `rsem-generate-data-matrix` to create gene and isoform matrix files. (#149, @kelly-sovacool) - Fix bug in the driver script that caused the snakemake module not to be loaded on biowulf in some cases. (#154, @kelly-sovacool) ### Documentation updates diff --git a/workflow/Snakefile b/workflow/Snakefile index 92dd60a..9259569 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -139,12 +139,8 @@ if paired_end: expand(join(workpath,degall_dir,"{name}.RSEM.isoforms.results"),name=samples), join(workpath,degall_dir,"RSEM.genes.FPKM.all_samples.txt"), join(workpath,degall_dir,"RSEM.isoforms.FPKM.all_samples.txt"), - #join(workpath,degall_dir,"RawCountFile_RSEM_genes_filtered.txt"), - #join(workpath,star_dir,"sampletable.txt"), - - # PCA Reports - # expand(join(workpath,degall_dir,"PcaReport_{dtype}.html"),dtype=dtypes), - + join(workpath, degall_dir, "RSEM.genes.expected_counts.all_samples.matrix"), + join(workpath, degall_dir, "RSEM.isoforms.expected_counts.all_samples.matrix"), # MultiQC join(workpath,"Reports","multiqc_report.html"), @@ -202,11 +198,8 @@ elif not paired_end: expand(join(workpath,degall_dir,"{name}.RSEM.isoforms.results"),name=samples), join(workpath,degall_dir,"RSEM.genes.FPKM.all_samples.txt"), join(workpath,degall_dir,"RSEM.isoforms.FPKM.all_samples.txt"), - #join(workpath,degall_dir,"RawCountFile_RSEM_genes_filtered.txt"), - #join(workpath,star_dir,"sampletable.txt"), - - # PCA Report - # expand(join(workpath,degall_dir,"PcaReport_{dtype}.html"),dtype=dtypes), + join(workpath, degall_dir, "RSEM.genes.expected_counts.all_samples.matrix"), + join(workpath, degall_dir, "RSEM.isoforms.expected_counts.all_samples.matrix"), # MultiQC join(workpath,"Reports","multiqc_report.html"), diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index a76f7c6..a159441 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -184,7 +184,7 @@ rule stats: """ -rule rsem_merge: # TODO is this redundant with `rsem-generate-data-matrix`? see https://github.com/CCBR/RENEE/issues/137 +rule rsem_merge: """Data processing step to merge the gene and isoform counts for each sample into count matrices. @Input: @@ -213,6 +213,25 @@ rule rsem_merge: # TODO is this redundant with `rsem-generate-data-matrix`? see sed '1 s/^gene_id|GeneName/symbol/' > {output.reformatted} """ +rule rsem_data_matrix: + input: + genes=expand(join(workpath,degall_dir,"{name}.RSEM.genes.results"), name=samples), + isoforms=expand(join(workpath,degall_dir,"{name}.RSEM.isoforms.results"), name=samples), + output: + genes=join(workpath, degall_dir, "RSEM.genes.expected_counts.all_samples.matrix"), + isoforms=join(workpath, degall_dir, "RSEM.isoforms.expected_counts.all_samples.matrix") + params: + rname='pl:rsem_data_matrix', + envmodules: + config['bin'][pfamily]['tool_versions']['RSEMVER'], + config['bin'][pfamily]['tool_versions']['PYTHONVER'], + container: config['images']['rsem'] + shell: + """ + rsem-generate-data-matrix {input.genes} > {output.genes} + rsem-generate-data-matrix {input.isoforms} > {output.isoforms} + """ + rule rseqc: """