fixed merge conflicts

bihealth · Mar 28, 2023 · 1de1206 · 1de1206
2 parents af45929 + 8a65d78
commit 1de1206
Show file tree

Hide file tree

Showing 21 changed files with 251 additions and 679 deletions.
diff --git a/BUGS.md b/BUGS.md
@@ -0,0 +1,6 @@
+ * For some reason, load(tidyverse) does not work correctly in the
+ environment seasnap-de. However, dplyr and other base tidyverse packages
+ load without issue.
+
+ * cluster profile doesn't work, but only when called from the pipeline.
+ Running the script interactively works well.
diff --git a/DE_pipeline.snake b/DE_pipeline.snake
@@ -64,11 +64,14 @@ def get_inputs_all():
 	#functional annotation
 	inputs.append(pph.expand_path(step = "goseq", extension = "go.rds",   if_set = dict(goseq=True) ))
 	inputs.append(pph.expand_path(step = "goseq", extension = "kegg.rds", if_set = dict(goseq=True) ))
-	inputs.append(pph.file_path(step = "annotation", extension = "rds", contrast="all"))
+	#inputs.append(pph.file_path(step = "annotation", extension = "rds", contrast="all"))
 	inputs.append(pph.file_path(step = "export_raw_counts", extension = "xlsx", contrast = "all"))
 	inputs.append(pph.expand_path(step = "cluster_profiler", extension = "rds", if_set = dict(cluster_profiler=dict(run=True)) ))
-	inputs.append(pph.file_path(step = "tmod_dbs", extension = "rds", contrast="all"))
-	inputs.append(pph.file_path(step = "tmod_pca", extension = "rds", contrast="all", if_set=dict(tmod_pca=True)))
+  # XXX uncomment the following line to run tmod_pca
+
+	if config["contrasts"]["defaults"]["tmod_pca"]:
+		inputs.append(pph.file_path(step = "tmod_pca", extension = "rds", contrast="all"))
+
 	inputs.append(pph.expand_path(step = "tmod", extension = "rds", if_set = dict(tmod=True)))
 
 	#time series

diff --git a/README.md b/README.md
@@ -46,21 +46,39 @@ Quick-Start
 After cloning this git repository:
 
 ```
-git clone git@cubi-gitlab.bihealth.org:CUBI/Pipelines/sea-snap.git
+git clone git@cubi-gitlab.bihealth.org:CUBI/Pipelines/seasnap-pipeline.git
 ```
 
 all required tools and packages can be installed via conda.
-Download and install them into a new environment called `sea_snap`:
+
+Currently there are two separate conda environments, one for the mapping
+pipeline and one for the DE pipeline
+
+Download and install them into new environments called `sea_snap_mapping`
+and `sea_snap_de`:
 
 ```
-conda env create -f conda_env.yaml
+conda env create -f conda_env_mapping.yaml
+conda env create -f conda_env_DE.yaml
 ```
 
-The file `conda_env.yaml` is located in the main directory of the git repository.
+The files `conda_env_mapping.yaml` and `conda_env_DE.yaml` are located in the main directory of the git repository.
 Each time before using SeA-SnaP, activate the environment with:
 
 ```
-conda activate sea_snap
+conda activate seasnap-mapping
+```
+
+or 
+```
+conda activate seasnap-de
+```
+
+Finally, run the following command in the seasnap-de environment:
+
+```
+conda activate seasnap-de
+Rscript install_r_packages.R
 ```
 
 ### Running the pipeline
@@ -90,7 +108,6 @@ The next steps depend on, whether you want to run:
 
 - [**`The mapping pipeline`**](documentation/run_mapping.md)
 - [**`The DE pipeline`**](documentation/run_DE.md)
-- [**`The sc pipeline`**](documentation/run_sc.md)
 
 The results of an analysis can also be [`exported`](documentation/export.md) to a new folder structure, e.g. to upload them to SODAR.
 

diff --git a/TODO.md b/TODO.md
@@ -2,5 +2,8 @@ The roadmap ahead:
 
 1) merging Eric's branches and the ATAC_seq branch
 2) making sure sea-snap runs smoothly with newer versions of snakemake (it
-doesn't currently)
-3) making the step from drmaa to cluster profiles.
+doesn't currently) --> DONE for mapping pipeline if snakemake version = 7.19.1
+3) making the step from drmaa to cluster profiles --> mostly DONE for mapping pipeline 
+4) optimizing resource requirements in `mapping_pipeline.snake` (check `mem` vs `mem_per_cpu`) and adding them to `DE_pipeline.snake`
+5) allowing sample-specific indices for bwa / salmon / kallisto?
+6) adapting the `conda_env.yaml` files
diff --git a/cluster_config_drmaa.json b/cluster_config_drmaa.json
@@ -33,7 +33,7 @@
 
     "salmon":
     {
-        "h_vmem": "4000",
+         "h_vmem": "4000",
         "h_rt": "40:00:00",
         "pe": "8"
     },
@@ -108,19 +108,12 @@
         "pe": "4"
     },
 
-    "cellranger_count":
-    {
-        "h_vmem": "4000",
-        "h_rt": "40:00:00",
+    "cluster_profiler":
+    {   "h_vmem": "20000",
+        "h_rt": "10:00:00",
         "pe": "1"
-    },
-
-    "velocyto_run":
-    {
-        "h_vmem": "20000",
-        "h_rt": "40:00:00",
-        "pe": "8"
     }
+
 }
 
 
diff --git a/cluster_config_slurm.json b/cluster_config_slurm.json
@@ -3,7 +3,7 @@
     {
         "snake_opt": "-j 100 -k --restart-times 0 --max-jobs-per-second 5 --rerun-incomplete",
 
-        "run_command": "--profile \"cubi-v1\""
+        "run_command": "--profile \"cubi-dev\""
     },
 
     "__default__":
@@ -19,15 +19,15 @@
 
     "star":
     {
-        "mem": "20000M",
-        "time": "40:00:00",
+        "mem": "60000M",
+        "time": "8:00:00",
         "cpus-per-task": 8
     },
 
     "star_index":
     {
-        "mem": "20000M",
-        "time": "40:00:00",
+        "mem": "40000M",
+        "time": "8:00:00",
         "cpus-per-task": 8
     },
 
@@ -120,6 +120,35 @@
         "mem": "20000M",
         "time": "40:00:00",
         "cpus-per-task": 8
+    },
+    "preseq_lc_extrap":
+    {
+        "mem": "40000M",
+        "time": "40:00:00",
+        "cpus-per-task": 8
+    },
+    "preseq_c_curve":
+    {
+        "mem": "40000M",
+        "time": "40:00:00",
+        "cpus-per-task": 8
+    },
+    "tmod_dbs":
+    {
+        "mem": "16000M",
+        "time": "8:00:00",
+        "cpus-per-task": 8
+    },
+
+    "cluster_profiler":
+    {
+        "mem": "8000M",
+        "time": "4:00:00",
+        "cpus-per-task": 8
     }
+
+
 }
 
+
+
diff --git a/conda_env.yaml → conda_env_DE.yaml b/conda_env.yaml → conda_env_DE.yaml
@@ -1,43 +1,20 @@
-name: sea_snap
+name: seasnap-de
 channels:
-  - bioconda
   - conda-forge
+  - bioconda
   - defaults
 dependencies:
-  - python
-  - r
-
-  - bamtools
-  - bedtools
-  - fastqc
-  - macs2
-  - multiqc
-  - picard
-  - preseq
-  - qualimap
-  - rna-seqc
-  - rsa
-  - rsem
-  - rseqc
-  - salmon
-  - samtools
-  - sqlite
-  - star
-  - subread
-  - tpmcalculator
-  - trimadap
-  - vim
-  - rpy2
-
-  - bioconductor-deseq2
+  - python>=3.9
+  - snakemake=7.19
+  - r-base>=4.1
+  - bioconductor-deseq2=1.38
   - bioconductor-genomicfeatures
   - bioconductor-annotationdbi
   - bioconductor-org.hs.eg.db # annotation for human
   - bioconductor-org.Mm.eg.db # annotation for mouse
   - bioconductor-tximport
   - bioconductor-goseq
   - bioconductor-apeglm
-  - bioconductor-dupradar
   - bioconductor-biomart
   - bioconductor-enrichplot
   - bioconductor-clusterprofiler
@@ -48,7 +25,6 @@ dependencies:
   - bioconductor-rtracklayer
   - r-blob
   - r-cairo
-  - r-cellranger
   - r-cowplot
   - r-crayon
   - r-devtools
@@ -57,20 +33,26 @@ dependencies:
   - r-ggplotify
   - r-readr
   - r-readxl
+  - r-writexl
   - r-roxygen2
   - r-tidyr
   - r-tidyselect
   - r-testthat
+  - r-remotes
   - r-ashr
   - r-pheatmap
   - r-shiny
-  - r-msigdbr
+  - r-msigdbr>=7.4
   - r-dplyr
   - r-biocmanager
   - r-xml2
   - r-xopen
-
+  - r-tmod>=0.50.11
+# Required for the actual report
+  - r-dt
+  - r-purrr
+  - r-plotly
+  - r-pander
+  - bioconductor-vsn
+  - r-tidyverse
   - drmaa
-  - snakemake
-
-
diff --git a/conda_env_mapping.yaml b/conda_env_mapping.yaml
@@ -0,0 +1,28 @@
+name: seasnap-mapping
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - python>=3.9
+  - snakemake=7.19
+  - bamtools
+  - bedtools
+  - fastqc
+  - macs2
+  - multiqc
+  - picard
+  - preseq
+  - qualimap
+  - rna-seqc
+  - rsem
+  - salmon
+  - samtools=1.16
+  - sqlite
+  - star=2.7.3a
+  - subread=2.0
+  - trimadap
+  - drmaa
+  - bioconductor-dupradar
+  - rseqc
+  - bwa
diff --git a/defaults/sc_config_defaults.yaml b/defaults/sc_config_defaults.yaml