From 3f9ed908cd5e9468c373a53ce48a33f5bddc46f8 Mon Sep 17 00:00:00 2001 From: "Brian M. Schilder" <34280215+bschilder@users.noreply.github.com> Date: Tue, 19 Mar 2024 14:34:50 +0000 Subject: [PATCH] add explanation to CSL report --- README.Rmd | 1 + reports/CSL.Rmd | 9 ++++++++- reports/CSL.html | 21 ++++++++++++--------- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/README.Rmd b/README.Rmd index 47de88e..e14f38d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -31,4 +31,5 @@ Thesis project by Momoko Otani, supervised by Nathan Skene and Brian Schilder. ## [Differential outcomes](https://neurogenomics.github.io/RareDiseasePrioritisation/reports/differential_outcomes) ## [GPT annotations](https://neurogenomics.github.io/RareDiseasePrioritisation/reports/gpt_annotations) ## [Orphanet prevalence data](https://neurogenomics.github.io/RareDiseasePrioritisation/reports/orphanet_prevalence) +## [CSL Areas of Interest](https://neurogenomics.github.io/RareDiseasePrioritisation/reports/CSL) diff --git a/reports/CSL.Rmd b/reports/CSL.Rmd index d97380b..d195c24 100644 --- a/reports/CSL.Rmd +++ b/reports/CSL.Rmd @@ -25,6 +25,8 @@ knitr::opts_chunk$set(warning = FALSE, ### Import CSL areas of interest +CSL areas of interest mapped onto Human Phenotype Ontology (HPO) and/or other disease ontologies (OMIM/ORPH). + ```{r} csl <- data.table::fread(here::here("data/CSL_areas_of_interest.tsv")) csl <- csl[Group=="Early Stage Partnering"] @@ -91,6 +93,11 @@ targets <- all_targets[ | disease_id %in% disease_ids ) ] +MSTExplorer::create_dt(head(targets,100)) +``` + +Summarise results: +```{r, message=TRUE} message(paste0( length(intersect(all_targets$hpo_id,hpo_ids_extended)),"/", length(unique(hpo_ids_extended)), @@ -99,9 +106,9 @@ message(paste0( " CSL phenotypes (across ",length(unique(all_targets$disease_id))," diseases)", " covered in our prioritised targets." )) -MSTExplorer::create_dt(head(targets,100)) ``` + ### Get the top candidates per area of interest #### Per HPO ancestor diff --git a/reports/CSL.html b/reports/CSL.html index e50e5a5..8bb1587 100644 --- a/reports/CSL.html +++ b/reports/CSL.html @@ -76755,6 +76755,8 @@


Import data

Import CSL areas of interest

+

CSL areas of interest mapped onto Human Phenotype Ontology (HPO) +and/or other disease ontologies (OMIM/ORPH).

csl <- data.table::fread(here::here("data/CSL_areas_of_interest.tsv"))
 csl <- csl[Group=="Early Stage Partnering"]
 # extract IDs from the Entry column of csl data.table with the pattern "HP:", "OMIM:", "ORPHA:" and put into a new col
@@ -76814,17 +76816,19 @@ 

Prioritise targets

| disease_id %in% disease_ids ) ] -message(paste0( +MSTExplorer::create_dt(head(targets,100))
+
+ +

Summarise results:

+
message(paste0(
   length(intersect(all_targets$hpo_id,hpo_ids_extended)),"/",
         length(unique(hpo_ids_extended)),
         " (",round(length(intersect(hpo_ids_extended,all_targets$hpo_id))/
                   length(unique(hpo_ids_extended))*100,2),"%)",
         " CSL phenotypes (across ",length(unique(all_targets$disease_id))," diseases)",
   " covered in our prioritised targets."
-))
-MSTExplorer::create_dt(head(targets,100))
-
- +)) +
## 594/1860 (31.94%) CSL phenotypes (across 4850 diseases) covered in our prioritised targets.

Get the top candidates per area of interest

@@ -76848,10 +76852,9 @@

Per CSL area of interest

top_targets <- targets2[!is.na(Area), head(.SD,3), by=c("Area")] # drop list columns -save_path <- here::here("top_targets_CSL.tsv") -top_targets[,-names(top_targets)[sapply(top_targets,is.list)],with=FALSE] |> - data.table::fwrite(save_path, sep="\t") - +# save_path <- here::here("reports/top_targets_CSL.tsv") +# top_targets[,-names(top_targets)[sapply(top_targets,is.list)],with=FALSE] |> +# data.table::fwrite(save_path, sep="\t") # top_targets <- data.table::fread(here::here("top_targets_CSL.tsv")) MSTExplorer::create_dt(top_targets)