Skip to content

Commit

Permalink
Merge pull request #612 from monarch-initiative/big-phenio-update
Browse files Browse the repository at this point in the history
phenio, kghub-downloader, xenbase, biogrid updates
  • Loading branch information
kevinschaper authored Nov 28, 2024
2 parents a4894f4 + 28921b0 commit a4ace55
Show file tree
Hide file tree
Showing 8 changed files with 179 additions and 194 deletions.
319 changes: 152 additions & 167 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ biolink-model = "^4.2.0"
bmt = "^1.0.15"
cat-merge = "0.2.1"
closurizer = "0.7.0"
kghub-downloader = "^0.3.10"
kghub-downloader = "^0.4.1"
kgx = "^2.4.2"
koza = "^0.6.1"
linkml = "^1.7.8"
Expand Down
2 changes: 1 addition & 1 deletion scripts/after_download.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ ${ZCAT} data/alliance/BGI_HUMAN.json.gz | jq -r '.data[] | "\(.basicGeneticEnti
sqlite3 -cmd ".mode tabs" -cmd ".headers on" data/dictybase/ddpheno.db "select subject as id, value as name from rdfs_label_statement where predicate = 'rdfs:label' and subject like 'DDPHENO:%'" > data/dictybase/ddpheno.tsv

# Unpack the phenio relation graph file
tar -xzf data/monarch/phenio-relation-graph.tar.gz -C data/monarch/
gunzip data/monarch/phenio-relation-graph.tsv.gz

awk '{ if ($2 == "rdfs:subClassOf" || $2 == "BFO:0000050" || $2 == "UPHENO:0000001") { print } }' data/monarch/phenio-relation-graph.tsv > data/monarch/phenio-relation-filtered.tsv

Expand Down
4 changes: 2 additions & 2 deletions scripts/load_solr.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ echo "Download the schema from monarch-py"

# retrieve the schema from the main branch on monarch-app

curl -O https://raw.githubusercontent.com/monarch-initiative/monarch-app/add-species-context-qualifier/backend/src/monarch_py/datamodels/model.yaml
curl -O https://raw.githubusercontent.com/monarch-initiative/monarch-app/add-species-context-qualifier/backend/src/monarch_py/datamodels/similarity.yaml
curl -O https://raw.githubusercontent.com/monarch-initiative/monarch-app/main/backend/src/monarch_py/datamodels/model.yaml
curl -O https://raw.githubusercontent.com/monarch-initiative/monarch-app/main/backend/src/monarch_py/datamodels/similarity.yaml

echo "Starting the server"
poetry run lsolr start-server
Expand Down
16 changes: 8 additions & 8 deletions src/monarch_ingest/download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,8 @@
local_name: data/monarch/kg-phenio.tar.gz
tag: phenio
-
url: https://github.com/monarch-initiative/phenio/releases/latest/download/phenio-relation-graph.tar.gz
local_name: data/monarch/phenio-relation-graph.tar.gz
url: https://github.com/monarch-initiative/phenio/releases/latest/download/phenio-relation-graph.tsv.gz
local_name: data/monarch/phenio-relation-graph.tsv.gz
tag: phenio
-
url: https://s3.amazonaws.com/bbop-sqlite/phenio.db.gz
Expand Down Expand Up @@ -446,11 +446,11 @@

### Xenbase
-
url: http://ftp.xenbase.org/pub/GenePageReports/XenbaseGenepageToGeneIdMapping.txt
url: https://download.xenbase.org/xenbase/GenePageReports/XenbaseGenepageToGeneIdMapping.txt
local_name: data/xenbase/XenbaseGenepageToGeneIdMapping.txt
tag: xenbase_publication_to_gene
-
url: http://ftp.xenbase.org/pub/GenePageReports/LiteratureMatchedGenesByPaper.txt
url: https://download.xenbase.org/xenbase/GenePageReports/LiteratureMatchedGenesByPaper.txt
local_name: data/xenbase/LiteratureMatchedGenesByPaper.txt
tag: xenbase_publication_to_gene
-
Expand All @@ -461,25 +461,25 @@
#### Xenopus orthologs

- # Homo sapiens (human)
url: http://ftp.xenbase.org/pub/GenePageReports/XenbaseGeneHumanOrthologMapping.txt
url: https://download.xenbase.org/xenbase/GenePageReports/XenbaseGeneHumanOrthologMapping.txt
local_name: data/xenbase/XenbaseGeneHumanOrthologMapping.txt
tag: xenbase_orthologs

- # Mus musculus (house mouse)
url: http://ftp.xenbase.org/pub/GenePageReports/XenbaseGeneMouseOrthologMapping.txt
url: https://download.xenbase.org/xenbase/GenePageReports/XenbaseGeneMouseOrthologMapping.txt
local_name: data/xenbase/XenbaseGeneMouseOrthologMapping.txt
tag: xenbase_orthologs

- # Danio rerio (Zebrafish)
url: http://ftp.xenbase.org/pub/GenePageReports/XenbaseGeneZebrafishOrthologMapping.txt
url: https://download.xenbase.org/xenbase/GenePageReports/XenbaseGeneZebrafishOrthologMapping.txt
local_name: data/xenbase/XenbaseGeneZebrafishOrthologMapping.txt
tag: xenbase_orthologs


# TODO: parse this oddball non-Entrez ortholog file separately?
#
- # Non-Entrez OMIM, MGI, and ZFIN genes
url: http://ftp.xenbase.org/pub/GenePageReports/XenbaseGeneNonEntrezOrthologMapping.txt
url: https://download.xenbase.org/xenbase/GenePageReports/XenbaseGeneNonEntrezOrthologMapping.txt
local_name: data/xenbase/XenbaseGeneNonEntrezOrthologMapping.txt
tag: xenbase_non_entrez_orthologs

Expand Down
20 changes: 10 additions & 10 deletions src/monarch_ingest/ingests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ alliance_phenotype:
alliance_disease_association:
url:
- 'https://github.com/monarch-initiative/alliance-disease-association-ingest/releases/latest/download/alliance_disease_edges.tsv'
biogrid:
url:
- 'https://github.com/monarch-initiative/biogrid-ingest/releases/latest/download/biogrid_gene_to_gene_edges.tsv'
clingen_variant:
url:
- 'https://github.com/monarch-initiative/clingen-ingest/releases/latest/download/clingen_variant_nodes.tsv'
Expand All @@ -23,14 +26,15 @@ hpoa:
- 'https://github.com/monarch-initiative/monarch-phenotype-profile-ingest/releases/latest/download/hpoa_gene_to_disease_edges.tsv'
- 'https://github.com/monarch-initiative/monarch-phenotype-profile-ingest/releases/latest/download/hpoa_gene_to_phenotype_edges.tsv'
maxo_annotation:
url: 'https://github.com/monarch-initiative/maxo-annotation-ingest/releases/latest/download/maxo_annotation_edges.tsv'
url:
- 'https://github.com/monarch-initiative/maxo-annotation-ingest/releases/latest/download/maxo_annotation_edges.tsv'
ncbi_gene:
url:
- 'https://github.com/monarch-initiative/ncbi-gene/releases/latest/download/ncbi_gene_9615_nodes.tsv'
- 'https://github.com/monarch-initiative/ncbi-gene/releases/latest/download/ncbi_gene_9913_nodes.tsv'
- 'https://github.com/monarch-initiative/ncbi-gene/releases/latest/download/ncbi_gene_9823_nodes.tsv'
- 'https://github.com/monarch-initiative/ncbi-gene/releases/latest/download/ncbi_gene_9031_nodes.tsv'
- 'https://github.com/monarch-initiative/ncbi-gene/releases/latest/download/ncbi_gene_227321_nodes.tsv'
- 'https://github.com/monarch-initiative/ncbi-gene/releases/latest/download/ncbi_gene_9615_nodes.tsv'
- 'https://github.com/monarch-initiative/ncbi-gene/releases/latest/download/ncbi_gene_9913_nodes.tsv'
- 'https://github.com/monarch-initiative/ncbi-gene/releases/latest/download/ncbi_gene_9823_nodes.tsv'
- 'https://github.com/monarch-initiative/ncbi-gene/releases/latest/download/ncbi_gene_9031_nodes.tsv'
- 'https://github.com/monarch-initiative/ncbi-gene/releases/latest/download/ncbi_gene_227321_nodes.tsv'
zfin_genotype_to_phenotype:
url:
- 'https://github.com/monarch-initiative/zfin-genotype-to-phenotype-ingest/releases/latest/download/zfin_genotype_to_phenotype_edges.tsv'
Expand All @@ -46,12 +50,8 @@ alliance_gene:
config: 'ingests/alliance/gene.yaml'
alliance_gene_to_expression:
config: 'ingests/alliance/gene_to_expression.yaml'
# alliance_publication:
# config: 'ingests/alliance/publication.yaml'
bgee_gene_to_expression:
config: 'ingests/bgee/gene_to_expression.yaml'
biogrid:
config: 'ingests/biogrid/biogrid.yaml'
ctd_chemical_to_disease:
config: 'ingests/ctd/chemical_to_disease.yaml'
dictybase_gene:
Expand Down
2 changes: 1 addition & 1 deletion src/monarch_ingest/ingests/pombase/gene_to_phenotype.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ columns:
- 'Parental strain'
- 'Strain name (background)'
- 'Genotype description'
- 'Gene name'
- 'Gene symbol'
- 'Allele name'
- 'Allele synonym'
- 'Allele type'
Expand Down
8 changes: 4 additions & 4 deletions src/monarch_ingest/qc_expect.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ edges:
min: 1870000
bgee_gene_to_expression_edges:
min: 430000
biogrid_edges:
min: 1400000
biogrid_gene_to_gene_edges:
min: 1340000
ctd_chemical_to_disease_edges:
min: 5000
dictybase_gene_to_phenotype_edges:
Expand All @@ -62,13 +62,13 @@ edges:
reactome_gene_to_pathway_edges:
min: 200000
string_protein_links_edges:
min: 1470000
min: 1420000
xenbase_gene_to_phenotype_edges:
min: 2000
alliance_phenotype_edges:
min: 650000
alliance_disease_edges:
min: 10000
min: 9900
zfin_genotype_to_phenotype_edges:
min: 125000
clinvar_variant_edges:
Expand Down

0 comments on commit a4ace55

Please sign in to comment.