Skip to content

Commit

Permalink
Merge pull request #562 from monarch-initiative/mapping
Browse files Browse the repository at this point in the history
Download mappings from data.mi.org
  • Loading branch information
kevinschaper authored Jan 25, 2024
2 parents 9fd9880 + ded65ef commit 95afa84
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 59 deletions.
1 change: 1 addition & 0 deletions mkdocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ nav:
- Overview: 'Sources/index.md'
- Alliance: 'Sources/alliance.md'
- BGee: 'Sources/bgee.md'
- BioGrid: 'Sources/biogrid.md'
- CTD: 'Sources/ctd.md'
# - Dictybase: 'Sources/dictybase.md'
# - Flybase: 'Sources/flybase.md'
Expand Down
32 changes: 19 additions & 13 deletions src/monarch_ingest/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def merge_files(
mappings = []
mappings.append("data/monarch/mondo.sssom.tsv")
mappings.append("data/monarch/gene_mappings.sssom.tsv")
mappings.append("data/monarch/chebi-mesh.biomappings.sssom.tsv")
mappings.append("data/monarch/mesh_chebi_biomappings.sssom.tsv")

logger.info("Merging knowledge graph...")

Expand All @@ -316,18 +316,22 @@ def apply_closure(
output_dir: str = OUTPUT_DIR,
):
output_file = f"{output_dir}/{name}-denormalized-edges.tsv"
add_closure(kg_archive=f"{output_dir}/{name}.tar.gz",
closure_file=closure_file,
output_file=output_file,
fields=['subject',
'object',
'qualifiers',
'frequency_qualifier',
'onset_qualifier',
'sex_qualifier',
'stage_qualifier'],
evidence_fields=['has_evidence', 'publications'],
grouping_fields=['subject', 'negated', 'predicate', 'object'])
add_closure(
kg_archive=f"{output_dir}/{name}.tar.gz",
closure_file=closure_file,
output_file=output_file,
fields=[
"subject",
"object",
"qualifiers",
"frequency_qualifier",
"onset_qualifier",
"sex_qualifier",
"stage_qualifier",
],
evidence_fields=["has_evidence", "publications"],
grouping_fields=["subject", "negated", "predicate", "object"],
)
sh.pigz(output_file, force=True)


Expand Down Expand Up @@ -397,9 +401,11 @@ def load_jsonl():
os.remove("output/monarch-kg_nodes.jsonl")
os.remove("output/monarch-kg_edges.jsonl")


def export_tsv():
export()


def do_release(dir: str = OUTPUT_DIR, kghub: bool = False):
import datetime

Expand Down
15 changes: 10 additions & 5 deletions src/monarch_ingest/download.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -293,16 +293,22 @@
local_name: data/mgi/MRK_Reference.rpt
tag: mgi_publication_to_gene

### Monarch
### Mapping files
-
url: 'http://data.monarchinitiative.org/monarch-gene-mapping/latest/gene_mappings.sssom.tsv'
# url: 'http://storage.googleapis.com/data-public-monarchinitiative/monarch-gene-mapping/latest/gene_mappings.tsv'
url: 'https://data.monarchinitiative.org/mappings/latest/gene_mappings.sssom.tsv'
local_name: data/monarch/gene_mappings.sssom.tsv
tag: mapping
-
url: https://raw.githubusercontent.com/monarch-initiative/mondo/master/src/ontology/mappings/mondo.sssom.tsv
url: https://data.monarchinitiative.org/mappings/latest/mondo.sssom.tsv
local_name: data/monarch/mondo.sssom.tsv
tag: mapping

-
url: https://data.monarchinitiative.org/mappings/latest/mesh_chebi_biomappings.sssom.tsv
local_name: data/monarch/mesh_chebi_biomappings.sssom.tsv
tag: mapping

### Monarch
-
url: https://kg-hub.berkeleybop.io/kg-phenio/current/kg-phenio.tar.gz
local_name: data/monarch/kg-phenio.tar.gz
Expand All @@ -315,7 +321,6 @@
url: https://s3.amazonaws.com/bbop-sqlite/phenio.db.gz
local_name: data/monarch/phenio.db.gz
tag: phenio

# -
# url: https://ci.monarchinitiative.org/job/monarch-ontology-json-sri/lastSuccessfulBuild/artifact/build/monarch-ontology-final.json
# local_name: data/monarch/monarch.json
Expand Down
93 changes: 52 additions & 41 deletions src/monarch_ingest/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,20 @@

from kghub_downloader.download_utils import download_from_yaml
from monarch_ingest.cli_utils import (
apply_closure,
apply_closure,
do_release,
export_tsv,
load_jsonl,
load_sqlite,
load_solr,
merge_files,
transform_one,
transform_phenio,
transform_all,
load_sqlite,
load_solr,
merge_files,
transform_one,
transform_phenio,
transform_all,
)

import typer

typer_app = typer.Typer()

OUTPUT_DIR = "output"
Expand All @@ -24,28 +25,26 @@
def callback(version: Optional[bool] = typer.Option(None, "--version", is_eager=True)):
if version:
from monarch_ingest import __version__

typer.echo(f"monarch_ingest version: {__version__}")
raise typer.Exit()
raise typer.Exit()


@typer_app.command()
def download(
ingests: Optional[List[str]] = typer.Option(None, help="Which ingests to download data for"),
all: bool = typer.Option(False, help="Download all ingest datasets")
):
ingests: Optional[List[str]] = typer.Option(None, help="Which ingests to download data for"),
all: bool = typer.Option(False, help="Download all ingest datasets"),
):
"""Downloads data defined in download.yaml"""

if ingests:
download_from_yaml(
yaml_file='src/monarch_ingest/download.yaml',
output_dir='.',
yaml_file="src/monarch_ingest/download.yaml",
output_dir=".",
tags=ingests,
)
elif all:
download_from_yaml(
yaml_file='src/monarch_ingest/download.yaml',
output_dir='.'
)
download_from_yaml(yaml_file="src/monarch_ingest/download.yaml", output_dir=".")


@typer_app.command()
Expand All @@ -55,48 +54,56 @@ def transform(
ingest: str = typer.Option(None, "--ingest", "-i", help="Run a single ingest (see ingests.yaml for a list)"),
phenio: bool = typer.Option(False, help="Run the phenio transform"),
all: bool = typer.Option(False, "--all", "-a", help="Ingest all sources"),
force: bool = typer.Option(False, "--force", "-f", help="Force ingest, even if output exists (on by default for single ingests)"),
force: bool = typer.Option(
False, "--force", "-f", help="Force ingest, even if output exists (on by default for single ingests)"
),
rdf: bool = typer.Option(False, help="Output rdf files along with tsv"),
verbose: Optional[bool] = typer.Option(None, "--debug/--quiet", "-d/-q", help="Use --quiet to suppress log output, --debug for verbose, including Koza logs"),
verbose: Optional[bool] = typer.Option(
None,
"--debug/--quiet",
"-d/-q",
help="Use --quiet to suppress log output, --debug for verbose, including Koza logs",
),
log: bool = typer.Option(False, "--log", "-l", help="Write DEBUG level logs to ./logs/ for each ingest"),
row_limit: int = typer.Option(None, "--row-limit", "-n", help="Number of rows to process"),
# parallel: int = typer.Option(None, "--parallel", "-p", help="Utilize Dask to perform multiple ingests in parallel"),
):
"""Run Koza transformation on specified Monarch ingests"""

if phenio:
transform_phenio(
output_dir=output_dir,
force=force,
verbose=verbose
)
transform_phenio(output_dir=output_dir, force=force, verbose=verbose)
elif ingest:
transform_one(
ingest = ingest,
output_dir = output_dir,
row_limit = row_limit,
rdf = rdf,
force = True if force is None else force,
verbose = verbose,
log = log,
ingest=ingest,
output_dir=output_dir,
row_limit=row_limit,
rdf=rdf,
force=True if force is None else force,
verbose=verbose,
log=log,
)
elif all:
transform_all(
output_dir = output_dir,
row_limit = row_limit,
rdf = rdf,
force = force,
output_dir=output_dir,
row_limit=row_limit,
rdf=rdf,
force=force,
verbose=verbose,
log = log,
log=log,
)


@typer_app.command()
def merge(
input_dir: str = typer.Option(f"{OUTPUT_DIR}/transform_output", help="Directory with nodes and edges to be merged",),
input_dir: str = typer.Option(
f"{OUTPUT_DIR}/transform_output",
help="Directory with nodes and edges to be merged",
),
output_dir: str = typer.Option(f"{OUTPUT_DIR}", help="Directory to output data"),
verbose: Optional[bool] = typer.Option(None, "--debug/--quiet", "-d/-q", help="Use --quiet to suppress log output, --debug for verbose"),
):
verbose: Optional[bool] = typer.Option(
None, "--debug/--quiet", "-d/-q", help="Use --quiet to suppress log output, --debug for verbose"
),
):
"""Merge nodes and edges into kg"""
merge_files(input_dir=input_dir, output_dir=output_dir, verbose=verbose)

Expand All @@ -105,10 +112,12 @@ def merge(
def closure():
apply_closure()


@typer_app.command()
def jsonl():
load_jsonl()


@typer_app.command()
def sqlite():
load_sqlite()
Expand All @@ -118,15 +127,17 @@ def sqlite():
def solr():
load_solr()


@typer_app.command()
def export():
export_tsv()


@typer_app.command()
def release(
dir: str = typer.Option(f"{OUTPUT_DIR}", help="Directory with kg to be released"),
kghub: bool = typer.Option(False, help="Also release to kghub S3 bucket")
):
kghub: bool = typer.Option(False, help="Also release to kghub S3 bucket"),
):
"""Copy data to Monarch GCP data buckets"""
do_release(dir, kghub)

Expand Down

0 comments on commit 95afa84

Please sign in to comment.