Skip to content

Commit

Permalink
genotype and variant support (#740)
Browse files Browse the repository at this point in the history
Adds initial support for genotypes & variants and their associations to
the API & UI

---------

Co-authored-by: glass-ships <glass.ships@outlook.com>
  • Loading branch information
kevinschaper and glass-ships authored Jun 11, 2024
1 parent a632971 commit c106f34
Show file tree
Hide file tree
Showing 41 changed files with 98,973 additions and 102,322 deletions.
77 changes: 38 additions & 39 deletions backend/src/monarch_py/datamodels/category_enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
class EntityCategory(Enum):
"""Entity categories"""

SEQUENCE_VARIANT = "biolink:SequenceVariant"
GENE = "biolink:Gene"
GENOTYPE = "biolink:Genotype"
PHENOTYPIC_FEATURE = "biolink:PhenotypicFeature"
BIOLOGICAL_PROCESS_OR_ACTIVITY = "biolink:BiologicalProcessOrActivity"
GROSS_ANATOMICAL_STRUCTURE = "biolink:GrossAnatomicalStructure"
DISEASE = "biolink:Disease"
PATHWAY = "biolink:Pathway"
GROSS_ANATOMICAL_STRUCTURE = "biolink:GrossAnatomicalStructure"
CELL = "biolink:Cell"
PATHWAY = "biolink:Pathway"
NAMED_THING = "biolink:NamedThing"
ANATOMICAL_ENTITY = "biolink:AnatomicalEntity"
CELLULAR_COMPONENT = "biolink:CellularComponent"
Expand All @@ -20,77 +22,67 @@ class EntityCategory(Enum):
MOLECULAR_ACTIVITY = "biolink:MolecularActivity"
PROTEIN = "biolink:Protein"
CELLULAR_ORGANISM = "biolink:CellularOrganism"
PHENOTYPIC_QUALITY = "biolink:PhenotypicQuality"
VERTEBRATE = "biolink:Vertebrate"
VIRUS = "biolink:Virus"
BEHAVIORAL_FEATURE = "biolink:BehavioralFeature"
CHEMICAL_ENTITY = "biolink:ChemicalEntity"
LIFE_STAGE = "biolink:LifeStage"
PATHOLOGICAL_PROCESS = "biolink:PathologicalProcess"
CHEMICAL_ENTITY = "biolink:ChemicalEntity"
DRUG = "biolink:Drug"
ORGANISM_TAXON = "biolink:OrganismTaxon"
SEQUENCE_VARIANT = "biolink:SequenceVariant"
SMALL_MOLECULE = "biolink:SmallMolecule"
ORGANISM_TAXON = "biolink:OrganismTaxon"
INFORMATION_CONTENT_ENTITY = "biolink:InformationContentEntity"
NUCLEIC_ACID_ENTITY = "biolink:NucleicAcidEntity"
EVIDENCE_TYPE = "biolink:EvidenceType"
GEOGRAPHIC_EXPOSURE = "biolink:GeographicExposure"
RNAPRODUCT = "biolink:RNAProduct"
TRANSCRIPT = "biolink:Transcript"
FUNGUS = "biolink:Fungus"
PLANT = "biolink:Plant"
DATASET = "biolink:Dataset"
INVERTEBRATE = "biolink:Invertebrate"
POPULATION_OF_INDIVIDUAL_ORGANISMS = "biolink:PopulationOfIndividualOrganisms"
PROTEIN_FAMILY = "biolink:ProteinFamily"
PROCESSED_MATERIAL = "biolink:ProcessedMaterial"
ACTIVITY = "biolink:Activity"
AGENT = "biolink:Agent"
CHEMICAL_EXPOSURE = "biolink:ChemicalExposure"
CONFIDENCE_LEVEL = "biolink:ConfidenceLevel"
DATASET = "biolink:Dataset"
ENVIRONMENTAL_FEATURE = "biolink:EnvironmentalFeature"
EXON = "biolink:Exon"
GENETIC_INHERITANCE = "biolink:GeneticInheritance"
GENOME = "biolink:Genome"
GENOTYPE = "biolink:Genotype"
HAPLOTYPE = "biolink:Haplotype"
HUMAN = "biolink:Human"
INDIVIDUAL_ORGANISM = "biolink:IndividualOrganism"
INVERTEBRATE = "biolink:Invertebrate"
MAMMAL = "biolink:Mammal"
MATERIAL_SAMPLE = "biolink:MaterialSample"
MICRO_RNA = "biolink:MicroRNA"
PATENT = "biolink:Patent"
PROTEIN_DOMAIN = "biolink:ProteinDomain"
POPULATION_OF_INDIVIDUAL_ORGANISMS = "biolink:PopulationOfIndividualOrganisms"
PROTEIN_FAMILY = "biolink:ProteinFamily"
PUBLICATION = "biolink:Publication"
REGULATORY_REGION = "biolink:RegulatoryRegion"
STUDY = "biolink:Study"
TREATMENT = "biolink:Treatment"
WEB_PAGE = "biolink:WebPage"
ACCESSIBLE_DNA_REGION = "biolink:AccessibleDnaRegion"
ARTICLE = "biolink:Article"
ATTRIBUTE = "biolink:Attribute"
BACTERIUM = "biolink:Bacterium"
BIOLOGICAL_ENTITY = "biolink:BiologicalEntity"
BIOLOGICAL_SEX = "biolink:BiologicalSex"
CELL_LINE = "biolink:CellLine"
CHEMICAL_EXPOSURE = "biolink:ChemicalExposure"
CHEMICAL_MIXTURE = "biolink:ChemicalMixture"
CODING_SEQUENCE = "biolink:CodingSequence"
DATASET_DISTRIBUTION = "biolink:DatasetDistribution"
DIAGNOSTIC_AID = "biolink:DiagnosticAid"
DRUG_EXPOSURE = "biolink:DrugExposure"
ENVIRONMENTAL_PROCESS = "biolink:EnvironmentalProcess"
EVENT = "biolink:Event"
EXON = "biolink:Exon"
GENOME = "biolink:Genome"
GENOTYPIC_SEX = "biolink:GenotypicSex"
NONCODING_RNAPRODUCT = "biolink:NoncodingRNAProduct"
HUMAN = "biolink:Human"
INDIVIDUAL_ORGANISM = "biolink:IndividualOrganism"
MATERIAL_SAMPLE = "biolink:MaterialSample"
MICRO_RNA = "biolink:MicroRNA"
ORGANISMAL_ENTITY = "biolink:OrganismalEntity"
PATENT = "biolink:Patent"
PHENOTYPIC_SEX = "biolink:PhenotypicSex"
POLYPEPTIDE = "biolink:Polypeptide"
PROCEDURE = "biolink:Procedure"
PROCESSED_MATERIAL = "biolink:ProcessedMaterial"
PROTEIN_DOMAIN = "biolink:ProteinDomain"
REAGENT_TARGETED_GENE = "biolink:ReagentTargetedGene"
REGULATORY_REGION = "biolink:RegulatoryRegion"
SI_RNA = "biolink:SiRNA"
SNV = "biolink:Snv"
STUDY = "biolink:Study"
STUDY_VARIABLE = "biolink:StudyVariable"
TRANSCRIPTION_FACTOR_BINDING_SITE = "biolink:TranscriptionFactorBindingSite"
TREATMENT = "biolink:Treatment"
WEB_PAGE = "biolink:WebPage"
ZYGOSITY = "biolink:Zygosity"


Expand All @@ -99,6 +91,7 @@ class AssociationCategory(Enum):

PAIRWISE_GENE_TO_GENE_INTERACTION = "biolink:PairwiseGeneToGeneInteraction"
GENE_TO_EXPRESSION_SITE_ASSOCIATION = "biolink:GeneToExpressionSiteAssociation"
VARIANT_TO_GENE_ASSOCIATION = "biolink:VariantToGeneAssociation"
MACROMOLECULAR_MACHINE_TO_BIOLOGICAL_PROCESS_ASSOCIATION = (
"biolink:MacromolecularMachineToBiologicalProcessAssociation"
)
Expand All @@ -111,14 +104,19 @@ class AssociationCategory(Enum):
)
ASSOCIATION = "biolink:Association"
GENE_TO_GENE_HOMOLOGY_ASSOCIATION = "biolink:GeneToGeneHomologyAssociation"
GENOTYPE_TO_PHENOTYPIC_FEATURE_ASSOCIATION = "biolink:GenotypeToPhenotypicFeatureAssociation"
DISEASE_TO_PHENOTYPIC_FEATURE_ASSOCIATION = "biolink:DiseaseToPhenotypicFeatureAssociation"
GENE_TO_PATHWAY_ASSOCIATION = "biolink:GeneToPathwayAssociation"
DISEASE_OR_PHENOTYPIC_FEATURE_TO_LOCATION_ASSOCIATION = "biolink:DiseaseOrPhenotypicFeatureToLocationAssociation"
CHEMICAL_TO_PATHWAY_ASSOCIATION = "biolink:ChemicalToPathwayAssociation"
GENOTYPE_TO_DISEASE_ASSOCIATION = "biolink:GenotypeToDiseaseAssociation"
CORRELATED_GENE_TO_DISEASE_ASSOCIATION = "biolink:CorrelatedGeneToDiseaseAssociation"
DISEASE_OR_PHENOTYPIC_FEATURE_TO_GENETIC_INHERITANCE_ASSOCIATION = (
"biolink:DiseaseOrPhenotypicFeatureToGeneticInheritanceAssociation"
)
VARIANT_TO_DISEASE_ASSOCIATION = "biolink:VariantToDiseaseAssociation"
CAUSAL_GENE_TO_DISEASE_ASSOCIATION = "biolink:CausalGeneToDiseaseAssociation"
CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_ASSOCIATION = "biolink:ChemicalToDiseaseOrPhenotypicFeatureAssociation"


class AssociationPredicate(Enum):
Expand All @@ -127,32 +125,33 @@ class AssociationPredicate(Enum):
INTERACTS_WITH = "biolink:interacts_with"
EXPRESSED_IN = "biolink:expressed_in"
HAS_PHENOTYPE = "biolink:has_phenotype"
IS_SEQUENCE_VARIANT_OF = "biolink:is_sequence_variant_of"
ENABLES = "biolink:enables"
ACTIVELY_INVOLVED_IN = "biolink:actively_involved_in"
ORTHOLOGOUS_TO = "biolink:orthologous_to"
LOCATED_IN = "biolink:located_in"
SUBCLASS_OF = "biolink:subclass_of"
RELATED_TO = "biolink:related_to"
PARTICIPATES_IN = "biolink:participates_in"
ACTS_UPSTREAM_OF_OR_WITHIN = "biolink:acts_upstream_of_or_within"
RELATED_TO = "biolink:related_to"
ACTIVE_IN = "biolink:active_in"
PART_OF = "biolink:part_of"
MODEL_OF = "biolink:model_of"
CAUSES = "biolink:causes"
ACTS_UPSTREAM_OF = "biolink:acts_upstream_of"
HAS_MODE_OF_INHERITANCE = "biolink:has_mode_of_inheritance"
GENE_ASSOCIATED_WITH_CONDITION = "biolink:gene_associated_with_condition"
CONTRIBUTES_TO = "biolink:contributes_to"
CAUSES = "biolink:causes"
GENE_ASSOCIATED_WITH_CONDITION = "biolink:gene_associated_with_condition"
TREATS_OR_APPLIED_OR_STUDIED_TO_TREAT = "biolink:treats_or_applied_or_studied_to_treat"
COLOCALIZES_WITH = "biolink:colocalizes_with"
ACTS_UPSTREAM_OF_OR_WITHIN_POSITIVE_EFFECT = "biolink:acts_upstream_of_or_within_positive_effect"
ACTS_UPSTREAM_OF_POSITIVE_EFFECT = "biolink:acts_upstream_of_positive_effect"
ACTS_UPSTREAM_OF_OR_WITHIN_NEGATIVE_EFFECT = "biolink:acts_upstream_of_or_within_negative_effect"
ACTS_UPSTREAM_OF_OR_WITHIN_POSITIVE_EFFECT = "biolink:acts_upstream_of_or_within_positive_effect"
ACTS_UPSTREAM_OF_NEGATIVE_EFFECT = "biolink:acts_upstream_of_negative_effect"
ACTS_UPSTREAM_OF_OR_WITHIN_NEGATIVE_EFFECT = "biolink:acts_upstream_of_or_within_negative_effect"


class MappingPredicate(Enum):
"""Mapping predicates"""

EXACT_MATCH = "skos:exactMatch"
CLOSE_MATCH = "skos:closeMatch"
BROAD_MATCH = "skos:broadMatch"
NARROW_MATCH = "skos:narrowMatch"
12 changes: 12 additions & 0 deletions backend/src/monarch_py/utils/association_type_mappings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,15 @@
- subject_label: Correlated Gene
object_label: Correlated Disease
category: "biolink:CorrelatedGeneToDiseaseAssociation"
- subject_label: Variant to Gene
object_label: Variant to Gene
category: "biolink:VariantToGeneAssociation"
- subject_label: Variant to Disease
object_label: Variant to Disease
category: "biolink:VariantToDiseaseAssociation"
- subject_label: Genotype to Phenotype
object_label: Genotype to Phenotype
category: "biolink:GenotypeToPhenotypicFeatureAssociation"
- subject_label: Disease Model
object_label: Disease Model
category: "biolink:GenotypeToDiseaseAssociation"
6 changes: 2 additions & 4 deletions backend/src/monarch_py/utils/association_type_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,9 @@ def get_association_type_mapping_by_query_string(
Raises: ValueError if no match is found
"""

categories = parse_query_string_for_category(query_string)
category = parse_query_string_for_category(query_string)

matching_types = [
a_type for a_type in AssociationTypeMappings.get_mappings() if set(a_type.category) == set(categories)
]
matching_types = [a_type for a_type in AssociationTypeMappings.get_mappings() if a_type.category == category]

if len(matching_types) == 0:
raise ValueError(f"No matching association type found for query string: [{query_string}]")
Expand Down
2 changes: 1 addition & 1 deletion backend/src/monarch_py/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def set_log_level(log_level: str):

def get_links_for_field(field: List[str]) -> List[ExpandedCurie]:
# TODO should be able to remove curie.replace("PMID", "PUBMED")) since the converter should handle prefix synonyms
expanded_curies = [ExpandedCurie(id=curie, url=get_link_for_curie(curie)) for curie in field]
expanded_curies = [ExpandedCurie(id=curie, url=get_link_for_curie(curie)) for curie in field if ":" in curie]
return expanded_curies


Expand Down
4 changes: 3 additions & 1 deletion backend/tests/fixtures/association_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ def association_counts():
"items": [
{
"label": "Phenotype to Disease",
"count": 3932,
"count": 3941,
"category": "biolink:DiseaseToPhenotypicFeatureAssociation",
},
{"label": "Causal Gene", "count": 126, "category": "biolink:CausalGeneToDiseaseAssociation"},
{"label": "Correlated Gene", "count": 146, "category": "biolink:CorrelatedGeneToDiseaseAssociation"},
{"label": "Variant to Disease", "count": 1, "category": "biolink:VariantToDiseaseAssociation"},
{"label": "Disease Model", "count": 243, "category": "biolink:GenotypeToDiseaseAssociation"},
]
}
8 changes: 8 additions & 0 deletions backend/tests/fixtures/association_counts_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ def association_counts_query():
'(category:"biolink:MacromolecularMachineToBiologicalProcessAssociation") AND (subject:"MONDO:0020121" OR subject_closure:"MONDO:0020121")',
'(category:"biolink:CausalGeneToDiseaseAssociation") AND (subject:"MONDO:0020121" OR subject_closure:"MONDO:0020121")',
'(category:"biolink:CorrelatedGeneToDiseaseAssociation") AND (subject:"MONDO:0020121" OR subject_closure:"MONDO:0020121")',
'(category:"biolink:VariantToGeneAssociation") AND (subject:"MONDO:0020121" OR subject_closure:"MONDO:0020121")',
'(category:"biolink:VariantToDiseaseAssociation") AND (subject:"MONDO:0020121" OR subject_closure:"MONDO:0020121")',
'(category:"biolink:GenotypeToPhenotypicFeatureAssociation") AND (subject:"MONDO:0020121" OR subject_closure:"MONDO:0020121")',
'(category:"biolink:GenotypeToDiseaseAssociation") AND (subject:"MONDO:0020121" OR subject_closure:"MONDO:0020121")',
'(category:"biolink:DiseaseToPhenotypicFeatureAssociation") AND (object:"MONDO:0020121" OR object_closure:"MONDO:0020121")',
'(category:"biolink:GeneToPhenotypicFeatureAssociation") AND (object:"MONDO:0020121" OR object_closure:"MONDO:0020121")',
'(category:"biolink:PairwiseGeneToGeneInteraction") AND (object:"MONDO:0020121" OR object_closure:"MONDO:0020121")',
Expand All @@ -35,6 +39,10 @@ def association_counts_query():
'(category:"biolink:MacromolecularMachineToBiologicalProcessAssociation") AND (object:"MONDO:0020121" OR object_closure:"MONDO:0020121")',
'(category:"biolink:CausalGeneToDiseaseAssociation") AND (object:"MONDO:0020121" OR object_closure:"MONDO:0020121")',
'(category:"biolink:CorrelatedGeneToDiseaseAssociation") AND (object:"MONDO:0020121" OR object_closure:"MONDO:0020121")',
'(category:"biolink:VariantToGeneAssociation") AND (object:"MONDO:0020121" OR object_closure:"MONDO:0020121")',
'(category:"biolink:VariantToDiseaseAssociation") AND (object:"MONDO:0020121" OR object_closure:"MONDO:0020121")',
'(category:"biolink:GenotypeToPhenotypicFeatureAssociation") AND (object:"MONDO:0020121" OR object_closure:"MONDO:0020121")',
'(category:"biolink:GenotypeToDiseaseAssociation") AND (object:"MONDO:0020121" OR object_closure:"MONDO:0020121")',
],
"filter_queries": [
'subject:"MONDO:0020121" OR subject_closure:"MONDO:0020121" OR object:"MONDO:0020121" OR object_closure:"MONDO:0020121"'
Expand Down
Loading

0 comments on commit c106f34

Please sign in to comment.