Skip to content

Commit

Permalink
Boost selected results on empty searches (#642)
Browse files Browse the repository at this point in the history
Right now we get results returned in an order that is largely a side
effect of ingest order when an empty search happens automatically in the
UI. This PR will catch searches for `*:*` and selectively boost a set of
diseases, genes, phenotypes and uberon terms.
  • Loading branch information
kevinschaper authored Apr 19, 2024
1 parent 0761949 commit accf783
Show file tree
Hide file tree
Showing 20 changed files with 37,642 additions and 15,408 deletions.
32 changes: 28 additions & 4 deletions backend/src/monarch_py/implementations/solr/solr_query_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def build_search_query(
query.q = q
query.def_type = "edismax"
query.query_fields = entity_query_fields()
query.boost = entity_boost()
query.boost = entity_boost(empty_search=(q == "*:*"))
if category:
query.add_filter_query(" OR ".join(f'category:"{cat}"' for cat in category))
if in_taxon_label:
Expand All @@ -185,7 +185,7 @@ def build_autocomplete_query(
# match the query fields to start with
query.query_fields = entity_query_fields()
query.def_type = "edismax"
query.boost = entity_boost(prioritized_predicates=prioritized_predicates)
query.boost = entity_boost(prioritized_predicates=prioritized_predicates, empty_search=(q == "*:*"))
return query


Expand Down Expand Up @@ -231,15 +231,17 @@ def obsolete_unboost(multiplier=0.1):
return f'if(termfreq(deprecated,"true"),{multiplier},1)'


def entity_boost(prioritized_predicates: List[AssociationPredicate] = None) -> str:
def entity_boost(prioritized_predicates: List[AssociationPredicate] = None, empty_search: bool = False) -> str:
"""Shared boost function between search and autocomplete"""
phenotype_boost = category_boost("biolink:PhenotypicFeature", 1.1)
disease_boost = category_boost("biolink:PhenotypicFeature", 1.3)
disease_boost = category_boost("biolink:Disease", 1.3)
human_gene_boost = category_boost("biolink:Gene", 1.1, taxon="NCBITaxon:9606")

boosts = [phenotype_boost, disease_boost, human_gene_boost, obsolete_unboost()]
if prioritized_predicates:
boosts.append(entity_predicate_boost(prioritized_predicates, 2.0))
if empty_search:
boosts.append(blank_search_boost())
return f"product({','.join(boosts)})"


Expand All @@ -259,6 +261,28 @@ def category_boost(category: str, multiplier: float, taxon: Optional[str] = None
return f'if(termfreq(category,"{category}"),{multiplier},1)'


def blank_search_boost() -> str:
"""
Boost specific nodes that we'd like to see as site examples to the top for empty searches
"""
example_nodes = [
"MONDO:0007523", # Ehlers-Danlos syndrome, hypermobility type
"MONDO:0019391", # Fanconi anemia
"MONDO:0018954", # Loeys-Dietz syndrome
"MONDO:0011518", # Wiedemann-Steiner syndrome
"HP:0001166", # Arachnodactyly
"HP:0001631", # Atrial septal defect
"UBERON:0000948", # heart
"UBERON:0006585", # vestibular organ
"HGNC:4851", # HTT
"HGNC:3603", # FBN1
]
# boost score by 1.5 + i/10 for these nodes
boosts = [f'if(termfreq(id,"{node}"),{len(example_nodes) - i + 2},1)' for i, node in enumerate(example_nodes)]
boost = ",".join(boosts)
return f"product({boost})"


def entity_query_fields():
"""
Shared query field list between search and autocomplete,
Expand Down
2 changes: 1 addition & 1 deletion backend/tests/fixtures/association_counts_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
def association_counts_response():
return {
"responseHeader": {
"QTime": 3,
"QTime": 1,
"params": {
"facet.query": [
'(category:"biolink:DiseaseToPhenotypicFeatureAssociation") AND (subject:"MONDO:0020121" OR subject_closure:"MONDO:0020121")',
Expand Down
2 changes: 1 addition & 1 deletion backend/tests/fixtures/association_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
def association_response():
return {
"responseHeader": {
"QTime": 2,
"QTime": 0,
"params": {
"mm": "100%",
"q": "*:*",
Expand Down
Loading

0 comments on commit accf783

Please sign in to comment.