From 9e9e7ead895da41e9c68785132b56d07435d4ddd Mon Sep 17 00:00:00 2001 From: Kevin Schaper Date: Thu, 2 Nov 2023 16:02:49 -0700 Subject: [PATCH] Adds q.op=AND to Solr queries so that boolean syntax will work (#459) We need to specify q.op=AND along with mm=100% to allow for boolean query syntax, which is a little surprising, but explained in https://issues.apache.org/jira/browse/SOLR-8812 --- backend/src/monarch_py/datamodels/solr.py | 5 ++++- backend/tests/fixtures/association_counts_query.py | 1 + backend/tests/fixtures/association_counts_response.py | 3 ++- backend/tests/fixtures/association_query_direct.py | 1 + backend/tests/fixtures/association_query_indirect.py | 1 + backend/tests/fixtures/association_response.py | 3 ++- backend/tests/fixtures/association_table_response.py | 3 ++- backend/tests/fixtures/autocomplete_query.py | 1 + backend/tests/fixtures/autocomplete_response.py | 3 ++- backend/tests/fixtures/histopheno_query.py | 1 + backend/tests/fixtures/histopheno_response.py | 3 ++- backend/tests/fixtures/search_query.py | 1 + backend/tests/fixtures/search_response.py | 3 ++- 13 files changed, 22 insertions(+), 7 deletions(-) diff --git a/backend/src/monarch_py/datamodels/solr.py b/backend/src/monarch_py/datamodels/solr.py index 8e3426c6b..196b2ed1d 100644 --- a/backend/src/monarch_py/datamodels/solr.py +++ b/backend/src/monarch_py/datamodels/solr.py @@ -45,7 +45,8 @@ class SolrQuery(BaseModel): filter_queries: Optional[List[str]] = Field(default_factory=list) query_fields: str = None def_type: str = "edismax" - mm: str = "100%" # All tokens in the query must be found in the doc, equivalent to q.op="AND" + q_op: str = "AND" # See SOLR-8812, need this plus mm=100% to allow boolean operators in queries + mm: str = "100%" # All tokens in the query must be found in the doc boost: str = None sort: str = None @@ -83,6 +84,8 @@ def _solrize(self, value): return "qf" elif value == "def_type": return "defType" + elif value == "q_op": + return "q.op" elif value is True: return "true" elif value is False: diff --git a/backend/tests/fixtures/association_counts_query.py b/backend/tests/fixtures/association_counts_query.py index 23372a08b..be9f06dcc 100644 --- a/backend/tests/fixtures/association_counts_query.py +++ b/backend/tests/fixtures/association_counts_query.py @@ -40,6 +40,7 @@ def association_counts_query(): ], "query_fields": None, "def_type": "edismax", + "q_op": "AND", "mm": "100%", "boost": None, "sort": None, diff --git a/backend/tests/fixtures/association_counts_response.py b/backend/tests/fixtures/association_counts_response.py index 1faefaec8..b223b0016 100644 --- a/backend/tests/fixtures/association_counts_response.py +++ b/backend/tests/fixtures/association_counts_response.py @@ -5,7 +5,7 @@ def association_counts_response(): return { "responseHeader": { - "QTime": 4, + "QTime": 1, "params": { "facet.query": [ '(category:"biolink:DiseaseToPhenotypicFeatureAssociation") AND (subject:"MONDO:0020121" OR subject_closure:"MONDO:0020121")', @@ -38,6 +38,7 @@ def association_counts_response(): "defType": "edismax", "facet_min_count": "1", "start": "0", + "q.op": "AND", "fq": 'subject:"MONDO\\:0020121" OR subject_closure:"MONDO\\:0020121" OR object:"MONDO\\:0020121" OR object_closure:"MONDO\\:0020121"', "rows": "20", "facet": "true", diff --git a/backend/tests/fixtures/association_query_direct.py b/backend/tests/fixtures/association_query_direct.py index 8686f9b83..c41d974d1 100644 --- a/backend/tests/fixtures/association_query_direct.py +++ b/backend/tests/fixtures/association_query_direct.py @@ -21,6 +21,7 @@ def association_query_direct(): ], "query_fields": "subject subject_label predicate object object_label", "def_type": "edismax", + "q_op": "AND", "mm": "100%", "boost": None, "sort": None, diff --git a/backend/tests/fixtures/association_query_indirect.py b/backend/tests/fixtures/association_query_indirect.py index e53190106..bbeab8090 100644 --- a/backend/tests/fixtures/association_query_indirect.py +++ b/backend/tests/fixtures/association_query_indirect.py @@ -21,6 +21,7 @@ def association_query_indirect(): ], "query_fields": "subject subject_label predicate object object_label", "def_type": "edismax", + "q_op": "AND", "mm": "100%", "boost": None, "sort": None, diff --git a/backend/tests/fixtures/association_response.py b/backend/tests/fixtures/association_response.py index e36c7afcd..77bedbfd1 100644 --- a/backend/tests/fixtures/association_response.py +++ b/backend/tests/fixtures/association_response.py @@ -5,13 +5,14 @@ def association_response(): return { "responseHeader": { - "QTime": 1, + "QTime": 0, "params": { "mm": "100%", "q": "*:*", "defType": "edismax", "facet_min_count": "1", "start": "0", + "q.op": "AND", "fq": 'subject:"MONDO\\:0020121" OR subject_closure:"MONDO\\:0020121" OR object:"MONDO\\:0020121" OR object_closure:"MONDO\\:0020121"', "rows": "20", "facet": "true", diff --git a/backend/tests/fixtures/association_table_response.py b/backend/tests/fixtures/association_table_response.py index fe4da13e9..7e924a5ca 100644 --- a/backend/tests/fixtures/association_table_response.py +++ b/backend/tests/fixtures/association_table_response.py @@ -5,13 +5,14 @@ def association_table_response(): return { "responseHeader": { - "QTime": 1, + "QTime": 0, "params": { "mm": "100%", "q": "*:*", "defType": "edismax", "facet_min_count": "1", "start": "0", + "q.op": "AND", "fq": [ "category:biolink\\:DiseaseToPhenotypicFeatureAssociation", 'subject:"MONDO\\:0020121" OR subject_closure:"MONDO\\:0020121" OR object:"MONDO\\:0020121" OR object_closure:"MONDO\\:0020121"', diff --git a/backend/tests/fixtures/autocomplete_query.py b/backend/tests/fixtures/autocomplete_query.py index cae8e88a2..cd5f4949d 100644 --- a/backend/tests/fixtures/autocomplete_query.py +++ b/backend/tests/fixtures/autocomplete_query.py @@ -13,6 +13,7 @@ def autocomplete_query(): "filter_queries": [], "query_fields": "id^100 name^10 name_t^5 name_ac symbol^10 symbol_t^5 symbol_ac synonym synonym_t synonym_ac", "def_type": "edismax", + "q_op": "AND", "mm": "100%", "boost": 'product(if(termfreq(category,"biolink:Disease"),10.0,1),if(and(termfreq(in_taxon,"NCBITaxon:9606"),termfreq(category,"biolink:Gene")),5.0,1))', "sort": None, diff --git a/backend/tests/fixtures/autocomplete_response.py b/backend/tests/fixtures/autocomplete_response.py index abdee91f4..1779f34ce 100644 --- a/backend/tests/fixtures/autocomplete_response.py +++ b/backend/tests/fixtures/autocomplete_response.py @@ -5,7 +5,7 @@ def autocomplete_response(): return { "responseHeader": { - "QTime": 1, + "QTime": 0, "params": { "mm": "100%", "q": "fanc", @@ -13,6 +13,7 @@ def autocomplete_response(): "facet_min_count": "1", "qf": "id^100 name^10 name_t^5 name_ac symbol^10 symbol_t^5 symbol_ac synonym synonym_t synonym_ac", "start": "0", + "q.op": "AND", "boost": 'product(if(termfreq(category,"biolink:Disease"),10.0,1),if(and(termfreq(in_taxon,"NCBITaxon:9606"),termfreq(category,"biolink:Gene")),5.0,1))', "rows": "20", "facet": "true", diff --git a/backend/tests/fixtures/histopheno_query.py b/backend/tests/fixtures/histopheno_query.py index 22eba9528..8ed9793aa 100644 --- a/backend/tests/fixtures/histopheno_query.py +++ b/backend/tests/fixtures/histopheno_query.py @@ -34,6 +34,7 @@ def histopheno_query(): "filter_queries": ["subject_closure:MONDO\\:0020121"], "query_fields": None, "def_type": "edismax", + "q_op": "AND", "mm": "100%", "boost": None, "sort": None, diff --git a/backend/tests/fixtures/histopheno_response.py b/backend/tests/fixtures/histopheno_response.py index bfee7c9f2..44789afcf 100644 --- a/backend/tests/fixtures/histopheno_response.py +++ b/backend/tests/fixtures/histopheno_response.py @@ -5,7 +5,7 @@ def histopheno_response(): return { "responseHeader": { - "QTime": 6, + "QTime": 1, "params": { "facet.query": [ 'object_closure:"HP:0000924"', @@ -34,6 +34,7 @@ def histopheno_response(): "defType": "edismax", "facet_min_count": "1", "start": "0", + "q.op": "AND", "fq": "subject_closure:MONDO\\:0020121", "rows": "0", "facet": "true", diff --git a/backend/tests/fixtures/search_query.py b/backend/tests/fixtures/search_query.py index 7e069ad20..64dbc1f84 100644 --- a/backend/tests/fixtures/search_query.py +++ b/backend/tests/fixtures/search_query.py @@ -13,6 +13,7 @@ def search_query(): "filter_queries": ["name:*"], "query_fields": "id^100 name^10 name_t^5 name_ac symbol^10 symbol_t^5 symbol_ac synonym synonym_t synonym_ac", "def_type": "edismax", + "q_op": "AND", "mm": "100%", "boost": 'product(if(termfreq(category,"biolink:Disease"),10.0,1),if(and(termfreq(in_taxon,"NCBITaxon:9606"),termfreq(category,"biolink:Gene")),5.0,1))', "sort": None, diff --git a/backend/tests/fixtures/search_response.py b/backend/tests/fixtures/search_response.py index f1dab08ed..5fbe50b82 100644 --- a/backend/tests/fixtures/search_response.py +++ b/backend/tests/fixtures/search_response.py @@ -5,7 +5,7 @@ def search_response(): return { "responseHeader": { - "QTime": 3, + "QTime": 1, "params": { "mm": "100%", "q": "fanconi", @@ -13,6 +13,7 @@ def search_response(): "facet_min_count": "1", "qf": "id^100 name^10 name_t^5 name_ac symbol^10 symbol_t^5 symbol_ac synonym synonym_t synonym_ac", "start": "0", + "q.op": "AND", "boost": 'product(if(termfreq(category,"biolink:Disease"),10.0,1),if(and(termfreq(in_taxon,"NCBITaxon:9606"),termfreq(category,"biolink:Gene")),5.0,1))', "fq": "name:*", "rows": "20",