From bb431d047ef83b85e4b39ddf0c2dcb4785af26b5 Mon Sep 17 00:00:00 2001 From: Andrew Prudhomme Date: Tue, 7 Jan 2025 09:16:01 -0800 Subject: [PATCH] Add range query support for _ID and ATOM fields (#804) --- .../nrtsearch/server/field/AtomFieldDef.java | 36 ++- .../nrtsearch/server/field/IdFieldDef.java | 20 +- .../nrtsearch/server/field/AtomFieldTest.java | 284 +++++++++++++++--- .../nrtsearch/server/field/IdFieldTest.java | 103 +++++++ .../resources/field/registerFieldsAtom2.json | 52 ++++ 5 files changed, 446 insertions(+), 49 deletions(-) create mode 100644 src/test/resources/field/registerFieldsAtom2.json diff --git a/src/main/java/com/yelp/nrtsearch/server/field/AtomFieldDef.java b/src/main/java/com/yelp/nrtsearch/server/field/AtomFieldDef.java index 6c77f3875..10e8e5b31 100644 --- a/src/main/java/com/yelp/nrtsearch/server/field/AtomFieldDef.java +++ b/src/main/java/com/yelp/nrtsearch/server/field/AtomFieldDef.java @@ -17,19 +17,25 @@ import static com.yelp.nrtsearch.server.analysis.AnalyzerCreator.hasAnalyzer; +import com.yelp.nrtsearch.server.field.properties.RangeQueryable; import com.yelp.nrtsearch.server.field.properties.Sortable; import com.yelp.nrtsearch.server.grpc.Field; +import com.yelp.nrtsearch.server.grpc.RangeQuery; import com.yelp.nrtsearch.server.grpc.SortType; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedSetSortField; +import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.util.BytesRef; /** Field class for 'ATOM' field type. Uses {@link KeywordAnalyzer} for text analysis. */ -public class AtomFieldDef extends TextBaseFieldDef implements Sortable { +public class AtomFieldDef extends TextBaseFieldDef implements Sortable, RangeQueryable { private static final Analyzer keywordAnalyzer = new KeywordAnalyzer(); public AtomFieldDef( @@ -98,4 +104,32 @@ public SortField getSortField(SortType type) { } return sortField; } + + @Override + public Query getRangeQuery(RangeQuery rangeQuery) { + verifySearchableOrDocValues("Range query"); + BytesRef lowerTerm = + rangeQuery.getLower().isEmpty() ? null : new BytesRef(rangeQuery.getLower()); + BytesRef upperTerm = + rangeQuery.getUpper().isEmpty() ? null : new BytesRef(rangeQuery.getUpper()); + if (isSearchable()) { + return new TermRangeQuery( + getName(), + lowerTerm, + upperTerm, + !rangeQuery.getLowerExclusive(), + !rangeQuery.getUpperExclusive()); + } else if (hasDocValues() + && (docValuesType == DocValuesType.SORTED || docValuesType == DocValuesType.SORTED_SET)) { + return SortedSetDocValuesField.newSlowRangeQuery( + getName(), + lowerTerm, + upperTerm, + !rangeQuery.getLowerExclusive(), + !rangeQuery.getUpperExclusive()); + } else { + throw new IllegalStateException( + "Only SORTED or SORTED_SET doc values are supported for range queries: " + getName()); + } + } } diff --git a/src/main/java/com/yelp/nrtsearch/server/field/IdFieldDef.java b/src/main/java/com/yelp/nrtsearch/server/field/IdFieldDef.java index 473648282..f5a5fc4f0 100644 --- a/src/main/java/com/yelp/nrtsearch/server/field/IdFieldDef.java +++ b/src/main/java/com/yelp/nrtsearch/server/field/IdFieldDef.java @@ -16,8 +16,10 @@ package com.yelp.nrtsearch.server.field; import com.yelp.nrtsearch.server.doc.LoadedDocValues; +import com.yelp.nrtsearch.server.field.properties.RangeQueryable; import com.yelp.nrtsearch.server.field.properties.TermQueryable; import com.yelp.nrtsearch.server.grpc.Field; +import com.yelp.nrtsearch.server.grpc.RangeQuery; import com.yelp.nrtsearch.server.grpc.SearchResponse; import java.io.IOException; import java.util.List; @@ -33,10 +35,11 @@ import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.util.BytesRef; /** Field class for defining '_ID' fields which are used to update documents */ -public class IdFieldDef extends IndexableFieldDef implements TermQueryable { +public class IdFieldDef extends IndexableFieldDef implements TermQueryable, RangeQueryable { protected IdFieldDef( String name, Field requestField, FieldDefCreator.FieldDefCreatorContext context) { @@ -169,4 +172,19 @@ public Query getTermInSetQueryFromTextValues(List textValues) { List textTerms = textValues.stream().map(BytesRef::new).collect(Collectors.toList()); return new org.apache.lucene.search.TermInSetQuery(getName(), textTerms); } + + @Override + public Query getRangeQuery(RangeQuery rangeQuery) { + // _ID fields are always searchable + BytesRef lowerTerm = + rangeQuery.getLower().isEmpty() ? null : new BytesRef(rangeQuery.getLower()); + BytesRef upperTerm = + rangeQuery.getUpper().isEmpty() ? null : new BytesRef(rangeQuery.getUpper()); + return new TermRangeQuery( + getName(), + lowerTerm, + upperTerm, + !rangeQuery.getLowerExclusive(), + !rangeQuery.getUpperExclusive()); + } } diff --git a/src/test/java/com/yelp/nrtsearch/server/field/AtomFieldTest.java b/src/test/java/com/yelp/nrtsearch/server/field/AtomFieldTest.java index 1150cb8ab..4520e240c 100644 --- a/src/test/java/com/yelp/nrtsearch/server/field/AtomFieldTest.java +++ b/src/test/java/com/yelp/nrtsearch/server/field/AtomFieldTest.java @@ -17,12 +17,14 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import com.yelp.nrtsearch.server.ServerTestCase; import com.yelp.nrtsearch.server.grpc.AddDocumentRequest; import com.yelp.nrtsearch.server.grpc.AddDocumentRequest.MultiValuedField; import com.yelp.nrtsearch.server.grpc.FieldDefRequest; import com.yelp.nrtsearch.server.grpc.Query; +import com.yelp.nrtsearch.server.grpc.RangeQuery; import com.yelp.nrtsearch.server.grpc.SearchRequest; import com.yelp.nrtsearch.server.grpc.SearchResponse; import com.yelp.nrtsearch.server.grpc.SearchResponse.Hit; @@ -38,8 +40,8 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; import org.junit.ClassRule; import org.junit.Test; @@ -47,58 +49,109 @@ public class AtomFieldTest extends ServerTestCase { @ClassRule public static final GrpcCleanupRule grpcCleanup = new GrpcCleanupRule(); protected List getIndices() { - return Collections.singletonList(DEFAULT_TEST_INDEX); + return List.of(DEFAULT_TEST_INDEX, "test_index_2"); } protected FieldDefRequest getIndexDef(String name) throws IOException { - return getFieldsFromResourceFile("/field/registerFieldsAtom.json"); + if (DEFAULT_TEST_INDEX.equals(name)) { + return getFieldsFromResourceFile("/field/registerFieldsAtom.json"); + } else { + return getFieldsFromResourceFile("/field/registerFieldsAtom2.json"); + } } protected void initIndex(String name) throws Exception { - List docs = new ArrayList<>(); - MultiValuedField t1Value = MultiValuedField.newBuilder().addValue("term 1").build(); - MultiValuedField t2Value = MultiValuedField.newBuilder().addValue("term 2").build(); - MultiValuedField t3Value = MultiValuedField.newBuilder().addValue("term 3").build(); - MultiValuedField t12Values = - MultiValuedField.newBuilder().addValue("term 1").addValue("term 2").build(); - MultiValuedField t23Values = - MultiValuedField.newBuilder().addValue("term 2").addValue("term 3").build(); - MultiValuedField t31Values = - MultiValuedField.newBuilder().addValue("term 3").addValue("term 1").build(); - AddDocumentRequest request = - AddDocumentRequest.newBuilder() - .setIndexName(name) - .putFields("doc_id", MultiValuedField.newBuilder().addValue("1").build()) - .putFields("single", t1Value) - .putFields("multi_one", t2Value) - .putFields("multi_two", t31Values) - .putFields("single_stored", t3Value) - .putFields("multi_stored", t12Values) - .build(); - docs.add(request); - request = - AddDocumentRequest.newBuilder() - .setIndexName(name) - .putFields("doc_id", MultiValuedField.newBuilder().addValue("2").build()) - .putFields("single", t2Value) - .putFields("multi_one", t3Value) - .putFields("multi_two", t12Values) - .putFields("single_stored", t2Value) - .putFields("multi_stored", t31Values) - .build(); - docs.add(request); - request = - AddDocumentRequest.newBuilder() - .setIndexName(name) - .putFields("doc_id", MultiValuedField.newBuilder().addValue("3").build()) - .putFields("single", t3Value) - .putFields("multi_one", t1Value) - .putFields("multi_two", t23Values) - .putFields("single_stored", t1Value) - .putFields("multi_stored", t23Values) - .build(); - docs.add(request); - addDocuments(docs.stream()); + if (DEFAULT_TEST_INDEX.equals(name)) { + List docs = new ArrayList<>(); + MultiValuedField t1Value = MultiValuedField.newBuilder().addValue("term 1").build(); + MultiValuedField t2Value = MultiValuedField.newBuilder().addValue("term 2").build(); + MultiValuedField t3Value = MultiValuedField.newBuilder().addValue("term 3").build(); + MultiValuedField t12Values = + MultiValuedField.newBuilder().addValue("term 1").addValue("term 2").build(); + MultiValuedField t23Values = + MultiValuedField.newBuilder().addValue("term 2").addValue("term 3").build(); + MultiValuedField t31Values = + MultiValuedField.newBuilder().addValue("term 3").addValue("term 1").build(); + AddDocumentRequest request = + AddDocumentRequest.newBuilder() + .setIndexName(name) + .putFields("doc_id", MultiValuedField.newBuilder().addValue("1").build()) + .putFields("single", t1Value) + .putFields("multi_one", t2Value) + .putFields("multi_two", t31Values) + .putFields("single_stored", t3Value) + .putFields("multi_stored", t12Values) + .build(); + docs.add(request); + request = + AddDocumentRequest.newBuilder() + .setIndexName(name) + .putFields("doc_id", MultiValuedField.newBuilder().addValue("2").build()) + .putFields("single", t2Value) + .putFields("multi_one", t3Value) + .putFields("multi_two", t12Values) + .putFields("single_stored", t2Value) + .putFields("multi_stored", t31Values) + .build(); + docs.add(request); + request = + AddDocumentRequest.newBuilder() + .setIndexName(name) + .putFields("doc_id", MultiValuedField.newBuilder().addValue("3").build()) + .putFields("single", t3Value) + .putFields("multi_one", t1Value) + .putFields("multi_two", t23Values) + .putFields("single_stored", t1Value) + .putFields("multi_stored", t23Values) + .build(); + docs.add(request); + addDocuments(docs.stream()); + } else { + List docs = new ArrayList<>(); + AddDocumentRequest request = + AddDocumentRequest.newBuilder() + .setIndexName(name) + .putFields("doc_id", MultiValuedField.newBuilder().addValue("1").build()) + .putFields("range_field", MultiValuedField.newBuilder().addValue("a").build()) + .build(); + docs.add(request); + request = + AddDocumentRequest.newBuilder() + .setIndexName(name) + .putFields("doc_id", MultiValuedField.newBuilder().addValue("2").build()) + .putFields("range_field", MultiValuedField.newBuilder().addValue("b").build()) + .build(); + docs.add(request); + request = + AddDocumentRequest.newBuilder() + .setIndexName(name) + .putFields("doc_id", MultiValuedField.newBuilder().addValue("3").build()) + .putFields("range_field", MultiValuedField.newBuilder().addValue("c").build()) + .build(); + docs.add(request); + request = + AddDocumentRequest.newBuilder() + .setIndexName(name) + .putFields("doc_id", MultiValuedField.newBuilder().addValue("4").build()) + .putFields("range_field", MultiValuedField.newBuilder().addValue("d").build()) + .build(); + docs.add(request); + request = + AddDocumentRequest.newBuilder() + .setIndexName(name) + .putFields("doc_id", MultiValuedField.newBuilder().addValue("5").build()) + .putFields("range_field", MultiValuedField.newBuilder().addValue("e").build()) + .build(); + docs.add(request); + request = + AddDocumentRequest.newBuilder() + .setIndexName(name) + .putFields("doc_id", MultiValuedField.newBuilder().addValue("6").build()) + .putFields("range_field", MultiValuedField.newBuilder().addValue("f").build()) + .build(); + docs.add(request); + addDocuments(docs.stream()); + } } @Test @@ -348,6 +401,143 @@ public void testTermInSetQueryUnsetTerms() { queryInSetAndVerifyIds(termInSetQuery); } + @Test + public void testRangeQuery() { + rangeQuery("range_field"); + } + + @Test + public void testRangeQuery_docValues() { + rangeQuery("range_field.dv"); + } + + @Test + public void testRangeQuery_searchable() { + rangeQuery("range_field.search"); + } + + @Test + public void testRangeQuery_multiDocValues() { + rangeQuery("range_field.mv_dv"); + } + + @Test + public void testRangeQuery_multiSearchable() { + rangeQuery("range_field.mv_search"); + } + + @Test + public void testRangeQuery_unsupported() { + try { + rangeQuery("range_field.none"); + fail(); + } catch (StatusRuntimeException e) { + assertTrue( + e.getMessage() + .contains( + "Range query requires field to be searchable or have doc values: range_field.none")); + } + } + + @Test + public void testRangeQuery_binaryDocValues() { + try { + rangeQuery("range_field.binary_dv"); + fail(); + } catch (StatusRuntimeException e) { + assertTrue( + e.getMessage() + .contains( + "Only SORTED or SORTED_SET doc values are supported for range queries: range_field.binary_dv")); + } + } + + private void rangeQuery(String fieldName) { + // Both bounds defined + + // Both inclusive + RangeQuery rangeQuery = + RangeQuery.newBuilder().setField(fieldName).setLower("b").setUpper("e").build(); + assertRangeQuery(rangeQuery, "b", "c", "d", "e"); + + // Lower exclusive, upper inclusive + rangeQuery = + RangeQuery.newBuilder() + .setField(fieldName) + .setLower("b") + .setUpper("e") + .setLowerExclusive(true) + .build(); + assertRangeQuery(rangeQuery, "c", "d", "e"); + + // Lower inclusive, upper exclusive + rangeQuery = + RangeQuery.newBuilder() + .setField(fieldName) + .setLower("b") + .setUpper("e") + .setUpperExclusive(true) + .build(); + assertRangeQuery(rangeQuery, "b", "c", "d"); + + // Both exclusive + rangeQuery = + RangeQuery.newBuilder() + .setField(fieldName) + .setLower("b") + .setUpper("e") + .setLowerExclusive(true) + .setUpperExclusive(true) + .build(); + assertRangeQuery(rangeQuery, "c", "d"); + + // Only upper bound defined + + // Both inclusive + rangeQuery = RangeQuery.newBuilder().setField(fieldName).setUpper("d").build(); + assertRangeQuery(rangeQuery, "a", "b", "c", "d"); + + // Upper exclusive + rangeQuery = + RangeQuery.newBuilder().setField(fieldName).setUpper("d").setUpperExclusive(true).build(); + assertRangeQuery(rangeQuery, "a", "b", "c"); + + // Only lower bound defined + + // Both inclusive + rangeQuery = RangeQuery.newBuilder().setField(fieldName).setLower("b").build(); + assertRangeQuery(rangeQuery, "b", "c", "d", "e", "f"); + + // Lower exclusive, upper inclusive + rangeQuery = + RangeQuery.newBuilder().setField(fieldName).setLower("b").setLowerExclusive(true).build(); + assertRangeQuery(rangeQuery, "c", "d", "e", "f"); + } + + private void assertRangeQuery(RangeQuery rangeQuery, String... expectedValues) { + Query query = Query.newBuilder().setRangeQuery(rangeQuery).build(); + SearchResponse searchResponse = + getGrpcServer() + .getBlockingStub() + .search( + SearchRequest.newBuilder() + .setIndexName("test_index_2") + .setStartHit(0) + .setTopHits(10) + .setQuery(query) + .addRetrieveFields("range_field") + .build()); + assertEquals(expectedValues.length, searchResponse.getHitsCount()); + List actualValues = + searchResponse.getHitsList().stream() + .map( + hit -> + hit.getFieldsMap().get("range_field").getFieldValueList().get(0).getTextValue()) + .sorted() + .collect(Collectors.toList()); + assertEquals(Arrays.asList(expectedValues), actualValues); + } + private TermQuery getTermQuery(String field, String term) { return TermQuery.newBuilder().setField(field).setTextValue(term).build(); } diff --git a/src/test/java/com/yelp/nrtsearch/server/field/IdFieldTest.java b/src/test/java/com/yelp/nrtsearch/server/field/IdFieldTest.java index ca0a034cd..e53eda13f 100644 --- a/src/test/java/com/yelp/nrtsearch/server/field/IdFieldTest.java +++ b/src/test/java/com/yelp/nrtsearch/server/field/IdFieldTest.java @@ -22,7 +22,9 @@ import io.grpc.testing.GrpcCleanupRule; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; import org.junit.ClassRule; import org.junit.Test; @@ -52,6 +54,21 @@ public void initIndex(String name) throws Exception { .setIndexName(name) .putFields("id", AddDocumentRequest.MultiValuedField.newBuilder().addValue("3").build()) .build()); + requestList.add( + AddDocumentRequest.newBuilder() + .setIndexName(name) + .putFields("id", AddDocumentRequest.MultiValuedField.newBuilder().addValue("4").build()) + .build()); + requestList.add( + AddDocumentRequest.newBuilder() + .setIndexName(name) + .putFields("id", AddDocumentRequest.MultiValuedField.newBuilder().addValue("5").build()) + .build()); + requestList.add( + AddDocumentRequest.newBuilder() + .setIndexName(name) + .putFields("id", AddDocumentRequest.MultiValuedField.newBuilder().addValue("6").build()) + .build()); addDocuments(requestList.stream()); } @@ -102,4 +119,90 @@ public void testAlwaysSearchable() { assertEquals(1, hit.getFieldsOrThrow("id").getFieldValueCount()); assertEquals("2", hit.getFieldsOrThrow("id").getFieldValue(0).getTextValue()); } + + @Test + public void testRangeQuery() { + String fieldName = "id"; + // Both bounds defined + + // Both inclusive + RangeQuery rangeQuery = + RangeQuery.newBuilder().setField(fieldName).setLower("2").setUpper("5").build(); + assertRangeQuery(rangeQuery, "2", "3", "4", "5"); + + // Lower exclusive, upper inclusive + rangeQuery = + RangeQuery.newBuilder() + .setField(fieldName) + .setLower("2") + .setUpper("5") + .setLowerExclusive(true) + .build(); + assertRangeQuery(rangeQuery, "3", "4", "5"); + + // Lower inclusive, upper exclusive + rangeQuery = + RangeQuery.newBuilder() + .setField(fieldName) + .setLower("2") + .setUpper("5") + .setUpperExclusive(true) + .build(); + assertRangeQuery(rangeQuery, "2", "3", "4"); + + // Both exclusive + rangeQuery = + RangeQuery.newBuilder() + .setField(fieldName) + .setLower("2") + .setUpper("5") + .setLowerExclusive(true) + .setUpperExclusive(true) + .build(); + assertRangeQuery(rangeQuery, "3", "4"); + + // Only upper bound defined + + // Both inclusive + rangeQuery = RangeQuery.newBuilder().setField(fieldName).setUpper("4").build(); + assertRangeQuery(rangeQuery, "1", "2", "3", "4"); + + // Lower inclusive, upper exclusive + rangeQuery = + RangeQuery.newBuilder().setField(fieldName).setUpper("4").setUpperExclusive(true).build(); + assertRangeQuery(rangeQuery, "1", "2", "3"); + + // Only lower bound defined + + // Both inclusive + rangeQuery = RangeQuery.newBuilder().setField(fieldName).setLower("3").build(); + assertRangeQuery(rangeQuery, "3", "4", "5", "6"); + + // Lower exclusive, upper inclusive + rangeQuery = + RangeQuery.newBuilder().setField(fieldName).setLower("3").setLowerExclusive(true).build(); + assertRangeQuery(rangeQuery, "4", "5", "6"); + } + + private void assertRangeQuery(RangeQuery rangeQuery, String... expectedValues) { + Query query = Query.newBuilder().setRangeQuery(rangeQuery).build(); + SearchResponse searchResponse = + getGrpcServer() + .getBlockingStub() + .search( + SearchRequest.newBuilder() + .setIndexName(DEFAULT_TEST_INDEX) + .setStartHit(0) + .setTopHits(10) + .setQuery(query) + .addRetrieveFields("id") + .build()); + assertEquals(expectedValues.length, searchResponse.getHitsCount()); + List actualValues = + searchResponse.getHitsList().stream() + .map(hit -> hit.getFieldsMap().get("id").getFieldValueList().get(0).getTextValue()) + .sorted() + .collect(Collectors.toList()); + assertEquals(Arrays.asList(expectedValues), actualValues); + } } diff --git a/src/test/resources/field/registerFieldsAtom2.json b/src/test/resources/field/registerFieldsAtom2.json new file mode 100644 index 000000000..132ce8d94 --- /dev/null +++ b/src/test/resources/field/registerFieldsAtom2.json @@ -0,0 +1,52 @@ +{ + "indexName": "test_index_2", + "field": [ + { + "name": "doc_id", + "type": "ATOM", + "search": true, + "storeDocValues": true + }, + { + "name": "range_field", + "type": "ATOM", + "search": true, + "multiValued": true, + "storeDocValues": true, + "childFields": [ + { + "name": "dv", + "type": "ATOM", + "storeDocValues": true + }, + { + "name": "binary_dv", + "type": "ATOM", + "textDocValuesType": "TEXT_DOC_VALUES_TYPE_BINARY", + "storeDocValues": true + }, + { + "name": "search", + "type": "ATOM", + "search": true + }, + { + "name": "none", + "type": "ATOM" + }, + { + "name": "mv_dv", + "type": "ATOM", + "multiValued": true, + "storeDocValues": true + }, + { + "name": "mv_search", + "type": "ATOM", + "multiValued": true, + "search": true + } + ] + } + ] +} \ No newline at end of file