Skip to content

Commit

Permalink
Lucene 9.9 upgrade (#625)
Browse files Browse the repository at this point in the history
* Bump lucene version to 9.9.0

* Replaced all Lucene95 codecs with Lucene99

* Replaced rewrite(IndexReader) with rewrite(IndexSearcher)

* Added hasBlocks=False to SegmentInfo constructor

* Switched to Lucene99 and Completion99PostingsFormat

* Fixed VectorFieldDefTest

* Fixed explain test

* Updated postings format to Completion99 in ContextSuggestFieldDef
  • Loading branch information
sarthakn7 authored Mar 5, 2024
1 parent 4159cb5 commit 353f6bc
Show file tree
Hide file tree
Showing 16 changed files with 56 additions and 59 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def _artifactId = 'server'
//This is for https://github.com/gradle/gradle/issues/11308
System.setProperty("org.gradle.internal.publish.checksums.insecure", "True")

def luceneVersion = '9.8.0'
def luceneVersion = '9.9.0'
project.ext.slf4jVersion = '2.0.0-alpha1'
project.ext.grpcVersion = '1.46.0'
project.ext.lz4Version = '1.7.0'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene95.Lucene95Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;

/** Implements per-index {@link Codec}. */
public class ServerCodec extends Lucene95Codec {
public class ServerCodec extends Lucene99Codec {

public static final String DEFAULT_POSTINGS_FORMAT = "Lucene90";
public static final String DEFAULT_POSTINGS_FORMAT = "Lucene99";
public static final String DEFAULT_DOC_VALUES_FORMAT = "Lucene90";

private final IndexStateManager stateManager;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ public Optional<Analyzer> getSearchAnalyzer() {
return Optional.ofNullable(this.searchAnalyzer);
}

@Override
public String getPostingsFormat() {
return "Completion90";
return "Completion99";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.KnnFloatVectorField;
Expand Down Expand Up @@ -77,12 +77,12 @@ private static KnnVectorsFormat createVectorsFormat(VectorIndexingOptions vector
int m =
vectorIndexingOptions.getHnswM() > 0
? vectorIndexingOptions.getHnswM()
: Lucene95HnswVectorsFormat.DEFAULT_MAX_CONN;
: Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
int efConstruction =
vectorIndexingOptions.getHnswEfConstruction() > 0
? vectorIndexingOptions.getHnswEfConstruction()
: Lucene95HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
return new Lucene95HnswVectorsFormat(m, efConstruction);
: Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
return new Lucene99HnswVectorsFormat(m, efConstruction);
}

/**
Expand Down Expand Up @@ -134,10 +134,10 @@ protected void validateRequest(Field requestField) {
}

if (requestField.getSearch()) {
if (requestField.getVectorDimensions() > Lucene95HnswVectorsFormat.DEFAULT_MAX_DIMENSIONS) {
if (requestField.getVectorDimensions() > Lucene99HnswVectorsFormat.DEFAULT_MAX_DIMENSIONS) {
throw new IllegalArgumentException(
"Vector dimension must be <= "
+ Lucene95HnswVectorsFormat.DEFAULT_MAX_DIMENSIONS
+ Lucene99HnswVectorsFormat.DEFAULT_MAX_DIMENSIONS
+ " for search");
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.TermQuery;
Expand Down Expand Up @@ -78,21 +79,20 @@ public BlendedTermQuery(Term[] terms, float[] boosts) {
}

@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = super.rewrite(reader);
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
Query rewritten = super.rewrite(indexSearcher);
if (rewritten != this) {
return rewritten;
}
IndexReaderContext context = reader.getContext();
TermStates[] ctx = new TermStates[terms.length];
int[] docFreqs = new int[ctx.length];
for (int i = 0; i < terms.length; i++) {
ctx[i] = TermStates.build(context, terms[i], true);
ctx[i] = TermStates.build(indexSearcher, terms[i], true);
docFreqs[i] = ctx[i].docFreq();
}

final int maxDoc = reader.maxDoc();
blend(ctx, maxDoc, reader);
final int maxDoc = indexSearcher.getIndexReader().maxDoc();
blend(ctx, maxDoc, indexSearcher.getIndexReader());
return topLevelQuery(terms, ctx, docFreqs, maxDoc);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.Query;
Expand Down Expand Up @@ -193,8 +194,8 @@ public MatchPhrasePrefixQuery(
}

@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = super.rewrite(reader);
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
Query rewritten = super.rewrite(indexSearcher);
if (rewritten != this) {
return rewritten;
}
Expand All @@ -211,7 +212,7 @@ public Query rewrite(IndexReader reader) throws IOException {
int position = positions.get(sizeMinus1);
Set<Term> terms = new HashSet<>();
for (Term term : suffixTerms) {
getPrefixTerms(terms, term, reader);
getPrefixTerms(terms, term, indexSearcher.getIndexReader());
if (terms.size() > maxExpansions) {
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;

/**
Expand Down Expand Up @@ -133,32 +134,30 @@ public float getWeight() {

/**
* Method to rewrite queries with the given {@link IndexReader}. Final to force use of {@link
* #doRewrite(IndexReader, boolean, Query)}.
* #doRewrite(boolean, Query)}.
*
* @param reader index reader
* @param indexSearcher index searcher
* @return function object with any query rewriting done
* @throws IOException
*/
public final FilterFunction rewrite(IndexReader reader) throws IOException {
public final FilterFunction rewrite(IndexSearcher indexSearcher) throws IOException {
Query rewrittenFilterQuery = null;
if (filterQuery != null) {
rewrittenFilterQuery = filterQuery.rewrite(reader);
rewrittenFilterQuery = filterQuery.rewrite(indexSearcher);
}
return doRewrite(reader, rewrittenFilterQuery != filterQuery, rewrittenFilterQuery);
return doRewrite(rewrittenFilterQuery != filterQuery, rewrittenFilterQuery);
}

/**
* Rewrite method for all child classes.
*
* @param reader index reader
* @param filterQueryRewritten if the filter query was modified by rewrite
* @param rewrittenFilterQuery final value of rewritten query, may be null if no filter
* @return fully rewritten filter function
* @throws IOException
*/
protected abstract FilterFunction doRewrite(
IndexReader reader, boolean filterQueryRewritten, Query rewrittenFilterQuery)
throws IOException;
boolean filterQueryRewritten, Query rewrittenFilterQuery) throws IOException;

@Override
public String toString() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
Expand Down Expand Up @@ -124,16 +123,16 @@ public MultiFunctionScoreQuery(
}

@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = super.rewrite(reader);
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
Query rewritten = super.rewrite(indexSearcher);
if (rewritten != this) {
return rewritten;
}
Query rewrittenInner = innerQuery.rewrite(reader);
Query rewrittenInner = innerQuery.rewrite(indexSearcher);
boolean needsRewrite = rewrittenInner != innerQuery;
FilterFunction[] rewrittenFunctions = new FilterFunction[functions.length];
for (int i = 0; i < functions.length; ++i) {
rewrittenFunctions[i] = functions[i].rewrite(reader);
rewrittenFunctions[i] = functions[i].rewrite(indexSearcher);
needsRewrite |= (rewrittenFunctions[i] != functions[i]);
}
if (needsRewrite) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import com.yelp.nrtsearch.server.luceneserver.search.query.QueryUtils.SettableDoubleValues;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
Expand Down Expand Up @@ -54,9 +53,7 @@ public ScriptFilterFunction(
}

@Override
protected FilterFunction doRewrite(
IndexReader reader, boolean filterQueryRewritten, Query rewrittenFilterQuery)
throws IOException {
protected FilterFunction doRewrite(boolean filterQueryRewritten, Query rewrittenFilterQuery) {
if (filterQueryRewritten) {
return new ScriptFilterFunction(rewrittenFilterQuery, getWeight(), script, scriptSource);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package com.yelp.nrtsearch.server.luceneserver.search.query.multifunction;

import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Query;
Expand All @@ -37,9 +36,7 @@ public WeightFilterFunction(Query filterQuery, float weight) {
}

@Override
protected FilterFunction doRewrite(
IndexReader reader, boolean filterQueryRewritten, Query rewrittenFilterQuery)
throws IOException {
protected FilterFunction doRewrite(boolean filterQueryRewritten, Query rewrittenFilterQuery) {
if (filterQueryRewritten) {
return new WeightFilterFunction(rewrittenFilterQuery, getWeight());
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene95.Lucene95Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader;
Expand All @@ -37,7 +37,7 @@
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
import org.apache.lucene.search.suggest.document.Completion90PostingsFormat;
import org.apache.lucene.search.suggest.document.Completion99PostingsFormat;
import org.apache.lucene.search.suggest.document.CompletionQuery;
import org.apache.lucene.search.suggest.document.ContextQuery;
import org.apache.lucene.search.suggest.document.ContextSuggestField;
Expand Down Expand Up @@ -249,8 +249,8 @@ protected IndexWriterConfig getIndexWriterConfig(
Analyzer indexAnalyzer, IndexWriterConfig.OpenMode mode) {
IndexWriterConfig iwc = super.getIndexWriterConfig(indexAnalyzer, mode);
Codec filterCodec =
new Lucene95Codec() {
final PostingsFormat fstPostingsFormat = new Completion90PostingsFormat();
new Lucene99Codec() {
final PostingsFormat fstPostingsFormat = new Completion99PostingsFormat();

@Override
public PostingsFormat getPostingsFormatForField(String field) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,32 +20,32 @@
import org.slf4j.LoggerFactory;

/**
* Copy of the lucene Completion90PostingsFormat, but allows the FST load mode to be configured.
* Copy of the lucene Completion99PostingsFormat, but allows the FST load mode to be configured.
* Since this codec is loaded by class name, it must have the same name as the original and be
* present earlier in the class path.
*/
public class Completion90PostingsFormat extends CompletionPostingsFormat {
private static final Logger logger = LoggerFactory.getLogger(Completion90PostingsFormat.class);
public class Completion99PostingsFormat extends CompletionPostingsFormat {
private static final Logger logger = LoggerFactory.getLogger(Completion99PostingsFormat.class);

/**
* Creates a {@link Completion90PostingsFormat} that will load the completion FST based on the
* Creates a {@link Completion99PostingsFormat} that will load the completion FST based on the
* value present in {@link CompletionPostingsFormatUtil}.
*/
public Completion90PostingsFormat() {
public Completion99PostingsFormat() {
this(CompletionPostingsFormatUtil.getCompletionCodecLoadMode());
}

/**
* Creates a {@link Completion90PostingsFormat} that will use the provided <code>fstLoadMode
* Creates a {@link Completion99PostingsFormat} that will use the provided <code>fstLoadMode
* </code> to determine if the completion FST should be loaded on or off heap.
*/
public Completion90PostingsFormat(FSTLoadMode fstLoadMode) {
super("Completion90", fstLoadMode);
logger.info("Created Completion90PostingsFormat with fstLoadMode: " + fstLoadMode);
public Completion99PostingsFormat(FSTLoadMode fstLoadMode) {
super("Completion99", fstLoadMode);
logger.info("Created Completion99PostingsFormat with fstLoadMode: " + fstLoadMode);
}

@Override
protected PostingsFormat delegatePostingsFormat() {
return PostingsFormat.forName("Lucene90");
return PostingsFormat.forName("Lucene99");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public class CompletionPostingsFormatUtil {
private CompletionPostingsFormatUtil() {}

/**
* Set the FST load mode used by the modified {@link Completion90PostingsFormat}. Must be set
* Set the FST load mode used by the modified {@link Completion99PostingsFormat}. Must be set
* before any index data is loaded.
*
* @param loadMode new FST load mode
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,9 @@ public void testExplain() {
var explain = hit.getExplain();
var expectedExplain =
String.format(
"%d.0 = weight(FunctionScoreQuery(int_field:[0 TO 100], scored by expr(int_score))), result of:\n"
"%d.0 = weight(FunctionScoreQuery(IndexOrDocValuesQuery("
+ "indexQuery=int_field:[0 TO 100], dvQuery=int_field:[0 TO 100]), "
+ "scored by expr(int_score))), result of:\n"
+ " %<d.0 = int_score, computed from:\n"
+ " %<d.0 = double(int_score)",
NUM_DOCS - i);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ public void testVectorFormat_set_m() {
KnnVectorsFormat format = vectorFieldDef.getVectorsFormat();
assertNotNull(format);
assertEquals(
"Lucene95HnswVectorsFormat(name=Lucene95HnswVectorsFormat, maxConn=5, beamWidth=100)",
"Lucene99HnswVectorsFormat(name=Lucene99HnswVectorsFormat, maxConn=5, beamWidth=100, flatVectorFormat=Lucene99FlatVectorsFormat())",
format.toString());
}

Expand All @@ -718,7 +718,7 @@ public void testVectorFormat_set_ef_construction() {
KnnVectorsFormat format = vectorFieldDef.getVectorsFormat();
assertNotNull(format);
assertEquals(
"Lucene95HnswVectorsFormat(name=Lucene95HnswVectorsFormat, maxConn=16, beamWidth=50)",
"Lucene99HnswVectorsFormat(name=Lucene99HnswVectorsFormat, maxConn=16, beamWidth=50, flatVectorFormat=Lucene99FlatVectorsFormat())",
format.toString());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,7 @@ private SegmentInfos getInfos(List<Integer> sizes, List<Integer> deletions) {
String.valueOf(id),
sizes.get(i),
false,
false,
null,
Collections.emptyMap(),
new byte[StringHelper.ID_LENGTH],
Expand Down

0 comments on commit 353f6bc

Please sign in to comment.