Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Short Circuit And Filter Operator During Index Evaluation #14700

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
* The {@code BlockDocIdSet} contains the matching document ids returned by the {@link FilterBlock}.
*/
public interface BlockDocIdSet {

/**
* Returns an iterator of the matching document ids. The document ids returned from the iterator should be in
* ascending order.
Expand All @@ -48,6 +47,10 @@ public interface BlockDocIdSet {
*/
long getNumEntriesScannedInFilter();

default CardinalityEstimate getCardinalityEstimate() {
return CardinalityEstimate.UNKNOWN;
}

/**
* For scan-based FilterBlockDocIdSet, pre-scans the documents and returns a non-scan-based FilterBlockDocIdSet.
*/
Expand Down Expand Up @@ -78,4 +81,10 @@ default BlockDocIdSet toNonScanDocIdSet() {

return this;
}

enum CardinalityEstimate {
ankitsultana marked this conversation as resolved.
Show resolved Hide resolved
UNKNOWN,
MATCHES_ALL,
MATCHES_NONE;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,8 @@ public int advance(int targetDocId) {
_docIdIterator.advanceIfNeeded(targetDocId);
return next();
}

public boolean hasNext() {
return _docIdIterator.hasNext();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.pinot.core.common.BlockDocIdSet;
import org.apache.pinot.core.operator.dociditerators.AndDocIdIterator;
import org.apache.pinot.core.operator.dociditerators.BitmapBasedDocIdIterator;
import org.apache.pinot.core.operator.dociditerators.EmptyDocIdIterator;
import org.apache.pinot.core.operator.dociditerators.RangelessBitmapDocIdIterator;
import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator;
import org.apache.pinot.core.operator.dociditerators.SortedDocIdIterator;
Expand Down Expand Up @@ -59,13 +60,20 @@ public final class AndDocIdSet implements BlockDocIdSet {
// Keep the scan based BlockDocIdSets to be accessed when collecting query execution stats
private final AtomicReference<List<BlockDocIdSet>> _scanBasedDocIdSets = new AtomicReference<>();
private final boolean _cardinalityBasedRankingForScan;
private final CardinalityEstimate _cardinalityEstimate;
private List<BlockDocIdSet> _docIdSets;
private volatile long _numEntriesScannedInFilter;

public AndDocIdSet(List<BlockDocIdSet> docIdSets, @Nullable Map<String, String> queryOptions) {
this(docIdSets, queryOptions, CardinalityEstimate.UNKNOWN);
}

public AndDocIdSet(List<BlockDocIdSet> docIdSets, @Nullable Map<String, String> queryOptions,
CardinalityEstimate cardinalityEstimate) {
_docIdSets = docIdSets;
_cardinalityBasedRankingForScan =
queryOptions != null && QueryOptionsUtils.isAndScanReorderingEnabled(queryOptions);
_cardinalityEstimate = cardinalityEstimate;
}

@Override
Expand Down Expand Up @@ -107,15 +115,17 @@ public BlockDocIdIterator iterator() {

// evaluate the bitmaps in the order of the lowest matching num docIds comes first, so that we minimize the number
// of containers (range) for comparison from the beginning, as will minimize the effort of bitmap AND application
bitmapBasedDocIdIterators.sort(Comparator.comparing(x -> x.getDocIds().getCardinality()));
if (_cardinalityEstimate != CardinalityEstimate.MATCHES_NONE) {
bitmapBasedDocIdIterators.sort(Comparator.comparing(x -> x.getDocIds().getCardinality()));
}

// Evaluate the scan based operator with the highest cardinality coming first, this potentially reduce the range of
// scanning from the beginning. Automatically place N/A cardinality column (negative infinity) to the back as we
// want to evaluate these unestimated predicates in the end.
// TODO: 1. remainingDocIdIterators currently doesn't report cardinality; therefore, it cannot be
// prioritized even if it provides high effective cardinality, one way to do this is to let AND/OR
// DocIdIterators bubble up cardinality for the sort to happen recursively for nested AND-OR predicates
if (_cardinalityBasedRankingForScan) {
if (_cardinalityBasedRankingForScan && _cardinalityEstimate != CardinalityEstimate.MATCHES_NONE) {
scanBasedDocIdIterators.sort(Comparator.comparing(x -> (-x.getEstimatedCardinality(true))));
}

Expand All @@ -124,7 +134,9 @@ public BlockDocIdIterator iterator() {
int numScanBasedDocIdIterators = scanBasedDocIdIterators.size();
int numRemainingDocIdIterators = remainingDocIdIterators.size();
int numIndexBasedDocIdIterators = numSortedDocIdIterators + numBitmapBasedDocIdIterators;
if ((numIndexBasedDocIdIterators > 0 && numScanBasedDocIdIterators > 0) || numIndexBasedDocIdIterators > 1) {
if (_cardinalityEstimate == CardinalityEstimate.MATCHES_NONE) {
return EmptyDocIdIterator.getInstance();
} else if ((numIndexBasedDocIdIterators > 0 && numScanBasedDocIdIterators > 0) || numIndexBasedDocIdIterators > 1) {
// When there are at least one index-base BlockDocIdIterator (SortedDocIdIterator or BitmapBasedDocIdIterator)
// and at least one ScanBasedDocIdIterator, or more than one index-based BlockDocIdIterator, merge them and
// construct a RangelessBitmapDocIdIterator from the merged document ids. If there is no remaining
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,16 @@


public class BitmapDocIdSet implements BlockDocIdSet {
private final ImmutableRoaringBitmap _bitmap;
private final BitmapDocIdIterator _iterator;

public BitmapDocIdSet(ImmutableRoaringBitmap docIds, int numDocs) {
_bitmap = docIds;
_iterator = new BitmapDocIdIterator(docIds, numDocs);
}

public BitmapDocIdSet(BitmapDocIdIterator iterator) {
_bitmap = null;
_iterator = iterator;
}

Expand All @@ -43,4 +46,13 @@ public BitmapDocIdIterator iterator() {
public long getNumEntriesScannedInFilter() {
return 0L;
}

@Override
public CardinalityEstimate getCardinalityEstimate() {
if (_bitmap == null) {
return CardinalityEstimate.UNKNOWN;
}
// If cardinality exceeds 0, return unknown, because we don't want to inspect each highLowContainer.
return _bitmap.cardinalityExceeds(0L) ? CardinalityEstimate.UNKNOWN : CardinalityEstimate.MATCHES_NONE;
ankitsultana marked this conversation as resolved.
Show resolved Hide resolved
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,9 @@ public EmptyDocIdIterator iterator() {
public long getNumEntriesScannedInFilter() {
return 0L;
}

@Override
public CardinalityEstimate getCardinalityEstimate() {
return CardinalityEstimate.MATCHES_NONE;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,9 @@ public MatchAllDocIdIterator iterator() {
public long getNumEntriesScannedInFilter() {
return 0L;
}

@Override
public CardinalityEstimate getCardinalityEstimate() {
return CardinalityEstimate.MATCHES_ALL;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,9 @@ public RangelessBitmapDocIdIterator iterator() {
public long getNumEntriesScannedInFilter() {
return 0L;
}

@Override
public CardinalityEstimate getCardinalityEstimate() {
return _iterator.hasNext() ? CardinalityEstimate.UNKNOWN : CardinalityEstimate.MATCHES_NONE;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,9 @@ public SortedDocIdIterator iterator() {
public long getNumEntriesScannedInFilter() {
return 0L;
}

@Override
public CardinalityEstimate getCardinalityEstimate() {
return _docIdRanges.isEmpty() ? CardinalityEstimate.MATCHES_NONE : CardinalityEstimate.UNKNOWN;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ protected BlockDocIdSet getTrues() {
Tracing.activeRecording().setNumChildren(_filterOperators.size());
List<BlockDocIdSet> blockDocIdSets = new ArrayList<>(_filterOperators.size());
for (BaseFilterOperator filterOperator : _filterOperators) {
blockDocIdSets.add(filterOperator.getTrues());
BlockDocIdSet blockDocIdSet = filterOperator.getTrues();
blockDocIdSets.add(blockDocIdSet);
if (blockDocIdSet.getCardinalityEstimate() == BlockDocIdSet.CardinalityEstimate.MATCHES_NONE) {
return new AndDocIdSet(blockDocIdSets, _queryOptions, BlockDocIdSet.CardinalityEstimate.MATCHES_NONE);
}
}
return new AndDocIdSet(blockDocIdSets, _queryOptions);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ public void queryStContainsWithMultipleFilters()
List<GenericRow> records = new ArrayList<>(1);
addRecord(records, -122.0007277, 37.5005785);
setUp(records);
// Test point is closed to border of a polygon but outside.
// Test point is close to border of a polygon but outside.
String query = "SELECT COUNT(*) FROM testTable WHERE ST_Contains(ST_GeomFromText('POLYGON ((\n"
+ " -122.0008564 37.5004316, \n"
+ " -121.9991291 37.5005168, \n"
Expand All @@ -401,7 +401,7 @@ public void queryStContainsWithMultipleFilters()

AggregationOperator aggregationOperator = getOperator(query);
AggregationResultsBlock resultsBlock = aggregationOperator.nextBlock();
QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(), 0, 2, 0, 1);
QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(), 0, 1, 0, 1);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is expected. Previously we would end up evaluating both the predicates, but now we only evaluate the first one, and since that doesn't match any docs, we skip the second predicate

List<Object> aggregationResult = resultsBlock.getResults();
Assert.assertNotNull(aggregationResult);
Assert.assertEquals((long) aggregationResult.get(0), 0);
Expand Down
Loading