Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Short Circuit And Filter Operator During Index Evaluation #14700

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@
import org.apache.pinot.core.operator.blocks.FilterBlock;
import org.apache.pinot.core.operator.dociditerators.AndDocIdIterator;
import org.apache.pinot.core.operator.dociditerators.BitmapDocIdIterator;
import org.apache.pinot.core.operator.dociditerators.EmptyDocIdIterator;
import org.apache.pinot.core.operator.dociditerators.OrDocIdIterator;
import org.apache.pinot.core.operator.dociditerators.RangelessBitmapDocIdIterator;
import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator;
import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet;
import org.apache.pinot.core.operator.docidsets.EmptyDocIdSet;
import org.apache.pinot.core.operator.docidsets.RangelessBitmapDocIdSet;
import org.apache.pinot.segment.spi.Constants;
import org.roaringbitmap.RoaringBitmapWriter;
Expand All @@ -35,7 +37,6 @@
* The {@code BlockDocIdSet} contains the matching document ids returned by the {@link FilterBlock}.
*/
public interface BlockDocIdSet {

/**
* Returns an iterator of the matching document ids. The document ids returned from the iterator should be in
* ascending order.
Expand All @@ -48,12 +49,18 @@ public interface BlockDocIdSet {
*/
long getNumEntriesScannedInFilter();

default CardinalityEstimate getCardinalityEstimate() {
return CardinalityEstimate.UNKNOWN;
}

/**
* For scan-based FilterBlockDocIdSet, pre-scans the documents and returns a non-scan-based FilterBlockDocIdSet.
*/
default BlockDocIdSet toNonScanDocIdSet() {
BlockDocIdIterator docIdIterator = iterator();

if (docIdIterator instanceof EmptyDocIdIterator) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this change? This is prioritizing the always empty filter case, which should be extremely rare

return EmptyDocIdSet.getInstance();
}
// NOTE: AND and OR DocIdIterator might contain scan-based DocIdIterator
// TODO: This scan is not counted in the execution stats
if (docIdIterator instanceof ScanBasedDocIdIterator || docIdIterator instanceof AndDocIdIterator
Expand All @@ -78,4 +85,10 @@ default BlockDocIdSet toNonScanDocIdSet() {

return this;
}

enum CardinalityEstimate {
ankitsultana marked this conversation as resolved.
Show resolved Hide resolved
UNKNOWN,
MATCHES_ALL,
MATCHES_NONE;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,8 @@ public int advance(int targetDocId) {
_docIdIterator.advanceIfNeeded(targetDocId);
return next();
}

public boolean hasNext() {
return _docIdIterator.hasNext();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.pinot.core.common.BlockDocIdSet;
import org.apache.pinot.core.operator.dociditerators.AndDocIdIterator;
import org.apache.pinot.core.operator.dociditerators.BitmapBasedDocIdIterator;
import org.apache.pinot.core.operator.dociditerators.EmptyDocIdIterator;
import org.apache.pinot.core.operator.dociditerators.RangelessBitmapDocIdIterator;
import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator;
import org.apache.pinot.core.operator.dociditerators.SortedDocIdIterator;
Expand Down Expand Up @@ -59,13 +60,20 @@ public final class AndDocIdSet implements BlockDocIdSet {
// Keep the scan based BlockDocIdSets to be accessed when collecting query execution stats
private final AtomicReference<List<BlockDocIdSet>> _scanBasedDocIdSets = new AtomicReference<>();
private final boolean _cardinalityBasedRankingForScan;
private final CardinalityEstimate _cardinalityEstimate;
private List<BlockDocIdSet> _docIdSets;
private volatile long _numEntriesScannedInFilter;

public AndDocIdSet(List<BlockDocIdSet> docIdSets, @Nullable Map<String, String> queryOptions) {
this(docIdSets, queryOptions, CardinalityEstimate.UNKNOWN);
}

public AndDocIdSet(List<BlockDocIdSet> docIdSets, @Nullable Map<String, String> queryOptions,
CardinalityEstimate cardinalityEstimate) {
_docIdSets = docIdSets;
_cardinalityBasedRankingForScan =
queryOptions != null && QueryOptionsUtils.isAndScanReorderingEnabled(queryOptions);
_cardinalityEstimate = cardinalityEstimate;
}

@Override
Expand Down Expand Up @@ -104,6 +112,9 @@ public BlockDocIdIterator iterator() {
_docIdSets = null;
_numEntriesScannedInFilter = numEntriesScannedForNonScanBasedDocIdSets;
_scanBasedDocIdSets.set(scanBasedDocIdSets);
if (_cardinalityEstimate == CardinalityEstimate.MATCHES_NONE) {
return EmptyDocIdIterator.getInstance();
}

// evaluate the bitmaps in the order of the lowest matching num docIds comes first, so that we minimize the number
// of containers (range) for comparison from the beginning, as will minimize the effort of bitmap AND application
Expand Down Expand Up @@ -165,6 +176,9 @@ public BlockDocIdIterator iterator() {
}
}
for (ScanBasedDocIdIterator scanBasedDocIdIterator : scanBasedDocIdIterators) {
if (!docIds.cardinalityExceeds(0)) {
return EmptyDocIdIterator.getInstance();
}
docIds = scanBasedDocIdIterator.applyAnd(docIds);
}
RangelessBitmapDocIdIterator rangelessBitmapDocIdIterator = new RangelessBitmapDocIdIterator(docIds);
Expand Down Expand Up @@ -196,4 +210,9 @@ public long getNumEntriesScannedInFilter() {
}
return _numEntriesScannedInFilter + numEntriesScannedForScanBasedDocIdSets;
}

@Override
public CardinalityEstimate getCardinalityEstimate() {
return _cardinalityEstimate;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,16 @@


public class BitmapDocIdSet implements BlockDocIdSet {
private final ImmutableRoaringBitmap _bitmap;
private final BitmapDocIdIterator _iterator;

public BitmapDocIdSet(ImmutableRoaringBitmap docIds, int numDocs) {
_bitmap = docIds;
_iterator = new BitmapDocIdIterator(docIds, numDocs);
}

public BitmapDocIdSet(BitmapDocIdIterator iterator) {
_bitmap = null;
_iterator = iterator;
}

Expand All @@ -43,4 +46,13 @@ public BitmapDocIdIterator iterator() {
public long getNumEntriesScannedInFilter() {
return 0L;
}

@Override
public CardinalityEstimate getCardinalityEstimate() {
if (_bitmap == null) {
return CardinalityEstimate.UNKNOWN;
}
// If cardinality exceeds 0, return unknown, because we don't want to inspect each highLowContainer.
return _bitmap.cardinalityExceeds(0L) ? CardinalityEstimate.UNKNOWN : CardinalityEstimate.MATCHES_NONE;
ankitsultana marked this conversation as resolved.
Show resolved Hide resolved
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,9 @@ public EmptyDocIdIterator iterator() {
public long getNumEntriesScannedInFilter() {
return 0L;
}

@Override
public CardinalityEstimate getCardinalityEstimate() {
return CardinalityEstimate.MATCHES_NONE;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,9 @@ public MatchAllDocIdIterator iterator() {
public long getNumEntriesScannedInFilter() {
return 0L;
}

@Override
public CardinalityEstimate getCardinalityEstimate() {
return CardinalityEstimate.MATCHES_ALL;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,9 @@ public RangelessBitmapDocIdIterator iterator() {
public long getNumEntriesScannedInFilter() {
return 0L;
}

@Override
public CardinalityEstimate getCardinalityEstimate() {
return _iterator.hasNext() ? CardinalityEstimate.UNKNOWN : CardinalityEstimate.MATCHES_NONE;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,9 @@ public SortedDocIdIterator iterator() {
public long getNumEntriesScannedInFilter() {
return 0L;
}

@Override
public CardinalityEstimate getCardinalityEstimate() {
return _docIdRanges.isEmpty() ? CardinalityEstimate.MATCHES_NONE : CardinalityEstimate.UNKNOWN;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,12 @@ protected BlockDocIdSet getTrues() {
Tracing.activeRecording().setNumChildren(_filterOperators.size());
List<BlockDocIdSet> blockDocIdSets = new ArrayList<>(_filterOperators.size());
for (BaseFilterOperator filterOperator : _filterOperators) {
blockDocIdSets.add(filterOperator.getTrues());
BlockDocIdSet blockDocIdSet = filterOperator.getTrues();
blockDocIdSets.add(blockDocIdSet);
if (blockDocIdSet.getCardinalityEstimate() == BlockDocIdSet.CardinalityEstimate.MATCHES_NONE) {
// Return AndDocIdSet to ensure that getNumEntriesScannedInFilter is correctly reported.
return new AndDocIdSet(blockDocIdSets, _queryOptions, BlockDocIdSet.CardinalityEstimate.MATCHES_NONE);
}
}
return new AndDocIdSet(blockDocIdSets, _queryOptions);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.Map;
import org.apache.pinot.core.common.BlockDocIdSet;
import org.apache.pinot.core.operator.docidsets.AndDocIdSet;
import org.apache.pinot.core.operator.docidsets.EmptyDocIdSet;
import org.apache.pinot.spi.trace.Tracing;


Expand Down Expand Up @@ -62,6 +63,9 @@ public String toExplainString() {
protected BlockDocIdSet getTrues() {
Tracing.activeRecording().setNumChildren(2);
BlockDocIdSet mainFilterDocIdSet = _mainFilterOperator.nextBlock().getNonScanFilterBLockDocIdSet();
if (mainFilterDocIdSet.getCardinalityEstimate() == BlockDocIdSet.CardinalityEstimate.MATCHES_NONE) {
return EmptyDocIdSet.getInstance();
}
BlockDocIdSet subFilterDocIdSet = _subFilterOperator.nextBlock().getBlockDocIdSet();
return new AndDocIdSet(Arrays.asList(mainFilterDocIdSet, subFilterDocIdSet), _queryOptions);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ public void queryStContainsWithMultipleFilters()
List<GenericRow> records = new ArrayList<>(1);
addRecord(records, -122.0007277, 37.5005785);
setUp(records);
// Test point is closed to border of a polygon but outside.
// Test point is close to border of a polygon but outside.
String query = "SELECT COUNT(*) FROM testTable WHERE ST_Contains(ST_GeomFromText('POLYGON ((\n"
+ " -122.0008564 37.5004316, \n"
+ " -121.9991291 37.5005168, \n"
Expand All @@ -401,7 +401,7 @@ public void queryStContainsWithMultipleFilters()

AggregationOperator aggregationOperator = getOperator(query);
AggregationResultsBlock resultsBlock = aggregationOperator.nextBlock();
QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(), 0, 2, 0, 1);
QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(), 0, 1, 0, 1);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is expected. Previously we would end up evaluating both the predicates, but now we only evaluate the first one, and since that doesn't match any docs, we skip the second predicate

List<Object> aggregationResult = resultsBlock.getResults();
Assert.assertNotNull(aggregationResult);
Assert.assertEquals((long) aggregationResult.get(0), 0);
Expand Down
Loading