Skip to content

Commit

Permalink
ESQL: Add interfaces to distribute the post-analysis verification (#1…
Browse files Browse the repository at this point in the history
…19798) (#120048)

This adds a PostAnalysisVerificationAware interface that allows an expression,
plan or even command to perform post-analysis verifications "locally", vs.
having them centralized in the core verifier.

(cherry picked from commit ad264f7)
  • Loading branch information
bpintea authored Jan 13, 2025
1 parent e99dfac commit c17de01
Show file tree
Hide file tree
Showing 15 changed files with 869 additions and 700 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/119798.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 119798
summary: "Add a `PostAnalysisAware,` distribute verification"
area: ES|QL
type: enhancement
issues: []

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.esql.capabilities;

import org.elasticsearch.xpack.esql.common.Failures;
import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction;
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;

import java.util.function.BiConsumer;

/**
* Interface implemented by expressions or plans that require validation after query plan analysis,
* when the indices and references have been resolved, but before the plan is transformed further by optimizations.
* The interface is similar to {@link PostAnalysisVerificationAware}, but focused on the tree structure, oftentimes covering semantic
* checks.
*/
public interface PostAnalysisPlanVerificationAware {

/**
* Allows the implementer to return a consumer that will perform self-validation in the context of the tree structure the implementer
* is part of. This usually involves checking the type and configuration of the children or that of the parent.
* <p>
* It is often more useful to perform the checks as extended as it makes sense, over stopping at the first failure. This will allow the
* author to progress faster to a correct query.
* </p>
* <p>
* Example: a {@link GroupingFunction} instance, which models a function to group documents to aggregate over, can only be used in
* the context of the STATS command, modeled by the {@link Aggregate} class. This is how this verification is performed:
* <pre>
* {@code
* @Override
* public BiConsumer<LogicalPlan, Failures> postAnalysisPlanVerification() {
* return (p, failures) -> {
* if (p instanceof Aggregate == false) {
* p.forEachExpression(
* GroupingFunction.class,
* gf -> failures.add(fail(gf, "cannot use grouping function [{}] outside of a STATS command", gf.sourceText()))
* );
* }
* };
* }
* }
* </pre>
*
* @return a consumer that will receive a tree to check and an accumulator of failures found during inspection.
*/
BiConsumer<LogicalPlan, Failures> postAnalysisPlanVerification();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.esql.capabilities;

import org.elasticsearch.xpack.esql.common.Failures;
import org.elasticsearch.xpack.esql.plan.logical.Filter;

/**
* Interface implemented by expressions or plans that require validation after query plan analysis,
* when the indices and references have been resolved, but before the plan is transformed further by optimizations.
* The interface is similar to {@link PostAnalysisPlanVerificationAware}, but focused on individual expressions or plans, typically
* covering syntactic checks.
*/
public interface PostAnalysisVerificationAware {

/**
* Allows the implementer to validate itself. This usually involves checking its internal setup, which often means checking the
* parameters it received on construction: their data or syntactic type, class, their count, expressions' structure etc.
* The discovered failures are added to the given {@link Failures} object.
* <p>
* It is often more useful to perform the checks as extended as it makes sense, over stopping at the first failure. This will allow the
* author to progress faster to a correct query.
* </p>
* <p>
* Example: the {@link Filter} class, which models the WHERE command, checks that the expression it filters on - {@code condition}
* - is of a Boolean or NULL type:
* <pre>
* {@code
* @Override
* void postAnalysisVerification(Failures failures) {
* if (condition.dataType() != NULL && condition.dataType() != BOOLEAN) {
* failures.add(fail(condition, "Condition expression needs to be boolean, found [{}]", condition.dataType()));
* }
* }
* }
* </pre>
*
* @param failures the object to add failures to.
*/
void postAnalysisVerification(Failures failures);
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,32 @@
import org.elasticsearch.TransportVersions;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware;
import org.elasticsearch.xpack.esql.common.Failures;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
import org.elasticsearch.xpack.esql.core.expression.function.Function;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.util.CollectionUtils;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
import org.elasticsearch.xpack.esql.plan.logical.OrderBy;

import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.function.BiConsumer;

import static java.util.Arrays.asList;
import static java.util.Collections.emptyList;
import static org.elasticsearch.xpack.esql.common.Failure.fail;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT;

/**
* A type of {@code Function} that takes multiple values and extracts a single value out of them. For example, {@code AVG()}.
*/
public abstract class AggregateFunction extends Function {
public abstract class AggregateFunction extends Function implements PostAnalysisPlanVerificationAware {

private final Expression field;
private final List<? extends Expression> parameters;
Expand Down Expand Up @@ -127,4 +133,19 @@ public boolean equals(Object obj) {
}
return false;
}

@Override
public BiConsumer<LogicalPlan, Failures> postAnalysisPlanVerification() {
return (p, failures) -> {
if (p instanceof OrderBy order) {
order.order().forEach(o -> {
o.forEachDown(Function.class, f -> {
if (f instanceof AggregateFunction) {
failures.add(fail(f, "Aggregate functions are not allowed in SORT [{}]", f.functionName()));
}
});
});
}
};
}
}
Loading

0 comments on commit c17de01

Please sign in to comment.