Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make semantic text part of the text family #119792

Merged
merged 9 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/119792.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 119792
summary: Make semantic text part of the text family
area: Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
import org.elasticsearch.index.mapper.PlaceHolderFieldMapper;
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.query.MatchBoolPrefixQueryBuilder;
import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.query.ZeroTermsQueryOption;
import org.elasticsearch.lucene.analysis.miscellaneous.DisableGraphAttribute;
Expand All @@ -63,24 +66,26 @@ public enum Type implements Writeable {
/**
* The text is analyzed and terms are added to a boolean query.
*/
BOOLEAN(0),
BOOLEAN(0, org.elasticsearch.index.query.MatchQueryBuilder.NAME),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick: Can we clean up imports a bit here, so we don't reference the full class name here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't, there's an inner class called MatchQueryBuilder in MatchQueryParser

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe break it out into a variable then?

/**
* The text is analyzed and used as a phrase query.
*/
PHRASE(1),
PHRASE(1, MatchPhraseQueryBuilder.NAME),
/**
* The text is analyzed and used in a phrase query, with the last term acting as a prefix.
*/
PHRASE_PREFIX(2),
PHRASE_PREFIX(2, MatchPhrasePrefixQueryBuilder.NAME),
/**
* The text is analyzed, terms are added to a boolean query with the last term acting as a prefix.
*/
BOOLEAN_PREFIX(3);
BOOLEAN_PREFIX(3, MatchBoolPrefixQueryBuilder.NAME);

private final int ordinal;
private final String queryName;

Type(int ordinal) {
Type(int ordinal, String queryName) {
this.ordinal = ordinal;
this.queryName = queryName;
}

public static Type readFromStream(StreamInput in) throws IOException {
Expand All @@ -93,6 +98,10 @@ public static Type readFromStream(StreamInput in) throws IOException {
throw new ElasticsearchException("unknown serialized type [" + ord + "]");
}

public String getQueryName() {
return queryName;
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVInt(this.ordinal);
Expand Down Expand Up @@ -207,11 +216,23 @@ public Query parse(Type type, String fieldName, Object value) throws IOException
IllegalArgumentException iae;
if (fieldType instanceof PlaceHolderFieldMapper.PlaceHolderFieldType) {
iae = new IllegalArgumentException(
"Field [" + fieldType.name() + "] of type [" + fieldType.typeName() + "] in legacy index does not support match queries"
"Field ["
+ fieldType.name()
+ "] of type ["
+ fieldType.typeName()
+ "] in legacy index does not support "
+ type.getQueryName()
+ " queries"
);
} else {
iae = new IllegalArgumentException(
"Field [" + fieldType.name() + "] of type [" + fieldType.typeName() + "] does not support match queries"
"Field ["
+ fieldType.name()
+ "] of type ["
+ fieldType.typeName()
+ "] does not support "
+ type.getQueryName()
+ " queries"
Comment on lines -214 to +235
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed this error message because it is propagated through to Kibana when attempting to run a match_phrase query on a semantic_text field. Previously, the error message indicated that semantic_text fields do not support match queries, which was quite misleading.

);
}
if (lenient) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@ public final void test() throws Throwable {

protected void shouldSkipTest(String testName) throws IOException {
if (testCase.requiredCapabilities.contains("semantic_text_type")
|| testCase.requiredCapabilities.contains("semantic_text_aggregations")) {
|| testCase.requiredCapabilities.contains("semantic_text_aggregations")
|| testCase.requiredCapabilities.contains("semantic_text_field_caps")) {
assumeTrue("Inference test service needs to be supported for semantic_text", supportsInferenceTestService());
}
checkCapabilities(adminClient(), testFeatureService, testName, testCase);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,9 @@ private static Void convertUnsupported(String s) {
for (Type value : Type.values()) {
LOOKUP.put(value.name(), value);
}
// Types with a different field caps family type
LOOKUP.put("SEMANTIC_TEXT", TEXT);

// widen smaller types
LOOKUP.put("SHORT", INTEGER);
LOOKUP.put("BYTE", INTEGER);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -601,14 +601,15 @@ emp_no_bool:boolean
testMatchWithSemanticText
required_capability: match_function
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where match(semantic_text_field, "something")
| keep semantic_text_field
| sort semantic_text_field asc
;

semantic_text_field:semantic_text
semantic_text_field:text
all we have to decide is what to do with the time that is given to us
be excellent to each other
live long and prosper
Expand All @@ -617,32 +618,35 @@ live long and prosper
testMatchWithSemanticTextAndKeyword
required_capability: match_function
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where match(semantic_text_field, "something") AND match(host, "host1")
| keep semantic_text_field, host
;

semantic_text_field:semantic_text | host:keyword
live long and prosper | host1
semantic_text_field:text | host:keyword
live long and prosper | host1
;

testMatchWithSemanticTextMultiValueField
required_capability: match_function
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text metadata _id
| where match(st_multi_value, "something") AND match(host, "host1")
| keep _id, st_multi_value
;

_id: keyword | st_multi_value:semantic_text
_id: keyword | st_multi_value:text
1 | ["Hello there!", "This is a random value", "for testing purposes"]
;

testMatchWithSemanticTextWithEvalsAndOtherFunctionsAndStats
required_capability: match_function
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where qstr("description:some*")
Expand All @@ -659,12 +663,13 @@ testMatchWithSemanticTextAndKql
required_capability: match_function
required_capability: semantic_text_type
required_capability: kql_function
required_capability: semantic_text_field_caps

from semantic_text
| where kql("host:host1") AND match(semantic_text_field, "something")
| KEEP host, semantic_text_field
;

host:keyword | semantic_text_field:semantic_text
host:keyword | semantic_text_field:text
"host1" | live long and prosper
;
Original file line number Diff line number Diff line change
Expand Up @@ -611,14 +611,15 @@ emp_no_bool:boolean
testMatchWithSemanticText
required_capability: match_operator_colon
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where semantic_text_field:"something"
| keep semantic_text_field
| sort semantic_text_field asc
;

semantic_text_field:semantic_text
semantic_text_field:text
all we have to decide is what to do with the time that is given to us
be excellent to each other
live long and prosper
Expand All @@ -627,32 +628,35 @@ live long and prosper
testMatchWithSemanticTextAndKeyword
required_capability: match_operator_colon
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where semantic_text_field:"something" AND host:"host1"
| keep semantic_text_field, host
;

semantic_text_field:semantic_text | host:keyword
live long and prosper | host1
semantic_text_field:text | host:keyword
live long and prosper | host1
;

testMatchWithSemanticTextMultiValueField
required_capability: match_operator_colon
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text metadata _id
| where st_multi_value:"something" AND match(host, "host1")
| keep _id, st_multi_value
;

_id: keyword | st_multi_value:semantic_text
_id: keyword | st_multi_value:text
1 | ["Hello there!", "This is a random value", "for testing purposes"]
;

testMatchWithSemanticTextWithEvalsAndOtherFunctionsAndStats
required_capability: match_operator_colon
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where qstr("description:some*")
Expand All @@ -669,12 +673,13 @@ testMatchWithSemanticTextAndKql
required_capability: match_operator_colon
required_capability: semantic_text_type
required_capability: kql_function
required_capability: semantic_text_field_caps

from semantic_text
| where kql("host:host1") AND semantic_text_field:"something"
| KEEP host, semantic_text_field
;

host:keyword | semantic_text_field:semantic_text
host:keyword | semantic_text_field:text
"host1" | live long and prosper
;
Loading