Skip to content

Commit

Permalink
[8.x] Make semantic text part of the text family (#119792) (#119962)
Browse files Browse the repository at this point in the history
Co-authored-by: Ioana Tagirta <ioana.tagirta@elastic.co>
  • Loading branch information
Mikep86 and ioanatia authored Jan 13, 2025
1 parent 6d6311f commit c8ba0f3
Show file tree
Hide file tree
Showing 14 changed files with 209 additions and 120 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/119792.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 119792
summary: Make semantic text part of the text family
area: Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
import org.elasticsearch.index.mapper.PlaceHolderFieldMapper;
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.query.MatchBoolPrefixQueryBuilder;
import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.query.ZeroTermsQueryOption;
import org.elasticsearch.lucene.analysis.miscellaneous.DisableGraphAttribute;
Expand All @@ -62,24 +65,26 @@ public enum Type implements Writeable {
/**
* The text is analyzed and terms are added to a boolean query.
*/
BOOLEAN(0),
BOOLEAN(0, org.elasticsearch.index.query.MatchQueryBuilder.NAME),
/**
* The text is analyzed and used as a phrase query.
*/
PHRASE(1),
PHRASE(1, MatchPhraseQueryBuilder.NAME),
/**
* The text is analyzed and used in a phrase query, with the last term acting as a prefix.
*/
PHRASE_PREFIX(2),
PHRASE_PREFIX(2, MatchPhrasePrefixQueryBuilder.NAME),
/**
* The text is analyzed, terms are added to a boolean query with the last term acting as a prefix.
*/
BOOLEAN_PREFIX(3);
BOOLEAN_PREFIX(3, MatchBoolPrefixQueryBuilder.NAME);

private final int ordinal;
private final String queryName;

Type(int ordinal) {
Type(int ordinal, String queryName) {
this.ordinal = ordinal;
this.queryName = queryName;
}

public static Type readFromStream(StreamInput in) throws IOException {
Expand All @@ -92,6 +97,10 @@ public static Type readFromStream(StreamInput in) throws IOException {
throw new ElasticsearchException("unknown serialized type [" + ord + "]");
}

public String getQueryName() {
return queryName;
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVInt(this.ordinal);
Expand Down Expand Up @@ -206,11 +215,23 @@ public Query parse(Type type, String fieldName, Object value) throws IOException
IllegalArgumentException iae;
if (fieldType instanceof PlaceHolderFieldMapper.PlaceHolderFieldType) {
iae = new IllegalArgumentException(
"Field [" + fieldType.name() + "] of type [" + fieldType.typeName() + "] in legacy index does not support match queries"
"Field ["
+ fieldType.name()
+ "] of type ["
+ fieldType.typeName()
+ "] in legacy index does not support "
+ type.getQueryName()
+ " queries"
);
} else {
iae = new IllegalArgumentException(
"Field [" + fieldType.name() + "] of type [" + fieldType.typeName() + "] does not support match queries"
"Field ["
+ fieldType.name()
+ "] of type ["
+ fieldType.typeName()
+ "] does not support "
+ type.getQueryName()
+ " queries"
);
}
if (lenient) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@ public final void test() throws Throwable {
}

protected void shouldSkipTest(String testName) throws IOException {
if (testCase.requiredCapabilities.contains("semantic_text_type")) {
if (testCase.requiredCapabilities.contains("semantic_text_type")
|| testCase.requiredCapabilities.contains("semantic_text_field_caps")) {
assumeTrue("Inference test service needs to be supported for semantic_text", supportsInferenceTestService());
}
checkCapabilities(adminClient(), testFeatureService, testName, testCase);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,9 @@ private static Void convertUnsupported(String s) {
for (Type value : Type.values()) {
LOOKUP.put(value.name(), value);
}
// Types with a different field caps family type
LOOKUP.put("SEMANTIC_TEXT", TEXT);

// widen smaller types
LOOKUP.put("SHORT", INTEGER);
LOOKUP.put("BYTE", INTEGER);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
_id:keyword,semantic_text_field:semantic_text,st_bool:semantic_text,st_cartesian_point:semantic_text,st_cartesian_shape:semantic_text,st_datetime:semantic_text,st_double:semantic_text,st_geopoint:semantic_text,st_geoshape:semantic_text,st_integer:semantic_text,st_ip:semantic_text,st_long:semantic_text,st_unsigned_long:semantic_text,st_version:semantic_text,st_multi_value:semantic_text,st_unicode:semantic_text,host:keyword,description:text,value:long,st_base64:semantic_text
1,live long and prosper,false,"POINT(4297.11 -1475.53)",,1953-09-02T00:00:00.000Z,5.20128E11,"POINT(42.97109630194 14.7552534413725)","POLYGON ((30 10\, 40 40\, 20 40\, 10 20\, 30 10))",23,1.1.1.1,2147483648,2147483648,1.2.3,["Hello there!", "This is a random value", "for testing purposes"],你吃饭了吗,"host1","some description1",1001,ZWxhc3RpYw==
2,all we have to decide is what to do with the time that is given to us,true,"POINT(7580.93 2272.77)",,2023-09-24T15:57:00.000Z,4541.11,"POINT(37.97109630194 21.7552534413725)","POLYGON ((30 10\, 40 40\, 20 40\, 10 20\, 30 10))",122,1.1.2.1,123,2147483648.2,9.0.0,["nice to meet you", "bye bye!"],["谢谢", "对不起我的中文不好"],"host2","some description2",1002,aGVsbG8=
3,be excellent to each other,,,,,,,,,,,,,,,"host3","some description3",1003,
_id:keyword,semantic_text_field:semantic_text,st_bool:semantic_text,st_cartesian_point:semantic_text,st_cartesian_shape:semantic_text,st_datetime:semantic_text,st_double:semantic_text,st_geopoint:semantic_text,st_geoshape:semantic_text,st_integer:semantic_text,st_ip:semantic_text,st_long:semantic_text,st_unsigned_long:semantic_text,st_version:semantic_text,st_multi_value:semantic_text,st_unicode:semantic_text,host:keyword,description:text,value:long,st_base64:semantic_text,st_logs:semantic_text
1,live long and prosper,false,"POINT(4297.11 -1475.53)",,1953-09-02T00:00:00.000Z,5.20128E11,"POINT(42.97109630194 14.7552534413725)","POLYGON ((30 10\, 40 40\, 20 40\, 10 20\, 30 10))",23,1.1.1.1,2147483648,2147483648,1.2.3,["Hello there!", "This is a random value", "for testing purposes"],你吃饭了吗,"host1","some description1",1001,ZWxhc3RpYw==,"2024-12-23T12:15:00.000Z 1.2.3.4 example@example.com 4553"
2,all we have to decide is what to do with the time that is given to us,true,"POINT(7580.93 2272.77)",,2023-09-24T15:57:00.000Z,4541.11,"POINT(37.97109630194 21.7552534413725)","POLYGON ((30 10\, 40 40\, 20 40\, 10 20\, 30 10))",122,1.1.2.1,123,2147483648.2,9.0.0,["nice to meet you", "bye bye!"],["谢谢", "对不起我的中文不好"],"host2","some description2",1002,aGVsbG8=,"2024-01-23T12:15:00.000Z 1.2.3.4 foo@example.com 42"
3,be excellent to each other,,,,,,,,,,,,,,,"host3","some description3",1003,,"2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42"
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@
"st_base64": {
"type": "semantic_text",
"inference_id": "test_sparse_inference"
},
"st_logs": {
"type": "semantic_text",
"inference_id": "test_sparse_inference"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -601,14 +601,15 @@ emp_no_bool:boolean
testMatchWithSemanticText
required_capability: match_function
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where match(semantic_text_field, "something")
| keep semantic_text_field
| sort semantic_text_field asc
;

semantic_text_field:semantic_text
semantic_text_field:text
all we have to decide is what to do with the time that is given to us
be excellent to each other
live long and prosper
Expand All @@ -617,32 +618,35 @@ live long and prosper
testMatchWithSemanticTextAndKeyword
required_capability: match_function
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where match(semantic_text_field, "something") AND match(host, "host1")
| keep semantic_text_field, host
;

semantic_text_field:semantic_text | host:keyword
live long and prosper | host1
semantic_text_field:text | host:keyword
live long and prosper | host1
;

testMatchWithSemanticTextMultiValueField
required_capability: match_function
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text metadata _id
| where match(st_multi_value, "something") AND match(host, "host1")
| keep _id, st_multi_value
;

_id: keyword | st_multi_value:semantic_text
_id: keyword | st_multi_value:text
1 | ["Hello there!", "This is a random value", "for testing purposes"]
;

testMatchWithSemanticTextWithEvalsAndOtherFunctionsAndStats
required_capability: match_function
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where qstr("description:some*")
Expand All @@ -659,12 +663,13 @@ testMatchWithSemanticTextAndKql
required_capability: match_function
required_capability: semantic_text_type
required_capability: kql_function
required_capability: semantic_text_field_caps

from semantic_text
| where kql("host:host1") AND match(semantic_text_field, "something")
| KEEP host, semantic_text_field
;

host:keyword | semantic_text_field:semantic_text
host:keyword | semantic_text_field:text
"host1" | live long and prosper
;
Original file line number Diff line number Diff line change
Expand Up @@ -611,14 +611,15 @@ emp_no_bool:boolean
testMatchWithSemanticText
required_capability: match_operator_colon
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where semantic_text_field:"something"
| keep semantic_text_field
| sort semantic_text_field asc
;

semantic_text_field:semantic_text
semantic_text_field:text
all we have to decide is what to do with the time that is given to us
be excellent to each other
live long and prosper
Expand All @@ -627,32 +628,35 @@ live long and prosper
testMatchWithSemanticTextAndKeyword
required_capability: match_operator_colon
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where semantic_text_field:"something" AND host:"host1"
| keep semantic_text_field, host
;

semantic_text_field:semantic_text | host:keyword
live long and prosper | host1
semantic_text_field:text | host:keyword
live long and prosper | host1
;

testMatchWithSemanticTextMultiValueField
required_capability: match_operator_colon
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text metadata _id
| where st_multi_value:"something" AND match(host, "host1")
| keep _id, st_multi_value
;

_id: keyword | st_multi_value:semantic_text
_id: keyword | st_multi_value:text
1 | ["Hello there!", "This is a random value", "for testing purposes"]
;

testMatchWithSemanticTextWithEvalsAndOtherFunctionsAndStats
required_capability: match_operator_colon
required_capability: semantic_text_type
required_capability: semantic_text_field_caps

from semantic_text
| where qstr("description:some*")
Expand All @@ -669,12 +673,13 @@ testMatchWithSemanticTextAndKql
required_capability: match_operator_colon
required_capability: semantic_text_type
required_capability: kql_function
required_capability: semantic_text_field_caps

from semantic_text
| where kql("host:host1") AND semantic_text_field:"something"
| KEEP host, semantic_text_field
;

host:keyword | semantic_text_field:semantic_text
host:keyword | semantic_text_field:text
"host1" | live long and prosper
;
Loading

0 comments on commit c8ba0f3

Please sign in to comment.