From ead0ecd4b88a4e2a700190f4cb6c8a7a8569cb58 Mon Sep 17 00:00:00 2001 From: Sam Partee Date: Tue, 31 Oct 2023 16:49:48 -0700 Subject: [PATCH] Small changes to set_value --- docs/user_guide/getting_started_01.ipynb | 24 ++- docs/user_guide/hybrid_queries_02.ipynb | 240 ++++++++++++++++++----- redisvl/query/filter.py | 82 +++----- redisvl/schema.py | 2 +- tests/unit/test_filter.py | 169 +++++++++++++++- 5 files changed, 401 insertions(+), 116 deletions(-) diff --git a/docs/user_guide/getting_started_01.ipynb b/docs/user_guide/getting_started_01.ipynb index 380ed9ed..246a378e 100644 --- a/docs/user_guide/getting_started_01.ipynb +++ b/docs/user_guide/getting_started_01.ipynb @@ -194,7 +194,15 @@ "cell_type": "code", "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index already exists, overwriting.\n" + ] + } + ], "source": [ "from redisvl.index import SearchIndex\n", "\n", @@ -217,8 +225,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m15:47:29\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m Indices:\n", - "\u001b[32m15:47:29\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m 1. user_index\n" + "\u001b[32m16:44:23\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m Indices:\n", + "\u001b[32m16:44:23\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m 1. user_index\n" ] } ], @@ -290,7 +298,7 @@ { "data": { "text/plain": [ - "[b'user:mary', b'user:john', b'user:joe']" + "[b'user:john', b'user:mary', b'user:joe']" ] }, "execution_count": 8, @@ -349,7 +357,7 @@ } ], "source": [ - "index.client.hgetall(\"user:tyler\")" + "index.client.hgetall(\"user:tyler\", )" ] }, { @@ -725,9 +733,9 @@ "│ offsets_per_term_avg │ 0 │\n", "│ records_per_doc_avg │ 4 │\n", "│ sortable_values_size_mb │ 0 │\n", - "│ total_indexing_time │ 0.4 │\n", + "│ total_indexing_time │ 1.771 │\n", "│ total_inverted_index_blocks │ 7 │\n", - "│ vector_index_sz_mb │ 0.235603 │\n", + "│ vector_index_sz_mb │ 0.17852 │\n", "╰─────────────────────────────┴─────────────╯\n" ] } @@ -771,7 +779,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.8.13" }, "orig_nbformat": 4, "vscode": { diff --git a/docs/user_guide/hybrid_queries_02.ipynb b/docs/user_guide/hybrid_queries_02.ipynb index 327ed646..692e7461 100644 --- a/docs/user_guide/hybrid_queries_02.ipynb +++ b/docs/user_guide/hybrid_queries_02.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -70,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -88,15 +88,15 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m14:27:18\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m Indices:\n", - "\u001b[32m14:27:18\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m 1. user_index\n" + "\u001b[32m16:44:04\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m Indices:\n", + "\u001b[32m16:44:04\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m 1. user_index\n" ] } ], @@ -107,7 +107,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -136,13 +136,13 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
" + "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158809006214timhigh12dermatologist-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
" ], "text/plain": [ "" @@ -170,7 +170,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -196,13 +196,13 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" + "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158809006214timhigh12dermatologist-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" ], "text/plain": [ "" @@ -222,13 +222,13 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" + "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158809006214timhigh12dermatologist-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" ], "text/plain": [ "" @@ -259,13 +259,13 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" + "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158809006214timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" ], "text/plain": [ "" @@ -294,7 +294,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -321,7 +321,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -347,13 +347,13 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" + "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158809006214timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" ], "text/plain": [ "" @@ -382,7 +382,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -410,13 +410,13 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
" + "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158809006214timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
" ], "text/plain": [ "" @@ -436,7 +436,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -462,7 +462,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -488,7 +488,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -514,13 +514,13 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" + "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158809006214timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" ], "text/plain": [ "" @@ -549,7 +549,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -577,13 +577,13 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" + "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158809006214timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" ], "text/plain": [ "" @@ -603,13 +603,13 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
" + "
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158809006214timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.653301358223joemedium35dentist-122.0839,37.3861
" ], "text/plain": [ "" @@ -640,7 +640,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -683,13 +683,13 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158808946609timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
" + "
vector_distanceusercredit_scoreagejoboffice_location
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158809006214timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
" ], "text/plain": [ "" @@ -709,6 +709,138 @@ "result_print(index.query(v))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dynamic Combination\n", + "\n", + "There are often situations where you may or may not want to use a filter in a\n", + "given query. As shown above, filters will except the ``None`` type and revert\n", + "to a wildcard filter essentially returning all results.\n", + "\n", + "The same goes for filter combinations which enables rapid reuse of filters in\n", + "requests with different parameters as shown below. This removes the need for\n", + "a number of \"if-then\" conditionals to test for the empty case.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "#\n", + "def make_filter(age=None, credit=None, job=None):\n", + " flexible_filter = (\n", + " (Num(\"age\") > age) &\n", + " (Tag(\"credit_score\") == credit) &\n", + " (Text(\"job\") % job)\n", + " )\n", + " return flexible_filter\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# all parameters\n", + "combined = make_filter(age=18, credit=\"high\", job=\"engineer\")\n", + "v.set_filter(combined)\n", + "result_print(index.query(v))" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# just age and credit_score\n", + "combined = make_filter(age=18, credit=\"high\")\n", + "v.set_filter(combined)\n", + "result_print(index.query(v))" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
vector_distanceusercredit_scoreagejoboffice_location
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# just age\n", + "combined = make_filter(age=18)\n", + "v.set_filter(combined)\n", + "result_print(index.query(v))" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
vector_distanceusercredit_scoreagejoboffice_location
0johnhigh18engineer-122.4194,37.7749
0derricklow14doctor-122.4194,37.7749
0.109129190445tylerhigh100engineer-122.0839,37.3861
0.158809006214timhigh12dermatologist-122.0839,37.3861
0.217882037163taimurlow15CEO-122.0839,37.3861
0.266666650772nancyhigh94doctor-122.4194,37.7749
0.653301358223joemedium35dentist-122.0839,37.3861
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# no filters\n", + "combined = make_filter()\n", + "v.set_filter(combined)\n", + "result_print(index.query(v))" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -720,7 +852,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -762,7 +894,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -796,13 +928,13 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0derricklow14doctor
0.109129190445tylerhigh100engineer
0.158808946609timhigh12dermatologist
" + "
vector_distanceusercredit_scoreagejob
0johnhigh18engineer
0derricklow14doctor
0.109129190445tylerhigh100engineer
0.158809006214timhigh12dermatologist
" ], "text/plain": [ "" @@ -837,7 +969,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -868,7 +1000,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -907,13 +1039,13 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
vector_distanceageusercredit_scorejoboffice_location
0.109129190445100tylerhighengineer-122.0839,37.3861
0.26666665077294nancyhighdoctor-122.4194,37.7749
0.21788203716315taimurlowCEO-122.0839,37.3861
014derricklowdoctor-122.4194,37.7749
0.15880894660912timhighdermatologist-122.0839,37.3861
" + "
vector_distanceageusercredit_scorejoboffice_location
0.109129190445100tylerhighengineer-122.0839,37.3861
0.26666665077294nancyhighdoctor-122.4194,37.7749
0.65330135822335joemediumdentist-122.0839,37.3861
018johnhighengineer-122.4194,37.7749
0.21788203716315taimurlowCEO-122.0839,37.3861
014derricklowdoctor-122.4194,37.7749
0.15880900621412timhighdermatologist-122.0839,37.3861
" ], "text/plain": [ "" @@ -946,7 +1078,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 36, "metadata": {}, "outputs": [ { @@ -955,7 +1087,7 @@ "'@credit_score:{high}'" ] }, - "execution_count": 64, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -968,17 +1100,17 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'id': 'v1:0e5779d4f66646b1a13aa33ef16655f5', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\\x00\\x00\\x00?'}\n", - "{'id': 'v1:e1ab4400834347649f0779ff8dfc59cc', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\\x00\\x00\\x00?'}\n", - "{'id': 'v1:07dfdccdf7624a04b03ba41cf40ba39f', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\\x00\\x00\\x00?'}\n", - "{'id': 'v1:e56c1815d0a7493087e7977a7db07452', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\\x00\\x00\\x00?'}\n" + "{'id': 'v1:c9a810d0c22c4263987c6f2080d42e03', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\\x00\\x00\\x00?'}\n", + "{'id': 'v1:d7ce52f61a0c4e328dd342eddf5d24a8', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\\x00\\x00\\x00?'}\n", + "{'id': 'v1:b13055ae391e4d379d73c8ef1860d9a0', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\\x00\\x00\\x00?'}\n", + "{'id': 'v1:f7a033ba0bf8463baf77a3b8ef58e9f4', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\\x00\\x00\\x00?'}\n" ] } ], @@ -1001,7 +1133,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 38, "metadata": {}, "outputs": [ { @@ -1010,7 +1142,7 @@ "'((@credit_score:{high} @age:[18 +inf]) @age:[-inf 100])=>[KNN 10 @user_embedding $vector AS vector_distance] RETURN 6 user credit_score age job office_location vector_distance SORTBY vector_distance ASC DIALECT 2 LIMIT 0 10'" ] }, - "execution_count": 66, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1045,7 +1177,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.8.13" }, "orig_nbformat": 4, "vscode": { diff --git a/redisvl/query/filter.py b/redisvl/query/filter.py index 42e63111..e2d46834 100644 --- a/redisvl/query/filter.py +++ b/redisvl/query/filter.py @@ -1,6 +1,6 @@ from enum import Enum from functools import wraps -from typing import Any, Callable, Dict, List, Optional, Set, Union +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union from redisvl.utils.token_escaper import TokenEscaper @@ -35,7 +35,12 @@ def equals(self, other: "FilterField") -> bool: return False return (self._field == other._field) and (self._value == other._value) - def _set_value(self, val: Any, val_type: type, operator: FilterOperator): + def _set_value( + self, + val: Any, + val_type: Union[type, Tuple[type, ...]], + operator: FilterOperator, + ): # check that the operator is supported by this class if operator not in self.OPERATORS: raise ValueError( @@ -87,7 +92,7 @@ class Tag(FilterField): FilterOperator.NE: "(-@%s:{%s})", FilterOperator.IN: "@%s:{%s}", } - SUPPORTED_VAL_TYPES = (list, set) + SUPPORTED_VAL_TYPES = (list, set, tuple, str, type(None)) def __init__(self, field: str): """Create a Tag FilterField @@ -100,21 +105,19 @@ def __init__(self, field: str): def _set_tag_value( self, other: Union[List[str], Set[str], str], operator: FilterOperator ): - # handle case where other is a None/null value - if other is None: + if isinstance(other, (list, set, tuple)): + try: + # "if val" clause removes non-truthy values from list + other = [str(val) for val in other if val] + except ValueError: + raise ValueError("All tags within collection must be strings") + # above to catch the "" case + elif not other: other = [] - - # handle other edge case where None is a valid single instance - elif not isinstance(other, self.SUPPORTED_VAL_TYPES): - # TODO -- do we automatically cast this value to a string? + elif isinstance(other, str): other = [other] - # check to make sure each value is a string - if not all(isinstance(tag, str) for tag in other): - # TODO -- is this necessary? Can we cast values to strings? - raise ValueError("All tags must be strings") - - self._set_value(other, self.SUPPORTED_VAL_TYPES, operator) # type: ignore + self._set_value(other, self.SUPPORTED_VAL_TYPES, operator) @check_operator_misuse def __eq__(self, other: Union[List[str], str]) -> "FilterExpression": @@ -214,7 +217,7 @@ class Geo(FilterField): FilterOperator.EQ: "@%s:[%f %f %i %s]", FilterOperator.NE: "(-@%s:[%f %f %i %s])", } - SUPPORTED_VAL_TYPES = (GeoSpec,) + SUPPORTED_VAL_TYPES = (GeoSpec, type(None)) @check_operator_misuse def __eq__(self, other) -> "FilterExpression": @@ -247,10 +250,7 @@ def __ne__(self, other) -> "FilterExpression": def __str__(self) -> str: """Return the Redis Query syntax for a Geographic filter expression""" if not self._value: - raise ValueError( - f"Operator must be used before calling __str__. Operators are " - f"{self.OPERATORS.values()}" - ) + return "*" return self.OPERATOR_MAP[self._operator] % ( self._field, @@ -277,14 +277,7 @@ class Num(FilterField): FilterOperator.GE: "@%s:[%i +inf]", FilterOperator.LE: "@%s:[-inf %i]", } - SUPPORTED_VAL_TYPES = (int,) - - def _set_num_value(self, other: int, operator: FilterOperator): - if not isinstance(other, self.SUPPORTED_VAL_TYPES): - # TODO -- what about floats - raise TypeError("Numeric filter value must be an integer.") - # Additional checks, e.g., value range, can be placed here - self._set_value(other, self.SUPPORTED_VAL_TYPES, operator) # type: ignore + SUPPORTED_VAL_TYPES = (int, float, type(None)) def __eq__(self, other: int) -> "FilterExpression": """Create a Numeric equality filter expression @@ -296,7 +289,7 @@ def __eq__(self, other: int) -> "FilterExpression": >>> from redisvl.query.filter import Num >>> filter = Num("zipcode") == 90210 """ - self._set_num_value(other, FilterOperator.EQ) + self._set_value(other, self.SUPPORTED_VAL_TYPES, FilterOperator.EQ) return FilterExpression(str(self)) def __ne__(self, other: int) -> "FilterExpression": @@ -309,7 +302,7 @@ def __ne__(self, other: int) -> "FilterExpression": >>> from redisvl.query.filter import Num >>> filter = Num("zipcode") != 90210 """ - self._set_num_value(other, FilterOperator.NE) + self._set_value(other, self.SUPPORTED_VAL_TYPES, FilterOperator.NE) return FilterExpression(str(self)) def __gt__(self, other: int) -> "FilterExpression": @@ -322,7 +315,7 @@ def __gt__(self, other: int) -> "FilterExpression": >>> from redisvl.query.filter import Num >>> filter = Num("age") > 18 """ - self._set_num_value(other, FilterOperator.GT) + self._set_value(other, self.SUPPORTED_VAL_TYPES, FilterOperator.GT) return FilterExpression(str(self)) def __lt__(self, other: int) -> "FilterExpression": @@ -335,7 +328,7 @@ def __lt__(self, other: int) -> "FilterExpression": >>> from redisvl.query.filter import Num >>> filter = Num("age") < 18 """ - self._set_num_value(other, FilterOperator.LT) + self._set_value(other, self.SUPPORTED_VAL_TYPES, FilterOperator.LT) return FilterExpression(str(self)) def __ge__(self, other: int) -> "FilterExpression": @@ -348,7 +341,7 @@ def __ge__(self, other: int) -> "FilterExpression": >>> from redisvl.query.filter import Num >>> filter = Num("age") >= 18 """ - self._set_num_value(other, FilterOperator.GE) + self._set_value(other, self.SUPPORTED_VAL_TYPES, FilterOperator.GE) return FilterExpression(str(self)) def __le__(self, other: int) -> "FilterExpression": @@ -361,7 +354,7 @@ def __le__(self, other: int) -> "FilterExpression": >>> from redisvl.query.filter import Num >>> filter = Num("age") <= 18 """ - self._set_num_value(other, FilterOperator.LE) + self._set_value(other, self.SUPPORTED_VAL_TYPES, FilterOperator.LE) return FilterExpression(str(self)) def __str__(self) -> str: @@ -392,20 +385,7 @@ class Text(FilterField): FilterOperator.NE: '(-@%s:"%s")', FilterOperator.LIKE: "@%s:(%s)", } - SUPPORTED_VAL_TYPES = (str,) - - def _set_text_value(self, other: str, operator: FilterOperator): - # handle case where other is None/null - if other is None: - other = "" - - if not isinstance(other, str): - # TODO -- should we cast to string? - raise TypeError("Text filter value must be a string.") - - # Additional processing or validation can go here - # TODO -- is there any other escaping that should be done? - self._set_value(other, self.SUPPORTED_VAL_TYPES, operator) # type: ignore + SUPPORTED_VAL_TYPES = (str, type(None)) @check_operator_misuse def __eq__(self, other: str) -> "FilterExpression": @@ -419,7 +399,7 @@ def __eq__(self, other: str) -> "FilterExpression": >>> from redisvl.query.filter import Text >>> filter = Text("job") == "engineer" """ - self._set_text_value(other, FilterOperator.EQ) + self._set_value(other, self.SUPPORTED_VAL_TYPES, FilterOperator.EQ) return FilterExpression(str(self)) @check_operator_misuse @@ -435,7 +415,7 @@ def __ne__(self, other: str) -> "FilterExpression": >>> from redisvl.query.filter import Text >>> filter = Text("job") != "engineer" """ - self._set_text_value(other, FilterOperator.NE) + self._set_value(other, self.SUPPORTED_VAL_TYPES, FilterOperator.NE) return FilterExpression(str(self)) def __mod__(self, other: str) -> "FilterExpression": @@ -453,7 +433,7 @@ def __mod__(self, other: str) -> "FilterExpression": >>> filter = Text("job") % "engineer|doctor" # contains either term in field >>> filter = Text("job") % "engineer doctor" # contains both terms in field """ - self._set_text_value(other, FilterOperator.LIKE) + self._set_value(other, self.SUPPORTED_VAL_TYPES, FilterOperator.LIKE) return FilterExpression(str(self)) def __str__(self) -> str: diff --git a/redisvl/schema.py b/redisvl/schema.py index c3725fcc..77484f05 100644 --- a/redisvl/schema.py +++ b/redisvl/schema.py @@ -66,7 +66,7 @@ class BaseVectorField(BaseModel): distance_metric: str = Field(default="COSINE") initial_cap: Optional[int] = None - @validator("algorithm", "datatype", "distance_metric", pre=True, each_item=True) + @validator("algorithm", "datatype", "distance_metric", pre=True) def uppercase_strings(cls, v): return v.upper() diff --git a/tests/unit/test_filter.py b/tests/unit/test_filter.py index 1c66fadb..5fef85a5 100644 --- a/tests/unit/test_filter.py +++ b/tests/unit/test_filter.py @@ -53,6 +53,32 @@ def test_tag_filter_varied(operation, tags, expected): assert str(tf) == expected +def test_nullable(): + tag = Tag("tag_field") == None + assert str(tag) == "*" + + tag = Tag("tag_field") != None + assert str(tag) == "*" + + tag = Tag("tag_field") == [] + assert str(tag) == "*" + + tag = Tag("tag_field") != [] + assert str(tag) == "*" + + tag = Tag("tag_field") == "" + assert str(tag) == "*" + + tag = Tag("tag_field") != "" + assert str(tag) == "*" + + tag = Tag("tag_field") == [None] + assert str(tag) == "*" + + tag = Tag("tag_field") == [None, "tag"] + assert str(tag) == "@tag_field:{tag}" + + def test_numeric_filter(): nf = Num("numeric_field") == 5 assert str(nf) == "@numeric_field:[5 5]" @@ -72,8 +98,14 @@ def test_numeric_filter(): nf = Num("numeric_field") <= 5 assert str(nf) == "@numeric_field:[-inf 5]" - with pytest.raises(TypeError): - nf = Num("numeric_field") == None + nf = Num("numeric_field") <= None + assert str(nf) == "*" + + nf = Num("numeric_field") == None + assert str(nf) == "*" + + nf = Num("numeric_field") != None + assert str(nf) == "*" def test_text_filter(): @@ -104,6 +136,96 @@ def test_geo_filter(): assert str(geo_f) != "(-@geo_field:[1.000000 2.000000 3 m])" +@pytest.mark.parametrize( + "value, expected", + [ + (None, "*"), + ([], "*"), + ("", "*"), + ([None], "*"), + ([None, "tag"], "@tag_field:{tag}"), + ], + ids=[ + "none", + "empty_list", + "empty_string", + "list_with_none", + "list_with_none_and_tag", + ], +) +def test_nullable(value, expected): + tag = Tag("tag_field") + assert str(tag == value) == expected + + +@pytest.mark.parametrize( + "operation, value, expected", + [ + ("__eq__", 5, "@numeric_field:[5 5]"), + ("__ne__", 5, "(-@numeric_field:[5 5])"), + ("__gt__", 5, "@numeric_field:[(5 +inf]"), + ("__ge__", 5, "@numeric_field:[5 +inf]"), + ("__lt__", 5, "@numeric_field:[-inf (5]"), + ("__le__", 5, "@numeric_field:[-inf 5]"), + ("__le__", None, "*"), + ("__eq__", None, "*"), + ("__ne__", None, "*"), + ], + ids=["eq", "ne", "gt", "ge", "lt", "le", "le_none", "eq_none", "ne_none"], +) +def test_numeric_filter(operation, value, expected): + nf = Num("numeric_field") + assert str(getattr(nf, operation)(value)) == expected + + +@pytest.mark.parametrize( + "operation, value, expected", + [ + ("__eq__", "text", '@text_field:("text")'), + ("__ne__", "text", '(-@text_field:"text")'), + ("__eq__", "", "*"), + ("__ne__", "", "*"), + ("__eq__", None, "*"), + ("__ne__", None, "*"), + ("__mod__", "text", "@text_field:(text)"), + ("__mod__", "tex*", "@text_field:(tex*)"), + ("__mod__", "%text%", "@text_field:(%text%)"), + ("__mod__", "", "*"), + ("__mod__", None, "*"), + ], + ids=[ + "eq", + "ne", + "eq-empty", + "ne-empty", + "eq-none", + "ne-none", + "like", + "like_wildcard", + "like_full", + "like_empty", + "like_none", + ], +) +def test_text_filter(operation, value, expected): + txt_f = getattr(Text("text_field"), operation)(value) + assert str(txt_f) == expected + + +@pytest.mark.parametrize( + "operation, expected", + [ + ("__eq__", "@geo_field:[1.000000 2.000000 3 km]"), + ("__ne__", "(-@geo_field:[1.000000 2.000000 3 km])"), + ], + ids=["eq", "ne"], +) +def test_geo_filter(operation, expected): + geo_radius = GeoRadius(1.0, 2.0, 3, "km") + geo_f = Geo("geo_field") + assert str(getattr(geo_f, operation)(geo_radius)) == expected + + def test_filters_combination(): tf1 = Tag("tag_field") == ["tag1", "tag2"] tf2 = Tag("tag_field") == "tag3" @@ -117,3 +239,46 @@ def test_filters_combination(): assert str(tf1) == "*" assert str(tf1 & tf2) == str(tf2) assert str(tf1 | tf2) == str(tf2) + + # test combining filters with None values and empty strings + tf1 = Tag("tag_field") == None + tf2 = Tag("tag_field") == "" + assert str(tf1 & tf2) == "*" + + tf1 = Tag("tag_field") == None + tf2 = Tag("tag_field") == "tag" + assert str(tf1 & tf2) == str(tf2) + + tf1 = Tag("tag_field") == None + tf2 = Tag("tag_field") == ["tag1", "tag2"] + assert str(tf1 & tf2) == str(tf2) + + tf1 = Tag("tag_field") == None + tf2 = Tag("tag_field") != None + assert str(tf1 & tf2) == "*" + + tf1 = Tag("tag_field") == "" + tf2 = Tag("tag_field") == "tag" + tf3 = Tag("tag_field") == ["tag1", "tag2"] + assert str(tf1 & tf2 & tf3) == str(tf2 & tf3) + + # test none filters for Tag Num Text and Geo + tf1 = Tag("tag_field") == None + tf2 = Num("num_field") == None + tf3 = Text("text_field") == None + tf4 = Geo("geo_field") == None + assert str(tf1 & tf2 & tf3 & tf4) == "*" + + tf1 = Tag("tag_field") != None + tf2 = Num("num_field") != None + tf3 = Text("text_field") != None + tf4 = Geo("geo_field") != None + assert str(tf1 & tf2 & tf3 & tf4) == "*" + + # test combinations of real and None filters across tag + # text and geo filters + tf1 = Tag("tag_field") == "tag" + tf2 = Num("num_field") == None + tf3 = Text("text_field") == None + tf4 = Geo("geo_field") == GeoRadius(1.0, 2.0, 3, "km") + assert str(tf1 & tf2 & tf3 & tf4) == str(tf1 & tf4)