From 2ed95aacc5420ffcfef370eefb469e1192493f7e Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Fri, 9 Aug 2024 12:16:56 -0700 Subject: [PATCH] ensure UPDATEs and DELETEs work on vec0 tables with text primary keys, refs #77 --- sqlite-vec.c | 47 ++++++++++++++++++++++++++++++------------ tests/test-loadable.py | 38 +++++++++++++++++++++++++++------- 2 files changed, 64 insertions(+), 21 deletions(-) diff --git a/sqlite-vec.c b/sqlite-vec.c index addf191..c2dfee9 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -5998,12 +5998,22 @@ int vec0Update_Delete_DeleteRowids(vec0_vtab *p, i64 rowid) { return rc; } -int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite_int64 rowid) { +int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value * idValue) { vec0_vtab *p = (vec0_vtab *)pVTab; int rc; + i64 rowid; i64 chunk_id; i64 chunk_offset; + if(p->pkIsText) { + rc = vec0_rowid_from_id(p, idValue, &rowid); + if (rc != SQLITE_OK) { + return rc; + } + }else { + rowid = sqlite3_value_int64(idValue); + } + // 1. Find chunk position for given rowid // 2. Ensure that validity bit for position is 1, then set to 0 // 3. Zero out rowid in chunks.rowid @@ -6112,14 +6122,33 @@ int vec0Update_UpdateVectorColumn(vec0_vtab *p, i64 chunk_id, i64 chunk_offset, return SQLITE_OK; } -int vec0Update_UpdateOnRowid(sqlite3_vtab *pVTab, int argc, +int vec0Update_Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv) { UNUSED_PARAMETER(argc); vec0_vtab *p = (vec0_vtab *)pVTab; int rc; i64 chunk_id; i64 chunk_offset; - i64 rowid = sqlite3_value_int64(argv[0]); + + i64 rowid; + if(p->pkIsText) { + const char * a = (const char *) sqlite3_value_text(argv[0]); + const char * b = (const char *) sqlite3_value_text(argv[1]); + // IMP: V08886_25725 + if( + (sqlite3_value_bytes(argv[0]) != sqlite3_value_bytes(argv[1])) + || strncmp(a, b, sqlite3_value_bytes(argv[0])) != 0 + ) { + vtab_set_error(pVTab, "UPDATEs on vec0 primary key values are not allowed."); + return SQLITE_ERROR; + } + rc = vec0_rowid_from_id(p, argv[0], &rowid); + if(rc != SQLITE_OK) { + return rc; + } + }else { + rowid = sqlite3_value_int64(argv[0]); + } // 1. get chunk_id and chunk_offset from _rowids rc = vec0_get_chunk_position(p, rowid, NULL, &chunk_id, &chunk_offset); @@ -6159,7 +6188,7 @@ static int vec0Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv, sqlite_int64 *pRowid) { // DELETE operation if (argc == 1 && sqlite3_value_type(argv[0]) != SQLITE_NULL) { - return vec0Update_Delete(pVTab, sqlite3_value_int64(argv[0])); + return vec0Update_Delete(pVTab, argv[0]); } // INSERT operation else if (argc > 1 && sqlite3_value_type(argv[0]) == SQLITE_NULL) { @@ -6167,15 +6196,7 @@ static int vec0Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv, } // UPDATE operation else if (argc > 1 && sqlite3_value_type(argv[0]) != SQLITE_NULL) { - if ((sqlite3_value_type(argv[0]) == SQLITE_INTEGER) && - (sqlite3_value_type(argv[1]) == SQLITE_INTEGER) && - (sqlite3_value_int64(argv[0]) == sqlite3_value_int64(argv[1]))) { - return vec0Update_UpdateOnRowid(pVTab, argc, argv); - } - - vtab_set_error(pVTab, - "UPDATE operation on rowids with vec0 is not supported."); - return SQLITE_ERROR; + return vec0Update_Update(pVTab, argc, argv); } else { vtab_set_error(pVTab, "Unrecognized xUpdate operation provided for vec0."); return SQLITE_ERROR; diff --git a/tests/test-loadable.py b/tests/test-loadable.py index 07bc9d8..22cd3ed 100644 --- a/tests/test-loadable.py +++ b/tests/test-loadable.py @@ -120,12 +120,11 @@ def spread_args(args): "vec0", "vec_each", "vec_npy_each", - #"vec_static_blob_entries", - #"vec_static_blobs", + # "vec_static_blob_entries", + # "vec_static_blobs", ] - def register_numpy(db, name: str, array): ptr = array.__array_interface__["data"][0] nvectors, dimensions = array.__array_interface__["shape"] @@ -264,16 +263,21 @@ def test_vec_static_blob_entries(): "v": "[0.300000,0.300000,0.300000,0.300000]", }, ] + + def test_limits(): db = connect(EXT_PATH) - with _raises("vec0 constructor error: Dimension on vector column too large, provided 8193, maximum 8192"): - db.execute("create virtual table v using vec0(a float[8193])") + with _raises( + "vec0 constructor error: Dimension on vector column too large, provided 8193, maximum 8192" + ): + db.execute("create virtual table v using vec0(a float[8193])") with _raises("vec0 constructor error: chunk_size too large"): - db.execute("create virtual table v using vec0(a float[4], chunk_size=8200)") - db.execute('create virtual table v using vec0(a float[1])') + db.execute("create virtual table v using vec0(a float[4], chunk_size=8200)") + db.execute("create virtual table v using vec0(a float[1])") with _raises("k value in knn query too large, provided 8193 and the limit is 4096"): - db.execute("select * from v where a match '[0.1]' and k = 8193") + db.execute("select * from v where a match '[0.1]' and k = 8193") + def test_funcs(): funcs = list( @@ -1509,6 +1513,24 @@ def test_vec0_text_pk(): }, ] + # test deletes on text primary keys + db.execute("delete from t where t_id = 't_1'") + assert execute_all(db, "select * from t") == [ + {"t_id": "t_2", "aaa": _f32([0.2]), "bbb": _f32([-0.2])}, + {"t_id": "t_3", "aaa": _f32([0.3]), "bbb": _f32([-0.3])}, + ] + + # test updates on text primary keys + db.execute("update t set aaa = '[999]' where t_id = 't_2'") + assert execute_all(db, "select * from t") == [ + {"t_id": "t_2", "aaa": _f32([999]), "bbb": _f32([-0.2])}, + {"t_id": "t_3", "aaa": _f32([0.3]), "bbb": _f32([-0.3])}, + ] + + # EVIDENCE-OF: V08886_25725 vec0 primary keys don't allow updates on PKs + with _raises("UPDATEs on vec0 primary key values are not allowed."): + db.execute("update t set t_id = 'xxx' where t_id = 't_2'") + def test_vec0_best_index(): db = connect(EXT_PATH)