From a3e99c4d0ee3d8855f5cc6199c9b43dfec3a531a Mon Sep 17 00:00:00 2001 From: Pieter Claerhout Date: Tue, 30 Jan 2024 09:30:15 +0100 Subject: [PATCH] Removed the custom ID option for embeddings when using PGVector --- vectorstores/pgvector/pgvector.go | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/vectorstores/pgvector/pgvector.go b/vectorstores/pgvector/pgvector.go index 6bc7d5101..495188746 100644 --- a/vectorstores/pgvector/pgvector.go +++ b/vectorstores/pgvector/pgvector.go @@ -161,7 +161,6 @@ func (s Store) createEmbeddingTableIfNotExists(ctx context.Context, tx pgx.Tx) e embedding vector%s, document varchar, cmetadata json, - custom_id varchar, "uuid" uuid NOT NULL, CONSTRAINT langchain_pg_embedding_collection_id_fkey FOREIGN KEY (collection_id) REFERENCES %s (uuid) ON DELETE CASCADE, @@ -169,10 +168,6 @@ func (s Store) createEmbeddingTableIfNotExists(ctx context.Context, tx pgx.Tx) e if _, err := tx.Exec(ctx, sql); err != nil { return err } - sql = fmt.Sprintf(`CREATE INDEX IF NOT EXISTS %s_custom_id ON %s (custom_id)`, s.embeddingTableName, s.embeddingTableName) - if _, err := tx.Exec(ctx, sql); err != nil { - return err - } sql = fmt.Sprintf(`CREATE INDEX IF NOT EXISTS %s_collection_id ON %s (collection_id)`, s.embeddingTableName, s.embeddingTableName) if _, err := tx.Exec(ctx, sql); err != nil { return err @@ -211,17 +206,16 @@ func (s Store) AddDocuments( if len(vectors) != len(docs) { return nil, ErrEmbedderWrongNumberVectors } - customID := uuid.New().String() b := &pgx.Batch{} - sql := fmt.Sprintf(`INSERT INTO %s (uuid, document, embedding, cmetadata, custom_id, collection_id) - VALUES($1, $2, $3, $4, $5, $6)`, s.embeddingTableName) + sql := fmt.Sprintf(`INSERT INTO %s (uuid, document, embedding, cmetadata, collection_id) + VALUES($1, $2, $3, $4, $5)`, s.embeddingTableName) ids := make([]string, len(docs)) for docIdx, doc := range docs { id := uuid.New().String() ids[docIdx] = id - b.Queue(sql, id, doc.PageContent, pgvector.NewVector(vectors[docIdx]), doc.Metadata, customID, s.collectionUUID) + b.Queue(sql, id, doc.PageContent, pgvector.NewVector(vectors[docIdx]), doc.Metadata, s.collectionUUID) } return ids, s.conn.SendBatch(ctx, b).Close() }