-
Notifications
You must be signed in to change notification settings - Fork 166
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
31 changed files
with
751 additions
and
97 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# pip install openai sqlite-vec | ||
|
||
from openai import OpenAI | ||
import sqlite3 | ||
import sqlite_vec | ||
import struct | ||
from typing import List | ||
|
||
|
||
def serialize(vector: List[float]) -> bytes: | ||
"""serializes a list of floats into a compact "raw bytes" format""" | ||
return struct.pack("%sf" % len(vector), *vector) | ||
|
||
|
||
sentences = [ | ||
"Capri-Sun is a brand of juice concentrate–based drinks manufactured by the German company Wild and regional licensees.", | ||
"George V was King of the United Kingdom and the British Dominions, and Emperor of India, from 6 May 1910 until his death in 1936.", | ||
"Alaqua Cox is a Native American (Menominee) actress.", | ||
"Shohei Ohtani is a Japanese professional baseball pitcher and designated hitter for the Los Angeles Dodgers of Major League Baseball.", | ||
"Tamarindo, also commonly known as agua de tamarindo, is a non-alcoholic beverage made of tamarind, sugar, and water.", | ||
] | ||
|
||
|
||
client = OpenAI() | ||
|
||
# change ':memory:' to a filepath to persist data | ||
db = sqlite3.connect(":memory:") | ||
db.enable_load_extension(True) | ||
sqlite_vec.load(db) | ||
db.enable_load_extension(False) | ||
|
||
db.execute( | ||
""" | ||
CREATE TABLE sentences( | ||
id INTEGER PRIMARY KEY, | ||
sentence TEXT | ||
); | ||
""" | ||
) | ||
|
||
with db: | ||
for i, sentence in enumerate(sentences): | ||
db.execute("INSERT INTO sentences(id, sentence) VALUES(?, ?)", [i, sentence]) | ||
|
||
db.execute( | ||
""" | ||
CREATE VIRTUAL TABLE vec_sentences USING vec0( | ||
id INTEGER PRIMARY KEY, | ||
sentence_embedding FLOAT[1536] | ||
); | ||
""" | ||
) | ||
|
||
|
||
with db: | ||
sentence_rows = db.execute("SELECT id, sentence FROM sentences").fetchall() | ||
response = client.embeddings.create( | ||
input=[row[1] for row in sentence_rows], model="text-embedding-3-small" | ||
) | ||
for (id, _), embedding in zip(sentence_rows, response.data): | ||
db.execute( | ||
"INSERT INTO vec_sentences(id, sentence_embedding) VALUES(?, ?)", | ||
[id, serialize(embedding.embedding)], | ||
) | ||
|
||
|
||
query = "fruity liquids" | ||
query_embedding = ( | ||
client.embeddings.create(input=query, model="text-embedding-3-small") | ||
.data[0] | ||
.embedding | ||
) | ||
|
||
results = db.execute( | ||
""" | ||
SELECT | ||
vec_sentences.id, | ||
distance, | ||
sentence | ||
FROM vec_sentences | ||
LEFT JOIN sentences ON sentences.id = vec_sentences.id | ||
WHERE sentence_embedding MATCH ? | ||
AND k = 3 | ||
ORDER BY distance | ||
""", | ||
[serialize(query_embedding)], | ||
).fetchall() | ||
|
||
for row in results: | ||
print(row) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
<script setup lang="ts"></script> | ||
|
||
<template> | ||
<div | ||
style=" | ||
background: var(--vp-c-default-3); | ||
padding: -4px 12px; | ||
border-radius: 10px; | ||
" | ||
> | ||
<div> | ||
<div class="language-sqlite vp-adaptive-theme"> | ||
<pre | ||
class="shiki shiki-themes github-light github-dark vp-code" | ||
><code><span class="line"><span style="--shiki-light:#6A737D;--shiki-dark:#6A737D;">-- store 768-dimensional vectors in a vec0 virtual table</span></span> | ||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">create</span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;"> virtual</span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;"> table</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> vec_movies </span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">using</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> vec0(</span></span> | ||
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> synopsis_embedding </span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">float</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;">[768]</span></span> | ||
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;">);</span></span> | ||
<span class="line"></span> | ||
<span class="line"><span style="--shiki-light:#6A737D;--shiki-dark:#6A737D;">-- insert vectors into the table, as JSON or compact BLOBs</span></span> | ||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">insert into</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> vec_movies(rowid, synopsis_embedding)</span></span> | ||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;"> select</span></span> | ||
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> rowid,</span></span> | ||
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> embed(synopsis) </span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">as</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> synopsis_embedding</span></span> | ||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;"> from</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> movies;</span></span> | ||
<span class="line"></span> | ||
<span class="line"><span style="--shiki-light:#6A737D;--shiki-dark:#6A737D;">-- KNN search!</span></span> | ||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">select</span></span> | ||
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> rowid,</span></span> | ||
<span class="line"><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> distance</span></span> | ||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">from</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> vec_movies</span></span> | ||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">where</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> synopsis_embedding </span><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">match</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> embed(</span><span style="--shiki-light:#032F62;--shiki-dark:#9ECBFF;">'scary futuristic movies'</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;">)</span></span> | ||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">order by</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;"> distance</span></span> | ||
<span class="line"><span style="--shiki-light:#D73A49;--shiki-dark:#F97583;">limit</span><span style="--shiki-light:#005CC5;--shiki-dark:#79B8FF;"> 20</span><span style="--shiki-light:#24292E;--shiki-dark:#E1E4E8;">;</span></span></code></pre> | ||
</div> | ||
</div> | ||
</div> | ||
</template> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.