-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvectordb.py
40 lines (31 loc) · 1.45 KB
/
vectordb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from sentence_transformers import SentenceTransformer
import pinecone
# Initialize embedding model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
# Initialize vector database (Pinecone example)
pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp")
index = pinecone.Index("proprietary-data-index")
# Step 1: Add proprietary data to the database
documents = [
{"id": "doc1", "text": "Company revenue grew by 20% in 2023."},
{"id": "doc2", "text": "The company was founded in 2010."}
]
for doc in documents:
embedding = embedding_model.encode(doc["text"]).tolist()
index.upsert([(doc["id"], embedding)])
# Step 2: User query
query = "What was the company’s growth in 2023?"
query_embedding = embedding_model.encode(query).tolist()
# Step 3: Search for relevant context
search_results = index.query(query_embedding, top_k=1, include_metadata=True)
context = search_results["matches"][0]["metadata"]["text"]
# Step 4: Pass context and query to LLM
from openai import ChatCompletion
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{"role": "system", "content": context},
{"role": "user", "content": query}
]
)
print(response["choices"][0]["message"]["content"])