-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserver.js
183 lines (151 loc) · 5.3 KB
/
server.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
const express = require('express');
const couchbase = require('couchbase');
const cors = require('cors');
const openai = require('openai');
const fs = require('fs');
const path = require('path');
require('dotenv').config();
const app = express();
app.use(express.json());
app.use(cors());
const useLocalEmbedding = process.env.USE_LOCAL_EMBEDDING === 'true';
let openaiclient = null;
if (!useLocalEmbedding) {
// Initialize OpenAI client only if local embedding is not being used
openaiclient = new openai.OpenAI({ apiKey: process.env.OPENAI_API_KEY });
}
// Import the helper functions
const { generateQueryEmbedding, storeEmbedding } = require('./helpers');
let cluster;
// Initialize Couchbase connection
async function init() {
if (!cluster) {
cluster = await couchbase.connect(process.env.COUCHBASE_URL, {
username: process.env.COUCHBASE_USERNAME,
password: process.env.COUCHBASE_PASSWORD,
configProfile: "wanDevelopment",
});
}
return cluster;
}
/**
* Retrieves stored embeddings from the specified bucket in Couchbase.
*
* @param {Array} queryEmbedding - The embedding for the search query.
* @returns {Array} An array of objects containing the id and score of stored data.
*/
async function getStoredEmbeddings(queryEmbedding) {
const cluster = await init();
const scope = cluster.bucket(process.env.COUCHBASE_BUCKET).scope('_default');
const searchIndex = 'vector-search-index';
let request = couchbase.SearchRequest.create(
couchbase.VectorSearch.fromVectorQuery(
couchbase.VectorQuery.create('_default.embedding', queryEmbedding).numCandidates(5)
)
);
const result = await scope.search(searchIndex, request);
return result.rows.map(row => {
return {
id: row.id,
score: row.score
};
});
}
/**
* Fetches full document from Couchbase by ID
*
* @param {Array} storedEmbeddings - The search result containing document IDs and scores.
* @returns {Array} An array of documents with their content and relevance score.
*/
async function fetchDocumentsByIds(storedEmbeddings) {
const cluster = await init();
const bucket = cluster.bucket(process.env.COUCHBASE_BUCKET);
const collection = bucket.defaultCollection();
const results = await Promise.all(
storedEmbeddings.map(async ({ id, score }) => {
try {
const result = await collection.get(id);
const content = result.content;
// Remove embedding from content
if (content && content._default && content._default.embedding) {
delete content._default.embedding;
}
return {
content: content,
score: score
};
} catch (err) {
console.error(`Error fetching document with ID ${id}:`, err);
return null;
}
})
);
return results.filter(doc => doc !== null);
}
/**
* Search blog posts using the query embedding or from local file.
*
* @param {string} query - The search term.
* @param {boolean} useLocalEmbedding - Whether to use a local embedding from file.
* @returns {Array} Search results.
*/
async function searchBlogPosts(query, useLocalEmbedding = false) {
let queryEmbedding;
if (useLocalEmbedding) {
const filePath = path.resolve('./data/query_with_embedding/query_with_embedding.json');
if (!fs.existsSync(filePath)) {
throw new Error('Local embedding file not found');
}
const fileContent = fs.readFileSync(filePath, 'utf-8');
const fileData = JSON.parse(fileContent);
queryEmbedding = fileData.data[0].embedding;
} else {
queryEmbedding = await generateQueryEmbedding(query);
}
const storedEmbeddings = await getStoredEmbeddings(queryEmbedding);
const documents = await fetchDocumentsByIds(storedEmbeddings);
return documents;
}
// Route to handle search requests
app.post('/search', async (req, res) => {
const searchTerm = req.body.q || '';
const useLocalEmbedding = req.body.useLocalEmbedding || false;
if (!searchTerm && !useLocalEmbedding) {
return res.status(400).json({ error: 'No search term or embedding provided' });
}
try {
const searchResults = await searchBlogPosts(searchTerm, useLocalEmbedding);
res.json(searchResults);
} catch (err) {
console.error('Error searching blog posts:', err);
res.status(500).json({ error: 'Error searching blog posts' });
}
});
// Route to embed and store markdown files
app.post('/embed', async (req, res) => {
const filePaths = req.body.files || [];
if (!Array.isArray(filePaths) || filePaths.length === 0) {
return res.status(400).json({ error: 'No files provided' });
}
try {
const results = await Promise.all(filePaths.map(async (filePath) => {
const fullPath = path.resolve(filePath);
if (!fs.existsSync(fullPath)) {
throw new Error(`File ${filePath} does not exist`);
}
const content = fs.readFileSync(fullPath, 'utf-8');
const result = await storeEmbedding(content, path.basename(filePath));
return result;
}));
res.json(results);
} catch (err) {
console.error('Error embedding and storing files:', err);
res.status(500).json({ error: 'Error embedding and storing files' });
}
});
// Start the server
const PORT = process.env.PORT || 3000;
const server = app.listen(PORT, () => {
console.log(`Server running on port ${PORT}`);
});
module.exports = { app, server };