-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathseed.ts
51 lines (41 loc) · 1.1 KB
/
seed.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import fs from "fs";
import csv from "csv-parser";
import { Index } from "@upstash/vector";
interface Row {
text: string;
}
const index = new Index({
url: "https://tops-koi-64471-us1-vector.upstash.io",
token:
"ABIFMHRvcHMta29pLTY0NDcxLXVzMWFkbWluWlRZMllURXdaamN0TUdKaFl5MDBOamhrTFRsbFlXSXRZakE0TkdFeVpqVmxZVEV6",
});
const parseCSV = async (filePath: string): Promise<Row[]> => {
return new Promise((resolve, reject) => {
const rows: Row[] = [];
fs.createReadStream(filePath)
.pipe(csv({ separator: "," }))
.on("data", (row) => {
rows.push(row);
})
.on("error", (err) => {
reject(err);
})
.on("end", () => {
resolve(rows);
});
});
};
const STEP = 30;
const seed = async () => {
const data = await parseCSV("training_dataset.csv");
for (let i = 0; i < data.length; i += STEP) {
const chunk = data.slice(i, i + STEP);
const formatted = chunk.map((row, batchIndex) => ({
data: row.text,
id: i + batchIndex,
metadata: { text: row.text },
}));
await index.upsert(formatted);
}
};
seed();