Skip to content

Commit

Permalink
create cosine similarity method
Browse files Browse the repository at this point in the history
  • Loading branch information
Olasunkanmi Oyinlola authored and Olasunkanmi Oyinlola committed Jan 28, 2024
1 parent 00b47a8 commit e5b7003
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 7 deletions.
9 changes: 9 additions & 0 deletions api/database.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,13 @@ export class DataBase {
console.error(AppError.dbConnectionError, error);
}
}

async disconnect() {
try {
await this.client.end();
console.log("Disconnected from the database.");
} catch (err) {
console.error(AppError.dbDisconnectError, err);
}
}
}
1 change: 1 addition & 0 deletions api/error.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ export const AppError = {
VectorCreationError: "Error creating pg vector extension",
CreateDocumentError: (document: string) => `Unable to create document, with query ${document}`,
dbConnectionError: "Error connecting to the database:",
dbDisconnectError: "Error disconnecting from the database:",
};
37 changes: 31 additions & 6 deletions api/query.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,35 @@
export const dbQuery = {
CREATE_VECTOR: "CREATE EXTENSION IF NOT EXISTS vector;",
CREATE_TABLE: `
CREATE TABLE IF NOT EXISTS documents (
id bigserial PRIMARY KEY,
content text,
embedding vector(1536)
);
`,
CREATE TABLE IF NOT EXISTS documents (
id bigserial PRIMARY KEY,
content text,
embedding vector(1536)
);
`,
CREATE_MATCH_DOCUMENTS_TABLES: `
create or replace function match_documents (
query_embedding vector(1536),
match_threshold float,
match_count int
)
returns table (
id bigint,
content text,
similarity float
)
language sql stable
as $$
select
documents.id,
documents.content,
1 - (documents.embedding <=> query_embedding) as similarity
from documents
where documents.embedding <=> query_embedding < 1 - match_threshold
order by documents.embedding <=> query_embedding
limit match_count;
$$;
`,
CREATE_INDEX: `CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);`,
};
1 change: 1 addition & 0 deletions api/services/document-service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export class DocumentService {}
34 changes: 33 additions & 1 deletion api/services/embed-service.ts
Original file line number Diff line number Diff line change
@@ -1 +1,33 @@
export class Embed {}
export class EmbeddingService {
generateEmbeddings() {}

/**
* Calculates the cosine similarity between two vectors.
* @param vecA - The first vector.
* @param vecB - The second vector.
* @returns The cosine similarity between the two vectors.
* @throws Error if the lengths of the vectors are not equal.
*/
cosineSimilarity(vecA: number[], vecB: number[]) {
let dotProduct = 0;
let magnitudeA = 0;
let magnitudeB = 0;
if (vecA.length !== vecB.length) {
throw Error("Both vectors must be of the same length");
}
for (let i = 0; i < vecA.length; i++) {
dotProduct += vecA[i] * vecB[i];
magnitudeA += Math.pow(vecA[i], 2);
magnitudeB += Math.pow(vecB[i], 2);
}

magnitudeA = Math.sqrt(magnitudeA);
magnitudeB = Math.sqrt(magnitudeB);

if (magnitudeA !== 0 && magnitudeB !== 0) {
return dotProduct / (magnitudeA * magnitudeB);
} else {
return null;
}
}
}

0 comments on commit e5b7003

Please sign in to comment.