Skip to content

Commit

Permalink
update the create embeddings end point to upload pdf files
Browse files Browse the repository at this point in the history
  • Loading branch information
Olasunkanmi Oyinlola authored and Olasunkanmi Oyinlola committed Apr 23, 2024
1 parent 5893580 commit c2ba886
Show file tree
Hide file tree
Showing 10 changed files with 219 additions and 62 deletions.
22 changes: 11 additions & 11 deletions api/controllers/embed.controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,28 @@ import { CreateDocumentEmbeddingHandler } from "../handlers/create-document-embe
import { documentRequestSchema } from "../lib/validation-schemas";
import { generateErrorResponse } from "../utils/utils";
import { Result } from "../lib/result";
import multer from "multer";
export class EmbeddingController {
path = "/embed";
router = express.Router();
upload = multer();
constructor() {
this.initRoutes();
}

initRoutes() {
this.router.post(`${this.path}/documents`, this.createDocumentEmbeddings);
this.router.post(`${this.path}/documents`, this.upload.single("pdf"), this.createDocumentEmbeddings);
}

async createDocumentEmbeddings(
req: express.Request,
res: express.Response,
next: express.NextFunction,
) {
const embeddingHandler: CreateDocumentEmbeddingHandler =
new CreateDocumentEmbeddingHandler();
async createDocumentEmbeddings(req: express.Request, res: express.Response, next: express.NextFunction) {
if (!req.file) {
return res.json(Result.fail("No file uploaded", 400));
}
const file = req.file;
const { buffer } = file;
const embeddingHandler: CreateDocumentEmbeddingHandler = new CreateDocumentEmbeddingHandler(buffer);
try {
const { title, documentType, domain } = documentRequestSchema.parse(
req.body,
);
const { title, documentType, domain } = documentRequestSchema.parse(req.body);
const result = await embeddingHandler.handle({
title,
documentType,
Expand Down
19 changes: 6 additions & 13 deletions api/handlers/create-document-embed.handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,17 @@ import { ICreateEmbeddingRequestDTO } from "../repositories/dtos/dtos";
import { EmbeddingService } from "../services/embed.service";
import { getValue } from "../utils";

export class CreateDocumentEmbeddingHandler
implements IRequestHandler<ICreateEmbeddingRequestDTO, Result<boolean>>
{
export class CreateDocumentEmbeddingHandler implements IRequestHandler<ICreateEmbeddingRequestDTO, Result<boolean>> {
constructor(private readonly pdf: Buffer) {}
private readonly apiKey: string = getValue("API_KEY");
embeddingService: EmbeddingService = new EmbeddingService(this.apiKey);

async handle(request: ICreateEmbeddingRequestDTO): Promise<Result<boolean>> {
const embeddingService: EmbeddingService = new EmbeddingService(this.apiKey, this.pdf);
try {
const { title, documentType, domain } = request;
const result = await this.embeddingService.createDocumentsEmbeddings(
title,
documentType,
domain
);
const result = await embeddingService.createDocumentsEmbeddings(title, documentType, domain);
if (!result) {
throw new HttpException(
HTTP_RESPONSE_CODE.BAD_REQUEST,
"An error occured, could not create embeddings"
);
throw new HttpException(HTTP_RESPONSE_CODE.BAD_REQUEST, "An error occured, could not create embeddings");
}
return result;
} catch (error) {
Expand Down
2 changes: 1 addition & 1 deletion api/interfaces/document-service.interface.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
export interface IDocumentService {
convertPDFToText(pdfFilePath: string): Promise<string>;
convertPDFToText(file: Buffer): Promise<string>;
writeToFile(outputFilePath: string, text: string): void;
breakTextIntoChunks(text: string, partSize: number): string[];
adjustChunkToEndAtCharacter(chunk: string): string;
Expand Down
144 changes: 142 additions & 2 deletions api/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"@google/generative-ai": "^0.1.3",
"@prisma/client": "^5.11.0",
"@types/express": "^4.17.21",
"@types/multer": "^1.4.11",
"@types/pdf-parse": "^1.1.4",
"@types/pg": "^8.11.0",
"body-parser": "^1.20.2",
Expand All @@ -30,6 +31,7 @@
"dotenv": "^16.4.1",
"express": "^4.18.2",
"langchain": "^0.1.9",
"multer": "^1.4.5-lts.1",
"pdf-parse": "^1.1.1",
"pg": "^8.11.3",
"pgvector": "^0.1.7",
Expand Down
5 changes: 2 additions & 3 deletions api/services/document.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@ import pdf from "pdf-parse";
import { IDocumentService } from "../interfaces/document-service.interface";

export class DocumentService implements IDocumentService {
async convertPDFToText(pdfFilePath: string): Promise<string> {
async convertPDFToText(file: Buffer): Promise<string> {
try {
const dataBuffer = fs.readFileSync(pdfFilePath);
const data = await pdf(dataBuffer);
const data = await pdf(file);
const text = this.formatText(data.text);
return text;
} catch (error) {
Expand Down
10 changes: 6 additions & 4 deletions api/services/embed.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,10 @@ of information. For example, you could use this task type to embed articles, FAQ
or product manuals to create a searchable knowledge base for customer support or information retrieval systems.*/

export class EmbeddingService extends GenerativeAIService implements IEmbeddingService {
documentPath: string = getValue("PDF_ABSOLUTE_PATH");
constructor(apiKey: string) {
constructor(
apiKey: string,
private file: Buffer
) {
super(apiKey);
}
/**
Expand Down Expand Up @@ -194,10 +196,10 @@ export class EmbeddingService extends GenerativeAIService implements IEmbeddingS
async createContentEmbeddings(): Promise<{ text: string; embeddings?: number[] }[]> {
const documentService: IDocumentService = new DocumentService();
let text: string;
if (!this.documentPath.length) {
if (!this.file.length) {
throw new HttpException(HTTP_RESPONSE_CODE.BAD_REQUEST, "Could not read PDF file");
}
text = await documentService.convertPDFToText(this.documentPath);
text = await documentService.convertPDFToText(this.file);
const chunks: string[] = documentService.breakTextIntoChunks(text, 2000);

const contentEmbed = chunks.map(
Expand Down
Loading

0 comments on commit c2ba886

Please sign in to comment.