diff --git a/05-document-readers/document-readers-json-ollama/README.md b/05-document-readers/document-readers-json-ollama/README.md index 82204a1..eb271cf 100644 --- a/05-document-readers/document-readers-json-ollama/README.md +++ b/05-document-readers/document-readers-json-ollama/README.md @@ -1,25 +1,39 @@ # JSON Document Readers: Ollama -## Running the application +Reading and vectorizing JSON documents with LLMs via Ollama. + +# Running the application + +The application relies on Ollama for providing LLMs. You can either run Ollama locally on your laptop (macOS or Linux), or rely on the Testcontainers support in Spring Boot to spin up an Ollama service automatically. ### When using Ollama +First, make sure you have [Ollama](https://ollama.ai) installed on your laptop (macOS or Linux). +Then, use Ollama to run the _llama2_ large language model. + ```shell ollama run llama2 ``` +Finally, run the Spring Boot application. + ```shell ./gradlew bootRun ``` ### When using Docker/Podman +The application relies on the native Testcontainers support in Spring Boot to spin up an Ollama service with a _llama2_ model at startup time. + ```shell ./gradlew bootTestRun ``` ## Calling the application +You can now call the application that will use Ollama and llama2 to load JSON documents as embeddings and generate an answer to your questions based on those documents (RAG pattern). +This example uses [httpie](https://httpie.io) to send HTTP requests. + ```shell http --raw "What bike is good for city commuting?" :8080/ai/doc/chat ``` diff --git a/05-document-readers/document-readers-json-ollama/src/main/java/com/thomasvitale/ai/spring/ChatService.java b/05-document-readers/document-readers-json-ollama/src/main/java/com/thomasvitale/ai/spring/ChatService.java index a86b2b3..4dde576 100644 --- a/05-document-readers/document-readers-json-ollama/src/main/java/com/thomasvitale/ai/spring/ChatService.java +++ b/05-document-readers/document-readers-json-ollama/src/main/java/com/thomasvitale/ai/spring/ChatService.java @@ -28,10 +28,9 @@ class ChatService { AssistantMessage chatWithDocument(String message) { var systemPromptTemplate = new SystemPromptTemplate(""" You're assisting with questions about products in a bicycle catalog. - Use the information from the DOCUMENTS section to provide accurate answers. - If the answer involves referring to the price or the dimension of the bicycle, - include the bicycle name in the response. - If unsure, simply state that you don't know. + Use the information from the DOCUMENTS section and no prior knowledge. + If unsure or if the answer isn't found in the DOCUMENTS section, simply state + that you don't know the answer. DOCUMENTS: {documents} diff --git a/05-document-readers/document-readers-pdf-ollama/README.md b/05-document-readers/document-readers-pdf-ollama/README.md new file mode 100644 index 0000000..53fed4c --- /dev/null +++ b/05-document-readers/document-readers-pdf-ollama/README.md @@ -0,0 +1,43 @@ +# PDF Document Readers: Ollama + +Reading and vectorizing PDF documents with LLMs via Ollama. + +# Running the application + +The application relies on Ollama for providing LLMs. You can either run Ollama locally on your laptop (macOS or Linux), or rely on the Testcontainers support in Spring Boot to spin up an Ollama service automatically. + +### When using Ollama + +First, make sure you have [Ollama](https://ollama.ai) installed on your laptop (macOS or Linux). +Then, use Ollama to run the _llama2_ large language model. + +```shell +ollama run llama2 +``` + +Finally, run the Spring Boot application. + +```shell +./gradlew bootRun +``` + +### When using Docker/Podman + +The application relies on the native Testcontainers support in Spring Boot to spin up an Ollama service with a _llama2_ model at startup time. + +```shell +./gradlew bootTestRun +``` + +## Calling the application + +You can now call the application that will use Ollama and llama2 to load PDF documents as embeddings and generate an answer to your questions based on those documents (RAG pattern). +This example uses [httpie](https://httpie.io) to send HTTP requests. + +```shell +http --raw "What is Iorek's biggest dream?" :8080/ai/doc/chat +``` + +```shell +http --raw "Who is Lucio?" :8080/ai/doc/chat +``` diff --git a/05-document-readers/document-readers-pdf-ollama/build.gradle b/05-document-readers/document-readers-pdf-ollama/build.gradle new file mode 100644 index 0000000..f2c529e --- /dev/null +++ b/05-document-readers/document-readers-pdf-ollama/build.gradle @@ -0,0 +1,35 @@ +plugins { + id 'java' + id 'org.springframework.boot' + id 'io.spring.dependency-management' +} + +group = 'com.thomasvitale' +version = '0.0.1-SNAPSHOT' + +java { + sourceCompatibility = '21' +} + +repositories { + mavenCentral() + maven { url 'https://repo.spring.io/milestone' } + maven { url 'https://repo.spring.io/snapshot' } +} + +dependencies { + implementation 'org.springframework.boot:spring-boot-starter-web' + + implementation "org.springframework.ai:spring-ai-ollama-spring-boot-starter:${springAiVersion}" + implementation "org.springframework.ai:spring-ai-pdf-document-reader:${springAiVersion}" + + testAndDevelopmentOnly 'org.springframework.boot:spring-boot-devtools' + + testImplementation 'org.springframework.boot:spring-boot-starter-test' + testImplementation 'org.springframework.boot:spring-boot-testcontainers' + testImplementation 'org.testcontainers:junit-jupiter' +} + +tasks.named('test') { + useJUnitPlatform() +} diff --git a/05-document-readers/document-readers-pdf-ollama/src/main/java/com/thomasvitale/ai/spring/ChatController.java b/05-document-readers/document-readers-pdf-ollama/src/main/java/com/thomasvitale/ai/spring/ChatController.java new file mode 100644 index 0000000..6bc35c3 --- /dev/null +++ b/05-document-readers/document-readers-pdf-ollama/src/main/java/com/thomasvitale/ai/spring/ChatController.java @@ -0,0 +1,21 @@ +package com.thomasvitale.ai.spring; + +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RestController; + +@RestController +class ChatController { + + private final ChatService chatService; + + ChatController(ChatService chatService) { + this.chatService = chatService; + } + + @PostMapping("/ai/doc/chat") + String chatWithDocument(@RequestBody String input) { + return chatService.chatWithDocument(input).getContent(); + } + +} diff --git a/05-document-readers/document-readers-pdf-ollama/src/main/java/com/thomasvitale/ai/spring/ChatService.java b/05-document-readers/document-readers-pdf-ollama/src/main/java/com/thomasvitale/ai/spring/ChatService.java new file mode 100644 index 0000000..6420e4b --- /dev/null +++ b/05-document-readers/document-readers-pdf-ollama/src/main/java/com/thomasvitale/ai/spring/ChatService.java @@ -0,0 +1,51 @@ +package com.thomasvitale.ai.spring; + +import org.springframework.ai.chat.ChatClient; +import org.springframework.ai.chat.messages.AssistantMessage; +import org.springframework.ai.chat.messages.UserMessage; +import org.springframework.ai.chat.prompt.Prompt; +import org.springframework.ai.chat.prompt.SystemPromptTemplate; +import org.springframework.ai.document.Document; +import org.springframework.ai.vectorstore.SearchRequest; +import org.springframework.ai.vectorstore.SimpleVectorStore; +import org.springframework.stereotype.Service; + +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +@Service +class ChatService { + + private final ChatClient chatClient; + private final SimpleVectorStore vectorStore; + + ChatService(ChatClient chatClient, SimpleVectorStore vectorStore) { + this.chatClient = chatClient; + this.vectorStore = vectorStore; + } + + AssistantMessage chatWithDocument(String message) { + var systemPromptTemplate = new SystemPromptTemplate(""" + Answer questions given the context information below (DOCUMENTS section) and no prior knowledge. + If the answer is not found in the DOCUMENTS section, simply state that you don't know the answer. + In the answer, include the source file name from which the context information is extracted from. + + DOCUMENTS: + {documents} + """); + + List similarDocuments = vectorStore.similaritySearch(SearchRequest.query(message).withTopK(2)); + String documents = similarDocuments.stream().map(Document::getContent).collect(Collectors.joining(System.lineSeparator())); + + Map model = Map.of("documents", documents); + var systemMessage = systemPromptTemplate.createMessage(model); + + var userMessage = new UserMessage(message); + var prompt = new Prompt(List.of(systemMessage, userMessage)); + + var chatResponse = chatClient.call(prompt); + return chatResponse.getResult().getOutput(); + } + +} diff --git a/05-document-readers/document-readers-pdf-ollama/src/main/java/com/thomasvitale/ai/spring/DocumentInitializer.java b/05-document-readers/document-readers-pdf-ollama/src/main/java/com/thomasvitale/ai/spring/DocumentInitializer.java new file mode 100644 index 0000000..c948744 --- /dev/null +++ b/05-document-readers/document-readers-pdf-ollama/src/main/java/com/thomasvitale/ai/spring/DocumentInitializer.java @@ -0,0 +1,57 @@ +package com.thomasvitale.ai.spring; + +import jakarta.annotation.PostConstruct; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.document.Document; +import org.springframework.ai.reader.ExtractedTextFormatter; +import org.springframework.ai.reader.pdf.PagePdfDocumentReader; +import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig; +import org.springframework.ai.vectorstore.SimpleVectorStore; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.core.io.Resource; +import org.springframework.stereotype.Component; + +import java.util.ArrayList; +import java.util.List; + +@Component +public class DocumentInitializer { + + private static final Logger log = LoggerFactory.getLogger(DocumentInitializer.class); + private final SimpleVectorStore simpleVectorStore; + + @Value("classpath:documents/story1.pdf") + Resource pdfFile1; + + @Value("classpath:documents/story2.pdf") + Resource pdfFile2; + + public DocumentInitializer(SimpleVectorStore simpleVectorStore) { + this.simpleVectorStore = simpleVectorStore; + } + + @PostConstruct + public void run() { + List documents = new ArrayList<>(); + + log.info("Loading PDF files as Documents"); + var pdfReader1 = new PagePdfDocumentReader(pdfFile1); + documents.addAll(pdfReader1.get()); + + log.info("Loading PDF files as Documents after reformatting"); + var pdfReader2 = new PagePdfDocumentReader(pdfFile2, PdfDocumentReaderConfig.builder() + .withPageExtractedTextFormatter(ExtractedTextFormatter.builder() + .withNumberOfTopPagesToSkipBeforeDelete(0) + .withNumberOfBottomTextLinesToDelete(1) + .withNumberOfTopTextLinesToDelete(1) + .build()) + .withPagesPerDocument(1) + .build()); + documents.addAll(pdfReader2.get()); + + log.info("Creating and storing Embeddings from Documents"); + simpleVectorStore.add(documents); + } + +} diff --git a/05-document-readers/document-readers-pdf-ollama/src/main/java/com/thomasvitale/ai/spring/DocumentReadersPdfOllamaApplication.java b/05-document-readers/document-readers-pdf-ollama/src/main/java/com/thomasvitale/ai/spring/DocumentReadersPdfOllamaApplication.java new file mode 100644 index 0000000..9673031 --- /dev/null +++ b/05-document-readers/document-readers-pdf-ollama/src/main/java/com/thomasvitale/ai/spring/DocumentReadersPdfOllamaApplication.java @@ -0,0 +1,21 @@ +package com.thomasvitale.ai.spring; + +import org.springframework.ai.embedding.EmbeddingClient; +import org.springframework.ai.vectorstore.SimpleVectorStore; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.context.annotation.Bean; + +@SpringBootApplication +public class DocumentReadersPdfOllamaApplication { + + @Bean + SimpleVectorStore documentWriter(EmbeddingClient embeddingClient) { + return new SimpleVectorStore(embeddingClient); + } + + public static void main(String[] args) { + SpringApplication.run(DocumentReadersPdfOllamaApplication.class, args); + } + +} diff --git a/05-document-readers/document-readers-pdf-ollama/src/main/resources/application.yml b/05-document-readers/document-readers-pdf-ollama/src/main/resources/application.yml new file mode 100644 index 0000000..318872b --- /dev/null +++ b/05-document-readers/document-readers-pdf-ollama/src/main/resources/application.yml @@ -0,0 +1,10 @@ +spring: + ai: + ollama: + chat: + model: llama2 + embedding: + model: llama2 + threads: + virtual: + enabled: true diff --git a/05-document-readers/document-readers-pdf-ollama/src/main/resources/documents/story1.pdf b/05-document-readers/document-readers-pdf-ollama/src/main/resources/documents/story1.pdf new file mode 100644 index 0000000..7445313 Binary files /dev/null and b/05-document-readers/document-readers-pdf-ollama/src/main/resources/documents/story1.pdf differ diff --git a/05-document-readers/document-readers-pdf-ollama/src/main/resources/documents/story2.pdf b/05-document-readers/document-readers-pdf-ollama/src/main/resources/documents/story2.pdf new file mode 100644 index 0000000..b11b2ce Binary files /dev/null and b/05-document-readers/document-readers-pdf-ollama/src/main/resources/documents/story2.pdf differ diff --git a/05-document-readers/document-readers-pdf-ollama/src/test/java/com/thomasvitale/ai/spring/DocumentReadersPdfOllamaApplicationTests.java b/05-document-readers/document-readers-pdf-ollama/src/test/java/com/thomasvitale/ai/spring/DocumentReadersPdfOllamaApplicationTests.java new file mode 100644 index 0000000..ff32149 --- /dev/null +++ b/05-document-readers/document-readers-pdf-ollama/src/test/java/com/thomasvitale/ai/spring/DocumentReadersPdfOllamaApplicationTests.java @@ -0,0 +1,17 @@ +package com.thomasvitale.ai.spring; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.annotation.Import; + +@SpringBootTest +@Import(TestDocumentReadersPdfOllamaApplication.class) +@Disabled // Only run locally for now +class DocumentReadersPdfOllamaApplicationTests { + + @Test + void contextLoads() { + } + +} diff --git a/05-document-readers/document-readers-pdf-ollama/src/test/java/com/thomasvitale/ai/spring/TestDocumentReadersPdfOllamaApplication.java b/05-document-readers/document-readers-pdf-ollama/src/test/java/com/thomasvitale/ai/spring/TestDocumentReadersPdfOllamaApplication.java new file mode 100644 index 0000000..5af9d6f --- /dev/null +++ b/05-document-readers/document-readers-pdf-ollama/src/test/java/com/thomasvitale/ai/spring/TestDocumentReadersPdfOllamaApplication.java @@ -0,0 +1,29 @@ +package com.thomasvitale.ai.spring; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.devtools.restart.RestartScope; +import org.springframework.boot.test.context.TestConfiguration; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Scope; +import org.springframework.test.context.DynamicPropertyRegistry; +import org.testcontainers.containers.GenericContainer; + +@TestConfiguration(proxyBeanMethods = false) +public class TestDocumentReadersPdfOllamaApplication { + + @Bean + @RestartScope + @Scope("singleton") // needed because of https://github.com/spring-projects/spring-boot/issues/35786 + GenericContainer ollama(DynamicPropertyRegistry properties) { + var ollama = new GenericContainer<>("ghcr.io/thomasvitale/ollama-llama2") + .withExposedPorts(11434); + properties.add("spring.ai.ollama.base-url", + () -> "http://%s:%s".formatted(ollama.getHost(), ollama.getMappedPort(11434))); + return ollama; + } + + public static void main(String[] args) { + SpringApplication.from(DocumentReadersPdfOllamaApplication::main).with(TestDocumentReadersPdfOllamaApplication.class).run(args); + } + +} diff --git a/05-document-readers/document-readers-text-ollama/README.md b/05-document-readers/document-readers-text-ollama/README.md index 790432d..dfc2943 100644 --- a/05-document-readers/document-readers-text-ollama/README.md +++ b/05-document-readers/document-readers-text-ollama/README.md @@ -1,25 +1,39 @@ # Text Document Readers: Ollama -## Running the application +Reading and vectorizing text documents with LLMs via Ollama. + +# Running the application + +The application relies on Ollama for providing LLMs. You can either run Ollama locally on your laptop (macOS or Linux), or rely on the Testcontainers support in Spring Boot to spin up an Ollama service automatically. ### When using Ollama +First, make sure you have [Ollama](https://ollama.ai) installed on your laptop (macOS or Linux). +Then, use Ollama to run the _llama2_ large language model. + ```shell ollama run llama2 ``` +Finally, run the Spring Boot application. + ```shell ./gradlew bootRun ``` ### When using Docker/Podman +The application relies on the native Testcontainers support in Spring Boot to spin up an Ollama service with a _llama2_ model at startup time. + ```shell ./gradlew bootTestRun ``` ## Calling the application +You can now call the application that will use Ollama and llama2 to load text documents as embeddings and generate an answer to your questions based on those documents (RAG pattern). +This example uses [httpie](https://httpie.io) to send HTTP requests. + ```shell http --raw "What is Iorek's biggest dream?" :8080/ai/doc/chat ``` diff --git a/05-document-readers/document-readers-text-ollama/src/main/java/com/thomasvitale/ai/spring/ChatService.java b/05-document-readers/document-readers-text-ollama/src/main/java/com/thomasvitale/ai/spring/ChatService.java index ae54894..6420e4b 100644 --- a/05-document-readers/document-readers-text-ollama/src/main/java/com/thomasvitale/ai/spring/ChatService.java +++ b/05-document-readers/document-readers-text-ollama/src/main/java/com/thomasvitale/ai/spring/ChatService.java @@ -2,8 +2,9 @@ import org.springframework.ai.chat.ChatClient; import org.springframework.ai.chat.messages.AssistantMessage; +import org.springframework.ai.chat.messages.UserMessage; import org.springframework.ai.chat.prompt.Prompt; -import org.springframework.ai.chat.prompt.PromptTemplate; +import org.springframework.ai.chat.prompt.SystemPromptTemplate; import org.springframework.ai.document.Document; import org.springframework.ai.vectorstore.SearchRequest; import org.springframework.ai.vectorstore.SimpleVectorStore; @@ -25,26 +26,23 @@ class ChatService { } AssistantMessage chatWithDocument(String message) { - var promptTemplate = new PromptTemplate(""" - Answer questions given the information below (DOCUMENTS section) and no prior knowledge. + var systemPromptTemplate = new SystemPromptTemplate(""" + Answer questions given the context information below (DOCUMENTS section) and no prior knowledge. + If the answer is not found in the DOCUMENTS section, simply state that you don't know the answer. In the answer, include the source file name from which the context information is extracted from. DOCUMENTS: {documents} - - Given the context information and no prior knowledge, answer the question (QUESTION section). - - QUESTION: - {question} """); List similarDocuments = vectorStore.similaritySearch(SearchRequest.query(message).withTopK(2)); String documents = similarDocuments.stream().map(Document::getContent).collect(Collectors.joining(System.lineSeparator())); - Map model = Map.of("documents", documents, "question", message); - var userMessage = promptTemplate.createMessage(model); + Map model = Map.of("documents", documents); + var systemMessage = systemPromptTemplate.createMessage(model); - var prompt = new Prompt(userMessage); + var userMessage = new UserMessage(message); + var prompt = new Prompt(List.of(systemMessage, userMessage)); var chatResponse = chatClient.call(prompt); return chatResponse.getResult().getOutput(); diff --git a/README.md b/README.md index 88bcebf..1f3e8df 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,7 @@ Samples showing how to build Java applications powered by Generative AI and LLMs | Project | Description | |----------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------| | [document-readers-json-ollama](https://github.com/ThomasVitale/llm-apps-java-spring-ai/tree/main/05-document-readers/document-readers-json-ollama) | Reading and vectorizing JSON documents with LLMs via Ollama. | +| [document-readers-pdf-ollama](https://github.com/ThomasVitale/llm-apps-java-spring-ai/tree/main/05-document-readers/document-readers-text-ollama) | Reading and vectorizing PDF documents with LLMs via Ollama. | | [document-readers-text-ollama](https://github.com/ThomasVitale/llm-apps-java-spring-ai/tree/main/05-document-readers/document-readers-text-ollama) | Reading and vectorizing text documents with LLMs via Ollama. | ### 6. Document Transformers diff --git a/settings.gradle b/settings.gradle index c2afae7..34b8ac9 100644 --- a/settings.gradle +++ b/settings.gradle @@ -17,4 +17,5 @@ include '04-embedding-models:embedding-models-ollama' include '04-embedding-models:embedding-models-openai' include '05-document-readers:document-readers-json-ollama' +include '05-document-readers:document-readers-pdf-ollama' include '05-document-readers:document-readers-text-ollama'