Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Can I use local LLM as the evaluator and provider? #47

Open
hijkzzz opened this issue Apr 25, 2024 · 6 comments
Open

Can I use local LLM as the evaluator and provider? #47

hijkzzz opened this issue Apr 25, 2024 · 6 comments

Comments

@hijkzzz
Copy link

hijkzzz commented Apr 25, 2024

No description provided.

@RahulSinghalChicago
Copy link

found this repo today and will be attempting as well

@disperaller
Copy link

+1

@66RING
Copy link

66RING commented May 21, 2024

@hijkzzz @disperaller @RahulSinghalChicago hey guys, I have some findings. But I found the score may be a bit to hight. Do you have a better idea?

base on the openai provider, you can add your own provider like this: (namely local_llama.py in this case)

--- ./providers/openai.py
+++ ./providers/local_llama.py
@@ -1,16 +1,19 @@
 import os
 from operator import itemgetter
 from typing import Optional
+import torch
 
-from openai import AsyncOpenAI
-from langchain_openai import ChatOpenAI  
+from langchain_openai import ChatOpenAI
 from langchain.prompts import PromptTemplate
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+
 import tiktoken
 
 from .model import ModelProvider
 
 
-class OpenAI(ModelProvider):
+class LocalLlama(ModelProvider):
     """
     A wrapper class for interacting with OpenAI's API, providing methods to encode text, generate prompts,
     evaluate models, and create LangChain runnables for language model interactions.
@@ -25,7 +28,7 @@
                                       temperature = 0)
 
     def __init__(self,
-                 model_name: str = "gpt-3.5-turbo-0125",
+                 model_name: str = "meta/llama-2-7b-chat-hf",
                  model_kwargs: dict = DEFAULT_MODEL_KWARGS):
         """
         Initializes the OpenAI model provider with a specific model.
@@ -37,15 +40,18 @@
         Raises:
             ValueError: If NIAH_MODEL_API_KEY is not found in the environment.
         """
-        api_key = os.getenv('NIAH_MODEL_API_KEY')
-        if (not api_key):
-            raise ValueError("NIAH_MODEL_API_KEY must be in env.")
-
-        self.model_name = model_name
+        self.model_or_path = model_name
+        self.model_name = model_name.split("/")[-1]
         self.model_kwargs = model_kwargs
-        self.api_key = api_key
-        self.model = AsyncOpenAI(api_key=self.api_key)
-        self.tokenizer = tiktoken.encoding_for_model(self.model_name)
+
+        self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_or_path,
+                device_map="auto",
+                torch_dtype=torch.bfloat16,
+                attn_implementation="flash_attention_2",
+            )
+
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_or_path)
     
     async def evaluate_model(self, prompt: str) -> str:
         """
@@ -57,12 +63,18 @@
         Returns:
             str: The content of the model's response to the prompt.
         """
-        response = await self.model.chat.completions.create(
-                model=self.model_name,
-                messages=prompt,
-                **self.model_kwargs
-            )
-        return response.choices[0].message.content
+        MAX_GEN_LENGTH = 128
+        tokenized_prompts = self.tokenizer(prompt, return_tensors="pt")
+        input_ids = tokenized_prompts.input_ids.cuda()
+
+        generation_output = self.model.generate(
+            input_ids,
+            max_new_tokens=MAX_GEN_LENGTH,
+            use_cache=True,
+            return_dict_in_generate=True)
+
+        output = self.tokenizer.decode(generation_output.sequences[:,input_ids.shape[1]:][0])
+        return output
     
     def generate_prompt(self, context: str, retrieval_question: str) -> str | list[dict[str, str]]:
         """
@@ -75,19 +87,16 @@
         Returns:
             list[dict[str, str]]: A list of dictionaries representing the structured prompt, including roles and content for system and user messages.
         """
-        return [{
-                "role": "system",
-                "content": "You are a helpful AI bot that answers questions for a user. Keep your response short and direct"
-            },
-            {
-                "role": "user",
-                "content": context
-            },
-            {
-                "role": "user",
-                "content": f"{retrieval_question} Don't give information outside the document or repeat your findings"
-            }]
-    
+        return f"""
+<s>[INST] <<SYS>>
+You are a helpful AI bot that answers questions for a user. Keep your response short and direct
+<</SYS>>
+{ context }
+
+{retrieval_question} Don't give information outside the document or repeat your findings
+[/INST]</s>
+"""
+
     def encode_text_to_tokens(self, text: str) -> list[int]:
         """
         Encodes a given text string to a sequence of tokens using the model's tokenizer.

add a entry in run.py like this:

diff --git a/needlehaystack/run.py b/needlehaystack/run.py
index 8edbccb..f5b6783 100644
--- a/needlehaystack/run.py
+++ b/needlehaystack/run.py
@@ -6,7 +6,7 @@ from jsonargparse import CLI
 
 from . import LLMNeedleHaystackTester, LLMMultiNeedleHaystackTester
 from .evaluators import Evaluator, LangSmithEvaluator, OpenAIEvaluator
-from .providers import Anthropic, ModelProvider, OpenAI, Cohere
+from .providers import Anthropic, ModelProvider, OpenAI, Cohere, LocalLlama
 
 load_dotenv()
 
@@ -65,6 +65,8 @@ def get_model_to_test(args: CommandArgs) -> ModelProvider:
             return Anthropic(model_name=args.model_name)
         case "cohere":
             return Cohere(model_name=args.model_name)
+        case "local":
+            return LocalLlama(model_name=args.model_name)
         case _:
             raise ValueError(f"Invalid provider: {args.provider}")

@AnaRhisT94
Copy link

@hijkzzz @disperaller @RahulSinghalChicago hey guys, I have some findings. But I found the score may be a bit to hight. Do you have a better idea?

base on the openai provider, you can add your own provider like this: (namely local_llama.py in this case)

--- ./providers/openai.py
+++ ./providers/local_llama.py
@@ -1,16 +1,19 @@
 import os
 from operator import itemgetter
 from typing import Optional
+import torch
 
-from openai import AsyncOpenAI
-from langchain_openai import ChatOpenAI  
+from langchain_openai import ChatOpenAI
 from langchain.prompts import PromptTemplate
+
+from transformers import AutoTokenizer, AutoModelForCausalLM
+
 import tiktoken
 
 from .model import ModelProvider
 
 
-class OpenAI(ModelProvider):
+class LocalLlama(ModelProvider):
     """
     A wrapper class for interacting with OpenAI's API, providing methods to encode text, generate prompts,
     evaluate models, and create LangChain runnables for language model interactions.
@@ -25,7 +28,7 @@
                                       temperature = 0)
 
     def __init__(self,
-                 model_name: str = "gpt-3.5-turbo-0125",
+                 model_name: str = "meta/llama-2-7b-chat-hf",
                  model_kwargs: dict = DEFAULT_MODEL_KWARGS):
         """
         Initializes the OpenAI model provider with a specific model.
@@ -37,15 +40,18 @@
         Raises:
             ValueError: If NIAH_MODEL_API_KEY is not found in the environment.
         """
-        api_key = os.getenv('NIAH_MODEL_API_KEY')
-        if (not api_key):
-            raise ValueError("NIAH_MODEL_API_KEY must be in env.")
-
-        self.model_name = model_name
+        self.model_or_path = model_name
+        self.model_name = model_name.split("/")[-1]
         self.model_kwargs = model_kwargs
-        self.api_key = api_key
-        self.model = AsyncOpenAI(api_key=self.api_key)
-        self.tokenizer = tiktoken.encoding_for_model(self.model_name)
+
+        self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_or_path,
+                device_map="auto",
+                torch_dtype=torch.bfloat16,
+                attn_implementation="flash_attention_2",
+            )
+
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_or_path)
     
     async def evaluate_model(self, prompt: str) -> str:
         """
@@ -57,12 +63,18 @@
         Returns:
             str: The content of the model's response to the prompt.
         """
-        response = await self.model.chat.completions.create(
-                model=self.model_name,
-                messages=prompt,
-                **self.model_kwargs
-            )
-        return response.choices[0].message.content
+        MAX_GEN_LENGTH = 128
+        tokenized_prompts = self.tokenizer(prompt, return_tensors="pt")
+        input_ids = tokenized_prompts.input_ids.cuda()
+
+        generation_output = self.model.generate(
+            input_ids,
+            max_new_tokens=MAX_GEN_LENGTH,
+            use_cache=True,
+            return_dict_in_generate=True)
+
+        output = self.tokenizer.decode(generation_output.sequences[:,input_ids.shape[1]:][0])
+        return output
     
     def generate_prompt(self, context: str, retrieval_question: str) -> str | list[dict[str, str]]:
         """
@@ -75,19 +87,16 @@
         Returns:
             list[dict[str, str]]: A list of dictionaries representing the structured prompt, including roles and content for system and user messages.
         """
-        return [{
-                "role": "system",
-                "content": "You are a helpful AI bot that answers questions for a user. Keep your response short and direct"
-            },
-            {
-                "role": "user",
-                "content": context
-            },
-            {
-                "role": "user",
-                "content": f"{retrieval_question} Don't give information outside the document or repeat your findings"
-            }]
-    
+        return f"""
+<s>[INST] <<SYS>>
+You are a helpful AI bot that answers questions for a user. Keep your response short and direct
+<</SYS>>
+{ context }
+
+{retrieval_question} Don't give information outside the document or repeat your findings
+[/INST]</s>
+"""
+
     def encode_text_to_tokens(self, text: str) -> list[int]:
         """
         Encodes a given text string to a sequence of tokens using the model's tokenizer.

add a entry in run.py like this:

diff --git a/needlehaystack/run.py b/needlehaystack/run.py
index 8edbccb..f5b6783 100644
--- a/needlehaystack/run.py
+++ b/needlehaystack/run.py
@@ -6,7 +6,7 @@ from jsonargparse import CLI
 
 from . import LLMNeedleHaystackTester, LLMMultiNeedleHaystackTester
 from .evaluators import Evaluator, LangSmithEvaluator, OpenAIEvaluator
-from .providers import Anthropic, ModelProvider, OpenAI, Cohere
+from .providers import Anthropic, ModelProvider, OpenAI, Cohere, LocalLlama
 
 load_dotenv()
 
@@ -65,6 +65,8 @@ def get_model_to_test(args: CommandArgs) -> ModelProvider:
             return Anthropic(model_name=args.model_name)
         case "cohere":
             return Cohere(model_name=args.model_name)
+        case "local":
+            return LocalLlama(model_name=args.model_name)
         case _:
             raise ValueError(f"Invalid provider: {args.provider}")

What about the evaluator.py? :)

@66RING
Copy link

66RING commented Jul 30, 2024

@AnaRhisT94 Use the OpenAI evaluator.

But I recently found a offline version of NeedleInAHaystack, https://github.com/66RING/LLMTest_NeedleInAHaystack-Local, which is base on this repo

@AnaRhisT94
Copy link

AnaRhisT94 commented Jul 30, 2024

@AnaRhisT94 Use the OpenAI evaluator.

But I recently found a offline version of NeedleInAHaystack, https://github.com/66RING/LLMTest_NeedleInAHaystack-Local, which is base on this repo

Thanks, I already found this local repo in your profile and looking at it.
I'm not sure I can use the OpenAIEvaluator class.
I need to change the self.evaluator = ChatOpenAI.. to something else, still not sure to what.

Anyways, I'm going to test the local version.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

5 participants