Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code Refactoring #7

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
a7b96d7
Code seperation for each model providers
prabha-git Jan 2, 2024
8938e86
Code Seperation for each provider
prabha-git Jan 2, 2024
374e8bf
Merge pull request #1 from prabha-git/refactor_models_seperate_class
prabha-git Jan 2, 2024
b238046
updated the readme
prabha-git Jan 2, 2024
4501c01
Merge pull request #2 from prabha-git/refactor_models_seperate_class
prabha-git Jan 2, 2024
9b093f2
issuecomment-1877803162 - Updating the code based on feedback
prabha-git Jan 9, 2024
dc7b715
Merge pull request #3 from prabha-git/remove_hardcoding_substring_eval
prabha-git Jan 9, 2024
13ce6b0
Added a check to ensure substring validation words is in needle
prabha-git Jan 10, 2024
933eeff
Merge pull request #4 from prabha-git/remove_hardcoding_substring_eval
prabha-git Jan 10, 2024
e471807
instating evaluation model with gpt4 only evaluation_method is set to…
prabha-git Jan 10, 2024
aa20790
Directly included the prompt in AnthropicEvaluator
prabha-git Feb 28, 2024
c7b0dce
refactored the code to use pathlib instead of os.path
prabha-git Feb 29, 2024
59a82e7
Updated the Anthropic to 0.16.0 , Since API has changed
prabha-git Feb 29, 2024
b242a5c
updates save_results method to save json output in single line
prabha-git Feb 29, 2024
dc47cc9
remove f-string where is it not requried in print_ongoing_status
prabha-git Feb 29, 2024
9f49f2c
Added pre-commit with trailing-whitespace,end-of-file-fixer
prabha-git Feb 29, 2024
7e86887
Fix with end-of-file-fixer
prabha-git Feb 29, 2024
4a77764
added black hook to pre-commit
prabha-git Feb 29, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
repos:
- repo: https://github.com/psf/black
rev: 24.2.0
hooks:
- id: black

86 changes: 86 additions & 0 deletions AnthropicEvaluator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import os
import tiktoken
from LLMNeedleHaystackTester import LLMNeedleHaystackTester
from anthropic import AsyncAnthropic, Anthropic


class AnthropicEvaluator(LLMNeedleHaystackTester):
def __init__(self, **kwargs):
if "anthropic_api_key" not in kwargs and not os.getenv("ANTHROPIC_API_KEY"):
raise ValueError(
"Either anthropic_api_key must be supplied with init, or ANTHROPIC_API_KEY must be in env"
)

if "model_name" not in kwargs:
raise ValueError("model_name must be supplied with init")
elif "claude" not in kwargs["model_name"]:
raise ValueError(
"If the model provider is 'Anthropic', the model name must include 'claude'. "
"See https://docs.anthropic.com/claude/reference/selecting-a-model for more details on Anthropic models"
)

if "evaluation_method" not in kwargs:
print(
"since evaluation method is not specified , default method substring_match will be used for evaluation"
)
elif kwargs["evaluation_method"] not in ("gpt4", "substring_match"):
raise ValueError("evaluation_method must be 'substring_match' or 'gpt4'")
elif (
kwargs["evaluation_method"] == "gpt4"
and "openai_api_key" not in kwargs
and not os.getenv("OPENAI_API_KEY")
):
raise ValueError(
"if evaluation_method is gpt4 , openai_api_key must be supplied with init, or OPENAI_API_KEY must be in env"
)
else:
self.openai_api_key = kwargs.get(
"openai_api_key", os.getenv("OPENAI_API_KEY")
)

self.anthropic_api_key = kwargs.pop(
"anthropic_api_key", os.getenv("ANTHROPIC_API_KEY")
)
self.model_name = kwargs["model_name"]
self.model_to_test_description = kwargs.pop("model_name")
self.model_to_test = AsyncAnthropic(api_key=self.anthropic_api_key)
self.tokenizer = Anthropic().get_tokenizer()

super().__init__(**kwargs)

def get_encoding(self, context):
return self.tokenizer.encode(context).ids

def get_decoding(self, encoded_context):
return self.tokenizer.decode(encoded_context)

def get_prompt(self, context):
return [
{
"role": "user",
"content": f"{context}\n\n {self.retrieval_question} Don't give information outside the document or repeat your findings",
},
{
"role": "assistant",
"content": "Here is the most relevant sentence in the context:",
},
]

async def get_response_from_model(self, prompt):
response = await self.model_to_test.messages.create(
model=self.model_name,
messages=prompt,
system="You are a helpful AI bot that answers questions for a user. Keep your response short and direct",
max_tokens=300,
temperature=0,
)
return response.content[0].text


if __name__ == "__main__":
# Tons of defaults set, check out the LLMNeedleHaystackTester's init for more info
ht = AnthropicEvaluator(
model_name="claude-2.1", evaluation_method="substring_match"
)

ht.start_test()
9 changes: 0 additions & 9 deletions Anthropic_prompt.txt

This file was deleted.

Loading