Skip to content

Commit

Permalink
Implement tests
Browse files Browse the repository at this point in the history
  • Loading branch information
pavelkraleu committed Mar 23, 2024
1 parent b74c060 commit 290cb2b
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 7 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: Docker Build and Run

on:
push:

jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Build Docker Image
run: |
docker build . -t needle-in-a-haystack
- name: Run Docker Container
run: |
docker run --entrypoint pytest -t needle-in-a-haystack
7 changes: 1 addition & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,4 @@ dmypy.json
# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.idea/
15 changes: 15 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM python:3.12

ENV PYTHONPATH /app
ENV PYTHONUNBUFFERED 1

WORKDIR /app

COPY requirements.txt .

RUN pip install --no-cache-dir -r requirements.txt

COPY . .

ENTRYPOINT ["python"]
CMD ["main.py"]
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,5 @@ tqdm==4.66.1
typing-inspect==0.9.0
typing_extensions==4.8.0
urllib3==2.1.0
yarl==1.9.3
yarl==1.9.3
pytest==8.1.1
29 changes: 29 additions & 0 deletions tests/test_evaluators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from unittest.mock import patch, MagicMock, call, ANY

from needlehaystack.evaluators import OpenAIEvaluator

QUESTION_ASKED = "What is the color of the sky?"
QUESTION_ANSWER = "Sky is blue"
API_KEY = "abc"
SCORE = 123
TEMPERATURE = 0
MODEL = "gpt-3.5-turbo-0125"


@patch('needlehaystack.evaluators.openai.ChatOpenAI')
@patch('needlehaystack.evaluators.openai.load_evaluator')
def test_openai(mock_load_evaluator, mock_chat_open_ai, monkeypatch):
monkeypatch.setenv('NIAH_EVALUATOR_API_KEY', API_KEY)

mock_evaluator = MagicMock()
mock_evaluator.evaluate_strings.return_value = {'score': str(SCORE)}

mock_load_evaluator.return_value = mock_evaluator

evaluator = OpenAIEvaluator(question_asked=QUESTION_ASKED, true_answer=QUESTION_ANSWER)
result = evaluator.evaluate_response("Something")

assert mock_chat_open_ai.call_args == call(model=MODEL, temperature=TEMPERATURE, openai_api_key=API_KEY)
assert mock_load_evaluator.call_args == call('labeled_score_string', criteria=OpenAIEvaluator.CRITERIA, llm=ANY)

assert result == SCORE

0 comments on commit 290cb2b

Please sign in to comment.