-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathllm_interface.py
93 lines (81 loc) · 3.44 KB
/
llm_interface.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# llm_interface.py
import logging
from typing import Generator
import ollama
import streamlit as st
import yaml
from deep_translator import GoogleTranslator # Use deep-translator
def load_config():
with open('config.yaml', 'r') as f:
return yaml.safe_load(f)
config = load_config()
system_prompt = """
You are an AI assistant that provides detailed answers based solely on the given context.
Instructions:
- Use **only** the information in the "Context" to answer the "Question".
- Do **not** include any external knowledge or assumptions.
- If the context doesn't contain sufficient information to answer the question, respond: "The context does not provide enough information to answer this question."
Formatting Guidelines:
- Use clear and concise language.
- Organize your answer into paragraphs for readability.
- Use bullet points or numbered lists to break down complex information when appropriate.
- Include headings or subheadings if relevant.
- Ensure proper grammar, punctuation, and spelling.
Remember: Base your entire response solely on the information provided in the context.
"""
def call_llm(context: str, prompt: str, language: str) -> Generator[str, None, None]:
"""Calls the language model with context and prompt to generate a response, translating if necessary."""
try:
# If the selected language is English, stream the response directly
if language == 'en':
response = ollama.chat(
model=config['llm_model'],
stream=True,
messages=[
{
"role": "system",
"content": system_prompt,
},
{
"role": "user",
"content": f"Context: {context}\nQuestion: {prompt}",
},
],
)
for chunk in response:
if chunk["done"] is False:
yield chunk["message"]["content"]
else:
break
else:
# For other languages, collect the response and translate it
response = ollama.chat(
model=config['llm_model'],
stream=False,
messages=[
{
"role": "system",
"content": system_prompt,
},
{
"role": "user",
"content": f"Context: {context}\nQuestion: {prompt}",
},
],
)
full_response = response["message"]["content"]
translated_text = translate_text(full_response, language)
yield translated_text # Yield the translated text
except Exception as e:
logging.error(f"An error occurred while generating the response: {e}")
st.error(f"An error occurred while generating the response: {e}")
def translate_text(text: str, dest_language: str) -> str:
"""Translates text to the desired language using deep-translator."""
try:
translator = GoogleTranslator(source='auto', target=dest_language)
translated = translator.translate(text)
return translated
except Exception as e:
logging.error(f"An error occurred during translation: {e}")
st.error(f"An error occurred during translation: {e}")
return text # Return original text if translation fails