Skip to content
This repository has been archived by the owner on Mar 30, 2024. It is now read-only.

Commit

Permalink
Support mutiple messages
Browse files Browse the repository at this point in the history
Signed-off-by: Hung-Han (Henry) Chen <chenhungh@gmail.com>
  • Loading branch information
chenhunghan committed Nov 3, 2023
1 parent abf8830 commit 5a14a48
Showing 1 changed file with 48 additions and 33 deletions.
81 changes: 48 additions & 33 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,7 @@
DEFAULT_MODEL_HG_REPO_ID = get_env(
"DEFAULT_MODEL_HG_REPO_ID", "TheBloke/Llama-2-7B-Chat-GGML"
)
DEFAULT_MODEL_HG_REPO_REVISION = get_env(
"DEFAULT_MODEL_HG_REPO_REVISION", "main"
)
DEFAULT_MODEL_HG_REPO_REVISION = get_env("DEFAULT_MODEL_HG_REPO_REVISION", "main")
DEFAULT_MODEL_FILE = get_env("DEFAULT_MODEL_FILE", "llama-2-7b-chat.ggmlv3.q4_0.bin")

log.info("DEFAULT_MODEL_HG_REPO_ID: %s", DEFAULT_MODEL_HG_REPO_ID)
Expand Down Expand Up @@ -70,13 +68,17 @@ def set_loading_model(boolean: bool):

app = FastAPI()


# https://github.com/tiangolo/fastapi/issues/3361
@app.exception_handler(RequestValidationError)
async def validation_exception_handler(request: Request, exc: RequestValidationError):
exc_str = f"{exc}".replace("\n", " ").replace(" ", " ")
log.error("%s: %s", request, exc_str)
content = {"status_code": 10422, "message": exc_str, "data": None}
return JSONResponse(content=content, status_code=status.HTTP_422_UNPROCESSABLE_ENTITY)
return JSONResponse(
content=content, status_code=status.HTTP_422_UNPROCESSABLE_ENTITY
)


@app.on_event("startup")
async def startup_event():
Expand Down Expand Up @@ -380,35 +382,48 @@ async def chat_completions(
user_start = "GPT4 User: "
user_end = "<|end_of_turn|>"

user_message = next(
(message for message in reversed(body.messages) if message.role == "user"), None
)
user_message_content = user_message.content if user_message else ""
assistant_message = next(
(message for message in reversed(body.messages) if message.role == "assistant"), None
)
assistant_message_content = (
f"{assistant_start}{assistant_message.content}{assistant_end}"
if assistant_message
else ""
)
system_message = next(
(message for message in body.messages if message.role == "system"), None
)
system_message_content = system_message.content if system_message else system
# avoid duplicate user start token in prompt if user message already includes it
if len(user_start) > 0 and user_start in user_message_content:
user_start = ""
# avoid duplicate user end token in prompt if user message already includes it
if len(user_end) > 0 and user_end in user_message_content:
user_end = ""
# avoid duplicate assistant start token in prompt if user message already includes it
if len(assistant_start) > 0 and assistant_start in user_message_content:
assistant_start = ""
# avoid duplicate system_start token in prompt if system_message_content already includes it
if len(system_start) > 0 and system_start in system_message_content:
system_start = ""
prompt = f"{system_start}{system_message_content}{system_end}{assistant_message_content}{user_start}{user_message_content}{user_end}{assistant_start}"
prompt = ""
for message in body.messages:
# Check for system message
if message.role == "system":
system_message_content = message.content if message else ""

# avoid duplicate system_start token in prompt if system_message_content already includes it
if len(system_start) > 0 and system_start in system_message_content:
system_start = ""
# avoid duplicate system_end token in prompt if system_message_content already includes it
if len(system_end) > 0 and system_end in system_message_content:
system_end = ""
prompt = f"{system_start}{system_message_content}{system_end}"
elif message.role == "user":
user_message_content = message.content if message else ""

# avoid duplicate user start token in prompt if user_message_content already includes it
if len(user_start) > 0 and user_start in user_message_content:
user_start = ""
# avoid duplicate user end token in prompt if user_message_content already includes it
if len(user_end) > 0 and user_end in user_message_content:
user_end = ""

prompt = f"{prompt}{user_start}{user_message_content}{user_end}"
elif message.role == "assistant":
assistant_message_content = message.content if message else ""

# avoid duplicate assistant start token in prompt if user message already includes it
if (
len(assistant_start) > 0
and assistant_start in assistant_message_content
):
assistant_start = ""
# avoid duplicate assistant start token in prompt if user message already includes it
if len(assistant_end) > 0 and assistant_end in assistant_message_content:
assistant_end = ""

prompt = (
f"{prompt}{assistant_start}{assistant_message_content}{assistant_end}"
)

prompt = f"{prompt}{assistant_start}"
model_name = body.model
llm = request.app.state.llm
if body.stream is True:
Expand Down

0 comments on commit 5a14a48

Please sign in to comment.