-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcleverbot.py
33 lines (28 loc) · 1.08 KB
/
cleverbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
"""
Loading the best model available.
"""
import lmntfy
import argparse
import asyncio
from pathlib import Path
from lmntfy.models.llm.engine import VllmEngine
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--models_folder", default="../models", type=Path, help="path to the folder containing all the models")
parser.add_argument("--debug", default=False, action="store_true", help="Print useful debug information (e.g., prompts)",)
args = parser.parse_args()
return args
async def main():
# process command line arguments
args = parse_args()
models_folder = args.models_folder
verbose = args.debug
# initializes model
# NOTE: we do not load a sentence embedder to maximize the GPU memory available
print("Loading the model...")
llm = lmntfy.models.llm.Llama3_70b(models_folder, device='cuda', engineType=VllmEngine)
# chat with the model
lmntfy.user_interface.command_line.display_logo()
await lmntfy.user_interface.command_line.basic_chat(llm, verbose=verbose)
if __name__ == "__main__":
asyncio.run(main())