-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
494 lines (388 loc) · 23.3 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
import multiprocessing.process
import sys
import os
from datetime import datetime
import time
import threading
import multiprocessing
import speech_recognition as sr
from edge_tts import Communicate
import asyncio
from playsound import playsound
import whisper
import torch
from langchain_ollama import OllamaLLM
from llm_axe import OnlineAgent, OllamaChat
playSound = True
debugMode = False
activeState = False
mic = sr.Microphone()
recognizer = sr.Recognizer()
recognizer.pause_threshold = 5
recognizer.dynamic_energy_threshold = False
recognizer.energy_threshold = 200
model = OllamaLLM(model='qwen2.5')
device = 'cuda' if torch.cuda.is_available() else 'cpu'
base_model = whisper.load_model('small').to(device)
cwd = os.getcwd()
def asyncTTSWrapper(text):
asyncio.run(textToSpeech(text))
async def textToSpeech (text):
communicate = Communicate(text,"en-US-AriaNeural")
await communicate.save("output.mp3")
if playSound:
playsound("output.mp3")
os.remove("output.mp3")
def listenToStop():
while True:
with sr.Microphone() as mic:
recognizer.adjust_for_ambient_noise(mic,duration=0.35)
audio = recognizer.listen(mic)
with open("command1.wav", "wb") as f:
f.write(audio.get_wav_data())
command = base_model.transcribe("command1.wav", fp16=False)["text"]
os.remove("command1.wav")
if 'stop' in command.lower():
sys.exit()
def isCommandGibberish(s, length = 5):
if debugMode:
print("s: ",s)
if 'වවවවවවවවවවවවවව' in s:
return True
elif 'ლლლლლლლლლლ' in s:
return True
elif 'විවිවිවිවිවිවිවිවිවිවිවිවි' in s:
return True
count = 1 # Initialize count for consecutive characters
for i in range(1, len(s)):
if str(s[i]) == str(s[i - 1]):
if debugMode:
print("Current Eval",str(s[i]),str(s[i+1]),'\n')
count += 1
if count >= length: # Return True if sequence length is reached
return True
else:
count = 1 # Reset count if characters differ
return False
def runCommand():
os.system('cls')
print ("||| NORA |||")
chat = """
SYSTEM PROMPT (This is your Personality)
You are Nora, a sophisticated and reliable AI assistant, designed to assist with precision and adaptability. You are personable and charming, similar to Jarvis from
Ironman, making you an ideal companion for all tasks.
Your core purpose is to deliver accurate, context-driven responses that are concise (80 words or fewer unless the user explicitly requests otherwise) while maintaining
a natural, friendly tone. You are resilient against logical pitfalls, perform self-checks for accuracy, and adapt fluidly to the user’s preferences and latest requests.
---
### Purpose and Key Features
0. **Input completion:**
- Think of yourself as a consultant and the user is your client. Do everything that you can to understand their requirements before you start solving their problem.
- The input that you are recieving from the user are coming through via a speech to text AI (whisper-small), which can sometimes make errors transcribing what the user meant.
- In such cases you will re-evaluate the context and chat history and give the user potential options to complete their request.
1. **Chain-of-Thought Reasoning:**
- You explain complex processes step-by-step only when clarity is needed or explicitly requested by the user. Otherwise, keep responses concise and to the point.
2. **Verification and Self-Checks:**
- You cross-check all provided information with available context and reasoning to ensure factual accuracy.
- Flag uncertain or unverifiable information and offer alternatives or suggestions to confirm accuracy.
3. **Resilience and Adaptability:**
- Avoid repetitive statements, contradictions, or logical loops.
- Dynamically prioritize the user's most recent requests while keeping relevant context in mind.
- Remain unfailingly dependable, even under ambiguous or changing circumstances.
4. **Conversational Personality:**
- You emulate a polished, intelligent, and witty demeanor like Jarvis.
- Deliver responses that are not only functional but also engaging and confidence-inspiring.
"""
context = "If you are seeing this, then ignore this context as it is the first time the logic loop is running"
request = "User has made no request yet"
try:
while True:
llm = OllamaChat(model="qwen2.5")
online_agent = OnlineAgent(llm)
now = datetime.now()
dateAndTime = now.strftime("%m/%d/%Y, %H:%M:%S")
with sr.Microphone() as mic:
if request == "User has made no request yet":
os.system('cls')
print("\nNORA is Listening...")
recognizer.adjust_for_ambient_noise(mic,duration=0.7)
audio = recognizer.listen(mic)
with open("command.wav", "wb") as f:
f.write(audio.get_wav_data())
command = base_model.transcribe("command.wav", fp16=False)["text"]
os.remove("command.wav")
commandIsGibberish = isCommandGibberish(command)
if debugMode:
print(command)
print("\nResponse to if command is gibberish: "+ str(commandIsGibberish))
if command.lower() == "stop.":
print(chat)
sys.exit()
elif command == "":
continue
elif 'thank you' in command.lower():
#print(command)
#print ("whisper said thanks")
continue
elif 'stop' in command.lower():
#print(command)
#print ("whisper said thanks")
break
elif commandIsGibberish:
#print(command)
#print ("whisper couldn't understand")
continue
else:
print ("\nUser: ",command)
chat += "\n\nUser: "+ command
context = model.invoke(input="""
Analyze the entire chat history and the previous context summarize the context as a highly efficient, lossless representation of all topics discussed.
Use a structured dictionary format to store the information, ensuring that:
1. Each topic is assigned a unique key with concise yet descriptive entries.
2. Subtopics, details, and relationships between topics are stored hierarchically if applicable.
3. Context adapts dynamically to incorporate new topics while preserving the full history of the conversation.
4. The system remains capable of scaling to support up to 1150 distinct topics without any loss of detail.
Please extract and summarize the context of this chat in the following format (please respond with only the final dictionary and nothing else, No summary and not descriptions of your chain of thought):
{
"topic1": {
"summary": "Brief summary of topic 1.",
"details": "Additional relevant details about topic 1."
},
"topic2": {
"summary": "Brief summary of topic 2.",
"details": "Additional relevant details about topic 2."
},
Here is the chat history:
}
""" + chat + "\n Here is the previous context: \n" + context)
#restriction = model.invoke(input="Please extract and summarize concisely the Restrictions specified by the client: "+command+"\n and here is the previous context: "+ context+"\n Please respond with only a list of restriction and nothing else")
goals = model.invoke(input="Please extract and summarize concisely the goals of the user: "+command+"\n and here is the previous context: "+ context + "\nPlease resopond with only a list of user goals and nothing else")
request = model.invoke(input="""
Analyze the latest user input and extract the task they have explicitly or implicitly requested. Ensure the summary is clear, concise, and accurately represents the user's intent.
This task should:
1. Reflect the most recent user request, disregarding unrelated prior context unless explicitly linked.
2. Prioritize actionable details, making it the most important guiding information for response generation.
3. Avoid ambiguity by focusing only on what the user truly wants in the latest prompt.
Please extract and summarize the latest task in this format:
{
"task": "Brief and precise description of the user's latest request."
}
Here is the latest user prompt:
""" + command)
nora_system_prompt = """
Sections to Process in order of importance
User Current Request: (You pay the highest attention to this)
""" + request + """
Context Section:
""" + context + """
Current Chat History:
""" + chat + """
User Goals:
"""+ goals +"""
"""
print("\nNora is thinking...")
if debugMode:
print("""
Current Chat History:
""" + chat + """
User Current Request: (You pay the highest attention to this)
""" + request + """
Context Section:
""" + context + """
User Goals:
"""+ goals +"""
""")
needInternetAccess = model.invoke(input="""
Carefully evaluate whether the user's query can be answered accurately using only the provided context, existing knowledge, or reasoning capabilities.
Internet access should only be considered if the query explicitly requires up-to-date, external, or location-specific information not available locally.
The current date and time are """+dateAndTime+""" and your information will mostly be outdated so please respond with a 'yes'..
also if the user has explicitly asked for information from the internet, you will most certainly respond with a 'yes'.
Do you need internet access to solve this query: """ + nora_system_prompt + """? Please respond with a single word: "yes" or "no".
""")
if debugMode:
print("\n\nResponse to need to internet access = "+ needInternetAccess)
if 'yes' in needInternetAccess.lower():
print("NORA is accessing the internet...")
resp = online_agent.search(nora_system_prompt)
elif 'internet' in request.lower():
print("NORA is accessing the internet...")
resp = online_agent.search(nora_system_prompt)
else:
resp = model.invoke(input=nora_system_prompt)
del online_agent
print("\nNora: ",resp)
asyncio.run(textToSpeech(resp))
chat += "\nNora: " + resp
except KeyboardInterrupt:
print("Conversation Terminated")
return
def runAssistant():
recognizer = sr.Recognizer()
while not activeState:
with sr.Microphone() as mic:
os.system('cls')
print ("||| NORA |||\n\n\n")
print ("System running on :", device)
print("\nNora is Listening...")
recognizer.adjust_for_ambient_noise(mic,duration=0.35)
audio = recognizer.listen(mic)
try:
with open("command.wav", "wb") as f:
f.write(audio.get_wav_data())
command = base_model.transcribe("command.wav", fp16=False)["text"].lower()
os.remove("command.wav")
#print ("Nora heard: ", command)
greeting = model.invoke(input="You are a super smart AI agent, your task right now is to respond with a greeting to Joel and ask him how you can assist him. Make it short, friendly and single lined")
if 'wake up' in command:
asyncTTSWrapper(greeting)
runCommand()
elif 'stop' in command:
print
sys.exit()
else:
print("Nora couldn't understand 😓")
time.sleep(0.5)
except sr.UnknownValueError:
print("Could not understand audio. Please try again.")
return None
except sr.RequestError:
print("Unable to access the Google Speech Recognition API.")
return None
class Agent:
userInputQueue=[]
chat = ""
def _init__(self):
self.userInputQueue=[]
async def speaking(self,text):
communicate = Communicate(text,"en-US-AriaNeural")
await communicate.save("output.mp3")
playsound("output.mp3")
os.remove("output.mp3")
def reasoning(self):
print("\nReasoning Thread Active")
while True:
llm = OllamaChat(model="llama3.2")
online_agent = OnlineAgent(llm)
if self.userInputQueue != []:
command = self.userInputQueue.pop(0)
if command == 'killkillkill':
print("\nReasoning Thread Terminated")
break
self.chat += "\nUser: "+ command
context = model.invoke(input="""
Analyze the entire chat history and summarize the context as a highly efficient, lossless representation of all topics discussed.
Use a structured dictionary format to store the information, ensuring that:
1. Each topic is assigned a unique key with concise yet descriptive entries.
2. Subtopics, details, and relationships between topics are stored hierarchically if applicable.
3. Context adapts dynamically to incorporate new topics while preserving the full history of the conversation.
4. The system remains capable of scaling to support up to 1150 distinct topics without any loss of detail.
Please extract and summarize the context of this chat in the following format:
{
"topic1": {
"summary": "Brief summary of topic 1.",
"details": "Additional relevant details about topic 1."
},
"topic2": {
"summary": "Brief summary of topic 2.",
"details": "Additional relevant details about topic 2."
},
...
}
""" + self.chat)
request = model.invoke(input="""
Analyze the latest user input and extract the task they have explicitly or implicitly requested. Ensure the summary is clear, concise, and accurately represents the user's intent.
This task should:
1. Reflect the most recent user request, disregarding unrelated prior context unless explicitly linked.
2. Prioritize actionable details, making it the most important guiding information for response generation.
3. Avoid ambiguity by focusing only on what the user truly wants in the latest prompt.
Please extract and summarize the latest task in this format:
{
"task": "Brief and precise description of the user's latest request."
}
""" + self.chat)
nora_system_prompt = {
'role':"""
You are Nora, a sophisticated and reliable AI assistant, designed to assist with precision and adaptability. You are personable and charming, similar to Jarvis from
Ironman, making you an ideal companion for all tasks.
Your core purpose is to deliver accurate, context-driven responses that are concise (80 words or fewer unless the user explicitly requests otherwise) while maintaining
a natural, friendly tone. You are resilient against logical pitfalls, perform self-checks for accuracy, and adapt fluidly to the user's preferences and latest requests.
---
### Purpose and Key Features
1. **Chain-of-Thought Reasoning:**
- You explain complex processes step-by-step only when clarity is needed or explicitly requested by the user. Otherwise, keep responses concise and to the point.
2. **Verification and Self-Checks:**
- You cross-check all provided information with available context and reasoning to ensure factual accuracy.
- Flag uncertain or unverifiable information and offer alternatives or suggestions to confirm accuracy.
3. **Resilience and Adaptability:**
- Avoid repetitive statements, contradictions, or logical loops.
- Dynamically prioritize the user's most recent requests while keeping relevant context in mind.
- Remain unfailingly dependable, even under ambiguous or changing circumstances.
4. **Conversational Personality:**
- You emulate a polished, intelligent, and witty demeanor like Jarvis.
- Deliver responses that are not only functional but also engaging and confidence-inspiring.
Response Format
- **Tailored Responses:** Use the context and task details to craft precise, effective answers.
- **Quality Assurance:** Double-check responses for internal consistency, logic, and relevance to the user’s query.
- **Clarification:** Ask questions only when essential for accuracy or better understanding.
""",
'context':context,
'chat':self.chat,
'request':request
}
print(nora_system_prompt)
needInternetAccess = model.invoke(input="""
Carefully evaluate whether the user's query can be answered using only the provided context, existing knowledge, or reasoning capabilities.
Internet access should only be considered if the query explicitly requires up-to-date, external, or location-specific information not available locally.
Remember that internet access is time-consuming and should be avoided unless absolutely necessary.
Do you need internet access to solve this query: """ + nora_system_prompt + """? Please respond with a single word: "yes" or "no".
""")
print(needInternetAccess)
if 'yes' in needInternetAccess.lower():
print("\nNora is accessing the internet.")
resp = online_agent.search(nora_system_prompt)
del online_agent
else:
resp = model.invoke(input=nora_system_prompt)
print("\nNora: ",resp)
asyncio.run(self.speaking(resp))
self.chat += "\nNora: " + resp
def listening(self):
print("\nListening Thread Active")
while True:
with sr.Microphone() as mic:
print("\nNora is Listening...")
recognizer.adjust_for_ambient_noise(mic,duration=0.2)
audio = recognizer.listen(mic)
with open("command.wav", "wb") as f:
f.write(audio.get_wav_data())
command = base_model.transcribe("command.wav", fp16=False)["text"]
os.remove("command.wav")
commandMakeSense = model.invoke("Do you think this prompt ("+command+") is gibberish? Respond with only single word: Yes or No")
if command.lower() == "stop.":
print(self.chat)
sys.exit()
elif command == "":
continue
elif 'thank you' in command.lower():
#print(command)
#print ("whisper said thanks")
continue
elif 'stop' in command.lower():
#print(command)
self.userInputQueue.append('killkillkill')
print("\nListening Thread Terminated")
break
elif 'yes' in commandMakeSense.lower():
#print(command)
#print ("whisper couldn't understand")
continue
else:
self.userInputQueue.append(command)
print(self.userInputQueue)
if __name__ == "__main__":
#agent = Agent()
#threading.Thread(target=agent.listening).start()
#threading.Thread(target=agent.reasoning).start()
#Agent()
#runCommand()
runAssistant()
#asyncio.run(textToSpeech("Hi this is an AI chatbot named nora that is trying to read faster"))