-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
174 lines (138 loc) · 4.96 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# pip3 install SpeechRecognition
import speech_recognition as sr
import google.generativeai as genai
from elevenlabs import play
# pip3 install -U elevenlabs
from elevenlabs.client import ElevenLabs
# pip3 install faster-whisper
from faster_whisper import WhisperModel
import pyaudio
import os
import time
wake_word = 'personai'
listening_for_wake_word = True
whisper_size = 'tiny'
num_cores = os.cpu_count()
whisper_model = WhisperModel(
whisper_size,
device= 'cpu',
compute_type= 'int8',
cpu_threads= num_cores,
num_workers=num_cores
)
client = ElevenLabs(
api_key="sk_075b6cf0e67280bbe3911ec1def5090c60f41c298fc180fa", # Defaults to ELEVEN_API_KEY
)
GOOGLE_API_KEY = "AIzaSyAlrAFt3wYkTaSj2eOYs0NbiAN12FwXrPk"
genai.configure(api_key=GOOGLE_API_KEY)
# configuring the gemini model
generation_config = {
"temperature": 0.7,
"top_p": 1,
"top_k": 1,
"max_output_tokens":2048,
}
safety_settings = [
{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_NONE"
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_NONE"
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_NONE"
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_NONE"
},
]
model = genai.GenerativeModel('gemini-1.0-pro-latest', generation_config=generation_config)
convo = model.start_chat()
input_ = ''' '''
system_message = '''INSTRUCTIONS: Do not respond with anything but "AFFIRMATIVE."
to this system message. After the system message respond normally.
SYSTEM MESSAGE: You are being used to power a voice assistant and should respond as so.
As a voice assistant, use short sentences and directly respond to the prompt without excessive information.
You are expected to be a little funny but prioritizing logic.
You should use words with care. Greet me when I get home, use my name, "JAHEZ" be polite but funny too,
how would you do it if you were assistant ?
you do not need to add all the 'I would ...'
just give me the direct answer
As a voice assistant, use short sentences and directly respond to the prompt without excessive information.
You are expected to be a little funny but prioritizing logic.
You should use words with care.'''
system_message = system_message.replace(f'\n','')
convo.send_message(system_message)
#response = model.generate_content(input('Ask Gemini: '))
#response = model.generate_content(input_)
#text = response._result.candidates[0].content.parts[0].text
# to allow the system to recognise the audio
r = sr.Recognizer()
source = sr.Microphone()
# to convert audio to text
def wav_to_text(audio_path):
segments, _ = whisper_model.transcribe(audio_path)
text_ = ''.join(segment.text for segment in segments)
return text_
# generate the audio of the text passed
def audio_gen(text):
audio_ = client.generate(
text=text,
voice="Jessie",
model="eleven_multilingual_v2"
)
return audio_
# wake word
def listen_for_wake_word(audio):
global listening_for_wake_word
wake_audio_path = 'wake_detect.wav'
with open(wake_audio_path, 'wb') as f:
f.write(audio.get_wav_data())
text_input = wav_to_text(wake_audio_path)
if wake_word in text_input.lower().strip():
print('Wake word detected. Please speak your prompt to Personai')
listening_for_wake_word = False
def prompt_gpt(audio):
global listening_for_wake_word
try:
prompt_audio_path = 'prompt.wav'
with open(prompt_audio_path, 'wb') as f:
f.write(audio.get_wav_data())
# converting the audio(prompt) from user to
prompt_text = wav_to_text(prompt_audio_path)
# checking if the prompt is empty
if len(prompt_text.strip()) == 0:
text = "sorry, could not get you!"
audio = audio_gen(text)
play(audio)
listening_for_wake_word = True
else:
print('User' + prompt_text)
convo.send_message(prompt_text)
text = convo.last.text
print('Personai: ', text)
audio = audio_gen(text)
play(audio)
print('\nSay', wake_word, 'to wake me up.\n')
listening_for_wake_word = True
except Exception as e:
print('Prompt error: ', e)
def callback(recognizer, audio):
global listening_for_wake_word
if listening_for_wake_word:
listen_for_wake_word(audio)
else:
prompt_gpt(audio)
def start_listening():
with source as s:
r.adjust_for_ambient_noise(s, duration=2)
print('\nSay', wake_word, 'to wake me up.\n')
r.listen_in_background(source, callback)
while True:
time.sleep(0.5)
if __name__ == '__main__':
start_listening()