-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspeech_recognition.py
79 lines (71 loc) · 3 KB
/
speech_recognition.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from zamia.decode_mic import *
from multiprocessing import Queue
import os
from utils.logger import Logger
import os
import psutil
class SpeechEngine:
def __init__(self, queue: Queue):
self.sr = SpeechRecognizer()
process = psutil.Process(os.getpid())
start = process.memory_info()[0]
usage = process.memory_info()[0] - start
print("[Memory Usage | Speech Recognition]", usage >> 20)
self.__queue = queue
self.__logger_speech = Logger("speech")
self.__logger_text = Logger("text")
def start_recognition(self):
from text_classification import TextClassificationEngine
te = TextClassificationEngine()
p, stream = open_audio_stream()
print("[Speech] Listening...")
audio2send = []
slid_win = deque(maxlen=int(SILENCE_LIMIT * REL) + 1)
prev_audio = deque(maxlen=int(PREV_AUDIO * REL) + 1)
started = False
while True:
try:
cur_data = stream.read(CHUNK)
slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
if sum([x > THRESHOLD for x in slid_win]) > 0:
if not started:
started = True
audio2send.append(cur_data)
elif started is True:
# The limit was reached, finish capture and deliver.
filename = self.sr.save_speech(list(prev_audio) + audio2send, p)
timestamp = self.__logger_speech.start()
text = self.sr.recognize_speech()
self.__logger_speech.checkpoint(text)
# print(text)
self.sr.save_speech_log(list(prev_audio) + audio2send, p, timestamp)
self.__logger_text.start()
sentiment = te.get_sentiment(text)
# self.__logger_text.checkpoint(text)
if sentiment:
self.__queue.put(sentiment)
print("[Speech] Detected speech: %s [%s]" % (text, sentiment["operation"]))
else:
print("[Speech] Detected speech: %s [Invalid Command]" % text)
# Remove temp file. Comment line to review.
os.remove(filename)
# Reset all
started = False
slid_win = deque(maxlen=int(SILENCE_LIMIT * REL) + 1)
prev_audio = deque(maxlen=int(PREV_AUDIO * REL) + 1)
audio2send = []
else:
prev_audio.append(cur_data)
except KeyboardInterrupt:
self.__logger_text.save()
self.__logger_text.close()
self.__logger_speech.save()
self.__logger_speech.close()
break
stream.stop_stream()
stream.close()
p.terminate()
if __name__ == "__main__":
queue = Queue()
se = SpeechEngine(queue)
se.start_recognition()