forked from erunpie/inline-measurement
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathstt.py
70 lines (61 loc) · 2.22 KB
/
stt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from speechkit import Session, RecognitionLongAudio
from speechkit.auth import generate_jwt
import time
import os
import json
import subprocess
from datetime import datetime
from dotenv import load_dotenv
load_dotenv()
bucket_name = os.getenv("YANDEX_BUCKET_NAME")
service_account_id = os.getenv("YANDEX_SERVICE_ACC_ID")
key_id = os.getenv("YANDEX_KEY_ID")
private_key = os.getenv("YANDEX_PRIVATE_KEY").replace('\\n', '\n').encode()
class STT:
default_init = {
"sample_rate": 16000,
}
def __init__(self,
sample_rate=None,
) -> None:
self.sample_rate = sample_rate if sample_rate else STT.default_init["sample_rate"]
def audio_to_text(self, audio_file_name=None) -> str:
jwt = generate_jwt(service_account_id, key_id, private_key)
session = Session.from_jwt(jwt)
recognize_long_audio = RecognitionLongAudio(session, service_account_id, bucket_name)
f = open("voice_cache/" + os.path.basename(audio_file_name) + "processing.wav", "w")
process = subprocess.Popen(
["ffmpeg",
"-loglevel", "quiet",
"-i", audio_file_name,
"-ar", str(self.sample_rate),
"-ac", "1",
"-acodec", "pcm_s16le",
"-f", "wav",
"-"
],
stdout=f
)
f.close()
process.wait()
recognize_long_audio.send_for_recognition(
"voice_cache/" + os.path.basename(audio_file_name) + "processing.wav", audioEncoding='LINEAR16_PCM', sampleRateHertz='16000',
audioChannelCount=1, rawResults=False, profanityFilter='false', literatureText='true',
)
try:
os.remove("voice_cache/" + os.path.basename(audio_file_name) + "processing.wav")
except:
pass
while True:
time.sleep(2)
if recognize_long_audio.get_recognition_results():
break
data = recognize_long_audio.get_data()
text = recognize_long_audio.get_raw_text()
result = text[0]
for letter in text[1:]:
if letter.isupper():
result += f' {letter}'
else:
result += letter
return result