-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
89 lines (70 loc) · 3.38 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from ibm_watson import SpeechToTextV1
import json
from os.path import join, dirname
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
authenticator = IAMAuthenticator(Key)
speech_to_text = SpeechToTextV1(
authenticator=authenticator
)
speech_to_text.set_service_url(Url)
class MyRecognizeCallback(RecognizeCallback):
def __init__(self):
RecognizeCallback.__init__(self)
def on_data(self, data):
print(json.dumps(data, indent=2))
def on_error(self, error):
print('Error received: {}'.format(error))
def on_inactivity_timeout(self, error):
print('Inactivity timeout: {}'.format(error))
myRecognizeCallback = MyRecognizeCallback()
file_path_ = 'E:\audio.wav'
class Main():
def seperation(file_path, ff):
with open(file_path,'rb') as audio_file:
#audio_source = AudioSource(audio_file)
speech_recognition_results = speech_to_text.recognize(#.recognize can also be used
audio=audio_file, #audio_source,
content_type= ff, #'audio/wav',
recognize_callback=myRecognizeCallback,
model='en-US_BroadbandModel',
speaker_labels = True, timestamps = True).get_result() #'audio/flac'
transcript = ''
for chunks in speech_recognition_results['results']:
if 'alternatives' in chunks.keys():
alternatives = chunks['alternatives'][0]
if 'transcript' in alternatives:
transcript = transcript + \
alternatives['transcript']
transcript += '\n'
print(transcript)
speakerLabels = speech_recognition_results["speaker_labels"]
#print("Done Processing ...\n")
#print(speakerLabels)
extractedData = []
for chunks in speech_recognition_results['results']:
if 'alternatives' in chunks.keys():
alternatives = chunks['alternatives'][0]
if 'timestamps' in alternatives:
for i in alternatives['timestamps']:
mydict = {'from': i[:][1], 'transcript': i[:][0]
, 'to': i[:][2]} #.replace("%HESITATION", "")
extractedData.append(mydict)
extractedData.append({'newline': '\n'})
finalOutput = []
for i in extractedData:
if 'newline' in i.keys():
finalOutput.append({'newline': '\n'})
else:
for j in speakerLabels:
if i["from"] == j["from"] and i["to"] == j["to"]:
mydictTemp = {"from": i["from"],
"to": i["to"],
"transcript": i["transcript"],
"speaker": j["speaker"],
"confidence": j["confidence"],
"final": j["final"],
}
finalOutput.append(mydictTemp)
return finalOutput