-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtranscribe_to_srt.py
101 lines (85 loc) · 3.4 KB
/
transcribe_to_srt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from subsai import SubsAI
import os
import sys
import getopt
import glob
def getOpts(argv):
# Set some output defaults
inputPath = ""
outputPath = "out"
hfToken = os.environ.get('HF_PYANNOTEAUDIO_KEY', hfToken) # Use environment variable if available for pyannote.audio
verbose=False
# Check command line args, and if available, override the defaults
try:
opts, args = getopt.getopt(argv, "hi:o:t:v", ["input=", "output=", "token="])
except getopt.GetoptError:
print(r"transcribe.py [-i <inputPath>] [-o <outputPath>] [-v]")
sys.exit(2)
# Walk through each option and arg
for opt, arg in opts:
if opt == '-h':
printHelp()
sys.exit()
elif opt in ("-i", "--input"):
inputPath = os.path.normpath(arg)
elif opt in ("-o", "--output"):
outputPath = os.path.normpath(arg)
elif opt in ("-t", "--token"):
hfToken = arg
elif opt in ("-v"):
verbose = True
# Return final output, either defaults or overridden ones
return inputPath, outputPath, hfToken, verbose
def printHelp():
print(r"transcribe.py [-i <inputPath>] [-o <outputPath>] [-t <hf_token>] [-v]")
print(r"")
print(r"Transcribe all specified media files to .srt.")
print(r"")
print(r"Command-line options:")
print(r"-i path")
print(r"--input=path")
print(r" Path to files to operate on. Wildcards can be applied. [default=''].")
print(r"")
print(r"-o path")
print(r"--output=path")
print(r" Path to output of files [default='out/'].")
print(r"")
print(r"-t token")
print(r"--token=hf_token")
print(r" Token for speaker diarization (pyannote.audio) [default=''].")
print(r"")
print(r"-v")
print(r" Enable verbose output.")
def main(argv):
# Get options
inputPath, outputPath, hfToken, verbose = getOpts(argv)
# If output path does not exist, create it
if not os.path.exists(outputPath):
os.mkdir(outputPath)
# Get list of all files mathing inputPath (can use wild cards, e.g. "inputpath\*.mp4")
files = glob.glob(inputPath)
# Create instance of the model
subs_ai = SubsAI()
if len(hfToken) > 0:
model = subs_ai.create_model('m-bain/whisperX', {'model_type': 'base',
'device': 'cuda',
'language': 'en',
'speaker_labels': True,
'min_speakers': 1,
'HF_TOKEN': hfToken})
else:
model = subs_ai.create_model('m-bain/whisperX', {'model_type': 'base',
'device': 'cuda',
'language': 'en'})
# Execute operation on each file
for file in files:
outputFile = os.path.join(outputPath, f"{os.path.splitext(os.path.basename(file))[0]}.srt")
# Transcribe the file
if(verbose): print(f"Transcribing: '{file}'... ", end="", flush=True)
# Make the transcription
subs = subs_ai.transcribe(file, model)
# Save the transcription
subs.save(outputFile)
if(verbose): print(f"Done")
if __name__ == "__main__":
main(sys.argv[1:])