This repository has been archived by the owner on Oct 22, 2023. It is now read-only.
generated from NetfluxESIR/python-app-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
111 lines (97 loc) · 2.96 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import sys
from pathlib import Path
from enum import Enum
from moviepy.editor import VideoFileClip
import typer
import whisper
from whisper.utils import WriteVTT
app = typer.Typer()
class TaskType(str, Enum):
language_detection = "language_detection"
subtitle_generation = "subtitle_generation"
@app.command()
def run(
input_file: Path = typer.Option(
...,
"-i",
"--input",
exists=True,
file_okay=True,
dir_okay=False,
writable=False,
readable=True,
resolve_path=True,
allow_dash=False,
help="Input file path",
),
task_type: str = typer.Option(
TaskType.language_detection,
"-t",
"--task-type",
help="Task type to run",
case_sensitive=False,
show_default=True,
),
output_dir: Path = typer.Option(
Path.cwd(),
"-o",
"--output-dir",
exists=True,
file_okay=False,
dir_okay=True,
writable=True,
readable=True,
resolve_path=True,
allow_dash=False,
help="Output directory path",
)
) -> None:
model = whisper.load_model("medium")
mp3_path = convert_to_mp3(input_file)
if task_type == TaskType.language_detection:
language_detection(mp3_path, model)
return None
if task_type == TaskType.subtitle_generation:
subtitle_generation(mp3_path, model, output_dir)
return None
else:
raise typer.BadParameter(f"Task type {task_type} not supported")
def convert_to_mp3(input_file: Path) -> Path:
try:
video = VideoFileClip(str(input_file.absolute()))
except Exception as e:
raise typer.BadParameter(
f"Could not load {input_file} as a video file: {e}"
)
mp3_path = input_file.parent / f"{input_file.stem}.mp3"
try:
video.audio.write_audiofile(
str(mp3_path.absolute()),
verbose=False,
logger=None
)
except Exception as e:
raise typer.BadParameter(f"Could not convert {input_file} to mp3: {e}")
return mp3_path
def language_detection(input_file: Path, model: whisper.Whisper) -> None:
audio = whisper.load_audio(str(input_file.absolute()))
audio = whisper.pad_or_trim(audio)
# make log-Mel spectrogram and move to the same device as the model
mel = whisper.log_mel_spectrogram(audio).to(model.device)
# detect the spoken language
_, probs = model.detect_language(mel)
print(max(probs, key=probs.get))
def subtitle_generation(
input_file: Path,
model: whisper.Whisper,
output_dir: Path
) -> None:
result = model.transcribe(str(input_file.absolute()))
WriteVTT(
output_dir=str(output_dir.absolute())
).write_result(
result,
sys.stdout
)
if __name__ == "__main__":
app()