-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaudio_processing_in_memory.py
60 lines (46 loc) · 2.14 KB
/
audio_processing_in_memory.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import soundfile as sf
import torchaudio
import numpy as np
import configparser
import logging
from pydub import AudioSegment
import io
def load_resample_trim_audio(input_audio_file):
# load the config
logging.info(f"Audio-Pre-Process: Loading config")
config = configparser.ConfigParser()
config.read('config.ini')
audio_preprocess = config['Audio_Preprocess']
start_sec = float(audio_preprocess.get('start_sec', None))
end_sec = float(audio_preprocess.get('end_sec', None))
sample_rate = int(audio_preprocess.get('SampleRate', 16000))
# Load the audio file
logging.info(f"Audio-Pre-Process: Loading audio file {input_audio_file}")
try:
loaded_audio = AudioSegment.from_file(input_audio_file)
loaded_audio_trimmed = loaded_audio[start_sec * 1000 : end_sec * 1000]
loaded_audio_converted = loaded_audio_trimmed.set_frame_rate(sample_rate)
# Create an in-memory buffer
buffer = io.BytesIO()
# Export the audio as wav to the buffer
loaded_audio_converted.export(buffer, format="wav")
# Get the buffer content as bytes
wav_bytes = buffer.getvalue()
except Exception as e:
print(f"PreProcess Error in loading audio file: {e}")
return None
return wav_bytes
# Convert start and end times from seconds to samples
#if start_sec is not None and end_sec is not None:
# try:
# print(f"Audio-Pre-Process: Trimming audio to {start_sec} - {end_sec} seconds")
# start_sample = int(start_sec * sample_rate)
# end_sample = min(int(end_sec * sample_rate), len(audio)) # Ensure end_sample does not exceed audio length
# except Exception as e:
# print(f"Error in converting start and end times from seconds to samples: {e}")
# return None
# Example usage
#input_audio_file = 'path/to/your/audio.ogg'
#start_sec, end_sec, sample_rate = load_audio_config()
#audio_for_stable_ts = load_resample_trim_audio(input_audio_file, start_sec, end_sec, sample_rate)
# Now, audio_for_stable_ts is ready to be used with stable-ts and contains only the desired segment