This repository has been archived by the owner on Sep 22, 2024. It is now read-only.
forked from pablodz/Speech-enhancement
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprepare_data.py
75 lines (56 loc) · 3.51 KB
/
prepare_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import os
import librosa
from data_tools import audio_files_to_numpy
from data_tools import blend_noise_randomly, numpy_audio_to_matrix_spectrogram
import numpy as np
def create_data(noise_dir, voice_dir, path_save_time_serie, path_save_sound, path_save_spectrogram, sample_rate,
min_duration, frame_length, hop_length_frame, hop_length_frame_noise, nb_samples, n_fft, hop_length_fft):
"""This function will randomly blend some clean voices from voice_dir with some noises from noise_dir
and save the spectrograms of noisy voice, noise and clean voices to disk as well as complex phase,
time series and sounds. This aims at preparing datasets for denoising training. It takes as inputs
parameters defined in args module"""
list_noise_files = os.listdir(noise_dir)
list_voice_files = os.listdir(voice_dir)
def remove_ds_store(lst):
"""remove mac specific file if present"""
if '.DS_Store' in lst:
lst.remove('.DS_Store')
return lst
list_noise_files = remove_ds_store(list_noise_files)
list_voice_files = remove_ds_store(list_voice_files)
nb_voice_files = len(list_voice_files)
nb_noise_files = len(list_noise_files)
# Extracting noise and voice from folder and convert to numpy
noise = audio_files_to_numpy(noise_dir, list_noise_files, sample_rate,
frame_length, hop_length_frame_noise, min_duration)
voice = audio_files_to_numpy(voice_dir, list_voice_files,
sample_rate, frame_length, hop_length_frame, min_duration)
# Blend some clean voices with random selected noises (and a random level of noise)
prod_voice, prod_noise, prod_noisy_voice = blend_noise_randomly(
voice, noise, nb_samples, frame_length)
# To save the long audio generated to disk to QC:
noisy_voice_long = prod_noisy_voice.reshape(1, nb_samples * frame_length)
librosa.output.write_wav(path_save_sound + 'noisy_voice_long.wav', noisy_voice_long[0, :], sample_rate)
voice_long = prod_voice.reshape(1, nb_samples * frame_length)
librosa.output.write_wav(path_save_sound + 'voice_long.wav', voice_long[0, :], sample_rate)
noise_long = prod_noise.reshape(1, nb_samples * frame_length)
librosa.output.write_wav(path_save_sound + 'noise_long.wav', noise_long[0, :], sample_rate)
# Squared spectrogram dimensions
dim_square_spec = int(n_fft / 2) + 1
# Create Amplitude and phase of the sounds
m_amp_db_voice, m_pha_voice = numpy_audio_to_matrix_spectrogram(
prod_voice, dim_square_spec, n_fft, hop_length_fft)
m_amp_db_noise, m_pha_noise = numpy_audio_to_matrix_spectrogram(
prod_noise, dim_square_spec, n_fft, hop_length_fft)
m_amp_db_noisy_voice, m_pha_noisy_voice = numpy_audio_to_matrix_spectrogram(
prod_noisy_voice, dim_square_spec, n_fft, hop_length_fft)
# Save to disk for Training / QC
np.save(path_save_time_serie + 'voice_timeserie', prod_voice)
np.save(path_save_time_serie + 'noise_timeserie', prod_noise)
np.save(path_save_time_serie + 'noisy_voice_timeserie', prod_noisy_voice)
np.save(path_save_spectrogram + 'voice_amp_db', m_amp_db_voice)
np.save(path_save_spectrogram + 'noise_amp_db', m_amp_db_noise)
np.save(path_save_spectrogram + 'noisy_voice_amp_db', m_amp_db_noisy_voice)
np.save(path_save_spectrogram + 'voice_pha_db', m_pha_voice)
np.save(path_save_spectrogram + 'noise_pha_db', m_pha_noise)
np.save(path_save_spectrogram + 'noisy_voice_pha_db', m_pha_noisy_voice)