-
Notifications
You must be signed in to change notification settings - Fork 0
/
config_CWGAN_GP.yaml
144 lines (105 loc) · 3.04 KB
/
config_CWGAN_GP.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
base_pth:
- /home/tl/CVAE-GANS/egg drilling dataset/dry/
- /home/tl/CVAE-GANS/egg drilling dataset/wet/
# Batch size for training
batch_size: 128
# Beta1 parameter, not in use for RMSProp
betas:
- 0.5
- 0.9
# Folder arrangement of the dataset with their respective classes
# Drilling angle: sm, lg, v; Drilling force: 0, 1, 2, 3
cls_pth:
sm:
- '0'
- '1'
- '2'
- '3'
lg:
- '0'
- '1'
- '2'
- '3'
v:
- '0'
- '1'
- '2'
- '3'
# Training epochs
epochs: 1000
# Training with mel spectrogram strips. Always 'True' for shallow CGAN
in_strips: False
num_strips: 8
# Lambda 1 to Lambda 4 are constant multipliers for loss functions
lambda1: 3
lambda2: 1
lambda3: 0.001
lambda4: 0.001
# The penalty coefficient for WGAN-GP
lambda_gp: 10
# Number of critic iterations for WGAN-GP
n_critic: 5
# Dimension of latent space / distribution
latent_dim: 64
# Learning rate of the optimizer
learning_rate: 5.0e-05
# Number of categories in each class
num_class:
- 3
- 4
# Normalize the audio waveform to a magnitude between -1 and 1
max_wav_value: 32768.0
# Desired sample rate
sample_rate: 22050
# Set the size of audio segment to be convert to mel spec
segment_size: 32768
# To split the audio according to the segment size
split: True
# For STFT
n_fft: 1024
# For mel scaling of a magnitude spectrogram, n_fft / 2 + 1
n_stft: 513
# For STFT
win_length: 1024
# For STFT, set hop length to get desired time frame. Current melspec size: (128 x 128), affected by segment size, sample rate and audio duration
hop_length: 256
# Min frequency
f_min: 0
# Max frequency. Should at most half of the sample rate
f_max: 11025
# Number of mel filter banks for mel spectrogram
n_mels: 128
# Default hanning window for STFT computations
window_fn: hann_window
# Raise the complex spectrogram to the power of N. Defaulted at 2 for mel spectrogram computation
power: 2
# To normalize spectrogram / mel spectrogram
normalized: False
# Momentum for Griffin-Lim numerical calculations
momentum: 0.99
# Number of iterations for Griffin-Lim.
n_iter: 32
# To randomly shuffle the data during dataset and dataloader phase
shuffle: True
# To enable log range compression on the mel spectrogram
enable_compression: True
# Number of workers for dataloading
num_workers: 2
# Which n-th conv layer for feature matching in the discriminator, 0 to 4
dis_fm_idx: 4
# N-points for the EWMA filter for the feature maps, previously 25
ewma_n: 10
# Set whether to load from checkpoint
load_from_checkpoint: False
# Set whether to use GPU (CUDA) or not
cuda: True
# Set the class condition of the forces, ie: 0, 1, 2, 3. For inference/generation
force_level: 3
# Set the class condition of the drilling angles, ie: 0, 1, 2. For inference
drill_angle: 0
# Select the number of generations of fake mel spectrogram
num_gen: 30
# Select the starting index for naming the generated files (to avoid overlap)
start_index: 0
# Define filepath to save generated mel spectrogram as image, format is 'png' always
save_path_for_generated: /home/tl/CVAE-GANS/evaluations/for FAD/generated_CWGANGP/