-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yaml
147 lines (107 loc) · 2.88 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
base_pth: /home/tl/CVAE-GANS/egg drilling dataset/
# Batch size for training
batch_size: 64
# Beta1 parameter, not in use for RMSProp
betas:
- 0.5
- 0.9
# Folder arrangement of the dataset with their respective classes
# Drilling angle: sm, lg, v; Drilling force: 0, 1, 2, 3
cls_pth:
sm:
- '0'
- '1'
- '2'
- '3'
# lg:
# - '0'
# - '1'
# - '2'
# - '3'
# v:
# - '0'
# - '1'
# - '2'
# - '3'
# Choice of classifier
classifier: convcls
# Choice of decoder / generator
decoder: dcgan
# Choice of discriminator
discriminator: dcgan_wgan_gp
# Choice of encoder
encoder: convenc
# Training epochs
epochs: 1000
# Training with mel spectrogram strips. Always 'True' for shallow CGAN
in_strips: False
num_strips: 8
# Lambda 1 to Lambda 4 are constant multipliers for loss functions
lambda1: 3
lambda2: 1
lambda3: 0.001
lambda4: 0.001
# The penalty coefficient for WGAN-GP
lambda_gp: 10
# Weighting coefficient for the wasserstein distances for CVAE-WGAN-GP
sigma: 0.5
# Weighting coefficient for the intermediate layer reconstruction loss
# for CVAE-WGAN-GP
gamma: 15
# Number of critic iterations for WGAN-GP
n_critic: 5
# Dimension of latent space / distribution
latent_dim: 64
# Learning rate of the optimizer
learning_rate: 5.0e-05
# Number of categories in each class
num_class:
- 4
# Normalize the audio waveform to a magnitude between -1 and 1
max_wav_value: 32768.0
# Desired sample rate
sample_rate: 22050
# For STFT
n_fft: 1024
# For mel scaling of a magnitude spectrogram
n_stft: 1024 / 2 + 1
# For STFT
win_length: 1024
# For STFT, set hop length to get desired time frame. Current melspec size: (128 x 128)
hop_length: 345
# Min frequency
f_min: 0
# Max frequency. Should at most half of the sample rate
f_max: 11025
# Number of mel filter banks for mel spectrogram
n_mels: 128
# Default hanning window for STFT computations
window_fn: hann_window
# Raise the complex spectrogram to the power of N. Defaulted at 2 for mel spectrogram computation
power: 2
# To normalize spectrogram / mel spectrogram
normalized: False
# Momentum for Griffin-Lim numerical calculations
momentum: 0.99
# Number of iterations for Griffin-Lim.
n_iter: 32
# To randomly shuffle the data during dataset and dataloader phase
shuffle: True
# To enable log range compression on the mel spectrogram
enable_compression: True
# Number of workers for dataloading
num_workers: 2
# Which n-th conv layer for feature matching in the classifier, 1 to 5
cls_fm_idx: 4
# Which n-th conv layer for feature matching in the discriminator, 0 to 4
dis_fm_idx: 4
# N-points for the EWMA filter for the feature maps, previously 25
ewma_n: 10
# Loss functions for each components
discriminator_loss: binary_cross_entropy_logits
classifier_loss: cross_entropy
decoder_loss: mean_square_error
# Loss function for the feature matching
fm_loss: mean_square_error
# Set whether to load from checkpoint
load_from_checkpoint: False