-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathparameters.yml
executable file
·211 lines (197 loc) · 12.4 KB
/
parameters.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
%YAML 1.2
%TAG !py! tag:yaml.org,2002:python/name:
%TAG !pyobj! tag:yaml.org,2002:python/object:
---
#_______________________________________________________ MODELS _______________________________________________________#
models:
- basic_backbone: &basic_backbone
act_fn: !py!torch.nn.ReLU
group_norm: { num_groups: 4, eps: 1e-05, affine: True }
architecture:
- conv2d: { kernel_size: [5, 5], out_channels: 4, padding: 2 }
- conv2d: { kernel_size: [5, 5], out_channels: 4, padding: 2 }
- conv2d: { kernel_size: [5, 5], out_channels: 4, padding: 2 }
- avg_pooling: ["pooling1", { kernel_size: [2, 2], stride: [2, 2] }]
- conv2d: { kernel_size: [3, 3], out_channels: 16, padding: 1 }
- conv2d: { kernel_size: [3, 3], out_channels: 16, padding: 1 }
- avg_pooling: { kernel_size: [2, 2], stride: [2, 2] }
- dense_link: { _from: "pooling1", allow_scaling: true }
- siamese_branch_1: &siamese_branch_1
- conv2d: ["conv1", { kernel_size: [3, 3], out_channels: 16, padding: 1 }]
- conv2d: ["conv2", { kernel_size: [3, 3], out_channels: 16, padding: 1 }]
- conv2d: ["conv3", { kernel_size: [3, 3], out_channels: 16, padding: 1 }]
- lager_backbone: &lager_backbone
act_fn: !py!torch.nn.ReLU
architecture:
- avg_pooling: ["pooling1", { kernel_size: [2, 2], stride: [2, 2] }]
- conv2d: { kernel_size: [3, 3], out_channels: 16, padding: 1 }
- conv2d: { kernel_size: [3, 3], out_channels: 16, padding: 1 }
- avg_pooling: { kernel_size: [2, 2], stride: [2, 2] }
- dense_link: ["dense1", { _from: "pooling1", allow_scaling: true }]
- _nas_layer_choice:
# Makes usage of NNI NAS API's 'MutableLayer' in order to specify multiple submodules alternatives (model architecture can then be sampled in space defined by mutable layers and inputs)
_name: "mutable_layer_1"
_candidates:
- conv2d: { kernel_size: [3, 3], out_channels: 32, padding: 1 }
- conv2d: { kernel_size: [5, 5], out_channels: 16, padding: 2 }
- conv2d: { kernel_size: [7, 7], out_channels: 8, padding: 3 }
- conv2d: { kernel_size: [3, 3], out_channels: 32, padding: 1 }
- avg_pooling: ["pooling2", { kernel_size: [2, 2], stride: [2, 2] }]
- conv2d: ["conv1", { kernel_size: [3, 3], out_channels: 16, padding: 1 }]
- conv2d: { kernel_size: [3, 3], out_channels: 32, padding: 1 }
- residual_link: { _from_nas_input_choice: ["pooling2", "conv1"], reduction: "mean" }
- avg_pooling: { kernel_size: [2, 2], stride: [2, 2] }
- conv2d: { kernel_size: [3, 3], out_channels: 16, padding: 1 }
- conv2d: { kernel_size: [3, 3], out_channels: 32, padding: 1 }
- _new_branch_from_tensor: { _from: "pooling2" } # Similar to dense links but will only use referenced submodule(s) output, allowing new siamese/parrallel NN branches to be defined (wont reuse previous submodule output features)
- conv2d: { kernel_size: [3, 3], out_channels: 32, padding: 1 }
- conv2d: { kernel_size: [3, 3], out_channels: 32, padding: 1 }
- dense_link: { _from: ["pooling2", "conv1"], allow_scaling: true }
- conv2d: { kernel_size: [3, 3], out_channels: 64, padding: 1 }
- dense_link: { reduction: "mean", _from: ["pooling2", "siamese_2_conv1"] }
- hrnet_backbone: &hrnet_backbone
act_fn: !py!torch.nn.ReLU
preactivation: True
layer_nrm_and_mean_batch_nrm: { eps: 1e-05, elementwise_affine: true, momentum: 0.1, track_running_stats: True }
architecture:
- hrnet_input_stem: { out_channels: 16, conv_count: 2 }
- multiresolution_fusion: ["stage_1_fusion", { create_new_branch: true, new_branch_channels: 32, reuse_scaling_convs: true }]
- parallel_conv: { kernel_size: [[3, 3], [5, 5]], out_channels: 32, groups: [8, 6] }
- parallel_conv: { kernel_size: [[3, 3], [5, 5]], out_channels: 32, groups: [8, 6] }
- parallel_conv: { kernel_size: [[3, 3], [5, 5]], out_channels: 32, groups: [8, 6] }
- parallel_conv: { kernel_size: [[3, 3], [5, 5]], out_channels: 32, groups: [8, 6] }
- residual_link: { _from: "stage_1_fusion", apply_in_parallel: true, allow_scaling: false }
- multiresolution_fusion: ["stage_2_fusion", { create_new_branch: true, new_branch_channels: 32, reuse_scaling_convs: true }]
- parallel_conv: { kernel_size: [[3, 3], [5, 5], [7, 7]], out_channels: [32, 32, 32], groups: [8, 6, 4] }
- parallel_conv: { kernel_size: [[3, 3], [5, 5], [7, 7]], out_channels: [32, 32, 32], groups: [8, 6, 4] }
- parallel_conv: { kernel_size: [[3, 3], [5, 5], [7, 7]], out_channels: [32, 32, 32], groups: [8, 6, 4] }
- parallel_conv: { kernel_size: [[3, 3], [5, 5], [7, 7]], out_channels: [32, 32, 32], groups: [8, 6, 4] }
- residual_link: { _from: ["stage_1_fusion", "stage_2_fusion"], apply_in_parallel: true, allow_scaling: true }
- multiresolution_fusion: ["stage_3_fusion", { create_new_branch: true, new_branch_channels: 16, reuse_scaling_convs: true }]
- parallel_conv: { kernel_size: [[3, 3], [5, 5], [7, 7], [7, 7]], out_channels: [32, 32, 32, 16], groups: [8, 6, 4, 1] }
- parallel_conv: { kernel_size: [[3, 3], [5, 5], [7, 7], [7, 7]], out_channels: [32, 32, 32, 16], groups: [8, 6, 4, 1] }
- parallel_conv: { kernel_size: [[3, 3], [5, 5], [7, 7], [7, 7]], out_channels: [32, 32, 32, 16], groups: [8, 6, 4, 1] }
- parallel_conv: { kernel_size: [[3, 3], [5, 5], [7, 7], [7, 7]], out_channels: [32, 32, 32, 16], groups: [8, 6, 4, 1] }
- residual_link: { _from: ["stage_1_fusion", "stage_2_fusion", "stage_3_fusion"], apply_in_parallel: true, allow_scaling: true }
- hrnet_repr_head_vZ: { out_channels: 32 }
- image_classifier:
act_fn: !py!torch.nn.LeakyReLU
dropout_prob: 0.0
batch_norm: { affine: true, eps: 1e-05, momentum: 0.07359778246238029 }
spectral_norm: { name: "weight", n_power_iterations: 2, eps: 1e-12, dim: null } # Spectral weight normalization
architecture:
- _nested_deepcvmodule: *basic_backbone
#- _nested_deepcvmodule: *lager_backbone
- !py!torch.nn.Flatten
- fully_connected: { act_fn: !py!torch.nn.Sigmoid , dropout_prob: 0., batch_norm: null }
- keypoints_encoder:
act_fn: !py!torch.nn.ReLU
batch_norm: { affine: true, eps: 1e-05, momentum: 0.07359778246238029 }
architecture:
- conv2d: { kernel_size: [3, 3], out_channels: 16, padding: 1 }
- keypoints_decoder:
act_fn: !py!torch.nn.ReLU
batch_norm: { affine: true, eps: 1e-05, momentum: 0.07359778246238029 }
architecture:
- conv2d: { kernel_size: [3, 3], out_channels: 16, padding: 1 }
#_______________________________________________________ TRAINING _______________________________________________________#
basic_training_params: &basic_ignite_training
scheduler:
type: !py!ignite.contrib.handlers.PiecewiseLinear
eval_args: ["milestones_values"]
kwargs:
param_name: "lr"
milestones_values: "[[0, 0.0], [int(0.2 * hp['epochs'] * iterations), hp['optimizer_opts']['lr']], [hp['epochs'] * iterations, 0.0]]"
# scheduler:
# type: !py!deepcv.meta.one_cycle.OneCyclePolicy
# kwargs:
# base_lr: 1e-4
# max_lr: 0.1
# base_momentum: 1e-4
# max_momentum: 1e-2
resume_from: ""
save_every_iters: 1000
log_grads_every_iters: 1000
seed: 563454
deterministic: true # torch.backends.cudnn.deterministic
prefetch_batches: true
# TODO: Probably should put NNI NAS/HP-specific parameters in another dict (aimed at another pipeline?)
nni_single_shot_nas_algorithm: "ENAS" # Shloud either be 'ENAS' or 'DART' (or eventually 'P-DARTS', 'SPOS', 'CDARTS', 'ProxylessNAS', or 'TextNAS' if relevant and supported by NNI for your model/task) one-shot NAS algorithm (See https://nni.readthedocs.io/en/latest/NAS/Overview.html#supported-one-shot-nas-algorithms for more details on NNI NAS algorithms). If undefined or `null`, NNI NAS Mutable layers and inputs used in trained model will be fixed to their first choice (i.e. fixed architecture; no NAS space)
train_image_classifier:
<<: *basic_ignite_training
epochs: 2
batch_size: 32
optimizer_opts:
lr: 1e-3
betas: [0.9, 0.999]
eps: 1e-08
weight_decay: 1e-2
amsgrad: false
# SGD: { lr: 0.1, momentum: 1e-4, weight_decay: 0., dampening: 1e-4, nesterov: true }
# image_classifier_hp_search:
# cross_validation: false
# hyperparams:
# - optimizer_opts.lr: linear([1e-6, 5e-3])
# - optimizer.momentum: log_linear([1e-8, 5e-3])
# - optimizer.weight_decay: log_linear([1e-10, 5e-4])
# - choice:
# - model.dropout_prob: choice([0., linear([0.1, 0.6])])
# - model.batch_norm: choice({ affine: true, eps: 1e-05, momentum: 0.07359778246238029 }, null)
#_______________________________________________________ AUGMENTATION _______________________________________________________#
# Dataset preprocessing and augmentation YAML configuration file
augmentations_recipes:
- basic_augmentation: &basic_augmentation
keep_same_input_shape: true # Whether augmented images will be croped their respective initial input image sizes or not
random_transform_order: true
augmentation_ops_depth: [1, 4] # An augmentation transform chain can only contain of 1 to 4 augmentation operations
augmentations_per_image: [1, 3] # Uniform range of augmentation count to be performed per dataset images (thus augmented dataset size will be between 2 and 4 times thez size of original dataset)
transforms:
- crop: false
- brightness: 0.2 # Random brightness variance/severity (assumes pixel data is normalized)
- contrast: 0.1 # Contrast transform variance/severity
- tweak_colors: 0.1
- gamma: 0.05 # gamma tweaking variance/severity
- posterize: 0.05 # entails conversion of a continuous gradation of tone to several regions of fewer tones
- noise: 0.1
- rotate: [-0.4, 0.4] # e.g., `[-|a|, |b|]` rotation range means random rotation will be sampled from a gaussian distribution truncated between -180x|a|° and 180x|b|°, with gaussian variance beeing proportional to `|a|-|b|`
- translate: 0.2 # variance/severity of image translation transform
- scale: 0.2
- smooth_non_linear_deformation: false # Strenght/severity of a non-linear image deformation (set to null, false or 0 to disable)
- augmix_augmentation:
<<: *basic_augmentation
augmix:
- augmentation_chains_count: [1, 3] # Number of augmentation transform chains to mix together (see [AugMix](https://arxiv.org/pdf/1912.02781.pdf) augmentation algorithm for more details)
- transform_chains_dirichlet: 0.3 # Dirichlt distribution parameter to sample k=3 mixing convex coeficients. (to be convex along each dimensions, dirichlet coefficents must be > 1.)
- mix_with_original_beta: 0.3 # if strength/severity is greater than 0: final augmented image will be with it original image (given value discribes Beta distribution)
- singan_augmentation: &singan_augmentation
<<: *basic_augmentation
transforms_additional:
- distilled_singan_augmentation: true
#_______________________________________________________ PREPROCESSING _______________________________________________________#
basic_preprocessing_procedure: &BASIC_PREPROCESSING_PROCEDURE
cache: false
seed: 434546
split_dataset:
validset_ratio: 0.2 # %
testset_ratio: 0.1 # % (Won't be taken into account if testset already exists)
transforms:
- !py!torchvision.transforms.ToTensor
#augmentation_reciepe: *basic_augmentation
mnist_preprocessing:
<<: *BASIC_PREPROCESSING_PROCEDURE
transforms:
- !py!torchvision.transforms.ToTensor
- !py!torchvision.transforms.Normalize "": { mean: [0.15, 0.15, 0.15], std: [0.15, 0.15, 0.15] }
cifar10_preprocessing: &CIFAR10_PREPROCESSING
<<: *BASIC_PREPROCESSING_PROCEDURE
transforms:
- !py!torchvision.transforms.ToTensor
- !py!torchvision.transforms.Normalize "": { mean: [0.491, 0.482, 0.447], std: [0.247, 0.243, 0.261] }
cifar100_preprocessing:
<<: *CIFAR10_PREPROCESSING
imagenet_prerocessing:
<<: *BASIC_PREPROCESSING_PROCEDURE
transforms:
- !py!torchvision.transforms.ToTensor
- !py!torchvision.transforms.Normalize "": { mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225] }
#_____________________________________________________________________________________________________________________________#