Skip to content

Commit

Permalink
switch to swish activation
Browse files Browse the repository at this point in the history
  • Loading branch information
peterdsharpe committed Mar 30, 2024
1 parent c5dd618 commit 63783eb
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 31 deletions.
10 changes: 5 additions & 5 deletions neuralfoil/gen2_5_architecture/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def net(x: np.ndarray):
x = w @ x + np.reshape(b, (-1, 1))

if len(layer_indices_to_iterate) != 0: # Don't apply the activation function on the last layer
x = np.softplus(x)
x = np.swish(x)
x = np.transpose(x)
return x

Expand All @@ -124,8 +124,8 @@ def net(x: np.ndarray):
x_flipped = x + 0. # This is a array-api-agnostic way to force a memory copy of the array to be made.
x_flipped[:, :8] = x[:, 8:16] * -1 # switch kulfan_lower with a flipped kulfan_upper
x_flipped[:, 8:16] = x[:, :8] * -1 # switch kulfan_upper with a flipped kulfan_lower
x_flipped[:, 16] *= -1 # flip kulfan_LE_weight
x_flipped[:, 18] *= -1 # flip sin(2a)
x_flipped[:, 16] = -1 * x[:, 16] # flip kulfan_LE_weight
x_flipped[:, 18] = -1 * x[:, 18] # flip sin(2a)
x_flipped[:, 23] = x[:, 24] # flip xtr_upper with xtr_lower
x_flipped[:, 24] = x[:, 23] # flip xtr_lower with xtr_upper

Expand All @@ -135,8 +135,8 @@ def net(x: np.ndarray):

### The resulting outputs will also be flipped, so we need to flip them back to their normal orientation
y_unflipped = y_flipped + 0. # This is a array-api-agnostic way to force a memory copy of the array to be made.
y_unflipped[:, 1] *= -1 # CL
y_unflipped[:, 3] *= -1 # CM
y_unflipped[:, 1] = y_flipped[:, 1] * -1 # CL
y_unflipped[:, 3] = y_flipped[:, 3] * -1 # CM
y_unflipped[:, 4] = y_flipped[:, 5] # switch Top_Xtr with Bot_Xtr
y_unflipped[:, 5] = y_flipped[:, 4] # switch Bot_Xtr with Top_Xtr

Expand Down
49 changes: 23 additions & 26 deletions training/gen2_architecture/train_blind_neural_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
N_outputs = len(df_train_outputs_scaled.columns)

cache_file = Path(__file__).parent / "nn-xxxlarge.pth"
n_hidden_layers = 5
width = 512
print("Cache file: ", cache_file)


Expand All @@ -29,30 +31,26 @@ class Net(torch.nn.Module):
def __init__(self, mean_inputs_scaled, cov_inputs_scaled):
super().__init__()

width = 512

self.mean_inputs_scaled = mean_inputs_scaled
self.cov_inputs_scaled = cov_inputs_scaled
self.inv_cov_inputs_scaled = torch.inverse(cov_inputs_scaled)
self.N_inputs = len(mean_inputs_scaled)

self.net = torch.nn.Sequential(
layers = [
torch.nn.Linear(N_inputs, width),
torch.nn.Tanh(),

torch.nn.Linear(width, width),
torch.nn.Tanh(),
torch.nn.Linear(width, width),
torch.nn.Tanh(),
torch.nn.Linear(width, width),
torch.nn.Tanh(),
torch.nn.Linear(width, width),
torch.nn.Tanh(),
torch.nn.Linear(width, width),
torch.nn.Tanh(),

torch.nn.SiLU(),
]
for i in range(n_hidden_layers):
layers += [
torch.nn.Linear(width, width),
torch.nn.SiLU(),
]

layers += [
torch.nn.Linear(width, N_outputs),
)
]

self.net = torch.nn.Sequential(*layers)

def squared_mahalanobis_distance(self, x: torch.Tensor):
return torch.sum(
Expand All @@ -67,15 +65,14 @@ def forward(self, x: torch.Tensor):
### Add in the squared Mahalanobis distance to the analysis_confidence logit, to ensure it
# asymptotes to untrustworthy as the inputs get further from the training data


### Then, flip the inputs and evaluate the network again.
# The goal here is to embed the invariant of "symmetry across alpha" into the network evaluation.

x_flipped = x.clone()
x_flipped[:, :8] = -1 * x[:, 8:16] # switch kulfan_lower with a flipped kulfan_upper
x_flipped[:, 8:16] = -1 * x[:, :8] # switch kulfan_upper with a flipped kulfan_lower
x_flipped[:, 16] = -1 * x[:, 16] # flip kulfan_LE_weight
x_flipped[:, 18] = -1 * x[:, 18] # flip sin(2a)
x_flipped[:, 18] = -1 * x[:, 18] # flip sin(2a)
x_flipped[:, 23] = x[:, 24] # flip xtr_upper with xtr_lower
x_flipped[:, 24] = x[:, 23] # flip xtr_lower with xtr_upper

Expand All @@ -93,9 +90,9 @@ def forward(self, x: torch.Tensor):

# switch upper and lower Ret, H
y_unflipped[:, 6 + 32 * 0: 6 + 32 * 2] = y_flipped[:, 6 + 32 * 3: 6 + 32 * 5]
y_unflipped[:, 6 + 32 * 2: 6 + 32 * 3] = y_flipped[:, 6 + 32 * 5: 6 + 32 * 6] * -1 # ue/vinf
y_unflipped[:, 6 + 32 * 2: 6 + 32 * 3] = y_flipped[:, 6 + 32 * 5: 6 + 32 * 6] * -1 # ue/vinf
y_unflipped[:, 6 + 32 * 3: 6 + 32 * 5] = y_flipped[:, 6 + 32 * 0: 6 + 32 * 2]
y_unflipped[:, 6 + 32 * 5: 6 + 32 * 6] = y_flipped[:, 6 + 32 * 2: 6 + 32 * 3] * -1 # ue/vinf
y_unflipped[:, 6 + 32 * 5: 6 + 32 * 6] = y_flipped[:, 6 + 32 * 2: 6 + 32 * 3] * -1 # ue/vinf

# switch upper_bl_ue/vinf with lower_bl_ue/vinf

Expand All @@ -117,15 +114,14 @@ def forward(self, x: torch.Tensor):
cov_inputs_scaled=torch.tensor(cov_inputs_scaled, dtype=torch.float32).to(device),
).to(device)


# Define the optimizer
learning_rate = 1e-4
optimizer = torch.optim.RAdam(net.parameters(), lr=learning_rate, weight_decay=3e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer,
patience=10,
factor=0.5,
patience=50,
verbose=True,
min_lr=0,
)

try:
Expand Down Expand Up @@ -182,6 +178,7 @@ def forward(self, x: torch.Tensor):

loss_weights = loss_weights / torch.sum(loss_weights) * 1000


def loss_function(y_pred, y_data, return_individual_loss_components=False):
# For data with NaN, overwrite the data with the prediction. This essentially makes the model ignore NaN data,
# since the gradient of the loss with respect to parameters is zero when the data is NaN.
Expand Down Expand Up @@ -286,7 +283,7 @@ def loss_function(y_pred, y_data, return_individual_loss_components=False):

loss_components_from_each_test_batch.append(loss_components)

y_pred[:, 0] = torch.sigmoid(y_pred[:, 0]) # Analysis confidence, a binary variable
y_pred[:, 0] = torch.sigmoid(y_pred[:, 0]) # Analysis confidence, a binary variable

mae_from_each_test_batch.append(
torch.nanmean(torch.abs(y_pred - y_data), dim=0)
Expand Down Expand Up @@ -321,6 +318,6 @@ def loss_function(y_pred, y_data, return_individual_loss_components=False):
scheduler.step(test_loss)

torch.save({
'model_state_dict': net.state_dict(),
'model_state_dict' : net.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
}, cache_file)

0 comments on commit 63783eb

Please sign in to comment.