pervariantnorm

ArnovanHilten · Nov 1, 2024 · e9a566e · e9a566e
1 parent d961d20
commit e9a566e
Show file tree

Hide file tree

Showing 5 changed files with 112 additions and 45 deletions.
diff --git a/GenNet.py b/GenNet.py
@@ -203,7 +203,7 @@ def make_parser_train(self, parser_train):
             "-mixed_precision",
             action='store_true',
             default=False,
-            help='Flag for mixed precision to save memory (can reduce performance)')
+            help='Flag for mixed precision to save memory (can reduce performance)')        
         parser_train.add_argument(
             "-suffix",
             metavar="extra_info",
@@ -252,6 +252,12 @@ def make_parser_train(self, parser_train):
             action='store_true',
             default=False,
             help='initialize the one-hot encoding for the neural network with a linear assumption')
+        parser_train.add_argument(
+            "-improved_norm",
+            action='store_true',
+            default=False,
+            help='Use the pervariantnorm layer instead of batchnorm for better normalization for interpretation')
+
         return parser_train
 
     def make_parser_plot(self, parser_plot):

diff --git a/GenNet_utils/Create_network.py b/GenNet_utils/Create_network.py
@@ -12,6 +12,9 @@
 import scipy
 import tables
 tf.keras.backend.set_epsilon(0.0000001)
+
+from GenNet_utils.Normalization import PerVariantNormalization
+
 tf_version = tf.__version__  # ToDo use packaging.version
 
 if tf_version <= '1.13.1':
@@ -52,7 +55,7 @@ def regression_properties(datapath):
     return mean_ytrain, negative_values_ytrain
 
 
-def layer_block(model, mask, i, regression, L1_act =0.01):
+def layer_block(model, mask, i, regression, L1_act=0.01,  batchnorm=True):
     if regression:
         activation_type="relu"
     else:
@@ -61,7 +64,12 @@ def layer_block(model, mask, i, regression, L1_act =0.01):
     model = LocallyDirected1D(mask=mask, filters=1, input_shape=(mask.shape[0], 1),
                               name="LocallyDirected_" + str(i), activity_regularizer=K.regularizers.l1(L1_act))(model)
     model = K.layers.Activation(activation_type)(model)
-    model = K.layers.BatchNormalization(center=False, scale=False)(model)
+
+    if batchnorm:
+        model = K.layers.BatchNormalization(center=False, scale=False)(model)
+    else:
+        model = PerVariantNormalization()(model)
+
     return model
 
 
@@ -87,11 +95,18 @@ def one_hot_input(input_layer):
     return model
 
 
-def add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain, mean_ytrain, l1_value, L1_act):
+def add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain, 
+                   mean_ytrain, l1_value, L1_act,  batchnorm=True):
     if num_covariates > 0:
         model = activation_layer(model, regression, negative_values_ytrain)
         model = K.layers.concatenate([model, input_cov], axis=1, name="concatenate_cov")
-        model = K.layers.BatchNormalization(center=False, scale=False, name="batchnorm_cov")(model)
+
+
+        if batchnorm:
+            model = K.layers.BatchNormalization(center=False, scale=False, name="batchnorm_cov")(model)
+        else:
+            model = PerVariantNormalization(name="pervariantnorm_cov")(model)
+
         model = K.layers.Dense(units=1, name="output_layer_cov",
                        kernel_regularizer=tf.keras.regularizers.l1(l=l1_value),
                        activity_regularizer=K.regularizers.l1(L1_act),
@@ -106,7 +121,8 @@ def create_network_from_npz(datapath,
                             regression=False,
                             one_hot = False,
                             num_covariates=0,
-                            mask_order = []):
+                            mask_order = [],
+                            batchnorm = True):
     print("Creating networks from npz masks")
     print("regression", regression)
     print("one_hot", one_hot)
@@ -171,7 +187,7 @@ def create_network_from_npz(datapath,
 
     for i in range(len(masks)):
         mask = masks[i]
-        model = layer_block(model, mask, i, regression, L1_act=L1_act)
+        model = layer_block(model, mask, i, regression, L1_act=L1_act, batchnorm=True)
 
     model = K.layers.Flatten()(model)
 
@@ -184,7 +200,8 @@ def create_network_from_npz(datapath,
                                activity_regularizer=K.regularizers.l1(L1_act),
                                bias_initializer= tf.keras.initializers.Constant(mean_ytrain))(model)
 
-    model = add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain, mean_ytrain, l1_value, L1_act)
+    model = add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain, 
+                           mean_ytrain, l1_value, L1_act, batchnorm=batchnorm)
 
     output_layer = activation_layer(model, regression, negative_values_ytrain)
     model = K.Model(inputs=[input_layer, input_cov], outputs=output_layer)
@@ -202,7 +219,8 @@ def create_network_from_csv(datapath,
                             L1_act =0.01, 
                             regression=False,
                             one_hot=False,
-                            num_covariates=0):
+                            num_covariates=0,
+                            batchnorm=True):
 
     print("Creating networks from npz masks")
     print("regression", regression)
@@ -240,7 +258,7 @@ def create_network_from_csv(datapath,
             matrixshape = (network_csv[columns[i]].max() + 1, network_csv[columns[i + 1]].max() + 1)
         mask = scipy.sparse.coo_matrix(((matrix_ones), matrix_coord), shape = matrixshape)
         masks.append(mask)
-        model = layer_block(model, mask, i, regression, L1_act=L1_act)
+        model = layer_block(model, mask, i, regression, L1_act=L1_act, batchnorm=batchnorm)
 
     model = K.layers.Flatten()(model)
 
@@ -249,7 +267,8 @@ def create_network_from_csv(datapath,
                            activity_regularizer=K.regularizers.l1(L1_act),
                            bias_initializer= tf.keras.initializers.Constant(mean_ytrain))(model)
 
-    model = add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain, mean_ytrain, l1_value, L1_act)
+    model = add_covariates(model, input_cov, num_covariates, regression, negative_values_ytrain, 
+                           mean_ytrain, l1_value, L1_act, batchnorm=batchnorm)
 
     output_layer = activation_layer(model, regression, negative_values_ytrain)
 

diff --git a/GenNet_utils/Normalization.py b/GenNet_utils/Normalization.py
@@ -3,43 +3,74 @@
 import scipy.sparse as sp
 
 
-class PerVariantNormalization(tf.keras.layers.Layer):
+class PerVariantNormalization(tf.keras.layers.Layer):   # devision error
     def __init__(self, momentum=0.99, epsilon=1e-6, **kwargs):
         super(PerVariantNormalization, self).__init__(**kwargs)
         self.momentum = momentum
         self.epsilon = epsilon
 
     def build(self, input_shape):
         # Initialize mean and variance for each feature (genetic variant)
-        self.mean = self.add_weight(name='mean',
-                                    shape=(input_shape[-1],),
-                                    initializer='zeros',
-                                    trainable=False)
-        self.variance = self.add_weight(name='variance',
-                                        shape=(input_shape[-1],),
-                                        initializer='ones',
-                                        trainable=False)
+        self.mean = self.add_weight(
+            name='mean',
+            shape=input_shape[-1:],
+            initializer='zeros',
+            trainable=False
+        )
+        self.variance = self.add_weight(
+            name='variance',
+            shape=input_shape[-1:],
+            initializer='ones',
+            trainable=False
+        )
+        super(PerVariantNormalization, self).build(input_shape)
 
     def call(self, inputs, training=None):
-        if training:
-            # Compute mean and variance for the batch
-            batch_mean, batch_variance = tf.nn.moments(inputs, axes=[0], keepdims=False)
+        if training is None:
+            training = tf.keras.backend.learning_phase()
 
-            # Update the running mean and variance
-            new_mean = self.momentum * self.mean + (1 - self.momentum) * batch_mean
-            new_variance = self.momentum * self.variance + (1 - self.momentum) * batch_variance
+        # Compute batch mean and variance
+        batch_mean, batch_variance = tf.nn.moments(inputs, axes=0)
+
+        # Update running mean and variance
+        new_mean = self.momentum * self.mean + (1.0 - self.momentum) * batch_mean
+        new_variance = self.momentum * self.variance + (1.0 - self.momentum) * batch_variance
+
+        # Create update ops
+        mean_update = tf.compat.v1.assign(self.mean, new_mean)
+        variance_update = tf.compat.v1.assign(self.variance, new_variance)
+
+        # Add update ops to the layer's updates
+        self.add_update([mean_update, variance_update])
+
+        # Use batch statistics during training, running statistics during inference
+        mean = tf.keras.backend.in_train_phase(batch_mean, self.mean, training=training)
+        variance = tf.keras.backend.in_train_phase(batch_variance, self.variance, training=training)
+
+        # # Normalize inputs
+        # std_inv = tf.math.rsqrt(variance + self.epsilon)  # Use the selected variance
+        # outputs = (inputs - mean) * std_inv  # Use the selected mean
+
+        outputs = per_variant_normalization(inputs, mean, variance, self.epsilon)
+
+        return outputs
+
+@tf.custom_gradient
+def per_variant_normalization(inputs, mean, variance, epsilon):
+    # Reshape mean and variance to match inputs
+    mean = tf.reshape(mean, [1, -1])
+    variance = tf.reshape(variance, [1, -1])
+    std = tf.sqrt(variance + epsilon)
+    outputs = (inputs - mean) / std
 
-            self.mean.assign(new_mean)
-            self.variance.assign(new_variance)
+    def grad(dy):
+        # Gradient with respect to inputs
+        dinputs = dy / std
+        # Gradients with respect to mean and variance are None
+        return dinputs, None, None, None
 
-        # Normalize each feature (genetic variant) using the running mean and variance
-        x = (inputs - self.mean) / tf.sqrt(self.variance + self.epsilon)
-        return x
+    return outputs, grad
 
-    def update_mean_and_variance(self, new_mean, new_variance):
-        # Update the mean and variance for each genetic variant
-        self.mean.assign(new_mean)
-        self.variance.assign(new_variance)
 
 
 class ConnectedNormalization(tf.keras.layers.Layer):

diff --git a/GenNet_utils/Train_network.py b/GenNet_utils/Train_network.py
@@ -9,6 +9,7 @@
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import tensorflow as tf
 import tensorflow.keras as K
+from tensorflow.keras.optimizers.legacy import Adam
 
 tf.keras.backend.set_epsilon(0.0000001)
 from GenNet_utils.Dataloader import *
@@ -18,6 +19,7 @@
 from GenNet_utils.Utility_functions import load_train_arguments
 
 
+
 def weighted_binary_crossentropy(y_true, y_pred):
     y_true = K.backend.clip(tf.cast(y_true, dtype=tf.float32), 0.0001, 1)
     y_pred = K.backend.clip(tf.cast(y_pred, dtype=tf.float32), 0.0001, 1)
@@ -249,8 +251,11 @@ def get_network(args):
     """
     regression = args.regression if hasattr(args, 'regression') else False
     args.init_linear = args.init_linear if hasattr(args, 'init_linear') else False
+    args.improved_norm = args.improved_norm if hasattr(args, 'improved_norm') else False
     args.L1 = args.L1 if hasattr(args, 'L1') else 0
     args.L1_act = args.L1_act if hasattr(args, 'L1_act') else 0
+
+    batchnorm = not(improved_norm)
 
     global weight_positive_class, weight_negative_class
 
@@ -271,29 +276,35 @@ def get_network(args):
 
     elif args.network_name == "gene_network_multiple_filters":
         print("gene_network_multiple_filters network")
-        model, masks = gene_network_multiple_filters(datapath=args.datapath, inputsize=args.inputsize, genotype_path=args.genotype_path,
+        model, masks = gene_network_multiple_filters(datapath=args.datapath, inputsize=args.inputsize, 
+                                                     genotype_path=args.genotype_path,
                                                      l1_value=args.L1, L1_act=args.L1_act, 
                                                      regression=regression, num_covariates=args.num_covariates,
                                                      filters=args.filters, one_hot=args.onehot)
 
     elif args.network_name == "gene_network_snp_gene_filters":
         print("gene_network_snp_gene_filters network")
-        model, masks = gene_network_snp_gene_filters(datapath=args.datapath, inputsize=args.inputsize, genotype_path=args.genotype_path,
+        model, masks = gene_network_snp_gene_filters(datapath=args.datapath, inputsize=args.inputsize, 
+                                                     genotype_path=args.genotype_path,
                                                      l1_value=args.L1, L1_act=args.L1_act, 
                                                      regression=regression, num_covariates=args.num_covariates,
                                                      filters=args.filters, one_hot=args.onehot)
     else:
         if os.path.exists(args.datapath + "/topology.csv"):
-            model, masks = create_network_from_csv(datapath=args.datapath, inputsize=args.inputsize, genotype_path=args.genotype_path,
+            model, masks = create_network_from_csv(datapath=args.datapath, inputsize=args.inputsize, 
+                                                   genotype_path=args.genotype_path,
                                                    l1_value=args.L1, L1_act=args.L1_act, regression=regression,
-                                                   num_covariates=args.num_covariates, one_hot=args.onehot)
+                                                   num_covariates=args.num_covariates, one_hot=args.onehot,
+                                                   batchnorm=batchnorm )
         elif len(glob.glob(args.datapath + "/*.npz")) > 0:
-            model, masks = create_network_from_npz(datapath=args.datapath, inputsize=args.inputsize, genotype_path=args.genotype_path,
+            model, masks = create_network_from_npz(datapath=args.datapath, inputsize=args.inputsize, 
+                                                   genotype_path=args.genotype_path,
                                                    l1_value=args.L1, L1_act=args.L1_act, regression=regression,
                                                    num_covariates=args.num_covariates, one_hot=args.onehot,
-                                                   mask_order=args.mask_order if hasattr(args, 'mask_order') else None)
+                                                   mask_order=args.mask_order if hasattr(args, 'mask_order') else None,
+                                                   batchnorm=batchnorm)
 
-    optimizer_model = tf.keras.optimizers.Adam(lr=args.learning_rate)
+    optimizer_model = Adam(lr=args.learning_rate)
 
     if regression:
         model.compile(loss="mse", optimizer=optimizer_model, metrics=["mse"])

diff --git a/requirements_GenNet.txt b/requirements_GenNet.txt
@@ -3,8 +3,8 @@ joblib>=0.16.0,<=1.3.2
 Markdown>=3.2.1,<=3.5
 matplotlib>=3.3.2,<=3.8.1
 jupyter==1.0.0
-numpy==1.21.6
-pandas>=0.25.3,<=2.0.3
+numpy==1.26.4
+pandas==2.2.2
 Pillow>=7.2.0,<=10.1.0
 plotly>=4.12.0,<=5.17.0
 pyparsing>=2.4.7,<=3.1.1
@@ -21,5 +21,5 @@ shap==0.42.1
 path_explain
 psutil<=5.9.6
 kaleido<=0.2.1
-tensorflow>=2.2,<=2.11.1
+tensorflow==2.11
 bitarray<=2.8.2