ecmwf · sahahner · Dec 27, 2024 · Dec 27, 2024 · Dec 27, 2024 · Dec 31, 2024
diff --git a/training/docs/modules/losses.rst b/training/docs/modules/losses.rst
@@ -6,7 +6,7 @@ This module is used to define the loss function used to train the model.
 
 Anemoi-training exposes a couple of loss functions by default to be
 used, all of which are subclassed from ``BaseWeightedLoss``. This class
-enables scalar multiplication, and graph node weighting.
+enables scaler multiplication, and graph node weighting.
 
 .. automodule:: anemoi.training.losses.weightedloss
    :members:
@@ -47,26 +47,28 @@ reference it in the config as follows:
       # loss function kwargs here
 
 *********
- Scalars
+ Scalers
 *********
 
 In addition to node scaling, the loss function can also be scaled by a
-scalar. These are provided by the ``Forecaster`` class, and a user can
+scaler. These are provided by the ``Forecaster`` class, and a user can
 define whether to include them in the loss function by setting
-``scalars`` in the loss config dictionary.
+``scalers`` in the loss config dictionary.
 
 .. code:: yaml
 
    # loss function for the model
    training_loss:
       # loss class to initialise
       _target_: anemoi.training.losses.mse.WeightedMSELoss
-      scalars: ['scalar1', 'scalar2']
+      scalers: ['scaler1', 'scaler2']
 
-Currently, the following scalars are available for use:
+Scalers can be added as options for the loss functions using the
+`scaler` builders in `config.training.scaler`.
 
--  ``variable``: Scale by the feature/variable weights as defined in the
-   config ``config.training.variable_loss_scaling``.
+``*`` is a valid entry to use all `scalers` given, if a scaler is to be
+excluded add `!scaler_name`, i.e. ``['*', '!scaler_1']``, and
+``scaler_1`` will not be added.
 
 ********************
  Validation Metrics
@@ -81,24 +83,24 @@ name
 Scaling Validation Losses
 =========================
 
-Validation metrics can **not** by default be scaled by scalars across
-the variable dimension, but can be by all other scalars. If you want to
+Validation metrics can **not** by default be scaled by scalers across
+the variable dimension, but can be by all other scalers. If you want to
 scale a validation metric by the variable weights, it must be added to
 `config.training.scale_validation_metrics`.
 
 These metrics are then kept in the normalised, preprocessed space, and
-thus the indexing of scalars aligns with the indexing of the tensors.
+thus the indexing of scalers aligns with the indexing of the tensors.
 
 By default, only `all` is kept in the normalised space and scaled.
 
 .. code:: yaml
 
-   # List of validation metrics to keep in normalised space, and scalars to be applied
+   # List of validation metrics to keep in normalised space, and scalers to be applied
    # Use '*' in reference all metrics, or a list of metric names.
    # Unlike above, variable scaling is possible due to these metrics being
    # calculated in the same way as the training loss, within the internal model space.
    scale_validation_metrics:
-   scalars_to_apply: ['variable']
+   scalers_to_apply: ['variable']
    metrics:
       - 'all'
       # - "*"
@@ -144,7 +146,7 @@ losses above.
       losses:
          - __target__: anemoi.training.losses.mse.WeightedMSELoss
          - __target__: anemoi.training.losses.mae.WeightedMAELoss
-      scalars: ['variable']
+      scalers: ['variable']
       loss_weights: [1.0,0.5]
 
 All kwargs passed to ``CombinedLoss`` are passed to each of the loss
@@ -170,7 +172,7 @@ option ``config.training.loss_gradient_scaling=True``.
 
 ``ScaleTensor`` is a class that can record and apply arbitrary scaling
 factors to tensors. It supports relative indexing, combining multiple
-scalars over the same dimensions, and is only constructed at
+scalers over the same dimensions, and is only constructed at
 broadcasting time, so the shape can be resolved to match the tensor
 exactly.
 

diff --git a/training/src/anemoi/training/config/training/default.yaml b/training/src/anemoi/training/config/training/default.yaml
@@ -1,3 +1,7 @@
+---
+defaults:
+  - scalers: scalers
+
 # resume or fork a training from a checkpoint last.ckpt or specified in hardware.files.warm_start
 run_id: null
 fork_run_id: null
@@ -46,12 +50,11 @@ zero_optimizer: False
 training_loss:
   # loss class to initialise
   _target_: anemoi.training.losses.mse.WeightedMSELoss
-  # Scalars to include in loss calculation
-  # Available scalars include:
-  # - 'variable': See `variable_loss_scaling` for more information
-  # - 'loss_weights_mask': Giving imputed NaNs a zero weight in the loss function
-  scalars: ['variable', 'loss_weights_mask']
-
+  # Scalers to include in loss calculation
+  # A selection of available scalers are listed in training/scalers/scalers.yaml
+  # '*' is a valid entry to use all `scalers` given, if a scaler is to be excluded
+  # add `!scaler_name`, i.e. ['*', '!scaler_1'], and `scaler_1` will not be added.
+  scalers: ['pressure_level', 'general_variable', 'nan_mask_weights']
   ignore_nans: False
 
 loss_gradient_scaling: False
@@ -64,21 +67,21 @@ loss_gradient_scaling: False
 validation_metrics:
   # loss class to initialise
   - _target_: anemoi.training.losses.mse.WeightedMSELoss
-    # Scalars to include in loss calculation
+    # Scalers to include in loss calculation
     # Cannot scale over the variable dimension due to possible remappings.
-    # Available scalars include:
+    # Available scalers include:
     # - 'loss_weights_mask': Giving imputed NaNs a zero weight in the loss function
     # Use the `scale_validation_metrics` section to variable scale.
-    scalars: []
+    scalers: []
     # other kwargs
     ignore_nans: True
 
-# List of validation metrics to keep in normalised space, and scalars to be applied
+# List of validation metrics to keep in normalised space, and scalers to be applied
 # Use '*' in reference all metrics, or a list of metric names.
 # Unlike above, variable scaling is possible due to these metrics being
 # calculated in the same way as the training loss, within the internal model space.
 scale_validation_metrics:
-  scalars_to_apply: ['variable']
+  scalers_to_apply: ['general_variable', 'pressure_level']
   metrics:
     - 'all'
     # - "*"
@@ -106,37 +109,8 @@ lr:
 # in order to keep a constant global_lr
 # global_lr = local_lr * num_gpus_per_node * num_nodes / gpus_per_model
 
-# Variable loss scaling
-# 'variable' must be included in `scalars` in the losses for this to be applied.
-variable_loss_scaling:
-  default: 1
-  pl:
-    q: 0.6 #1
-    t: 6   #1
-    u: 0.8 #0.5
-    v: 0.5 #0.33
-    w: 0.001
-    z: 12  #1
-  sfc:
-    sp: 10
-    10u: 0.1
-    10v: 0.1
-    2d: 0.5
-    tp: 0.025
-    cp: 0.0025
-
 metrics:
 - z_500
 - t_850
 - u_850
 - v_850
-
-pressure_level_scaler:
-  _target_: anemoi.training.data.scaling.ReluPressureLevelScaler
-  minimum: 0.2
-  slope: 0.001
-
-node_loss_weights:
-  _target_: anemoi.training.losses.nodeweights.GraphNodeAttribute
-  target_nodes: ${graph.data}
-  node_attribute: area_weight
diff --git a/training/src/anemoi/training/config/training/scalers/scalers.yaml b/training/src/anemoi/training/config/training/scalers/scalers.yaml
@@ -0,0 +1,58 @@
+variable_groups:
+  default: sfc
+  pl: [q, t, u, v, w, z]
+
+# Several scalers can be added here. In order to be applied their names must be included in the loss.
+# scaler name must be included in `scalers` in the losses for this to be applied.
+builders:
+  general_variable:
+    # Variable groups definition for scaling by variable level.
+    # The variable level scaling methods are defined under additional_scalers
+    # A default group is required and is appended as prefix to the metric of all variables not assigned to a group.
+    _target_: anemoi.training.losses.scaling.variable.GeneralVariableLossScaler
+    scale_dim: -1 # dimension on which scaling applied
+    weights:
+      default: 1
+      q: 0.6 #1
+      t: 6   #1
+      u: 0.8 #0.5
+      v: 0.5 #0.33
+      w: 0.001
+      z: 12  #1
+      sp: 10
+      10u: 0.1
+      10v: 0.1
+      2d: 0.5
+      tp: 0.025
+      cp: 0.0025
+
+  pressure_level:
+    _target_: anemoi.training.losses.scaling.variable_level.ReluVariableLevelScaler
+    group: pl
+    y_intercept: 0.2
+    slope: 0.001
+    scale_dim: -1 # dimension on which scaling applied
+
+  # mask NaNs with zeros in the loss function
+  nan_mask_weights:
+    _target_: anemoi.training.losses.scaling.loss_weights_mask.NaNMaskScaler
+    scale_dim: (-2, -1) # dimension on which scaling applied
+
+  # tendency scalers
+  # scale the prognostic losses by the stdev of the variable tendencies (e.g. the 6-hourly differences of the data)
+  # useful if including slow vs fast evolving variables in the training (e.g. Land/Ocean vs Atmosphere)
+  # if using this option 'variable_loss_scalings' should all be set close to 1.0 for prognostic variables
+  stdev_tendency:
+    _target_: anemoi.training.losses.scaling.variable_tendency.StdevTendencyScaler
+    scale_dim: -1 # dimension on which scaling applied
+  var_tendency:
+    _target_: anemoi.training.losses.scaling.variable_tendency.VarTendencyScaler
+    scale_dim: -1 # dimension on which scaling applied
+
+  node_weights:
+    _target_: anemoi.training.losses.nodeweights.GraphNodeAttribute
+    target_nodes: ${graph.data}
+    node_attribute: area_weight
+    scale_dim: 2 # dimension on which scaling applied
+
+  # limited_area_mask
diff --git a/training/src/anemoi/training/data/datamodule.py b/training/src/anemoi/training/data/datamodule.py
@@ -73,6 +73,10 @@ def __init__(self, config: DictConfig, graph_data: HeteroData) -> None:
     def statistics(self) -> dict:
         return self.ds_train.statistics
 
+    @cached_property
+    def statistics_tendencies(self) -> dict:
+        return self.ds_train.statistics_tendencies
+
     @cached_property
     def metadata(self) -> dict:
         return self.ds_train.metadata
@@ -183,6 +187,7 @@ def _get_dataset(
             rollout=r,
             multistep=self.config.training.multistep_input,
             timeincrement=self.timeincrement,
+            timestep=self.config.data.timestep,
             shuffle=shuffle,
             grid_indices=self.grid_indices,
             label=label,

diff --git a/training/src/anemoi/training/data/dataset.py b/training/src/anemoi/training/data/dataset.py
@@ -41,6 +41,7 @@ def __init__(
         rollout: int = 1,
         multistep: int = 1,
         timeincrement: int = 1,
+        timestep: str = "6h",
         shuffle: bool = True,
         label: str = "generic",
         effective_bs: int = 1,
@@ -57,6 +58,8 @@ def __init__(
             length of rollout window, by default 12
         timeincrement : int, optional
             time increment between samples, by default 1
+        timestep : int, optional
+            the time frequency of the samples, by default '6h'
         multistep : int, optional
             collate (t-1, ... t - multistep) into the input state vector, by default 1
         shuffle : bool, optional
@@ -73,6 +76,7 @@ def __init__(
 
         self.rollout = rollout
         self.timeincrement = timeincrement
+        self.timestep = timestep
         self.grid_indices = grid_indices
 
         # lazy init
@@ -104,6 +108,14 @@ def statistics(self) -> dict:
         """Return dataset statistics."""
         return self.data.statistics
 
+    @cached_property
+    def statistics_tendencies(self) -> dict:
+        """Return dataset tendency statistics."""
+        try:
+            return self.data.statistics_tendencies(self.timestep)
+        except (KeyError, AttributeError):
+            return None
+
     @cached_property
     def metadata(self) -> dict:
         """Return dataset metadata."""

diff --git a/training/src/anemoi/training/data/scaling.py b/training/src/anemoi/training/data/scaling.py
diff --git a/training/src/anemoi/training/losses/combined.py b/training/src/anemoi/training/losses/combined.py
@@ -70,7 +70,7 @@ def __init__(
             losses:
                 - __target__: anemoi.training.losses.mse.WeightedMSELoss
                 - __target__: anemoi.training.losses.mae.WeightedMAELoss
-            scalars: ['variable']
+            scalers: ['variable']
             loss_weights: [1.0,0.5]
         ```
         """