From 66a44d4c0c22618268058ee4264ca900e3223b64 Mon Sep 17 00:00:00 2001
From: Joeri van Engelen <joerivanengelen@hotmail.com>
Date: Thu, 16 Jan 2025 14:00:48 +0100
Subject: [PATCH] Issue #1381 average well rates at steady state (#1382)

Fixes #1381

# Description

Upon receiving simulation ``times`` with string "steady-state"
``Well.from_imod5_data`` now assumes the simulation is steady-state and
well rates should be averaged.
This brings well rates of packages a lot closer to what iMOD5 produces
in the water balance for steady-state simulations.
``GroundwaterFlowModel.from_imod5_data`` sets ``times`` arg for
``Well.from_imod5_data`` to ``"steady-state"`` if no Storage package is
present in the projectfile.

I opted for this for now as we currently lack of a clear API to
construct steady-state simulations. See also #1308.
---
 docs/api/changelog.rst                        |  4 +
 imod/mf6/model_gwf.py                         | 24 ++---
 imod/mf6/wel.py                               | 87 ++++++++++++++-----
 imod/tests/test_mf6/test_mf6_simulation.py    | 32 +++++++
 imod/tests/test_mf6/test_mf6_wel.py           | 13 +++
 .../test_utilities/test_resampling.py         | 22 ++++-
 imod/typing/__init__.py                       |  2 +
 imod/util/expand_repetitions.py               | 21 ++++-
 8 files changed, 172 insertions(+), 33 deletions(-)

diff --git a/docs/api/changelog.rst b/docs/api/changelog.rst
index be7a0670f..a9a78e20e 100644
--- a/docs/api/changelog.rst
+++ b/docs/api/changelog.rst
@@ -27,6 +27,10 @@ Changed
   > 0 to indicate an active cell and <0 to indicate a vertical passthrough cell,
   consistent with MODFLOW6. Previously this could only be indicated with 1 and
   -1.
+- :meth:`imod.mf6.Well.from_imod5_data` and
+  :meth:`imod.mf6.LayeredWell.from_imod5_data` now also accept the argument
+  ``times = "steady-state"``, for the simulation is assumed to be "steady-state"
+  and well timeseries are averaged.
 
 Fixed
 ~~~~~
diff --git a/imod/mf6/model_gwf.py b/imod/mf6/model_gwf.py
index ebc878876..c7e9a2af0 100644
--- a/imod/mf6/model_gwf.py
+++ b/imod/mf6/model_gwf.py
@@ -40,7 +40,7 @@
     SimulationAllocationOptions,
     SimulationDistributingOptions,
 )
-from imod.typing import GridDataArray
+from imod.typing import GridDataArray, StressPeriodTimesType
 from imod.typing.grid import zeros_like
 
 
@@ -271,7 +271,8 @@ def from_imod5_data(
         result["npf"] = npf_pkg
 
         # import sto
-        if "sto" in imod5_data.keys():
+        is_transient = "sto" in imod5_data.keys()
+        if is_transient:
             result["sto"] = StorageCoefficient.from_imod5_data(
                 imod5_data,
                 grid,
@@ -308,6 +309,7 @@ def from_imod5_data(
         imod5_keys = list(imod5_data.keys())
 
         # import wells
+        wel_times: StressPeriodTimesType = times if is_transient else "steady-state"
         wel_keys = [key for key in imod5_keys if key[0:3] == "wel"]
         for wel_key in wel_keys:
             wel_key_truncated = wel_key[:16]
@@ -322,15 +324,15 @@ def from_imod5_data(
                     """
                 )
                 raise KeyError(msg)
-            layer = np.array(imod5_data[wel_key]["layer"])
-            if np.any(layer == 0):
-                result[wel_key_truncated] = Well.from_imod5_data(
-                    wel_key, imod5_data, times
-                )
-            else:
-                result[wel_key_truncated] = LayeredWell.from_imod5_data(
-                    wel_key, imod5_data, times
-                )
+
+            wel_layer = np.array(imod5_data[wel_key]["layer"])
+            is_allocated = np.any(wel_layer == 0)
+            wel_args = (wel_key, imod5_data, wel_times)
+            result[wel_key_truncated] = (
+                Well.from_imod5_data(*wel_args)
+                if is_allocated
+                else LayeredWell.from_imod5_data(*wel_args)
+            )
 
         if "cap" in imod5_keys:
             result["msw-sprinkling"] = LayeredWell.from_imod5_cap_data(imod5_data)  # type: ignore
diff --git a/imod/mf6/wel.py b/imod/mf6/wel.py
index abefe7c23..50b3fcea0 100644
--- a/imod/mf6/wel.py
+++ b/imod/mf6/wel.py
@@ -4,8 +4,9 @@
 import itertools
 import textwrap
 import warnings
+from collections.abc import Iterable
 from datetime import datetime
-from typing import Any, Optional, Tuple, Union
+from typing import Any, Optional, Tuple, Union, cast
 
 import cftime
 import numpy as np
@@ -45,9 +46,9 @@
     ValidationError,
 )
 from imod.select.points import points_indices, points_values
-from imod.typing import GridDataArray, Imod5DataDict
+from imod.typing import GridDataArray, Imod5DataDict, StressPeriodTimesType
 from imod.typing.grid import is_spatial_grid, ones_like
-from imod.util.expand_repetitions import resample_timeseries
+from imod.util.expand_repetitions import average_timeseries, resample_timeseries
 from imod.util.structured import values_within_range
 
 ABSTRACT_METH_ERROR_MSG = "Method in abstract base class called"
@@ -108,25 +109,33 @@ def _df_groups_to_da_rates(
 def _prepare_well_rates_from_groups(
     pkg_data: dict,
     unique_well_groups: pd.api.typing.DataFrameGroupBy,
-    times: list[datetime],
+    start_times: StressPeriodTimesType,
 ) -> xr.DataArray:
     """
     Prepare well rates from dataframe groups, grouped by unique well locations.
     Resample timeseries if ipf with associated text files.
     """
     has_associated = pkg_data["has_associated"]
-    start_times = times[:-1]  # Starts stress periods.
     if has_associated:
         # Resample times per group
         unique_well_groups = [
-            resample_timeseries(df_group, start_times)
+            _process_timeseries(df_group, start_times)
             for df_group in unique_well_groups
         ]
     return _df_groups_to_da_rates(unique_well_groups)
 
 
+def _process_timeseries(
+    df_group: pd.api.typing.DataFrameGroupBy, start_times: StressPeriodTimesType
+):
+    if _is_steady_state(start_times):
+        return average_timeseries(df_group)
+    else:
+        return resample_timeseries(df_group, start_times)
+
+
 def _prepare_df_ipf_associated(
-    pkg_data: dict, start_times: list[datetime], all_well_times: list[datetime]
+    pkg_data: dict, all_well_times: list[datetime]
 ) -> pd.DataFrame:
     """Prepare dataframe for an ipf with associated timeseries in a textfile."""
     # Validate if associated wells are assigned multiple layers, factors,
@@ -155,7 +164,7 @@ def _prepare_df_ipf_associated(
 
 
 def _prepare_df_ipf_unassociated(
-    pkg_data: dict, start_times: list[datetime]
+    pkg_data: dict, start_times: StressPeriodTimesType
 ) -> pd.DataFrame:
     """Prepare dataframe for an ipf with no associated timeseries."""
     is_steady_state = any(t is None for t in pkg_data["time"])
@@ -185,6 +194,10 @@ def _prepare_df_ipf_unassociated(
     da_multi = df_multi.to_xarray()
     indexers = {"ipf_row": ipf_row_index}
     if not is_steady_state:
+        if start_times == "steady-state":
+            raise ValueError(
+                "``start_times`` cannot be 'steady-state' for transient wells without associated timeseries."
+            )
         indexers["time"] = start_times
     # Multi-dimensional reindex, forward fill well locations, fill well rates
     # with 0.0.
@@ -197,13 +210,12 @@ def _prepare_df_ipf_unassociated(
 
 
 def _unpack_package_data(
-    pkg_data: dict, times: list[datetime], all_well_times: list[datetime]
+    pkg_data: dict, start_times: StressPeriodTimesType, all_well_times: list[datetime]
 ) -> pd.DataFrame:
     """Unpack package data to dataframe"""
-    start_times = times[:-1]  # Starts stress periods.
     has_associated = pkg_data["has_associated"]
     if has_associated:
-        return _prepare_df_ipf_associated(pkg_data, start_times, all_well_times)
+        return _prepare_df_ipf_associated(pkg_data, all_well_times)
     else:
         return _prepare_df_ipf_unassociated(pkg_data, start_times)
 
@@ -287,6 +299,33 @@ def derive_cellid_from_points(
     return cellid.astype(int)
 
 
+def _is_steady_state(times: StressPeriodTimesType) -> bool:
+    # Shortcut when not string, to avoid ambigious bitwise "and" operation when
+    # its not.
+    return isinstance(times, str) and times == "steady-state"
+
+
+def _is_iterable_of_datetimes(times: StressPeriodTimesType) -> bool:
+    return (
+        isinstance(times, Iterable)
+        and (len(times) > 0)
+        and isinstance(times[0], (datetime, np.datetime64, pd.Timestamp))
+    )
+
+
+def _get_starttimes(
+    times: StressPeriodTimesType,
+) -> StressPeriodTimesType:
+    if _is_steady_state(times):
+        return times
+    elif _is_iterable_of_datetimes(times):
+        return cast(list[datetime], times[:-1])
+    else:
+        raise ValueError(
+            "Only 'steady-state' or a list of datetimes are supported for ``times``."
+        )
+
+
 class GridAgnosticWell(BoundaryCondition, IPointDataPackage, abc.ABC):
     """
     Abstract base class for grid agnostic wells
@@ -539,7 +578,7 @@ def from_imod5_data(
         cls,
         key: str,
         imod5_data: dict[str, dict[str, GridDataArray]],
-        times: list[datetime],
+        times: StressPeriodTimesType,
         minimum_k: float = 0.1,
         minimum_thickness: float = 0.05,
     ) -> "GridAgnosticWell":
@@ -571,10 +610,15 @@ def from_imod5_data(
             * Multiplication and addition factors need to remain constant through time
             * Same associated well cannot be assigned to multiple layers
         - The dataframe of the first projectfile timestamp is selected
-        - Rate timeseries are resampled with a time weighted mean to the
-          simulation times.
-        - When simulation times fall outside well timeseries range, the last
-          rate is forward filled.
+        - Timeseries are processed based on the ``times`` argument of this
+          method:
+            * If ``times`` is a list of datetimes, rate timeseries are resampled
+              with a time weighted mean to the simulation times. When simulation
+              times fall outside well timeseries range, the last rate is forward
+              filled.
+            * If ``times = "steady-state"``, the simulation is assumed to be
+              "steady-state" and an average rate is computed from the
+              timeseries.
         - Projectfile timestamps are not used. Even if assigned to a
           "steady-state" timestamp, the resulting dataset still uses simulation
           times.
@@ -618,8 +662,9 @@ def from_imod5_data(
         imod5_data: dict
             iMOD5 data loaded from a projectfile with
             :func:`imod.formats.prj.open_projectfile_data`
-        times: list
-            Simulation times
+        times: list[datetime] | Literal["steady-state"]
+            Simulation times, a list of datetimes for transient simulations. Or
+            the string ``"steady-state"`` for steady-state simulations.
         minimum_k: float, optional
             On creating point wells, no point wells will be placed in cells with
             a lower horizontal conductivity than this. Wells are placed when
@@ -629,10 +674,12 @@ def from_imod5_data(
             a lower thickness than this. Wells are placed when ``to_mf6_pkg`` is
             called.
         """
+
         pkg_data = imod5_data[key]
         all_well_times = get_all_imod5_prj_well_times(imod5_data)
 
-        df = _unpack_package_data(pkg_data, times, all_well_times)
+        start_times = _get_starttimes(times)  # Starts stress periods.
+        df = _unpack_package_data(pkg_data, start_times, all_well_times)
         cls._validate_imod5_depth_information(key, pkg_data, df)
 
         # Groupby unique wells, to get dataframes per time.
@@ -650,7 +697,7 @@ def from_imod5_data(
             for (var, dtype), value in zip(varnames, index_values)
         }
         cls_input["rate"] = _prepare_well_rates_from_groups(
-            pkg_data, unique_well_groups, times
+            pkg_data, unique_well_groups, start_times
         )
         cls_input["minimum_k"] = minimum_k
         cls_input["minimum_thickness"] = minimum_thickness
diff --git a/imod/tests/test_mf6/test_mf6_simulation.py b/imod/tests/test_mf6/test_mf6_simulation.py
index 73ec1b31f..8af9de08e 100644
--- a/imod/tests/test_mf6/test_mf6_simulation.py
+++ b/imod/tests/test_mf6/test_mf6_simulation.py
@@ -653,6 +653,38 @@ def test_import_from_imod5__correct_well_type(imod5_dataset):
     assert isinstance(simulation["imported_model"]["wel-WELLS_L5"], LayeredWell)
 
 
+@pytest.mark.unittest_jit
+@pytest.mark.usefixtures("imod5_dataset")
+def test_import_from_imod5__well_steady_state(imod5_dataset):
+    # Unpack
+    imod5_data = imod5_dataset[0]
+    period_data = imod5_dataset[1]
+
+    sto = imod5_data.pop("sto")
+
+    # Other arrangement
+    default_simulation_allocation_options = SimulationAllocationOptions
+    default_simulation_distributing_options = SimulationDistributingOptions
+    datelist = pd.date_range(start="1/1/1989", end="1/1/2013", freq="W")
+
+    # Act
+    simulation = Modflow6Simulation.from_imod5_data(
+        imod5_data,
+        period_data,
+        datelist,
+        default_simulation_allocation_options,
+        default_simulation_distributing_options,
+    )
+    # Assert
+    gwf = simulation["imported_model"]
+    assert "time" not in gwf["wel-WELLS_L3"].dataset.coords
+    assert "time" not in gwf["wel-WELLS_L4"].dataset.coords
+    assert "time" not in gwf["wel-WELLS_L5"].dataset.coords
+    # Teardown
+    # Reassign storage package again
+    imod5_data["sto"] = sto
+
+
 @pytest.mark.unittest_jit
 @pytest.mark.usefixtures("imod5_dataset")
 def test_import_from_imod5__nonstandard_regridding(imod5_dataset, tmp_path):
diff --git a/imod/tests/test_mf6/test_mf6_wel.py b/imod/tests/test_mf6/test_mf6_wel.py
index 91475cef7..44436cabe 100644
--- a/imod/tests/test_mf6/test_mf6_wel.py
+++ b/imod/tests/test_mf6/test_mf6_wel.py
@@ -1019,6 +1019,19 @@ def test_import_and_convert_to_mf6(imod5_dataset, tmp_path, wel_class):
     mf6_well._write("wel", [], write_context)
 
 
+@parametrize("wel_class", [Well, LayeredWell])
+@pytest.mark.usefixtures("imod5_dataset")
+def test_import__as_steady_state(imod5_dataset, wel_class):
+    data = imod5_dataset[0]
+    times = "steady-state"
+    # Import grid-agnostic well from imod5 data (it contains 1 well)
+    wel = wel_class.from_imod5_data("wel-WELLS_L3", data, times)
+
+    assert "time" not in wel.dataset.coords
+    assert wel.dataset["rate"].shape == (1,)
+    np.testing.assert_almost_equal(wel.dataset["rate"].values, -323.89361702)
+
+
 @parametrize("wel_class", [Well])
 @pytest.mark.usefixtures("imod5_dataset")
 def test_import_and_cleanup(imod5_dataset, wel_class: Well):
diff --git a/imod/tests/test_mf6/test_utilities/test_resampling.py b/imod/tests/test_mf6/test_utilities/test_resampling.py
index e24a9b722..ad8fc2be9 100644
--- a/imod/tests/test_mf6/test_utilities/test_resampling.py
+++ b/imod/tests/test_mf6/test_utilities/test_resampling.py
@@ -1,8 +1,9 @@
 from datetime import datetime
 
+import numpy as np
 import pandas as pd
 
-from imod.util.expand_repetitions import resample_timeseries
+from imod.util.expand_repetitions import average_timeseries, resample_timeseries
 
 
 def initialize_timeseries(times: list[datetime], rates: list[float]) -> pd.DataFrame:
@@ -160,3 +161,22 @@ def test_timeseries_resampling_refine_and_coarsen():
     pd.testing.assert_frame_equal(
         original_timeseries, re_coarsened_timeseries, check_dtype=False
     )
+
+
+def test_mean_timeseries():
+    # In this test, we compute the mean of a timeseries.
+    times = [datetime(1989, 1, i) for i in [1, 3, 4, 5, 6]]
+    rates = [i * 100 for i in range(1, 6)]
+    timeseries = initialize_timeseries(times, rates)
+
+    mean_timeseries = average_timeseries(timeseries)
+
+    dummy_times = [datetime(1989, 1, 1)]
+    expected_rates = np.mean(rates)
+    expected_timeseries = initialize_timeseries(dummy_times, expected_rates)
+    col_order = ["x", "y", "id", "filt_top", "filt_bot", "rate"]
+    expected_timeseries = expected_timeseries[col_order]
+
+    pd.testing.assert_frame_equal(
+        mean_timeseries, expected_timeseries, check_dtype=False
+    )
diff --git a/imod/typing/__init__.py b/imod/typing/__init__.py
index da8bb6c19..c44eb9838 100644
--- a/imod/typing/__init__.py
+++ b/imod/typing/__init__.py
@@ -2,6 +2,7 @@
 Module to define type aliases.
 """
 
+from datetime import datetime
 from typing import TYPE_CHECKING, Literal, TypeAlias, TypedDict, TypeVar, Union
 
 import numpy as np
@@ -17,6 +18,7 @@
 UnstructuredData: TypeAlias = Union[xu.UgridDataset, xu.UgridDataArray]
 FloatArray: TypeAlias = NDArray[np.floating]
 IntArray: TypeAlias = NDArray[np.int_]
+StressPeriodTimesType: TypeAlias = list[datetime] | Literal["steady-state"]
 
 
 class SelSettingsType(TypedDict, total=False):
diff --git a/imod/util/expand_repetitions.py b/imod/util/expand_repetitions.py
index 60c358b14..361b1ad31 100644
--- a/imod/util/expand_repetitions.py
+++ b/imod/util/expand_repetitions.py
@@ -44,6 +44,24 @@ def expand_repetitions(
     return expanded
 
 
+def average_timeseries(well_rate: pd.DataFrame) -> pd.DataFrame:
+    """
+    Compute the mean value of the timeseries for a single well. Time column is
+    dropped.
+
+    Parameters
+    ----------
+    well_rate:  pd.DataFrame
+        input timeseries for a single well
+    """
+    # Take first item
+    output_frame = well_rate.iloc[[0]].drop(columns=["time", "rate"])
+    # Compute mean
+    col_index = output_frame.shape[1]
+    output_frame.insert(col_index, "rate", well_rate["rate"].mean())
+    return output_frame
+
+
 def resample_timeseries(
     well_rate: pd.DataFrame, times: list[datetime] | pd.DatetimeIndex
 ) -> pd.DataFrame:
@@ -98,7 +116,8 @@ def resample_timeseries(
 
     # compute time difference from perious to current row
     time_diff_col = intermediate_df["time"].diff()
-    intermediate_df.insert(7, "time_to_next", time_diff_col.values)
+    col_index = intermediate_df.shape[1] - 1
+    intermediate_df.insert(col_index, "time_to_next", time_diff_col.values)
 
     # shift the new column 1 place down so that they become the time to the next row
     intermediate_df["time_to_next"] = intermediate_df["time_to_next"].shift(-1)