From 9637bc6d4a946797777e9b0fb5e8700c2f2a4360 Mon Sep 17 00:00:00 2001
From: wpbonelli <wbonelli@ucar.edu>
Date: Mon, 20 Nov 2023 10:04:46 -0500
Subject: [PATCH] feat(modflow): support dataframe for pkg data

---
 autotest/test_mnw.py      | 22 +++++++++++++++++-----
 autotest/test_modflow.py  |  1 +
 flopy/modflow/mfag.py     | 17 +++++++++++++----
 flopy/modflow/mfchd.py    |  3 +--
 flopy/modflow/mfdrn.py    |  3 +--
 flopy/modflow/mfdrt.py    |  3 +--
 flopy/modflow/mffhb.py    |  9 +++++++--
 flopy/modflow/mfgage.py   |  5 ++++-
 flopy/modflow/mfghb.py    |  3 +--
 flopy/modflow/mfhyd.py    |  5 ++++-
 flopy/modflow/mfmnw2.py   |  7 +++++++
 flopy/modflow/mfriv.py    |  3 +--
 flopy/modflow/mfsfr2.py   |  8 ++++++--
 flopy/modflow/mfstr.py    |  7 +++++--
 flopy/modflow/mfwel.py    |  3 +--
 flopy/utils/mflistfile.py |  1 -
 flopy/utils/util_list.py  |  9 ++++++---
 17 files changed, 76 insertions(+), 33 deletions(-)

diff --git a/autotest/test_mnw.py b/autotest/test_mnw.py
index 266c55a14b..f50036dcdb 100644
--- a/autotest/test_mnw.py
+++ b/autotest/test_mnw.py
@@ -23,7 +23,7 @@ def mnw1_path(example_data_path):
     return example_data_path / "mf2005_test"
 
 
-def test_load(function_tmpdir, example_data_path, mnw2_examples_path):
+def test_load(function_tmpdir, mnw2_examples_path):
     """t027 test load of MNW2 Package"""
     # load in the test problem (1 well, 3 stress periods)
     m = Modflow.load(
@@ -94,7 +94,8 @@ def test_mnw1_load_write(function_tmpdir, mnw1_path):
         assert np.array_equal(v, m2.mnw1.stress_period_data[k])
 
 
-def test_make_package(function_tmpdir):
+@pytest.mark.parametrize("dataframe", [True, False])
+def test_make_package(function_tmpdir, dataframe):
     """t027 test make MNW2 Package"""
     ws = function_tmpdir
     m4 = Modflow("mnw2example", model_ws=ws)
@@ -195,6 +196,9 @@ def test_make_package(function_tmpdir):
         ).view(np.recarray),
     }
 
+    if dataframe:
+        node_data = pd.DataFrame(node_data)
+
     mnw2_4 = ModflowMnw2(
         model=m4,
         mnwmax=2,
@@ -257,6 +261,9 @@ def test_make_package(function_tmpdir):
         ).view(np.recarray),
     }
 
+    if dataframe:
+        node_data = pd.DataFrame(node_data)
+
     mnw2_4 = ModflowMnw2(
         model=m4,
         mnwmax=2,
@@ -294,7 +301,8 @@ def test_make_package(function_tmpdir):
     )
 
 
-def test_mnw2_create_file(function_tmpdir):
+@pytest.mark.parametrize("dataframe", [True, False])
+def test_mnw2_create_file(function_tmpdir, dataframe):
     """
     Test for issue #556, Mnw2 crashed if wells have
     multiple node lengths
@@ -341,8 +349,12 @@ def test_mnw2_create_file(function_tmpdir):
             wellids[i],
             nnodes=nlayers[i],
             nper=len(stress_period_data.index),
-            node_data=node_data.to_records(index=False),
-            stress_period_data=stress_period_data.to_records(index=False),
+            node_data=node_data.to_records(index=False)
+            if not dataframe
+            else node_data,
+            stress_period_data=stress_period_data.to_records(index=False)
+            if not dataframe
+            else stress_period_data,
         )
 
         wells.append(wl)
diff --git a/autotest/test_modflow.py b/autotest/test_modflow.py
index 6f628aad68..ba3fe04f48 100644
--- a/autotest/test_modflow.py
+++ b/autotest/test_modflow.py
@@ -5,6 +5,7 @@
 from pathlib import Path
 
 import numpy as np
+import pandas as pd
 import pytest
 from autotest.conftest import get_example_data_path
 from modflow_devtools.markers import excludes_platform, requires_exe
diff --git a/flopy/modflow/mfag.py b/flopy/modflow/mfag.py
index 985ee27e08..ede221485d 100644
--- a/flopy/modflow/mfag.py
+++ b/flopy/modflow/mfag.py
@@ -11,6 +11,7 @@
 import os
 
 import numpy as np
+import pandas as pd
 
 from ..pakbase import Package
 from ..utils.flopy_io import multi_line_strip
@@ -29,9 +30,9 @@ class ModflowAg(Package):
         model object
     options : flopy.utils.OptionBlock object
         option block object
-    time_series : np.recarray
+    time_series : np.recarray or pd.DataFrame
         numpy recarray for the time series block
-    well_list : np.recarray
+    well_list : np.recarray or pd.DataFrame
         recarray of the well_list block
     irrdiversion : dict {per: np.recarray}
         dictionary of the irrdiversion block
@@ -269,8 +270,16 @@ def __init__(
         else:
             self.options = OptionBlock("", ModflowAg)
 
-        self.time_series = time_series
-        self.well_list = well_list
+        self.time_series = (
+            time_series.to_records(index=False)
+            if isinstance(time_series, pd.DataFrame)
+            else time_series
+        )
+        self.well_list = (
+            well_list.to_records(index=False)
+            if isinstance(well_list, pd.DataFrame)
+            else well_list
+        )
         self.irrdiversion = irrdiversion
         self.irrwell = irrwell
         self.supwell = supwell
diff --git a/flopy/modflow/mfchd.py b/flopy/modflow/mfchd.py
index fb47d9ad5a..530b606da0 100644
--- a/flopy/modflow/mfchd.py
+++ b/flopy/modflow/mfchd.py
@@ -24,8 +24,7 @@ class ModflowChd(Package):
     model : model object
         The model object (of type :class:`flopy.modflow.mf.Modflow`) to which
         this package will be added.
-    stress_period_data : list of boundaries, recarrays, or dictionary of
-        boundaries.
+    stress_period_data : list, recarray, dataframe, or dictionary of boundaries.
 
         Each chd cell is defined through definition of
         layer (int), row (int), column (int), shead (float), ehead (float)
diff --git a/flopy/modflow/mfdrn.py b/flopy/modflow/mfdrn.py
index c94f9a39d0..c4614f5aa7 100644
--- a/flopy/modflow/mfdrn.py
+++ b/flopy/modflow/mfdrn.py
@@ -27,8 +27,7 @@ class ModflowDrn(Package):
         A flag that is used to determine if cell-by-cell budget data should be
         saved. If ipakcb is non-zero cell-by-cell budget data will be saved.
         (default is None).
-    stress_period_data : list of boundaries, recarrays, or dictionary of
-        boundaries.
+    stress_period_data : list, recarray, dataframe or dictionary of boundaries.
         Each drain cell is defined through definition of
         layer(int), row(int), column(int), elevation(float),
         conductance(float).
diff --git a/flopy/modflow/mfdrt.py b/flopy/modflow/mfdrt.py
index 8bb2083c76..781ddcd3dd 100644
--- a/flopy/modflow/mfdrt.py
+++ b/flopy/modflow/mfdrt.py
@@ -27,8 +27,7 @@ class ModflowDrt(Package):
         A flag that is used to determine if cell-by-cell budget data should be
         saved. If ipakcb is non-zero cell-by-cell budget data will be saved.
         (default is None).
-    stress_period_data : list of boundaries, recarrays, or dictionary of
-        boundaries.
+    stress_period_data : list, recarray, dataframe or dictionary of boundaries.
         Each drain return cell is defined through definition of
         layer(int), row(int), column(int), elevation(float),
         conductance(float), layerR (int) , rowR (int), colR (int) and rfprop (float).
diff --git a/flopy/modflow/mffhb.py b/flopy/modflow/mffhb.py
index 1e20e35b6b..64764078e3 100644
--- a/flopy/modflow/mffhb.py
+++ b/flopy/modflow/mffhb.py
@@ -8,6 +8,7 @@
 
 """
 import numpy as np
+import pandas as pd
 
 from ..pakbase import Package
 from ..utils import read1d
@@ -64,7 +65,7 @@ class ModflowFhb(Package):
         (default is 0.0)
     cnstm5 : float
         A constant multiplier for data list flwrat. (default is 1.0)
-    ds5 : list or numpy array or recarray
+    ds5 : list or numpy array or recarray or pandas dataframe
         Each FHB flwrat cell (dataset 5) is defined through definition of
         layer(int), row(int), column(int), iaux(int), flwrat[nbdtime](float).
         There should be nflw entries. (default is None)
@@ -81,7 +82,7 @@ class ModflowFhb(Package):
 
     cnstm7 : float
         A constant multiplier for data list sbhedt. (default is 1.0)
-    ds7 : list or numpy array or recarray
+    ds7 : list or numpy array or recarray or pandas dataframe
         Each FHB sbhed cell (dataset 7) is defined through definition of
         layer(int), row(int), column(int), iaux(int), sbhed[nbdtime](float).
         There should be nhed entries. (default is None)
@@ -211,6 +212,8 @@ def __init__(
                 raise TypeError(msg)
             elif isinstance(ds5, list):
                 ds5 = np.array(ds5)
+            elif isinstance(ds5, pd.DataFrame):
+                ds5 = ds5.to_records(index=False)
             # convert numpy array to a recarray
             if ds5.dtype != dtype:
                 ds5 = np.core.records.fromarrays(ds5.transpose(), dtype=dtype)
@@ -228,6 +231,8 @@ def __init__(
                 raise TypeError(msg)
             elif isinstance(ds7, list):
                 ds7 = np.array(ds7)
+            elif isinstance(ds7, pd.DataFrame):
+                ds7 = ds7.to_records(index=False)
             # convert numpy array to a recarray
             if ds7.dtype != dtype:
                 ds7 = np.core.records.fromarrays(ds7.transpose(), dtype=dtype)
diff --git a/flopy/modflow/mfgage.py b/flopy/modflow/mfgage.py
index 16af8b5b14..48953cd0de 100644
--- a/flopy/modflow/mfgage.py
+++ b/flopy/modflow/mfgage.py
@@ -10,6 +10,7 @@
 import os
 
 import numpy as np
+import pandas as pd
 
 from ..pakbase import Package
 from ..utils import read_fixed_var, write_fixed_var
@@ -27,7 +28,7 @@ class ModflowGage(Package):
         this package will be added.
     numgage : int
         The total number of gages included in the gage file (default is 0).
-    gage_data : list or numpy array
+    gage_data : list or numpy array or recarray or pandas dataframe
         data for dataset 2a and 2b in the gage package. If a list is provided
         then the list includes 2 to 3 entries (LAKE UNIT [OUTTYPE]) for each
         LAK Package entry and 4 entries (GAGESEG GAGERCH UNIT OUTTYPE) for
@@ -132,6 +133,8 @@ def __init__(
                     gage_data = np.core.records.fromarrays(
                         gage_data.transpose(), dtype=dtype
                     )
+            elif isinstance(gage_data, pd.DataFrame):
+                gage_data = gage_data.to_records(index=False)
             elif isinstance(gage_data, list):
                 d = ModflowGage.get_empty(ncells=numgage)
                 for n in range(len(gage_data)):
diff --git a/flopy/modflow/mfghb.py b/flopy/modflow/mfghb.py
index 7b8d6b119d..903c88c646 100644
--- a/flopy/modflow/mfghb.py
+++ b/flopy/modflow/mfghb.py
@@ -27,8 +27,7 @@ class ModflowGhb(Package):
         A flag that is used to determine if cell-by-cell budget data should be
         saved. If ipakcb is non-zero cell-by-cell budget data will be saved.
         (default is 0).
-    stress_period_data : list of boundaries, recarray of boundaries or,
-        dictionary of boundaries.
+    stress_period_data : list, recarray, dataframe or dictionary of boundaries.
 
         Each ghb cell is defined through definition of
         layer(int), row(int), column(int), stage(float), conductance(float)
diff --git a/flopy/modflow/mfhyd.py b/flopy/modflow/mfhyd.py
index a90ac12f2b..45d593a737 100644
--- a/flopy/modflow/mfhyd.py
+++ b/flopy/modflow/mfhyd.py
@@ -8,6 +8,7 @@
 
 """
 import numpy as np
+import pandas as pd
 
 from ..pakbase import Package
 from ..utils.recarray_utils import create_empty_recarray
@@ -31,7 +32,7 @@ class ModflowHyd(Package):
         is a user-specified value that is output if a value cannot be computed
         at a hydrograph location. For example, the cell in which the hydrograph
         is located may be a no-flow cell. (default is -999.)
-    obsdata : list of lists, numpy array, or numpy recarray (nhyd, 7)
+    obsdata : list of lists, numpy array or recarray, or pandas dataframe (nhyd, 7)
         Each row of obsdata includes data defining pckg (3 character string),
         arr (2 character string), intyp (1 character string) klay (int),
         xl (float), yl (float), hydlbl (14 character string) for each
@@ -158,6 +159,8 @@ def __init__(
 
         dtype = ModflowHyd.get_default_dtype()
         obs = ModflowHyd.get_empty(nhyd)
+        if isinstance(obsdata, pd.dataFrame):
+            obsdata = obsdata.to_records(index=False)
         if isinstance(obsdata, list):
             if len(obsdata) != nhyd:
                 raise RuntimeError(
diff --git a/flopy/modflow/mfmnw2.py b/flopy/modflow/mfmnw2.py
index ae59bf9648..7a9a5548f5 100644
--- a/flopy/modflow/mfmnw2.py
+++ b/flopy/modflow/mfmnw2.py
@@ -2,6 +2,7 @@
 import warnings
 
 import numpy as np
+import pandas as pd
 
 from ..pakbase import Package
 from ..utils import MfList, check
@@ -451,6 +452,8 @@ def __init__(
         # does this need to be Mflist?
         self.stress_period_data = self.get_empty_stress_period_data(nper)
         if stress_period_data is not None:
+            if isinstance(stress_period_data, pd.DataFrame):
+                stress_period_data = stress_period_data.to_records(index=False)
             for n in stress_period_data.dtype.names:
                 self.stress_period_data[n] = stress_period_data[n]
 
@@ -459,6 +462,8 @@ def __init__(
             np.abs(nnodes), aux_names=self.aux
         )
         if node_data is not None:
+            if isinstance(node_data, pd.DataFrame):
+                node_data = node_data.to_records(index=False)
             for n in node_data.dtype.names:
                 self.node_data[n] = node_data[n]
                 # convert strings to lower case
@@ -1054,6 +1059,8 @@ def __init__(
         self.node_data = self.get_empty_node_data(0, aux_names=aux)
 
         if node_data is not None:
+            if isinstance(node_data, pd.DataFrame):
+                node_data = node_data.to_records(index=False)
             self.node_data = self.get_empty_node_data(
                 len(node_data), aux_names=aux
             )
diff --git a/flopy/modflow/mfriv.py b/flopy/modflow/mfriv.py
index b734822389..a557da18a3 100644
--- a/flopy/modflow/mfriv.py
+++ b/flopy/modflow/mfriv.py
@@ -27,8 +27,7 @@ class ModflowRiv(Package):
         A flag that is used to determine if cell-by-cell budget data should be
         saved. If ipakcb is non-zero cell-by-cell budget data will be saved.
         (default is 0).
-    stress_period_data : list of boundaries, or recarray of boundaries, or
-        dictionary of boundaries.
+    stress_period_data : list, recarray, dataframe, or dictionary of boundaries.
         Each river cell is defined through definition of
         layer (int), row (int), column (int), stage (float), cond (float),
         rbot (float).
diff --git a/flopy/modflow/mfsfr2.py b/flopy/modflow/mfsfr2.py
index 26a7a9a24d..1287948070 100644
--- a/flopy/modflow/mfsfr2.py
+++ b/flopy/modflow/mfsfr2.py
@@ -172,13 +172,13 @@ class ModflowSfr2(Package):
         simulations (and would need to be converted to whatever units are being
         used in the particular simulation). (default is 0.0001; for
         MODFLOW-2005 simulations only when irtflg > 0)
-    reach_data : recarray
+    reach_data : recarray or dataframe
         Numpy record array of length equal to nstrm, with columns for each
         variable entered in item 2 (see SFR package input instructions). In
         following flopy convention, layer, row, column and node number
         (for unstructured grids) are zero-based; segment and reach are
         one-based.
-    segment_data : recarray
+    segment_data : recarray or dataframe
         Numpy record array of length equal to nss, with columns for each
         variable entered in items 6a, 6b and 6c (see SFR package input
         instructions). Segment numbers are one-based.
@@ -431,6 +431,8 @@ def __init__(
         )
         if segment_data is not None:
             # segment_data is a zero-d array
+            if isinstance(segment_data, pd.DataFrame):
+                segment_data = segment_data.to_records(index=False)
             if not isinstance(segment_data, dict):
                 if len(segment_data.shape) == 0:
                     segment_data = np.atleast_1d(segment_data)
@@ -479,6 +481,8 @@ def __init__(
         # Dataset 2.
         self.reach_data = self.get_empty_reach_data(np.abs(self._nstrm))
         if reach_data is not None:
+            if isinstance(reach_data, pd.DataFrame):
+                reach_data = reach_data.to_records(index=False)
             for n in reach_data.dtype.names:
                 self.reach_data[n] = reach_data[n]
 
diff --git a/flopy/modflow/mfstr.py b/flopy/modflow/mfstr.py
index 7e16b2ee68..cb480f5235 100644
--- a/flopy/modflow/mfstr.py
+++ b/flopy/modflow/mfstr.py
@@ -8,6 +8,7 @@
 
 """
 import numpy as np
+import pandas as pd
 
 from ..pakbase import Package
 from ..utils import MfList, read_fixed_var, write_fixed_var
@@ -81,8 +82,8 @@ class ModflowStr(Package):
         datasets 6 and 8.
 
         The value for stress period data for a stress period can be an integer
-        (-1 or 0), a list of lists, a numpy array, or a numpy recarray. If
-        stress period data for a stress period contains an integer, a -1
+        (-1 or 0), a list of lists, a numpy array or recarray, or a pandas
+        dataframe. If data for a stress period contains an integer, a -1
         denotes data from the previous stress period will be reused and a 0
         indicates there are no str reaches for this stress period.
 
@@ -367,6 +368,8 @@ def __init__(
             for key, d in stress_period_data.items():
                 if isinstance(d, list):
                     d = np.array(d)
+                if isinstance(d, pd.DataFrame):
+                    d = d.to_records(index=False)
                 if isinstance(d, np.recarray):
                     e = (
                         "ModflowStr error: recarray dtype: {} does not match "
diff --git a/flopy/modflow/mfwel.py b/flopy/modflow/mfwel.py
index 0628bdc62e..69a51e42f5 100644
--- a/flopy/modflow/mfwel.py
+++ b/flopy/modflow/mfwel.py
@@ -28,8 +28,7 @@ class ModflowWel(Package):
         A flag that is used to determine if cell-by-cell budget data should be
         saved. If ipakcb is non-zero cell-by-cell budget data will be saved.
         (default is 0).
-    stress_period_data : list of boundaries, or recarray of boundaries, or
-        dictionary of boundaries
+    stress_period_data : list, recarray, dataframe or dictionary of boundaries.
         Each well is defined through definition of
         layer (int), row (int), column (int), flux (float).
         The simplest form is a dictionary with a lists of boundaries for each
diff --git a/flopy/utils/mflistfile.py b/flopy/utils/mflistfile.py
index 6ac362cef1..b24cab7a2a 100644
--- a/flopy/utils/mflistfile.py
+++ b/flopy/utils/mflistfile.py
@@ -12,7 +12,6 @@
 import numpy as np
 import pandas as pd
 
-from ..utils import import_optional_dependency
 from ..utils.flopy_io import get_ts_sp
 from ..utils.utils_def import totim_to_datetime
 
diff --git a/flopy/utils/util_list.py b/flopy/utils/util_list.py
index 2aa83be270..5f12e6ad8d 100644
--- a/flopy/utils/util_list.py
+++ b/flopy/utils/util_list.py
@@ -14,7 +14,6 @@
 import pandas as pd
 
 from ..datbase import DataInterface, DataListInterface, DataType
-from ..utils import import_optional_dependency
 from ..utils.recarray_utils import create_empty_recarray
 
 
@@ -361,6 +360,9 @@ def __cast_data(self, data):
         # A single ndarray
         elif isinstance(data, np.ndarray):
             self.__cast_ndarray(0, data)
+        # A single dataframe
+        elif isinstance(data, pd.DataFrame):
+            self.__cast_dataframe(0, data)
         # A single filename
         elif isinstance(data, str):
             self.__cast_str(0, data)
@@ -408,8 +410,6 @@ def __cast_ndarray(self, kper, d):
                 f"MfList error: ndarray shape {d.shape} doesn't match "
                 f"dtype len: {len(self.dtype)}"
             )
-            # warnings.warn("MfList: ndarray dtype does not match self " +\
-            #               "dtype, trying to cast")
         try:
             self.__data[kper] = np.core.records.fromarrays(
                 d.transpose(), dtype=self.dtype
@@ -420,6 +420,9 @@ def __cast_ndarray(self, kper, d):
             )
         self.__vtype[kper] = np.recarray
 
+    def __cast_dataframe(self, kper, d):
+        self.__cast_recarray(kper, d.to_records(index=False))
+
     def get_dataframe(self, squeeze=False):
         """
         Cast recarrays for stress periods into single