From 9637bc6d4a946797777e9b0fb5e8700c2f2a4360 Mon Sep 17 00:00:00 2001 From: wpbonelli Date: Mon, 20 Nov 2023 10:04:46 -0500 Subject: [PATCH] feat(modflow): support dataframe for pkg data --- autotest/test_mnw.py | 22 +++++++++++++++++----- autotest/test_modflow.py | 1 + flopy/modflow/mfag.py | 17 +++++++++++++---- flopy/modflow/mfchd.py | 3 +-- flopy/modflow/mfdrn.py | 3 +-- flopy/modflow/mfdrt.py | 3 +-- flopy/modflow/mffhb.py | 9 +++++++-- flopy/modflow/mfgage.py | 5 ++++- flopy/modflow/mfghb.py | 3 +-- flopy/modflow/mfhyd.py | 5 ++++- flopy/modflow/mfmnw2.py | 7 +++++++ flopy/modflow/mfriv.py | 3 +-- flopy/modflow/mfsfr2.py | 8 ++++++-- flopy/modflow/mfstr.py | 7 +++++-- flopy/modflow/mfwel.py | 3 +-- flopy/utils/mflistfile.py | 1 - flopy/utils/util_list.py | 9 ++++++--- 17 files changed, 76 insertions(+), 33 deletions(-) diff --git a/autotest/test_mnw.py b/autotest/test_mnw.py index 266c55a14b..f50036dcdb 100644 --- a/autotest/test_mnw.py +++ b/autotest/test_mnw.py @@ -23,7 +23,7 @@ def mnw1_path(example_data_path): return example_data_path / "mf2005_test" -def test_load(function_tmpdir, example_data_path, mnw2_examples_path): +def test_load(function_tmpdir, mnw2_examples_path): """t027 test load of MNW2 Package""" # load in the test problem (1 well, 3 stress periods) m = Modflow.load( @@ -94,7 +94,8 @@ def test_mnw1_load_write(function_tmpdir, mnw1_path): assert np.array_equal(v, m2.mnw1.stress_period_data[k]) -def test_make_package(function_tmpdir): +@pytest.mark.parametrize("dataframe", [True, False]) +def test_make_package(function_tmpdir, dataframe): """t027 test make MNW2 Package""" ws = function_tmpdir m4 = Modflow("mnw2example", model_ws=ws) @@ -195,6 +196,9 @@ def test_make_package(function_tmpdir): ).view(np.recarray), } + if dataframe: + node_data = pd.DataFrame(node_data) + mnw2_4 = ModflowMnw2( model=m4, mnwmax=2, @@ -257,6 +261,9 @@ def test_make_package(function_tmpdir): ).view(np.recarray), } + if dataframe: + node_data = pd.DataFrame(node_data) + mnw2_4 = ModflowMnw2( model=m4, mnwmax=2, @@ -294,7 +301,8 @@ def test_make_package(function_tmpdir): ) -def test_mnw2_create_file(function_tmpdir): +@pytest.mark.parametrize("dataframe", [True, False]) +def test_mnw2_create_file(function_tmpdir, dataframe): """ Test for issue #556, Mnw2 crashed if wells have multiple node lengths @@ -341,8 +349,12 @@ def test_mnw2_create_file(function_tmpdir): wellids[i], nnodes=nlayers[i], nper=len(stress_period_data.index), - node_data=node_data.to_records(index=False), - stress_period_data=stress_period_data.to_records(index=False), + node_data=node_data.to_records(index=False) + if not dataframe + else node_data, + stress_period_data=stress_period_data.to_records(index=False) + if not dataframe + else stress_period_data, ) wells.append(wl) diff --git a/autotest/test_modflow.py b/autotest/test_modflow.py index 6f628aad68..ba3fe04f48 100644 --- a/autotest/test_modflow.py +++ b/autotest/test_modflow.py @@ -5,6 +5,7 @@ from pathlib import Path import numpy as np +import pandas as pd import pytest from autotest.conftest import get_example_data_path from modflow_devtools.markers import excludes_platform, requires_exe diff --git a/flopy/modflow/mfag.py b/flopy/modflow/mfag.py index 985ee27e08..ede221485d 100644 --- a/flopy/modflow/mfag.py +++ b/flopy/modflow/mfag.py @@ -11,6 +11,7 @@ import os import numpy as np +import pandas as pd from ..pakbase import Package from ..utils.flopy_io import multi_line_strip @@ -29,9 +30,9 @@ class ModflowAg(Package): model object options : flopy.utils.OptionBlock object option block object - time_series : np.recarray + time_series : np.recarray or pd.DataFrame numpy recarray for the time series block - well_list : np.recarray + well_list : np.recarray or pd.DataFrame recarray of the well_list block irrdiversion : dict {per: np.recarray} dictionary of the irrdiversion block @@ -269,8 +270,16 @@ def __init__( else: self.options = OptionBlock("", ModflowAg) - self.time_series = time_series - self.well_list = well_list + self.time_series = ( + time_series.to_records(index=False) + if isinstance(time_series, pd.DataFrame) + else time_series + ) + self.well_list = ( + well_list.to_records(index=False) + if isinstance(well_list, pd.DataFrame) + else well_list + ) self.irrdiversion = irrdiversion self.irrwell = irrwell self.supwell = supwell diff --git a/flopy/modflow/mfchd.py b/flopy/modflow/mfchd.py index fb47d9ad5a..530b606da0 100644 --- a/flopy/modflow/mfchd.py +++ b/flopy/modflow/mfchd.py @@ -24,8 +24,7 @@ class ModflowChd(Package): model : model object The model object (of type :class:`flopy.modflow.mf.Modflow`) to which this package will be added. - stress_period_data : list of boundaries, recarrays, or dictionary of - boundaries. + stress_period_data : list, recarray, dataframe, or dictionary of boundaries. Each chd cell is defined through definition of layer (int), row (int), column (int), shead (float), ehead (float) diff --git a/flopy/modflow/mfdrn.py b/flopy/modflow/mfdrn.py index c94f9a39d0..c4614f5aa7 100644 --- a/flopy/modflow/mfdrn.py +++ b/flopy/modflow/mfdrn.py @@ -27,8 +27,7 @@ class ModflowDrn(Package): A flag that is used to determine if cell-by-cell budget data should be saved. If ipakcb is non-zero cell-by-cell budget data will be saved. (default is None). - stress_period_data : list of boundaries, recarrays, or dictionary of - boundaries. + stress_period_data : list, recarray, dataframe or dictionary of boundaries. Each drain cell is defined through definition of layer(int), row(int), column(int), elevation(float), conductance(float). diff --git a/flopy/modflow/mfdrt.py b/flopy/modflow/mfdrt.py index 8bb2083c76..781ddcd3dd 100644 --- a/flopy/modflow/mfdrt.py +++ b/flopy/modflow/mfdrt.py @@ -27,8 +27,7 @@ class ModflowDrt(Package): A flag that is used to determine if cell-by-cell budget data should be saved. If ipakcb is non-zero cell-by-cell budget data will be saved. (default is None). - stress_period_data : list of boundaries, recarrays, or dictionary of - boundaries. + stress_period_data : list, recarray, dataframe or dictionary of boundaries. Each drain return cell is defined through definition of layer(int), row(int), column(int), elevation(float), conductance(float), layerR (int) , rowR (int), colR (int) and rfprop (float). diff --git a/flopy/modflow/mffhb.py b/flopy/modflow/mffhb.py index 1e20e35b6b..64764078e3 100644 --- a/flopy/modflow/mffhb.py +++ b/flopy/modflow/mffhb.py @@ -8,6 +8,7 @@ """ import numpy as np +import pandas as pd from ..pakbase import Package from ..utils import read1d @@ -64,7 +65,7 @@ class ModflowFhb(Package): (default is 0.0) cnstm5 : float A constant multiplier for data list flwrat. (default is 1.0) - ds5 : list or numpy array or recarray + ds5 : list or numpy array or recarray or pandas dataframe Each FHB flwrat cell (dataset 5) is defined through definition of layer(int), row(int), column(int), iaux(int), flwrat[nbdtime](float). There should be nflw entries. (default is None) @@ -81,7 +82,7 @@ class ModflowFhb(Package): cnstm7 : float A constant multiplier for data list sbhedt. (default is 1.0) - ds7 : list or numpy array or recarray + ds7 : list or numpy array or recarray or pandas dataframe Each FHB sbhed cell (dataset 7) is defined through definition of layer(int), row(int), column(int), iaux(int), sbhed[nbdtime](float). There should be nhed entries. (default is None) @@ -211,6 +212,8 @@ def __init__( raise TypeError(msg) elif isinstance(ds5, list): ds5 = np.array(ds5) + elif isinstance(ds5, pd.DataFrame): + ds5 = ds5.to_records(index=False) # convert numpy array to a recarray if ds5.dtype != dtype: ds5 = np.core.records.fromarrays(ds5.transpose(), dtype=dtype) @@ -228,6 +231,8 @@ def __init__( raise TypeError(msg) elif isinstance(ds7, list): ds7 = np.array(ds7) + elif isinstance(ds7, pd.DataFrame): + ds7 = ds7.to_records(index=False) # convert numpy array to a recarray if ds7.dtype != dtype: ds7 = np.core.records.fromarrays(ds7.transpose(), dtype=dtype) diff --git a/flopy/modflow/mfgage.py b/flopy/modflow/mfgage.py index 16af8b5b14..48953cd0de 100644 --- a/flopy/modflow/mfgage.py +++ b/flopy/modflow/mfgage.py @@ -10,6 +10,7 @@ import os import numpy as np +import pandas as pd from ..pakbase import Package from ..utils import read_fixed_var, write_fixed_var @@ -27,7 +28,7 @@ class ModflowGage(Package): this package will be added. numgage : int The total number of gages included in the gage file (default is 0). - gage_data : list or numpy array + gage_data : list or numpy array or recarray or pandas dataframe data for dataset 2a and 2b in the gage package. If a list is provided then the list includes 2 to 3 entries (LAKE UNIT [OUTTYPE]) for each LAK Package entry and 4 entries (GAGESEG GAGERCH UNIT OUTTYPE) for @@ -132,6 +133,8 @@ def __init__( gage_data = np.core.records.fromarrays( gage_data.transpose(), dtype=dtype ) + elif isinstance(gage_data, pd.DataFrame): + gage_data = gage_data.to_records(index=False) elif isinstance(gage_data, list): d = ModflowGage.get_empty(ncells=numgage) for n in range(len(gage_data)): diff --git a/flopy/modflow/mfghb.py b/flopy/modflow/mfghb.py index 7b8d6b119d..903c88c646 100644 --- a/flopy/modflow/mfghb.py +++ b/flopy/modflow/mfghb.py @@ -27,8 +27,7 @@ class ModflowGhb(Package): A flag that is used to determine if cell-by-cell budget data should be saved. If ipakcb is non-zero cell-by-cell budget data will be saved. (default is 0). - stress_period_data : list of boundaries, recarray of boundaries or, - dictionary of boundaries. + stress_period_data : list, recarray, dataframe or dictionary of boundaries. Each ghb cell is defined through definition of layer(int), row(int), column(int), stage(float), conductance(float) diff --git a/flopy/modflow/mfhyd.py b/flopy/modflow/mfhyd.py index a90ac12f2b..45d593a737 100644 --- a/flopy/modflow/mfhyd.py +++ b/flopy/modflow/mfhyd.py @@ -8,6 +8,7 @@ """ import numpy as np +import pandas as pd from ..pakbase import Package from ..utils.recarray_utils import create_empty_recarray @@ -31,7 +32,7 @@ class ModflowHyd(Package): is a user-specified value that is output if a value cannot be computed at a hydrograph location. For example, the cell in which the hydrograph is located may be a no-flow cell. (default is -999.) - obsdata : list of lists, numpy array, or numpy recarray (nhyd, 7) + obsdata : list of lists, numpy array or recarray, or pandas dataframe (nhyd, 7) Each row of obsdata includes data defining pckg (3 character string), arr (2 character string), intyp (1 character string) klay (int), xl (float), yl (float), hydlbl (14 character string) for each @@ -158,6 +159,8 @@ def __init__( dtype = ModflowHyd.get_default_dtype() obs = ModflowHyd.get_empty(nhyd) + if isinstance(obsdata, pd.dataFrame): + obsdata = obsdata.to_records(index=False) if isinstance(obsdata, list): if len(obsdata) != nhyd: raise RuntimeError( diff --git a/flopy/modflow/mfmnw2.py b/flopy/modflow/mfmnw2.py index ae59bf9648..7a9a5548f5 100644 --- a/flopy/modflow/mfmnw2.py +++ b/flopy/modflow/mfmnw2.py @@ -2,6 +2,7 @@ import warnings import numpy as np +import pandas as pd from ..pakbase import Package from ..utils import MfList, check @@ -451,6 +452,8 @@ def __init__( # does this need to be Mflist? self.stress_period_data = self.get_empty_stress_period_data(nper) if stress_period_data is not None: + if isinstance(stress_period_data, pd.DataFrame): + stress_period_data = stress_period_data.to_records(index=False) for n in stress_period_data.dtype.names: self.stress_period_data[n] = stress_period_data[n] @@ -459,6 +462,8 @@ def __init__( np.abs(nnodes), aux_names=self.aux ) if node_data is not None: + if isinstance(node_data, pd.DataFrame): + node_data = node_data.to_records(index=False) for n in node_data.dtype.names: self.node_data[n] = node_data[n] # convert strings to lower case @@ -1054,6 +1059,8 @@ def __init__( self.node_data = self.get_empty_node_data(0, aux_names=aux) if node_data is not None: + if isinstance(node_data, pd.DataFrame): + node_data = node_data.to_records(index=False) self.node_data = self.get_empty_node_data( len(node_data), aux_names=aux ) diff --git a/flopy/modflow/mfriv.py b/flopy/modflow/mfriv.py index b734822389..a557da18a3 100644 --- a/flopy/modflow/mfriv.py +++ b/flopy/modflow/mfriv.py @@ -27,8 +27,7 @@ class ModflowRiv(Package): A flag that is used to determine if cell-by-cell budget data should be saved. If ipakcb is non-zero cell-by-cell budget data will be saved. (default is 0). - stress_period_data : list of boundaries, or recarray of boundaries, or - dictionary of boundaries. + stress_period_data : list, recarray, dataframe, or dictionary of boundaries. Each river cell is defined through definition of layer (int), row (int), column (int), stage (float), cond (float), rbot (float). diff --git a/flopy/modflow/mfsfr2.py b/flopy/modflow/mfsfr2.py index 26a7a9a24d..1287948070 100644 --- a/flopy/modflow/mfsfr2.py +++ b/flopy/modflow/mfsfr2.py @@ -172,13 +172,13 @@ class ModflowSfr2(Package): simulations (and would need to be converted to whatever units are being used in the particular simulation). (default is 0.0001; for MODFLOW-2005 simulations only when irtflg > 0) - reach_data : recarray + reach_data : recarray or dataframe Numpy record array of length equal to nstrm, with columns for each variable entered in item 2 (see SFR package input instructions). In following flopy convention, layer, row, column and node number (for unstructured grids) are zero-based; segment and reach are one-based. - segment_data : recarray + segment_data : recarray or dataframe Numpy record array of length equal to nss, with columns for each variable entered in items 6a, 6b and 6c (see SFR package input instructions). Segment numbers are one-based. @@ -431,6 +431,8 @@ def __init__( ) if segment_data is not None: # segment_data is a zero-d array + if isinstance(segment_data, pd.DataFrame): + segment_data = segment_data.to_records(index=False) if not isinstance(segment_data, dict): if len(segment_data.shape) == 0: segment_data = np.atleast_1d(segment_data) @@ -479,6 +481,8 @@ def __init__( # Dataset 2. self.reach_data = self.get_empty_reach_data(np.abs(self._nstrm)) if reach_data is not None: + if isinstance(reach_data, pd.DataFrame): + reach_data = reach_data.to_records(index=False) for n in reach_data.dtype.names: self.reach_data[n] = reach_data[n] diff --git a/flopy/modflow/mfstr.py b/flopy/modflow/mfstr.py index 7e16b2ee68..cb480f5235 100644 --- a/flopy/modflow/mfstr.py +++ b/flopy/modflow/mfstr.py @@ -8,6 +8,7 @@ """ import numpy as np +import pandas as pd from ..pakbase import Package from ..utils import MfList, read_fixed_var, write_fixed_var @@ -81,8 +82,8 @@ class ModflowStr(Package): datasets 6 and 8. The value for stress period data for a stress period can be an integer - (-1 or 0), a list of lists, a numpy array, or a numpy recarray. If - stress period data for a stress period contains an integer, a -1 + (-1 or 0), a list of lists, a numpy array or recarray, or a pandas + dataframe. If data for a stress period contains an integer, a -1 denotes data from the previous stress period will be reused and a 0 indicates there are no str reaches for this stress period. @@ -367,6 +368,8 @@ def __init__( for key, d in stress_period_data.items(): if isinstance(d, list): d = np.array(d) + if isinstance(d, pd.DataFrame): + d = d.to_records(index=False) if isinstance(d, np.recarray): e = ( "ModflowStr error: recarray dtype: {} does not match " diff --git a/flopy/modflow/mfwel.py b/flopy/modflow/mfwel.py index 0628bdc62e..69a51e42f5 100644 --- a/flopy/modflow/mfwel.py +++ b/flopy/modflow/mfwel.py @@ -28,8 +28,7 @@ class ModflowWel(Package): A flag that is used to determine if cell-by-cell budget data should be saved. If ipakcb is non-zero cell-by-cell budget data will be saved. (default is 0). - stress_period_data : list of boundaries, or recarray of boundaries, or - dictionary of boundaries + stress_period_data : list, recarray, dataframe or dictionary of boundaries. Each well is defined through definition of layer (int), row (int), column (int), flux (float). The simplest form is a dictionary with a lists of boundaries for each diff --git a/flopy/utils/mflistfile.py b/flopy/utils/mflistfile.py index 6ac362cef1..b24cab7a2a 100644 --- a/flopy/utils/mflistfile.py +++ b/flopy/utils/mflistfile.py @@ -12,7 +12,6 @@ import numpy as np import pandas as pd -from ..utils import import_optional_dependency from ..utils.flopy_io import get_ts_sp from ..utils.utils_def import totim_to_datetime diff --git a/flopy/utils/util_list.py b/flopy/utils/util_list.py index 2aa83be270..5f12e6ad8d 100644 --- a/flopy/utils/util_list.py +++ b/flopy/utils/util_list.py @@ -14,7 +14,6 @@ import pandas as pd from ..datbase import DataInterface, DataListInterface, DataType -from ..utils import import_optional_dependency from ..utils.recarray_utils import create_empty_recarray @@ -361,6 +360,9 @@ def __cast_data(self, data): # A single ndarray elif isinstance(data, np.ndarray): self.__cast_ndarray(0, data) + # A single dataframe + elif isinstance(data, pd.DataFrame): + self.__cast_dataframe(0, data) # A single filename elif isinstance(data, str): self.__cast_str(0, data) @@ -408,8 +410,6 @@ def __cast_ndarray(self, kper, d): f"MfList error: ndarray shape {d.shape} doesn't match " f"dtype len: {len(self.dtype)}" ) - # warnings.warn("MfList: ndarray dtype does not match self " +\ - # "dtype, trying to cast") try: self.__data[kper] = np.core.records.fromarrays( d.transpose(), dtype=self.dtype @@ -420,6 +420,9 @@ def __cast_ndarray(self, kper, d): ) self.__vtype[kper] = np.recarray + def __cast_dataframe(self, kper, d): + self.__cast_recarray(kper, d.to_records(index=False)) + def get_dataframe(self, squeeze=False): """ Cast recarrays for stress periods into single