From ad981a200f05d9e326569f62c8bedfa84e015fb4 Mon Sep 17 00:00:00 2001 From: Wolfgang Preimesberger Date: Wed, 13 Nov 2024 00:25:45 +0100 Subject: [PATCH] Prepare v0.16.1 (numpy<2) release (#309) * Fix and filter warnings, remove old plotting module * Update CI build --- .github/workflows/ci.yml | 15 +- CHANGELOG.rst | 6 + environment.yml | 4 +- setup.cfg | 10 +- src/pytesmo/time_series/grouping.py | 8 +- src/pytesmo/time_series/plotting.py | 160 ------------ src/pytesmo/utils.py | 6 +- src/pytesmo/validation_framework/adapters.py | 4 +- .../metric_calculators.py | 108 ++++---- src/pytesmo/validation_framework/upscaling.py | 6 +- .../validation_framework/validation.py | 5 +- tests/test_docs/test_examples.py | 6 + tests/test_scaling.py | 13 +- tests/test_time_series/test_plotting.py | 54 ---- .../test_adapters.py | 12 +- .../test_data_manager.py | 27 +- .../test_error_handling.py | 34 ++- .../test_metric_calculators.py | 240 +++++++++++------- .../test_upscaling.py | 38 +-- .../test_validation.py | 34 +-- 20 files changed, 342 insertions(+), 448 deletions(-) delete mode 100755 src/pytesmo/time_series/plotting.py delete mode 100644 tests/test_time_series/test_plotting.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a88ad726..9bbc4ac0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,9 +51,9 @@ jobs: - name: Export Environment shell: bash -l {0} run: | - mkdir -p .artifacts + mkdir -p artifacts filename=env_py${{ matrix.python-version }}_${{ matrix.os }}.yml - conda env export --no-builds | grep -v "prefix" > .artifacts/$filename + conda env export --no-builds | grep -v "prefix" > artifacts/$filename - name: Install package and test shell: bash -l {0} run: | @@ -71,22 +71,23 @@ jobs: shell: bash -l {0} run: | git status - pip install setuptools_scm + pip install setuptools_scm twine if [ ${{ matrix.os }} == "windows-latest" ] then # build whls on windows pip install wheel - python setup.py bdist_wheel --dist-dir .artifacts/dist + python setup.py bdist_wheel --dist-dir artifacts/dist else # build dist on linux - python setup.py sdist --dist-dir .artifacts/dist + python setup.py sdist --dist-dir artifacts/dist fi - ls .artifacts/dist + ls artifacts/dist + twine check artifacts/dist/* - name: Upload Artifacts uses: actions/upload-artifact@v4 with: name: Artifacts-${{ matrix.python-version }}-${{ matrix.os }} - path: .artifacts/* + path: artifacts/* coveralls: name: Submit Coveralls 👚 needs: build diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 25bf8f05..c9f4e749 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,12 @@ Changelog Unreleased changes in master ============================ + +Version 0.16.1, 2024-11-13 +========================== +- THIS VERSION IS STILL COMPATIBLE WITH ``NUMPY<2.0``, from v0.17 on pytesmo will require ``numpy>=2.0`` +- Old ``pytesmo.timeseries.plotting`` module was removed +- Fixed and filtered many warnings that were printed by tests until now - Fixed an issue with the intra-annual metrics adapter when an empty time series is passed (PR `#307 `_) - Metapackage updated (pyscaffold 4.5) (PR `#307 `_) - C modules were outdated and could not be compiled, therefore rebuilt (PR `#307 `_) diff --git a/environment.yml b/environment.yml index a2734326..77aa7f40 100644 --- a/environment.yml +++ b/environment.yml @@ -10,7 +10,7 @@ dependencies: - numpy<2.0.0 - numba - scipy>=0.12 - - pandas>=0.11.0,!=0.15.2 + - pandas>=0.23.0 - netcdf4>=1.0.1,!=1.6.2 - cython>=0.29.21 - scikit-learn @@ -34,7 +34,7 @@ dependencies: - ipykernel - sphinx_rtd_theme - ascat>=2.0 - - ismn==1.3.4 + - ismn==1.5.1 - pytest - pytest-cov - pytest-mpl diff --git a/setup.cfg b/setup.cfg index a08464cd..a14609df 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,7 +33,7 @@ install_requires = importlib-metadata; python_version<"3.8" numpy>=1.7.0,<2.0.0 scipy>=0.12 - pandas>=0.11.0,!=0.15.2 + pandas>=0.23.0 matplotlib>=1.2.0 netCDF4>=1.0.1,!=1.6.2 pygeogrids @@ -103,6 +103,7 @@ norecursedirs = markers = full_framework : marks slow test that use the whole validation framework (deselect with '-m "not full_framework"') slow : marks slow tests (deselect with '-m "not slow"') + doc_example : marks slow tests that test python code from documentation testpaths = tests # This removes some of the warnings that show up with pytest but are not an issue filterwarnings = @@ -122,6 +123,13 @@ filterwarnings = ignore:`np.bool` is a deprecated alias for the builtin `bool` # this comes from the `test_cci` in `test_data_averager` ignore: IOError in reading ISMN data + # old CDF matching method + ignore:Use the new implementation 'cdf_match' instead.:DeprecationWarning + # ascat package prints some warnings, doesn't matter for pytesmo + ignore::UserWarning:^ascat + # ismn package used deprecated version of this, doesn't matter for pytesmo + ignore:The 'parallel_process_async' method was renamed to `parallel_process`.:DeprecationWarning + [aliases] dists = bdist_wheel diff --git a/src/pytesmo/time_series/grouping.py b/src/pytesmo/time_series/grouping.py index 1fadf65d..ed52250e 100644 --- a/src/pytesmo/time_series/grouping.py +++ b/src/pytesmo/time_series/grouping.py @@ -366,6 +366,8 @@ def filter(self, idx: pd.DatetimeIndex): # selection = dat.query(" | ".join(cond)).index if self.yearless_date_ranges is not None: + cols = {} + for i, gdrange in enumerate(self.yearless_date_ranges): for y in np.unique(idx.year): start = gdrange[0] @@ -385,7 +387,11 @@ def filter(self, idx: pd.DatetimeIndex): end_dt = end.to_datetime(years=y) - mask[f"gen_range{y}-{i}"] = (idx >= start_dt) & ( + cols[f"gen_range{y}-{i}"] = (idx >= start_dt) & ( idx <= end_dt) + mask = pd.concat( + [mask, pd.DataFrame(index=mask.index, data=cols)], + axis=1) + return mask.any(axis=1, bool_only=True) diff --git a/src/pytesmo/time_series/plotting.py b/src/pytesmo/time_series/plotting.py deleted file mode 100755 index f2bbbf7e..00000000 --- a/src/pytesmo/time_series/plotting.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) 2014,Vienna University of Technology, -# Department of Geodesy and Geoinformation -# All rights reserved. - -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the Vienna University of Technology, -# Department of Geodesy and Geoinformation nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. - -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY, -# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -''' -Created on Mar 7, 2014 - -Plot anomalies around climatology using colors - -@author: Christoph Paulik christoph.paulik@geo.tuwien.ac.at -''' - -import matplotlib.pyplot as plt -import matplotlib.gridspec as gridspec -import pandas as pd - -import pytesmo.time_series.anomaly as anom - - -def plot_clim_anom(df, clim=None, axes=None, markersize=0.75, - mfc='0.3', mec='0.3', clim_color='0.0', - clim_linewidth=0.5, clim_linestyle='-', - pos_anom_color='#799ADA', neg_anom_color='#FD8086', - anom_linewidth=0.2, add_titles=True): - """ - Takes a pandas DataFrame and calculates the climatology and anomaly - and plots them in a nice way for each column - - Parameters - ---------- - df : pandas.DataFrame - clim : pandas.DataFrame, optional - if given these climatologies will be used - if not given then climatologies will be calculated - this DataFrame must have the same number of columns as df - and also the column names. - each climatology must have doy as index. - axes : list of matplotlib.Axes, optional - list of axes on which each column should be plotted - if not given a standard layout is generated - markersize : float, optional - size of the markers for the datapoints - mfc : matplotlib color, optional - markerfacecolor, color of the marker face - mec : matplotlib color, optional - markeredgecolor - clim_color : matplotlib color, optional - color of the climatology - clim_linewidth : float, optional - linewidth of the climatology - clim_linestyle : string, optional - linestyle of the climatology - pos_anom_color : matplotlib color, optional - color of the positive anomaly - neg_anom_color : matplotlib color, optional - color of the negative anomaly - anom_linewidth : float, optional - linewidth of the anomaly lines - add_titles : boolean, optional - if set each subplot will have it's column name as title - Default : True - - Returns - ------- - Figure : matplotlib.Figure - if no axes were given - axes : list of matploblib.Axes - if no axes were given - """ - - if type(df) == pd.Series: - df = pd.DataFrame(df) - - nr_columns = len(df.columns) - - # make own axis if necessary - if axes is None: - own_axis = True - gs = gridspec.GridSpec(nr_columns, 1, right=0.8) - - fig = plt.figure(num=None, figsize=(6, 2 * nr_columns), - dpi=150, facecolor='w', edgecolor='k') - - last_axis = fig.add_subplot(gs[nr_columns - 1]) - axes = [] - for i, grid in enumerate(gs): - if i < nr_columns - 1: - ax = fig.add_subplot(grid, sharex=last_axis) - axes.append(ax) - ax.xaxis.set_visible(False) - axes.append(last_axis) - - else: - own_axis = False - - for i, column in enumerate(df): - Ser = df[column] - ax = axes[i] - - if clim is None: - clima = anom.calc_climatology(Ser) - else: - clima = pd.Series(clim[column]) - anomaly = anom.calc_anomaly(Ser, climatology=clima, return_clim=True) - - anomaly[Ser.name] = Ser - anomaly = anomaly.dropna() - - pos_anom = anomaly[Ser.name].values > anomaly['climatology'].values - neg_anom = anomaly[Ser.name].values < anomaly['climatology'].values - - ax.plot(anomaly.index, anomaly[Ser.name].values, 'o', - markersize=markersize, mfc=mfc, mec=mec) - - ax.plot(anomaly.index, anomaly['climatology'].values, - linestyle=clim_linestyle, - color=clim_color, - linewidth=clim_linewidth) - - ax.fill_between(anomaly.index, - anomaly[Ser.name].values, - anomaly['climatology'].values, interpolate=True, - where=pos_anom, color=pos_anom_color, - linewidth=anom_linewidth) - ax.fill_between(anomaly.index, - anomaly[Ser.name].values, - anomaly['climatology'].values, interpolate=True, - where=neg_anom, color=neg_anom_color, - linewidth=anom_linewidth) - if add_titles: - ax.set_title(column) - - if own_axis: - return fig, axes - else: - return None, None diff --git a/src/pytesmo/utils.py b/src/pytesmo/utils.py index c38c7131..a7bb17ce 100644 --- a/src/pytesmo/utils.py +++ b/src/pytesmo/utils.py @@ -47,7 +47,7 @@ def rootdir() -> Path: def deprecated(message: str = None): """ - Decorator for classes or functions to mark them as deprecated. + Decorator for class methods or functions to mark them as deprecated. If the decorator is applied without a specific message (`@deprecated()`), the default warning is shown when using the function/class. To specify a custom message use it like: @@ -67,13 +67,11 @@ def decorator(src): @functools.wraps(src) def new_func(*args, **kwargs): - warnings.simplefilter('always', DeprecationWarning) - warnings.warn( default_msg if message is None else message, category=DeprecationWarning, stacklevel=2) - warnings.simplefilter('default', DeprecationWarning) + return src(*args, **kwargs) return new_func diff --git a/src/pytesmo/validation_framework/adapters.py b/src/pytesmo/validation_framework/adapters.py index 115d71ba..256217b6 100644 --- a/src/pytesmo/validation_framework/adapters.py +++ b/src/pytesmo/validation_framework/adapters.py @@ -699,9 +699,9 @@ def _adapt(self, data: DataFrame) -> DataFrame: if self.drop_original: if self.time_offset_fields is not None: - data.drop(columns=self.time_offset_fields, inplace=True) + data = data.drop(columns=self.time_offset_fields) if self.base_time_field in data.columns: - data.drop(columns=[self.base_time_field], inplace=True) + data = data.drop(columns=[self.base_time_field]) # Remove NaNs from index, if present data = data.loc[data.index.dropna()] diff --git a/src/pytesmo/validation_framework/metric_calculators.py b/src/pytesmo/validation_framework/metric_calculators.py index 1f2c87dc..b2fb0c4a 100644 --- a/src/pytesmo/validation_framework/metric_calculators.py +++ b/src/pytesmo/validation_framework/metric_calculators.py @@ -256,7 +256,7 @@ class BasicMetrics(MetadataMetrics): """ def __init__( - self, other_name="k1", calc_tau=False, metadata_template=None + self, other_name="k1", calc_tau=False, metadata_template=None ): super(BasicMetrics, self).__init__( other_name=other_name, metadata_template=metadata_template @@ -373,7 +373,7 @@ class FTMetrics(MetadataMetrics): """ def __init__( - self, frozen_flag=2, other_name="k1", metadata_template=None + self, frozen_flag=2, other_name="k1", metadata_template=None ): super(FTMetrics, self).__init__( other_name=other_name, metadata_template=metadata_template @@ -454,11 +454,11 @@ class HSAF_Metrics(MetadataMetrics): """ def __init__( - self, - other_name1="k1", - other_name2="k2", - dataset_names=None, - metadata_template=None, + self, + other_name1="k1", + other_name2="k2", + dataset_names=None, + metadata_template=None, ): super(HSAF_Metrics, self).__init__( @@ -677,16 +677,21 @@ class IntercomparisonMetrics(MetadataMetrics): """ def __init__( - self, - refname="ref", - other_names=("k1", "k2", "k3"), - calc_tau=False, - metrics_between_nonref=False, - calc_rho=True, - dataset_names=None, - metadata_template=None, + self, + refname="ref", + other_names=("k1", "k2", "k3"), + calc_tau=False, + metrics_between_nonref=False, + calc_rho=True, + dataset_names=None, + metadata_template=None, ): - + warnings.warn( + "pytesmo IntercomparisonMetrics calculator " + "is deprecated and will be removed in a future " + "release. Use the PairwiseIntercomparisonMetrics " + "class instead.", DeprecationWarning + ) other_names = list(other_names) super(IntercomparisonMetrics, self).__init__( other_name=other_names, metadata_template=metadata_template @@ -799,7 +804,6 @@ def calc_metrics(self, data, gpi_info): dataset["status"][0] = eh.INSUFFICIENT_DATA return dataset - # make sure we have the correct order data = data[self.df_columns] @@ -932,13 +936,13 @@ class TCMetrics(MetadataMetrics): """ def __init__( - self, - other_names=("k1", "k2"), - calc_tau=False, - dataset_names=None, - tc_metrics_for_ref=True, - metrics_between_nonref=False, - metadata_template=None, + self, + other_names=("k1", "k2"), + calc_tau=False, + dataset_names=None, + tc_metrics_for_ref=True, + metrics_between_nonref=False, + metadata_template=None, ): """ Triple Collocation metrics as implemented in the QA4SM project. @@ -970,6 +974,13 @@ def __init__( which is then propagated to the end netCDF results file. """ + warnings.warn( + "pytesmo TCMetrics calculator " + "is deprecated and will be removed in a future " + "release. Use the TripleCollocationMetrics " + "class instead.", DeprecationWarning + ) + self.ref_name = "ref" other_names = list(other_names) super(TCMetrics, self).__init__( @@ -1056,10 +1067,11 @@ def __init__( ) for metric, ds in metrics_thds.keys(): if not any( - [ - self.ds_names_lut[other_ds] == ds - for other_ds in thds_name.split(self.ds_names_split) - ] + [ + self.ds_names_lut[other_ds] == ds + for other_ds in thds_name.split( + self.ds_names_split) + ] ): continue full_name = "_".join([metric, ds]) @@ -1146,7 +1158,6 @@ def calc_metrics(self, data, gpi_info): dataset["status"][0] = eh.INSUFFICIENT_DATA return dataset - # calculate Pearson correlation pearson_R, pearson_p = df_metrics.pearsonr(data) pearson_R, pearson_p = pearson_R._asdict(), pearson_p._asdict() @@ -1204,7 +1215,7 @@ def calc_metrics(self, data, gpi_info): ) for metr, res in dict( - snr=snr, err_std=err_std, beta=beta + snr=snr, err_std=err_std, beta=beta ).items(): for ds, ds_res in res.items(): m_ds = "{}_{}".format(metr, self.ds_names_lut[ds]) @@ -1308,7 +1319,6 @@ class RollingMetrics(MetadataMetrics): """ def __init__(self, other_name="k1", metadata_template=None): - super(RollingMetrics, self).__init__( other_name=other_name, metadata_template=metadata_template ) @@ -1317,7 +1327,7 @@ def __init__(self, other_name="k1", metadata_template=None): self.result_template.update(_get_metric_template(self.basic_metrics)) def calc_metrics( - self, data, gpi_info, window_size="30d", center=True, min_periods=2 + self, data, gpi_info, window_size="30d", center=True, min_periods=2 ): """ Calculate the desired statistics. @@ -1444,7 +1454,7 @@ def _pairwise_metric_names(self): return metrics def _calc_pairwise_metrics( - self, x, y, mx, my, varx, vary, cov, result, suffix="" + self, x, y, mx, my, varx, vary, cov, result, suffix="" ): """ Calculates pairwise metrics, making use of pre-computed moments. @@ -1602,15 +1612,15 @@ class PairwiseIntercomparisonMetrics(MetadataMetrics, PairwiseMetricsMixin): """ def __init__( - self, - min_obs=10, - calc_spearman=True, - calc_kendall=True, - analytical_cis=True, - bootstrap_cis=False, - bootstrap_min_obs=100, - bootstrap_alpha=0.05, - metadata_template=None, + self, + min_obs=10, + calc_spearman=True, + calc_kendall=True, + analytical_cis=True, + bootstrap_cis=False, + bootstrap_min_obs=100, + bootstrap_alpha=0.05, + metadata_template=None, ): super().__init__(min_obs=min_obs, metadata_template=metadata_template) @@ -1703,13 +1713,13 @@ class TripleCollocationMetrics(MetadataMetrics, PairwiseMetricsMixin): """ def __init__( - self, - refname, - min_obs=10, - bootstrap_cis=False, - bootstrap_min_obs=100, - bootstrap_alpha=0.05, - metadata_template=None, + self, + refname, + min_obs=10, + bootstrap_cis=False, + bootstrap_min_obs=100, + bootstrap_alpha=0.05, + metadata_template=None, ): super().__init__(min_obs=min_obs, metadata_template=metadata_template) diff --git a/src/pytesmo/validation_framework/upscaling.py b/src/pytesmo/validation_framework/upscaling.py index 41110e33..533a7d47 100644 --- a/src/pytesmo/validation_framework/upscaling.py +++ b/src/pytesmo/validation_framework/upscaling.py @@ -212,8 +212,8 @@ def temporal_match( for n, df in enumerate(to_match): if df is None: continue - points = int(df.count()) - if int(ref.count()) >= points: + points = int(df.count().iloc[0]) + if int(ref.count().iloc[0]) >= points: continue else: ref = df @@ -225,7 +225,7 @@ def temporal_match( matched = combined_temporal_collocation( ref, to_match, - pd.Timedelta(hours, "H"), + pd.Timedelta(hours, "h"), combined_dropna=combined_dropna, checkna=True, ) diff --git a/src/pytesmo/validation_framework/validation.py b/src/pytesmo/validation_framework/validation.py index 60edebaf..d84dae02 100644 --- a/src/pytesmo/validation_framework/validation.py +++ b/src/pytesmo/validation_framework/validation.py @@ -440,10 +440,7 @@ def dummy_result(): # at this stage we can drop the column multiindex and just use # the dataset name - if LooseVersion(pd.__version__) < LooseVersion("0.23"): - data.columns = data.columns.droplevel(level=1) - else: - data = data.rename(columns=lambda x: x[0]) + data = data.rename(columns=lambda x: x[0]) if self.scaling is not None: # get scaling index by finding the column in the diff --git a/tests/test_docs/test_examples.py b/tests/test_docs/test_examples.py index 8b3e742a..cf1cf1cf 100644 --- a/tests/test_docs/test_examples.py +++ b/tests/test_docs/test_examples.py @@ -5,6 +5,7 @@ """ import os +import subprocess from nbconvert.preprocessors import ExecutePreprocessor import nbformat import pytest @@ -34,6 +35,11 @@ def test_ipython_notebook(notebook): applicable to the tests here, this file must be within a sub-folder of the tests/ directory (assuming that examples are in docs/examples)! """ + # Handles jupyter warning (can probably be removed again in future): + os.environ["JUPYTER_PLATFORM_DIRS"] = "1" + subprocess.call(["jupyter", "--paths"]) + + # Run ipynb files and check if they pass preprocessor = ExecutePreprocessor(timeout=600, kernel_name="python3") with open(os.path.join(examples_path, notebook)) as f: nb = nbformat.read(f, as_version=4) diff --git a/tests/test_scaling.py b/tests/test_scaling.py index a1cc04bc..8b27b199 100644 --- a/tests/test_scaling.py +++ b/tests/test_scaling.py @@ -38,12 +38,10 @@ import numpy.testing as nptest import pytest - scaling_methods = list(scaling.get_scaling_method_lut().keys()) def test_mean_std_scaling(): - # use a random sample from a standard distribution n = 1000 x = np.random.normal(0, 0.5, n) @@ -55,7 +53,6 @@ def test_mean_std_scaling(): def test_min_max_scaling(): - # use a random sample from a standard distribution n = 1000 x = np.random.normal(0, 0.5, n) @@ -79,6 +76,7 @@ def test_scaling_method(method): y = np.arange(n) * 0.5 o = getattr(scaling, method)(y, x) + nptest.assert_almost_equal(x, o) @@ -102,7 +100,6 @@ def test_scaling_kwargs(method): @pytest.mark.parametrize('method', scaling_methods) def test_scale(method): - n = 1000 x = np.arange(n) y = np.arange(n) * 0.5 @@ -117,7 +114,6 @@ def test_scale(method): @pytest.mark.parametrize('method', ['non_existing_method']) def test_scale_error(method): - n = 1000 x = np.arange(n) y = np.arange(n) * 0.5 @@ -133,7 +129,6 @@ def test_scale_error(method): @pytest.mark.parametrize('method', ['non_existing_method']) def test_add_scale_error(method): - n = 1000 x = np.arange(n, dtype=float) y = np.arange(n) * 0.5 @@ -147,7 +142,6 @@ def test_add_scale_error(method): @pytest.mark.parametrize('method', scaling_methods) def test_add_scale(method): - n = 1000 x = np.arange(n, dtype=float) y = np.arange(n) * 0.5 @@ -182,11 +176,12 @@ def test_linreg_with_nan(): nptest.assert_almost_equal(df.loc[10:, 'x'].values, df.loc[10:, 'y'].values) - assert(df.index.size == n) + assert (df.index.size == n) +@pytest.mark.filterwarnings( + "ignore:Too few percentiles for chosen k.:UserWarning") def test_single_percentile_data(): - n = 1000 x = np.arange(n, dtype=float) y = np.ones(n) diff --git a/tests/test_time_series/test_plotting.py b/tests/test_time_series/test_plotting.py deleted file mode 100644 index 5ff15efb..00000000 --- a/tests/test_time_series/test_plotting.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2015,Vienna University of Technology, -# Department of Geodesy and Geoinformation -# All rights reserved. - -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the Vienna University of Technology, -# Department of Geodesy and Geoinformation nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. - -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY, -# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -''' -Test for timeseries plotting -''' - -import numpy as np -import pandas as pd -import matplotlib -matplotlib.use('Agg') -import pytest - -import pytesmo.time_series.plotting as plotting - - -@pytest.mark.xfail(matplotlib.__version__ != '2.1.2', reason='plotting defaults change') -@pytest.mark.mpl_image_compare(tolerance=21) -def test_anomaly_calc_given_climatology(): - - clim = pd.DataFrame({'data': np.concatenate((np.arange(150) - 40, - np.arange(216) + 190))}, - index=np.arange(366) + 1) - data = pd.DataFrame({'data': np.arange(366)}, - index=pd.date_range('2000-01-01', periods=366)) - fig, axes = plotting.plot_clim_anom(data, clim=clim) - return fig - - diff --git a/tests/test_validation_framework/test_adapters.py b/tests/test_validation_framework/test_adapters.py index 68d83149..cc83bf0b 100644 --- a/tests/test_validation_framework/test_adapters.py +++ b/tests/test_validation_framework/test_adapters.py @@ -1,6 +1,7 @@ import pytest from pytesmo.validation_framework.adapters import TimestampAdapter + """ Test for the adapters. """ @@ -92,7 +93,7 @@ def test_advanced_masking_adapter_nans_ignored(): ds = TestDataset("", n=20) # introduce nan ts = ds.read() - ts.iloc[7]["x"] = np.nan + ts.iloc[7, ts.columns.get_loc('x')] = np.nan def _read(): return ts @@ -168,7 +169,6 @@ def test_anomaly_clim_adapter_one_column(): def test_adapters_custom_fct_name(): - def assert_all_read_fcts(reader): assert (np.all(reader.read() == reader.read())) assert (np.all(reader.read() == reader.alias_read())) @@ -305,6 +305,8 @@ def read_empty(): pd.DataFrame(columns=['x', 'y', 'xy_mean'])) +@pytest.mark.filterwarnings( + "ignore:The input DataFrame is either empty or has.*:UserWarning") def test_timestamp_adapter(): ds = TestDataset("", n=20) @@ -421,7 +423,7 @@ def _read_all_nans(): # ----------------------- def _read_empty(): - return pd.DataFrame(columns=["sm", "offset"],) + return pd.DataFrame(columns=["sm", "offset"], ) setattr(ds, "read", _read_empty) origin = ds.read() @@ -467,8 +469,8 @@ def _read_complex(): should_be = origin.apply( lambda row: np.datetime64("2005-02-01") + np.timedelta64( int(row["base_time"]), "D") + np.timedelta64( - int(row["offset_min"]), "m") + np.timedelta64( - int(row["offset_sec"]), "s"), + int(row["offset_min"]), "m") + np.timedelta64( + int(row["offset_sec"]), "s"), axis=1).values assert (adapted.index.values == should_be).all() diff --git a/tests/test_validation_framework/test_data_manager.py b/tests/test_validation_framework/test_data_manager.py index 1f787aa0..6ae1d990 100644 --- a/tests/test_validation_framework/test_data_manager.py +++ b/tests/test_validation_framework/test_data_manager.py @@ -197,8 +197,10 @@ def test_DataManager_dataset_names(): dm = setup_TestDataManager() result_names = dm.get_results_names(3) - assert result_names == [(('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm')), - (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm2'))] + assert result_names == [ + (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm')), + (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm2')) + ] result_names = dm.get_results_names(2) assert result_names == [(('DS1', 'soil moisture'), ('DS2', 'sm')), @@ -209,7 +211,8 @@ def test_DataManager_dataset_names(): def test_DataManager_get_data(): datasets = setup_TestDatasets() - dm = DataManager(datasets, 'DS1', read_ts_names={f'DS{i}': 'read' for i in range(1,4)}) + dm = DataManager(datasets, 'DS1', + read_ts_names={f'DS{i}': 'read' for i in range(1, 4)}) data = dm.get_data(1, 1, 1) assert sorted(list(data)) == ['DS1', 'DS2', 'DS3'] @@ -220,8 +223,10 @@ def test_get_result_names(): 'DS2': ['sm'], 'DS3': ['sm', 'sm2']} result_names = get_result_names(tst_ds_dict, 'DS1', 3) - assert result_names == [(('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm')), - (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm2'))] + assert result_names == [ + (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm')), + (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm2')) + ] result_names = get_result_names(tst_ds_dict, 'DS1', 2) assert result_names == [(('DS1', 'soil moisture'), ('DS2', 'sm')), @@ -239,8 +244,10 @@ def test_get_result_combinations(): 'DS2': ['sm'], 'DS3': ['sm', 'sm2']} result_names = get_result_combinations(tst_ds_dict, n=3) - assert result_names == [(('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm')), - (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm2'))] + assert result_names == [ + (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm')), + (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm2')) + ] result_names = get_result_combinations(tst_ds_dict, n=2) assert result_names == [(('DS1', 'soil moisture'), ('DS2', 'sm')), @@ -250,6 +257,7 @@ def test_get_result_combinations(): (('DS2', 'sm'), ('DS3', 'sm2'))] +@pytest.mark.filterwarnings("ignore:Less than k=1 points.*:UserWarning") def test_maxdist(): testdf = pd.DataFrame([1, 1, 1], columns=["sm"]) @@ -308,8 +316,7 @@ def flush(self): df_dict = dm.get_data(0, 0, 0) assert df_dict == expected - # test if the far away point in the other dataset can be found (should not happen) + # test if the far away point in the other dataset can be found + # (should not happen) df_dict = dm.get_data(1, 1, 1) assert df_dict == {} - - diff --git a/tests/test_validation_framework/test_error_handling.py b/tests/test_validation_framework/test_error_handling.py index d09c7503..0202d1e4 100644 --- a/tests/test_validation_framework/test_error_handling.py +++ b/tests/test_validation_framework/test_error_handling.py @@ -15,6 +15,7 @@ from .utils import create_datasets +@pytest.mark.filterwarnings("ignore:Not enough observations.*:UserWarning") def test_error_handling_empty_df(): # This tests whether error handling works if one of the datasets consists # of an empty dataframe. @@ -34,7 +35,8 @@ def test_error_handling_empty_df(): datasets, spatial_ref="0-ERA5", metrics_calculators={(n_datasets, 2): metric_calculator.calc_metrics}, - temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "h")), + temporal_matcher=make_combined_temporal_matcher( + pd.Timedelta(12, "h")), ) gpis = list(range(npoints)) args = (gpis, gpis, gpis) @@ -56,6 +58,7 @@ def test_error_handling_empty_df(): assert np.all(results[key]["status"] == eh.NO_TEMP_MATCHED_DATA) +@pytest.mark.filterwarnings("ignore:Not enough observations.*:UserWarning") def test_error_handling_nodata(): # this tests if we get the NoGpiDataError if one dataset doesn't have any # values. Here we use only 2 datasets, otherwise the third one will be @@ -72,7 +75,8 @@ def test_error_handling_nodata(): datasets, spatial_ref="0-ERA5", metrics_calculators={(n_datasets, 2): metric_calculator.calc_metrics}, - temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "h")), + temporal_matcher=make_combined_temporal_matcher( + pd.Timedelta(12, "h")), ) gpis = list(range(npoints)) args = (gpis, gpis, gpis) @@ -94,6 +98,7 @@ def test_error_handling_nodata(): assert np.all(results[key]["status"] == eh.NO_GPI_DATA) +@pytest.mark.filterwarnings("ignore:Not enough observations.*:UserWarning") def test_error_handling_not_enough_data(): # This tests if we get a proper warning if we have not enough data to # calculate correlations (nsamples = 5). In this case, the behaviour of all @@ -110,7 +115,8 @@ def test_error_handling_not_enough_data(): datasets, spatial_ref="0-ERA5", metrics_calculators={(n_datasets, 2): metric_calculator.calc_metrics}, - temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "h")), + temporal_matcher=make_combined_temporal_matcher( + pd.Timedelta(12, "h")), ) gpis = list(range(npoints)) args = (gpis, gpis, gpis) @@ -118,9 +124,11 @@ def test_error_handling_not_enough_data(): for handle_errors in ["ignore", "raise"]: with pytest.warns( - UserWarning, match="Not enough observations to calculate metrics." + UserWarning, + match="Not enough observations to calculate metrics." ): - results = val.calc(*args, **kwargs, handle_errors=handle_errors) + results = val.calc(*args, **kwargs, + handle_errors=handle_errors) for key in results: for metric in results[key]: assert len(results[key][metric]) == npoints @@ -142,7 +150,8 @@ def test_error_handling_ok(): datasets, spatial_ref="0-ERA5", metrics_calculators={(n_datasets, 2): metric_calculator.calc_metrics}, - temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "h")), + temporal_matcher=make_combined_temporal_matcher( + pd.Timedelta(12, "h")), ) gpis = list(range(npoints)) args = (gpis, gpis, gpis) @@ -158,6 +167,7 @@ def test_error_handling_ok(): assert np.all(results[key]["status"] == eh.OK) +@pytest.mark.filterwarnings("ignore:Not enough observations.*:UserWarning") def test_error_handling_scaling_failed(): # This tests whether a scaling error is raised if the scaling fails due to # insufficient data. @@ -177,7 +187,8 @@ def scale(self, data, ref_idx, gpi_info): scaling=BadScaler(), spatial_ref="0-ERA5", metrics_calculators={(n_datasets, 2): metric_calculator.calc_metrics}, - temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "h")), + temporal_matcher=make_combined_temporal_matcher( + pd.Timedelta(12, "h")), ) gpis = list(range(npoints)) args = (gpis, gpis, gpis) @@ -197,6 +208,7 @@ def scale(self, data, ref_idx, gpi_info): assert np.all(results[key]["status"] == eh.SCALING_FAILED) +@pytest.mark.filterwarnings("ignore:Not enough observations.*:UserWarning") def test_error_handling_datamanager_failed(): # This tests whether a scaling error is raised if the scaling fails due to # insufficient data. @@ -220,7 +232,8 @@ def bad_get_data(*args): data_manager, spatial_ref=spatial_ref, metrics_calculators={(n_datasets, 2): metric_calculator.calc_metrics}, - temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "h")), + temporal_matcher=make_combined_temporal_matcher( + pd.Timedelta(12, "h")), ) gpis = list(range(npoints)) args = (gpis, gpis, gpis) @@ -240,6 +253,7 @@ def bad_get_data(*args): assert np.all(results[key]["status"] == eh.DATA_MANAGER_FAILED) +@pytest.mark.filterwarnings("ignore:Not enough observations.*:UserWarning") def test_error_handling_temp_matching_failed(): # This tests whether a TemporalMatchingError is raised if the matching # fails @@ -278,6 +292,7 @@ def bad_matching(*args, **kwargs): assert np.all(results[key]["status"] == eh.TEMPORAL_MATCHING_FAILED) +@pytest.mark.filterwarnings("ignore:Not enough observations.*:UserWarning") def test_error_handling_metrics_calculation_failed(): # This tests whether a MetricsCalculationError is raised if metrics # calculation fails @@ -296,7 +311,8 @@ def bad_metrics(data, gpi_info): datasets, spatial_ref="0-ERA5", metrics_calculators={(n_datasets, 2): bad_metrics}, - temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "h")), + temporal_matcher=make_combined_temporal_matcher( + pd.Timedelta(12, "h")), ) gpis = list(range(npoints)) args = (gpis, gpis, gpis) diff --git a/tests/test_validation_framework/test_metric_calculators.py b/tests/test_validation_framework/test_metric_calculators.py index 59ee3177..3d7164bb 100644 --- a/tests/test_validation_framework/test_metric_calculators.py +++ b/tests/test_validation_framework/test_metric_calculators.py @@ -62,7 +62,6 @@ from pytesmo.validation_framework.results_manager import netcdf_results_manager import pytesmo.metrics as metrics - from .utils import DummyReader @@ -155,7 +154,8 @@ def test_BasicMetrics_calculator_metadata(): df = make_some_data() data = df[["ref", "k1"]] - metadata_dict_template = {"network": np.array(["None"], dtype="U256")} + metadata_dict_template = { + "network": np.array(["None"], dtype="U256")} metriccalc = BasicMetrics( other_name="k1", @@ -219,7 +219,8 @@ def test_BasicMetricsPlusMSE_calculator_metadata(): df = make_some_data() data = df[["ref", "k1"]] - metadata_dict_template = {"network": np.array(["None"], dtype="U256")} + metadata_dict_template = { + "network": np.array(["None"], dtype="U256")} metriccalc = BasicMetricsPlusMSE( other_name="k1", metadata_template=metadata_dict_template @@ -253,9 +254,10 @@ def test_IntercompMetrics_calculator(): df = make_some_data() data = df[["ref", "k1", "k2"]] - metriccalc = IntercomparisonMetrics( - other_names=("k1", "k2"), calc_tau=True - ) + with pytest.warns(DeprecationWarning): + metriccalc = IntercomparisonMetrics( + other_names=("k1", "k2"), calc_tau=True + ) res = metriccalc.calc_metrics(data, gpi_info=(0, 0, 0)) @@ -275,34 +277,42 @@ def test_IntercompMetrics_calculator(): ) np.testing.assert_almost_equal( - res["mse_corr_between_ref_and_k1"], np.array([0], dtype=np.float32) + res["mse_corr_between_ref_and_k1"], + np.array([0], dtype=np.float32) ) np.testing.assert_almost_equal( - res["mse_corr_between_ref_and_k2"], np.array([0], dtype=np.float32) + res["mse_corr_between_ref_and_k2"], + np.array([0], dtype=np.float32) ) np.testing.assert_almost_equal( - res["mse_bias_between_ref_and_k1"], np.array([0.04], dtype=np.float32) + res["mse_bias_between_ref_and_k1"], + np.array([0.04], dtype=np.float32) ) np.testing.assert_almost_equal( - res["mse_bias_between_ref_and_k2"], np.array([0.04], dtype=np.float32) + res["mse_bias_between_ref_and_k2"], + np.array([0.04], dtype=np.float32) ) # scipy 1.3.0 is not built for python 2.7 so we allow both for now assert ( - np.isnan(res["p_R_between_ref_and_k1"]) - or res["p_R_between_ref_and_k1"] == 1.0 + np.isnan(res["p_R_between_ref_and_k1"]) + or res["p_R_between_ref_and_k1"] == 1.0 ) assert ( - np.isnan(res["p_R_between_ref_and_k2"]) - or res["p_R_between_ref_and_k2"] == 1.0 + np.isnan(res["p_R_between_ref_and_k2"]) + or res["p_R_between_ref_and_k2"] == 1.0 ) - assert res["RMSD_between_ref_and_k1"] == np.array([0.2], dtype="float32") - assert res["RMSD_between_ref_and_k2"] == np.array([0.2], dtype="float32") + assert (res["RMSD_between_ref_and_k1"] == + np.array([0.2], dtype="float32")) + assert (res["RMSD_between_ref_and_k2"] == + np.array([0.2], dtype="float32")) - assert res["BIAS_between_ref_and_k1"] == np.array([-0.2], dtype="float32") - assert res["BIAS_between_ref_and_k2"] == np.array([0.2], dtype="float32") + assert (res["BIAS_between_ref_and_k1"] == + np.array([-0.2], dtype="float32")) + assert (res["BIAS_between_ref_and_k2"] == + np.array([0.2], dtype="float32")) np.testing.assert_almost_equal( res["urmsd_between_ref_and_k1"], np.array([0.0], dtype="float32") @@ -322,13 +332,15 @@ def test_IntercompMetrics_calculator_metadata(): df = make_some_data() data = df[["ref", "k1", "k2"]] - metadata_dict_template = {"network": np.array(["None"], dtype="U256")} + metadata_dict_template = { + "network": np.array(["None"], dtype="U256")} - metriccalc = IntercomparisonMetrics( - other_names=("k1", "k2"), - calc_tau=True, - metadata_template=metadata_dict_template, - ) + with pytest.warns(DeprecationWarning): + metriccalc = IntercomparisonMetrics( + other_names=("k1", "k2"), + calc_tau=True, + metadata_template=metadata_dict_template, + ) res = metriccalc.calc_metrics( data, gpi_info=(0, 0, 0, {"network": "SOILSCAPE"}) ) @@ -344,11 +356,12 @@ def test_TC_metrics_calculator(): df = make_some_data() data = df[["ref", "k1", "k2", "k3"]] - metriccalc = TCMetrics( - other_names=("k1", "k2", "k3"), - calc_tau=True, - dataset_names=("ref", "k1", "k2", "k3"), - ) + with pytest.warns(DeprecationWarning): + metriccalc = TCMetrics( + other_names=("k1", "k2", "k3"), + calc_tau=True, + dataset_names=("ref", "k1", "k2", "k3"), + ) res = metriccalc.calc_metrics(data, gpi_info=(0, 0, 0)) @@ -361,47 +374,59 @@ def test_TC_metrics_calculator(): assert np.isnan(res["rho_between_ref_and_k2"]) np.testing.assert_almost_equal( - res["mse_between_ref_and_k1"], np.array([0.04], dtype=np.float32) + res["mse_between_ref_and_k1"], + np.array([0.04], dtype=np.float32) ) np.testing.assert_almost_equal( - res["mse_between_ref_and_k2"], np.array([0.04], dtype=np.float32) + res["mse_between_ref_and_k2"], + np.array([0.04], dtype=np.float32) ) np.testing.assert_almost_equal( - res["mse_corr_between_ref_and_k1"], np.array([0], dtype=np.float32) + res["mse_corr_between_ref_and_k1"], + np.array([0], dtype=np.float32) ) np.testing.assert_almost_equal( - res["mse_corr_between_ref_and_k2"], np.array([0], dtype=np.float32) + res["mse_corr_between_ref_and_k2"], + np.array([0], dtype=np.float32) ) np.testing.assert_almost_equal( - res["mse_bias_between_ref_and_k1"], np.array([0.04], dtype=np.float32) + res["mse_bias_between_ref_and_k1"], + np.array([0.04], dtype=np.float32) ) np.testing.assert_almost_equal( - res["mse_bias_between_ref_and_k2"], np.array([0.04], dtype=np.float32) + res["mse_bias_between_ref_and_k2"], + np.array([0.04], dtype=np.float32) ) # scipy 1.3.0 is not built for python 2.7 so we allow both for now assert ( - np.isnan(res["p_R_between_ref_and_k1"]) - or res["p_R_between_ref_and_k1"] == 1.0 + np.isnan(res["p_R_between_ref_and_k1"]) + or res["p_R_between_ref_and_k1"] == 1.0 ) assert ( - np.isnan(res["p_R_between_ref_and_k2"]) - or res["p_R_between_ref_and_k2"] == 1.0 + np.isnan(res["p_R_between_ref_and_k2"]) + or res["p_R_between_ref_and_k2"] == 1.0 ) - assert res["RMSD_between_ref_and_k1"] == np.array([0.2], dtype="float32") - assert res["RMSD_between_ref_and_k2"] == np.array([0.2], dtype="float32") + assert (res["RMSD_between_ref_and_k1"] == + np.array([0.2], dtype="float32")) + assert (res["RMSD_between_ref_and_k2"] == + np.array([0.2], dtype="float32")) - assert res["BIAS_between_ref_and_k1"] == np.array([-0.2], dtype="float32") - assert res["BIAS_between_ref_and_k2"] == np.array([0.2], dtype="float32") + assert (res["BIAS_between_ref_and_k1"] == + np.array([-0.2], dtype="float32")) + assert (res["BIAS_between_ref_and_k2"] == + np.array([0.2], dtype="float32")) np.testing.assert_almost_equal( - res["urmsd_between_ref_and_k1"], np.array([0.0], dtype="float32") + res["urmsd_between_ref_and_k1"], + np.array([0.0], dtype="float32") ) np.testing.assert_almost_equal( - res["urmsd_between_ref_and_k2"], np.array([0.0], dtype="float32") + res["urmsd_between_ref_and_k2"], + np.array([0.0], dtype="float32") ) assert "RSS_between_ref_and_k1" in res.keys() @@ -430,14 +455,16 @@ def test_TC_metrics_calculator_metadata(): df = make_some_data() data = df[["ref", "k1", "k2"]] - metadata_dict_template = {"network": np.array(["None"], dtype="U256")} - - metriccalc = TCMetrics( - other_names=("k1", "k2"), - calc_tau=True, - dataset_names=["ref", "k1", "k2"], - metadata_template=metadata_dict_template, - ) + metadata_dict_template = { + "network": np.array(["None"], dtype="U256")} + + with pytest.warns(DeprecationWarning): + metriccalc = TCMetrics( + other_names=("k1", "k2"), + calc_tau=True, + dataset_names=["ref", "k1", "k2"], + metadata_template=metadata_dict_template, + ) res = metriccalc.calc_metrics( data, gpi_info=(0, 0, 0, {"network": "SOILSCAPE"}) ) @@ -472,7 +499,8 @@ def test_FTMetrics_metadata(): df = make_some_data() data = df[["ref", "k1"]] - metadata_dict_template = {"network": np.array(["None"], dtype="U256")} + metadata_dict_template = { + "network": np.array(["None"], dtype="U256")} metriccalc = FTMetrics( frozen_flag=2, @@ -510,7 +538,8 @@ def test_BasicSeasonalMetrics_metadata(): df = make_some_data() data = df[["ref", "k1"]] - metadata_dict_template = {"network": np.array(["None"], dtype="U256")} + metadata_dict_template = { + "network": np.array(["None"], dtype="U256")} with pytest.warns(UserWarning): metriccalc = MonthsMetricsAdapter( @@ -525,6 +554,8 @@ def test_BasicSeasonalMetrics_metadata(): assert res["network"] == np.array(["SOILSCAPE"], dtype="U256") +@pytest.mark.filterwarnings( + "ignore:invalid value encountered in divide*:RuntimeWarning") def test_HSAF_Metrics(): """ Test HSAF Metrics @@ -542,6 +573,8 @@ def test_HSAF_Metrics(): assert np.isnan(res["ref_k2_ALL_rho"]) +@pytest.mark.filterwarnings( + "ignore:invalid value encountered in divide*:RuntimeWarning") def test_HSAF_Metrics_metadata(): """ Test HSAF Metrics with metadata. @@ -583,7 +616,8 @@ def test_RollingMetrics(): for i in range(indexer.shape[0]): rmsd_arr.append( metrics.rmsd( - df["ref"][indexer[i, :]].values, df["k1"][indexer[i, :]].values + df.iloc[indexer[i, :], df.columns.get_loc("ref")].values, + df.iloc[indexer[i, :], df.columns.get_loc("k1")].values ) ) @@ -611,7 +645,7 @@ def make_datasets(df): return datasets -def testdata_known_results(): +def make_testdata_known_results(): dr = pd.date_range("2000", "2020", freq="D") n = len(dr) x = np.ones(n) * 2 @@ -684,16 +718,19 @@ def testdata_known_results(): for ck in expected: for m in expected[ck]: if m in ["n_obs", "gpi"]: - expected[ck][m] = np.array([expected[ck][m]], dtype=np.int32) + expected[ck][m] = np.array([expected[ck][m]], + dtype=np.int32) elif m in ["lat", "lon"]: - expected[ck][m] = np.array([expected[ck][m]], dtype=np.float64) + expected[ck][m] = np.array([expected[ck][m]], + dtype=np.float64) else: - expected[ck][m] = np.array([expected[ck][m]], dtype=np.float32) + expected[ck][m] = np.array([expected[ck][m]], + dtype=np.float32) return make_datasets(df), expected -def testdata_random(): +def make_testdata_random(): np.random.seed(42) dr = pd.date_range("2000", "2020", freq="D") n = len(dr) @@ -749,19 +786,24 @@ def testdata_random(): for ck in expected: for m in expected[ck]: if m in ["n_obs", "gpi"]: - expected[ck][m] = np.array([expected[ck][m]], dtype=np.int32) + expected[ck][m] = np.array([expected[ck][m]], + dtype=np.int32) elif m in ["lat", "lon"]: - expected[ck][m] = np.array([expected[ck][m]], dtype=np.float64) + expected[ck][m] = np.array([expected[ck][m]], + dtype=np.float64) else: - expected[ck][m] = np.array([expected[ck][m]], dtype=np.float32) + expected[ck][m] = np.array([expected[ck][m]], + dtype=np.float32) return make_datasets(df), expected @pytest.mark.parametrize( - "testdata_generator", [testdata_known_results, testdata_random] + "testdata_generator", [make_testdata_known_results, make_testdata_random] ) @pytest.mark.parametrize("seas_metrics", [None, MonthsMetricsAdapter]) +@pytest.mark.filterwarnings( + "ignore:invalid value encountered in divide.*:RuntimeWarning") def test_PairwiseIntercomparisonMetrics(testdata_generator, seas_metrics): # This test first compares the PairwiseIntercomparisonMetrics to known # results and then confirms that it agrees with IntercomparisonMetrics as @@ -836,26 +878,26 @@ def test_PairwiseIntercomparisonMetrics(testdata_generator, seas_metrics): # preparation of IntercomparisonMetrics run for comparison ds_names = list(datasets.keys()) - metrics = IntercomparisonMetrics( - dataset_names=ds_names, - # passing the names here explicitly, see GH issue #220 - refname="reference_name", - other_names=ds_names[1:], - calc_tau=True, - ) + with pytest.warns(DeprecationWarning): + metrics = IntercomparisonMetrics( + dataset_names=ds_names, + # passing the names here explicitly, see GH issue #220 + refname="reference_name", + other_names=ds_names[1:], + calc_tau=True, + ) if seas_metrics: with pytest.warns(UserWarning): metrics = seas_metrics(metrics) - val = Validation( - datasets, - "reference_name", - scaling=None, - temporal_matcher=None, # use default here - metrics_calculators={(4, 4): metrics.calc_metrics}, - ) - - print("running old setup") + with pytest.warns(UserWarning): + val = Validation( + datasets, + "reference_name", + scaling=None, + temporal_matcher=None, # use default here + metrics_calculators={(4, 4): metrics.calc_metrics}, + ) results = val.calc(0, 1, 1, rename_cols=False) # results is a dictionary with one entry and key @@ -925,7 +967,7 @@ def test_PairwiseIntercomparisonMetrics(testdata_generator, seas_metrics): def test_PairwiseIntercomparisonMetrics_confidence_intervals(): # tests if the correct confidence intervals are returned - datasets, _ = testdata_random() + datasets, _ = make_testdata_random() matcher = make_combined_temporal_matcher(pd.Timedelta(6, "h")) val = Validation( datasets, @@ -944,7 +986,8 @@ def test_PairwiseIntercomparisonMetrics_confidence_intervals(): }, ) results_pw = val.calc( - [0], [1], [1], rename_cols=False, only_with_reference=True + [0], [1], [1], rename_cols=False, + only_with_reference=True ) metrics_with_ci = { @@ -999,7 +1042,7 @@ def test_PairwiseIntercomparisonMetrics_confidence_intervals(): @pytest.mark.parametrize( - "testdata_generator", [testdata_known_results, testdata_random] + "testdata_generator", [make_testdata_known_results, make_testdata_random] ) @pytest.mark.parametrize("seas_metrics", [None, MonthsMetricsAdapter]) def test_TripleCollocationMetrics(testdata_generator, seas_metrics): @@ -1028,7 +1071,8 @@ def test_TripleCollocationMetrics(testdata_generator, seas_metrics): metrics_calculators={(4, 3): triplet_metrics_calculator.calc_metrics}, ) results_triplet = val_triplet.calc( - [0], [1], [1], rename_cols=False, only_with_reference=True + [0], [1], [1], rename_cols=False, + only_with_reference=True ) if "col1_name" in datasets.keys(): @@ -1092,7 +1136,8 @@ def test_TripleCollocationMetrics(testdata_generator, seas_metrics): }, ) results_triplet = val_triplet.calc( - [0], [1], [1], rename_cols=False, only_with_reference=True + [0], [1], [1], rename_cols=False, + only_with_reference=True ) for key in results_triplet: for dset, _ in key: @@ -1108,12 +1153,12 @@ def test_TripleCollocationMetrics(testdata_generator, seas_metrics): assert (*lkey, dset) in results_triplet[key] assert (*ukey, dset) in results_triplet[key] assert ( - results_triplet[key][(*lkey, dset)] - <= results_triplet[key][(*mkey, dset)] + results_triplet[key][(*lkey, dset)] + <= results_triplet[key][(*mkey, dset)] ) assert ( - results_triplet[key][(*mkey, dset)] - <= results_triplet[key][(*ukey, dset)] + results_triplet[key][(*mkey, dset)] + <= results_triplet[key][(*ukey, dset)] ) @@ -1161,13 +1206,14 @@ def test_temporal_matching_ascat_ismn(): # old setup ds_names = list(datasets.keys()) - metrics = IntercomparisonMetrics( - dataset_names=ds_names, - # passing the names here explicitly, see GH issue #220 - refname=refname, - other_names=ds_names[1:], - calc_tau=True, - ) + with pytest.warns(DeprecationWarning): + metrics = IntercomparisonMetrics( + dataset_names=ds_names, + # passing the names here explicitly, see GH issue #220 + refname=refname, + other_names=ds_names[1:], + calc_tau=True, + ) old_val = Validation( datasets, refname, diff --git a/tests/test_validation_framework/test_upscaling.py b/tests/test_validation_framework/test_upscaling.py index 1cc105eb..c5026dd5 100644 --- a/tests/test_validation_framework/test_upscaling.py +++ b/tests/test_validation_framework/test_upscaling.py @@ -60,20 +60,20 @@ def averager(): def test_upscale(averager): """Test all upscaling functions""" to_upscale = pd.concat( - [pd.Series(2, index=np.linspace(1,10), name='sm'), - pd.Series(4, index=np.linspace(1,10), name='sm')], + [pd.Series(2, index=np.linspace(1, 10), name='sm'), + pd.Series(4, index=np.linspace(1, 10), name='sm')], axis=1 ) # simple check of series averaging upscaled = averager.upscale(to_upscale, method="average") - should = pd.Series(float(3), index=np.linspace(1,10)) + should = pd.Series(float(3), index=np.linspace(1, 10)) assert upscaled.equals(should) def test_tstability(averager): """Test temporal stability filtering with noisy or uncorrelated series""" n_obs = 1000 - points = np.linspace(0, 2*np.pi, n_obs) + points = np.linspace(0, 2 * np.pi, n_obs) ts = np.sin(points) low_corr = np.sin(points + np.pi) high_sterr = np.sin(points) + np.random.normal(0, 2, n_obs) @@ -99,12 +99,14 @@ def series_2_match(): ref_ser = pd.Series( data_ref, - index=pd.date_range("2007-01-01 01:00:00", "2007-01-30 01:00:00", freq="D"), + index=pd.date_range("2007-01-01 01:00:00", + "2007-01-30 01:00:00", freq="D"), name="ref" ).to_frame() match_ser = pd.Series( data2match, - index=pd.date_range("2007-01-01 05:00:00", "2007-01-29 05:00:00", freq="D"), + index=pd.date_range("2007-01-01 05:00:00", + "2007-01-29 05:00:00", freq="D"), name="ref" ).to_frame() to_match = [ref_ser, match_ser] @@ -115,25 +117,29 @@ def series_2_match(): def test_temporal_matching(averager, series_2_match): """Test temporal matching""" matched = averager.temporal_match(series_2_match, drop_missing=False) - assert len(matched.index) == 30, "Should be matched to the longest timeseries" + assert len(matched.index) == 30, \ + "Should be matched to the longest timeseries" matched = averager.temporal_match(series_2_match, drop_missing=True) - assert len(matched.index) == 28, "Should drop the row and the missing timestep with a missing value" + assert len(matched.index) == 28, \ + "Should drop the row and the missing timestep with a missing value" - matched = averager.temporal_match(series_2_match, hours=3) - assert matched.equals(series_2_match[0]), "Should not be matched" + with pytest.warns(UserWarning): + matched = averager.temporal_match(series_2_match, hours=3) + assert matched.equals(series_2_match[0]), "Should not be matched" def test_capture_warning(averager, series_2_match): def override_read(points, other_name): return series_2_match - averager.lut = {"other_ds": {0 : [0]}} + averager.lut = {"other_ds": {0: [0]}} averager.datasets = {"other_ds": {"columns": ["ref"]}} averager._read = override_read - res = averager.get_upscaled_ts( - gpi=0, - other_name="other_ds", - **{"hours": 3} - ) + with pytest.warns(UserWarning): + res = averager.get_upscaled_ts( + gpi=0, + other_name="other_ds", + **{"hours": 3} + ) assert res.equals(series_2_match[0]) diff --git a/tests/test_validation_framework/test_validation.py b/tests/test_validation_framework/test_validation.py index bd70d265..a3789ac0 100644 --- a/tests/test_validation_framework/test_validation.py +++ b/tests/test_validation_framework/test_validation.py @@ -137,9 +137,9 @@ def ismn_reader(): def check_results( - filename: str, - target_vars: dict, - variables: list = None, + filename: str, + target_vars: dict, + variables: list = None, ): """ Check that standard vars are present and that nobs, rho and rmsd match @@ -185,7 +185,6 @@ def test_ascat_ismn_validation(ascat_reader, ismn_reader): metadata = ismn_reader.read_metadata(idx) jobs.append((idx, metadata["longitude"].val, metadata["latitude"].val)) - # Create the validation object. datasets = { @@ -205,7 +204,8 @@ def test_ascat_ismn_validation(ascat_reader, ismn_reader): } read_ts_names = {"ASCAT": "read", "ISMN": "read"} - period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] + period = [datetime(2007, 1, 1), + datetime(2014, 12, 31)] datasets = DataManager( datasets, "ISMN", period, read_ts_names=read_ts_names) @@ -216,6 +216,8 @@ def test_ascat_ismn_validation(ascat_reader, ismn_reader): temporal_ref="ASCAT", scaling="cdf_match", scaling_ref="ASCAT", + temporal_matcher=make_combined_temporal_matcher( + pd.Timedelta(1, "h")), metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics(other_name="k1").calc_metrics @@ -337,6 +339,8 @@ def test_ascat_ismn_validation_metadata(ascat_reader, ismn_reader): temporal_ref="ASCAT", scaling="cdf_match", scaling_ref="ASCAT", + temporal_matcher=make_combined_temporal_matcher( + pd.Timedelta(1, "h")), metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics( @@ -396,7 +400,7 @@ def test_ascat_ismn_validation_metadata(ascat_reader, ismn_reader): "SOILSCAPE", "SOILSCAPE", "SOILSCAPE", - ], dtype="U256",) + ], dtype="U256", ) } vars_should = [ 'BIAS', 'R', 'RMSD', '_row_size', 'climate', 'gpi', 'idx', 'landcover', @@ -432,14 +436,10 @@ def test_validation_with_averager(ascat_reader, ismn_reader): (7, -120.80639, 38.17353)] } } - gpis = (1814367, 1803695, 1856312) - lons, lats = [], [] - for gpi in gpis: - lon, lat = ascat_reader.grid.gpi2lonlat(gpi) - lons.append(lon) - lats.append(lat) - jobs = [(gpis, lons, lats)] + jobs = [((1814367, 1803695, 1856312), + (0, 0, 0), + (0, 0, 0))] # coords not needed here # Create the variable ***save_path*** which is a string representing the # path where the results will be saved. **DO NOT CHANGE** the name @@ -467,7 +467,8 @@ def test_validation_with_averager(ascat_reader, ismn_reader): } read_ts_names = {"ASCAT": "read", "ISMN": "read"} - period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] + period = [datetime(2007, 1, 1), + datetime(2014, 12, 31)] datasets = DataManager( datasets, @@ -486,6 +487,8 @@ def test_validation_with_averager(ascat_reader, ismn_reader): temporal_ref="ISMN", scaling="cdf_match", scaling_ref="ISMN", + temporal_matcher=make_combined_temporal_matcher( + pd.Timedelta(1, "h")), metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics(other_name="k1").calc_metrics @@ -829,7 +832,6 @@ def test_validation_n3_k2(): def test_validation_n3_k2_temporal_matching_no_matches2(): - empty_result = { 'gpi': np.array([4], dtype=np.int32), 'lon': np.array([4.]), @@ -1159,6 +1161,8 @@ def test_ascat_ismn_validation_metadata_rolling(ascat_reader, ismn_reader): temporal_ref="ASCAT", scaling="cdf_match", scaling_ref="ASCAT", + temporal_matcher=make_combined_temporal_matcher( + window=pd.Timedelta(1, "h")), metrics_calculators={ (2, 2): metrics_calculators.RollingMetrics(