From ad981a200f05d9e326569f62c8bedfa84e015fb4 Mon Sep 17 00:00:00 2001
From: Wolfgang Preimesberger <wolfgang.preimesberger@geo.tuwien.ac.at>
Date: Wed, 13 Nov 2024 00:25:45 +0100
Subject: [PATCH] Prepare v0.16.1 (numpy<2) release (#309)

* Fix and filter warnings, remove old plotting module

* Update CI build
---
 .github/workflows/ci.yml                      |  15 +-
 CHANGELOG.rst                                 |   6 +
 environment.yml                               |   4 +-
 setup.cfg                                     |  10 +-
 src/pytesmo/time_series/grouping.py           |   8 +-
 src/pytesmo/time_series/plotting.py           | 160 ------------
 src/pytesmo/utils.py                          |   6 +-
 src/pytesmo/validation_framework/adapters.py  |   4 +-
 .../metric_calculators.py                     | 108 ++++----
 src/pytesmo/validation_framework/upscaling.py |   6 +-
 .../validation_framework/validation.py        |   5 +-
 tests/test_docs/test_examples.py              |   6 +
 tests/test_scaling.py                         |  13 +-
 tests/test_time_series/test_plotting.py       |  54 ----
 .../test_adapters.py                          |  12 +-
 .../test_data_manager.py                      |  27 +-
 .../test_error_handling.py                    |  34 ++-
 .../test_metric_calculators.py                | 240 +++++++++++-------
 .../test_upscaling.py                         |  38 +--
 .../test_validation.py                        |  34 +--
 20 files changed, 342 insertions(+), 448 deletions(-)
 delete mode 100755 src/pytesmo/time_series/plotting.py
 delete mode 100644 tests/test_time_series/test_plotting.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a88ad726..9bbc4ac0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -51,9 +51,9 @@ jobs:
       - name: Export Environment
         shell: bash -l {0}
         run: |
-          mkdir -p .artifacts
+          mkdir -p artifacts
           filename=env_py${{ matrix.python-version }}_${{ matrix.os }}.yml
-          conda env export --no-builds | grep -v "prefix" > .artifacts/$filename
+          conda env export --no-builds | grep -v "prefix" > artifacts/$filename
       - name: Install package and test
         shell: bash -l {0}
         run: |
@@ -71,22 +71,23 @@ jobs:
         shell: bash -l {0}
         run: |
           git status
-          pip install setuptools_scm
+          pip install setuptools_scm twine
           if [ ${{ matrix.os }} == "windows-latest" ]
           then
             # build whls on windows
             pip install wheel
-            python setup.py bdist_wheel --dist-dir .artifacts/dist
+            python setup.py bdist_wheel --dist-dir artifacts/dist
           else
             # build dist on linux
-            python setup.py sdist --dist-dir .artifacts/dist
+            python setup.py sdist --dist-dir artifacts/dist
           fi
-          ls .artifacts/dist
+          ls artifacts/dist
+          twine check artifacts/dist/*
       - name: Upload Artifacts
         uses: actions/upload-artifact@v4
         with:
           name: Artifacts-${{ matrix.python-version }}-${{ matrix.os }}
-          path: .artifacts/*
+          path: artifacts/*
   coveralls:
     name: Submit Coveralls 👚
     needs: build
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 25bf8f05..c9f4e749 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -4,6 +4,12 @@ Changelog
 
 Unreleased changes in master
 ============================
+
+Version 0.16.1, 2024-11-13
+==========================
+- THIS VERSION IS STILL COMPATIBLE WITH ``NUMPY<2.0``, from v0.17 on pytesmo will require ``numpy>=2.0``
+- Old ``pytesmo.timeseries.plotting`` module was removed
+- Fixed and filtered many warnings that were printed by tests until now
 - Fixed an issue with the intra-annual metrics adapter when an empty time series is passed (PR `#307 <https://github.com/TUW-GEO/pytesmo/pull/307>`_)
 - Metapackage updated (pyscaffold 4.5) (PR `#307 <https://github.com/TUW-GEO/pytesmo/pull/307>`_)
 - C modules were outdated and could not be compiled, therefore rebuilt (PR `#307 <https://github.com/TUW-GEO/pytesmo/pull/307>`_)
diff --git a/environment.yml b/environment.yml
index a2734326..77aa7f40 100644
--- a/environment.yml
+++ b/environment.yml
@@ -10,7 +10,7 @@ dependencies:
   - numpy<2.0.0
   - numba
   - scipy>=0.12
-  - pandas>=0.11.0,!=0.15.2
+  - pandas>=0.23.0
   - netcdf4>=1.0.1,!=1.6.2
   - cython>=0.29.21
   - scikit-learn
@@ -34,7 +34,7 @@ dependencies:
     - ipykernel
     - sphinx_rtd_theme
     - ascat>=2.0
-    - ismn==1.3.4
+    - ismn==1.5.1
     - pytest
     - pytest-cov
     - pytest-mpl
diff --git a/setup.cfg b/setup.cfg
index a08464cd..a14609df 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -33,7 +33,7 @@ install_requires =
     importlib-metadata; python_version<"3.8"
     numpy>=1.7.0,<2.0.0
     scipy>=0.12
-    pandas>=0.11.0,!=0.15.2
+    pandas>=0.23.0
     matplotlib>=1.2.0
     netCDF4>=1.0.1,!=1.6.2
     pygeogrids
@@ -103,6 +103,7 @@ norecursedirs =
 markers =
     full_framework : marks slow test that use the whole validation framework (deselect with '-m "not full_framework"')
     slow : marks slow tests (deselect with '-m "not slow"')
+    doc_example : marks slow tests that test python code from documentation
 testpaths = tests
 # This removes some of the warnings that show up with pytest but are not an issue
 filterwarnings =
@@ -122,6 +123,13 @@ filterwarnings =
     ignore:`np.bool` is a deprecated alias for the builtin `bool`
     # this comes from the `test_cci` in `test_data_averager`
     ignore: IOError in reading ISMN data
+    # old CDF matching method
+    ignore:Use the new implementation 'cdf_match' instead.:DeprecationWarning
+    # ascat package prints some warnings, doesn't matter for pytesmo
+    ignore::UserWarning:^ascat
+    # ismn package used deprecated version of this, doesn't matter for pytesmo
+    ignore:The 'parallel_process_async' method was renamed to `parallel_process`.:DeprecationWarning
+
 
 [aliases]
 dists = bdist_wheel
diff --git a/src/pytesmo/time_series/grouping.py b/src/pytesmo/time_series/grouping.py
index 1fadf65d..ed52250e 100644
--- a/src/pytesmo/time_series/grouping.py
+++ b/src/pytesmo/time_series/grouping.py
@@ -366,6 +366,8 @@ def filter(self, idx: pd.DatetimeIndex):
     # selection = dat.query(" | ".join(cond)).index
 
         if self.yearless_date_ranges is not None:
+            cols = {}
+
             for i, gdrange in enumerate(self.yearless_date_ranges):
                 for y in np.unique(idx.year):
                     start = gdrange[0]
@@ -385,7 +387,11 @@ def filter(self, idx: pd.DatetimeIndex):
 
                     end_dt = end.to_datetime(years=y)
 
-                    mask[f"gen_range{y}-{i}"] = (idx >= start_dt) & (
+                    cols[f"gen_range{y}-{i}"] = (idx >= start_dt) & (
                         idx <= end_dt)
 
+            mask = pd.concat(
+                [mask, pd.DataFrame(index=mask.index, data=cols)],
+                axis=1)
+
         return mask.any(axis=1, bool_only=True)
diff --git a/src/pytesmo/time_series/plotting.py b/src/pytesmo/time_series/plotting.py
deleted file mode 100755
index f2bbbf7e..00000000
--- a/src/pytesmo/time_series/plotting.py
+++ /dev/null
@@ -1,160 +0,0 @@
-# Copyright (c) 2014,Vienna University of Technology,
-# Department of Geodesy and Geoinformation
-# All rights reserved.
-
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#   * Redistributions of source code must retain the above copyright
-#     notice, this list of conditions and the following disclaimer.
-#    * Redistributions in binary form must reproduce the above copyright
-#      notice, this list of conditions and the following disclaimer in the
-#      documentation and/or other materials provided with the distribution.
-#    * Neither the name of the Vienna University of Technology,
-#      Department of Geodesy and Geoinformation nor the
-#      names of its contributors may be used to endorse or promote products
-#      derived from this software without specific prior written permission.
-
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
-# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-'''
-Created on Mar 7, 2014
-
-Plot anomalies around climatology using colors
-
-@author: Christoph Paulik christoph.paulik@geo.tuwien.ac.at
-'''
-
-import matplotlib.pyplot as plt
-import matplotlib.gridspec as gridspec
-import pandas as pd
-
-import pytesmo.time_series.anomaly as anom
-
-
-def plot_clim_anom(df, clim=None, axes=None, markersize=0.75,
-                   mfc='0.3', mec='0.3', clim_color='0.0',
-                   clim_linewidth=0.5, clim_linestyle='-',
-                   pos_anom_color='#799ADA', neg_anom_color='#FD8086',
-                   anom_linewidth=0.2, add_titles=True):
-    """
-    Takes a pandas DataFrame and calculates the climatology and anomaly
-    and plots them in a nice way for each column
-
-    Parameters
-    ----------
-    df : pandas.DataFrame
-    clim : pandas.DataFrame, optional
-        if given these climatologies will be used
-        if not given then climatologies will be calculated
-        this DataFrame must have the same number of columns as df
-        and also the column names.
-        each climatology must have doy as index.
-    axes : list of matplotlib.Axes, optional
-           list of axes on which each column should be plotted
-           if not given a standard layout is generated
-    markersize : float, optional
-        size of the markers for the datapoints
-    mfc : matplotlib color, optional
-        markerfacecolor, color of the marker face
-    mec : matplotlib color, optional
-        markeredgecolor
-    clim_color : matplotlib color, optional
-        color of the climatology
-    clim_linewidth : float, optional
-        linewidth of the climatology
-    clim_linestyle : string, optional
-        linestyle of the climatology
-    pos_anom_color : matplotlib color, optional
-        color of the positive anomaly
-    neg_anom_color : matplotlib color, optional
-        color of the negative anomaly
-    anom_linewidth : float, optional
-        linewidth of the anomaly lines
-    add_titles : boolean, optional
-        if set each subplot will have it's column name as title
-        Default : True
-
-    Returns
-    -------
-    Figure : matplotlib.Figure
-        if no axes were given
-    axes : list of matploblib.Axes
-        if no axes were given
-    """
-
-    if type(df) == pd.Series:
-        df = pd.DataFrame(df)
-
-    nr_columns = len(df.columns)
-
-    # make own axis if necessary
-    if axes is None:
-        own_axis = True
-        gs = gridspec.GridSpec(nr_columns, 1, right=0.8)
-
-        fig = plt.figure(num=None, figsize=(6, 2 * nr_columns),
-                         dpi=150, facecolor='w', edgecolor='k')
-
-        last_axis = fig.add_subplot(gs[nr_columns - 1])
-        axes = []
-        for i, grid in enumerate(gs):
-            if i < nr_columns - 1:
-                ax = fig.add_subplot(grid, sharex=last_axis)
-                axes.append(ax)
-                ax.xaxis.set_visible(False)
-        axes.append(last_axis)
-
-    else:
-        own_axis = False
-
-    for i, column in enumerate(df):
-        Ser = df[column]
-        ax = axes[i]
-
-        if clim is None:
-            clima = anom.calc_climatology(Ser)
-        else:
-            clima = pd.Series(clim[column])
-        anomaly = anom.calc_anomaly(Ser, climatology=clima, return_clim=True)
-
-        anomaly[Ser.name] = Ser
-        anomaly = anomaly.dropna()
-
-        pos_anom = anomaly[Ser.name].values > anomaly['climatology'].values
-        neg_anom = anomaly[Ser.name].values < anomaly['climatology'].values
-
-        ax.plot(anomaly.index, anomaly[Ser.name].values, 'o',
-                markersize=markersize, mfc=mfc, mec=mec)
-
-        ax.plot(anomaly.index, anomaly['climatology'].values,
-                linestyle=clim_linestyle,
-                color=clim_color,
-                linewidth=clim_linewidth)
-
-        ax.fill_between(anomaly.index,
-                        anomaly[Ser.name].values,
-                        anomaly['climatology'].values, interpolate=True,
-                        where=pos_anom, color=pos_anom_color,
-                        linewidth=anom_linewidth)
-        ax.fill_between(anomaly.index,
-                        anomaly[Ser.name].values,
-                        anomaly['climatology'].values, interpolate=True,
-                        where=neg_anom, color=neg_anom_color,
-                        linewidth=anom_linewidth)
-        if add_titles:
-            ax.set_title(column)
-
-    if own_axis:
-        return fig, axes
-    else:
-        return None, None
diff --git a/src/pytesmo/utils.py b/src/pytesmo/utils.py
index c38c7131..a7bb17ce 100644
--- a/src/pytesmo/utils.py
+++ b/src/pytesmo/utils.py
@@ -47,7 +47,7 @@ def rootdir() -> Path:
 
 def deprecated(message: str = None):
     """
-    Decorator for classes or functions to mark them as deprecated.
+    Decorator for class methods or functions to mark them as deprecated.
     If the decorator is applied without a specific message (`@deprecated()`),
     the default warning is shown when using the function/class. To specify
     a custom message use it like:
@@ -67,13 +67,11 @@ def decorator(src):
 
         @functools.wraps(src)
         def new_func(*args, **kwargs):
-            warnings.simplefilter('always', DeprecationWarning)
-
             warnings.warn(
                 default_msg if message is None else message,
                 category=DeprecationWarning,
                 stacklevel=2)
-            warnings.simplefilter('default', DeprecationWarning)
+
             return src(*args, **kwargs)
 
         return new_func
diff --git a/src/pytesmo/validation_framework/adapters.py b/src/pytesmo/validation_framework/adapters.py
index 115d71ba..256217b6 100644
--- a/src/pytesmo/validation_framework/adapters.py
+++ b/src/pytesmo/validation_framework/adapters.py
@@ -699,9 +699,9 @@ def _adapt(self, data: DataFrame) -> DataFrame:
 
         if self.drop_original:
             if self.time_offset_fields is not None:
-                data.drop(columns=self.time_offset_fields, inplace=True)
+                data = data.drop(columns=self.time_offset_fields)
             if self.base_time_field in data.columns:
-                data.drop(columns=[self.base_time_field], inplace=True)
+                data = data.drop(columns=[self.base_time_field])
 
         # Remove NaNs from index, if present
         data = data.loc[data.index.dropna()]
diff --git a/src/pytesmo/validation_framework/metric_calculators.py b/src/pytesmo/validation_framework/metric_calculators.py
index 1f2c87dc..b2fb0c4a 100644
--- a/src/pytesmo/validation_framework/metric_calculators.py
+++ b/src/pytesmo/validation_framework/metric_calculators.py
@@ -256,7 +256,7 @@ class BasicMetrics(MetadataMetrics):
     """
 
     def __init__(
-        self, other_name="k1", calc_tau=False, metadata_template=None
+            self, other_name="k1", calc_tau=False, metadata_template=None
     ):
         super(BasicMetrics, self).__init__(
             other_name=other_name, metadata_template=metadata_template
@@ -373,7 +373,7 @@ class FTMetrics(MetadataMetrics):
     """
 
     def __init__(
-        self, frozen_flag=2, other_name="k1", metadata_template=None
+            self, frozen_flag=2, other_name="k1", metadata_template=None
     ):
         super(FTMetrics, self).__init__(
             other_name=other_name, metadata_template=metadata_template
@@ -454,11 +454,11 @@ class HSAF_Metrics(MetadataMetrics):
     """
 
     def __init__(
-        self,
-        other_name1="k1",
-        other_name2="k2",
-        dataset_names=None,
-        metadata_template=None,
+            self,
+            other_name1="k1",
+            other_name2="k2",
+            dataset_names=None,
+            metadata_template=None,
     ):
 
         super(HSAF_Metrics, self).__init__(
@@ -677,16 +677,21 @@ class IntercomparisonMetrics(MetadataMetrics):
     """
 
     def __init__(
-        self,
-        refname="ref",
-        other_names=("k1", "k2", "k3"),
-        calc_tau=False,
-        metrics_between_nonref=False,
-        calc_rho=True,
-        dataset_names=None,
-        metadata_template=None,
+            self,
+            refname="ref",
+            other_names=("k1", "k2", "k3"),
+            calc_tau=False,
+            metrics_between_nonref=False,
+            calc_rho=True,
+            dataset_names=None,
+            metadata_template=None,
     ):
-
+        warnings.warn(
+            "pytesmo IntercomparisonMetrics calculator "
+            "is deprecated and will be removed in a future "
+            "release. Use the PairwiseIntercomparisonMetrics "
+            "class instead.", DeprecationWarning
+        )
         other_names = list(other_names)
         super(IntercomparisonMetrics, self).__init__(
             other_name=other_names, metadata_template=metadata_template
@@ -799,7 +804,6 @@ def calc_metrics(self, data, gpi_info):
             dataset["status"][0] = eh.INSUFFICIENT_DATA
             return dataset
 
-
         # make sure we have the correct order
         data = data[self.df_columns]
 
@@ -932,13 +936,13 @@ class TCMetrics(MetadataMetrics):
     """
 
     def __init__(
-        self,
-        other_names=("k1", "k2"),
-        calc_tau=False,
-        dataset_names=None,
-        tc_metrics_for_ref=True,
-        metrics_between_nonref=False,
-        metadata_template=None,
+            self,
+            other_names=("k1", "k2"),
+            calc_tau=False,
+            dataset_names=None,
+            tc_metrics_for_ref=True,
+            metrics_between_nonref=False,
+            metadata_template=None,
     ):
         """
         Triple Collocation metrics as implemented in the QA4SM project.
@@ -970,6 +974,13 @@ def __init__(
             which is then propagated to the end netCDF results file.
 
         """
+        warnings.warn(
+            "pytesmo TCMetrics calculator "
+            "is deprecated and will be removed in a future "
+            "release. Use the TripleCollocationMetrics "
+            "class instead.", DeprecationWarning
+        )
+
         self.ref_name = "ref"
         other_names = list(other_names)
         super(TCMetrics, self).__init__(
@@ -1056,10 +1067,11 @@ def __init__(
             )
             for metric, ds in metrics_thds.keys():
                 if not any(
-                    [
-                        self.ds_names_lut[other_ds] == ds
-                        for other_ds in thds_name.split(self.ds_names_split)
-                    ]
+                        [
+                            self.ds_names_lut[other_ds] == ds
+                            for other_ds in thds_name.split(
+                                self.ds_names_split)
+                        ]
                 ):
                     continue
                 full_name = "_".join([metric, ds])
@@ -1146,7 +1158,6 @@ def calc_metrics(self, data, gpi_info):
             dataset["status"][0] = eh.INSUFFICIENT_DATA
             return dataset
 
-
         # calculate Pearson correlation
         pearson_R, pearson_p = df_metrics.pearsonr(data)
         pearson_R, pearson_p = pearson_R._asdict(), pearson_p._asdict()
@@ -1204,7 +1215,7 @@ def calc_metrics(self, data, gpi_info):
             )
 
             for metr, res in dict(
-                snr=snr, err_std=err_std, beta=beta
+                    snr=snr, err_std=err_std, beta=beta
             ).items():
                 for ds, ds_res in res.items():
                     m_ds = "{}_{}".format(metr, self.ds_names_lut[ds])
@@ -1308,7 +1319,6 @@ class RollingMetrics(MetadataMetrics):
     """
 
     def __init__(self, other_name="k1", metadata_template=None):
-
         super(RollingMetrics, self).__init__(
             other_name=other_name, metadata_template=metadata_template
         )
@@ -1317,7 +1327,7 @@ def __init__(self, other_name="k1", metadata_template=None):
         self.result_template.update(_get_metric_template(self.basic_metrics))
 
     def calc_metrics(
-        self, data, gpi_info, window_size="30d", center=True, min_periods=2
+            self, data, gpi_info, window_size="30d", center=True, min_periods=2
     ):
         """
         Calculate the desired statistics.
@@ -1444,7 +1454,7 @@ def _pairwise_metric_names(self):
         return metrics
 
     def _calc_pairwise_metrics(
-        self, x, y, mx, my, varx, vary, cov, result, suffix=""
+            self, x, y, mx, my, varx, vary, cov, result, suffix=""
     ):
         """
         Calculates pairwise metrics, making use of pre-computed moments.
@@ -1602,15 +1612,15 @@ class PairwiseIntercomparisonMetrics(MetadataMetrics, PairwiseMetricsMixin):
     """
 
     def __init__(
-        self,
-        min_obs=10,
-        calc_spearman=True,
-        calc_kendall=True,
-        analytical_cis=True,
-        bootstrap_cis=False,
-        bootstrap_min_obs=100,
-        bootstrap_alpha=0.05,
-        metadata_template=None,
+            self,
+            min_obs=10,
+            calc_spearman=True,
+            calc_kendall=True,
+            analytical_cis=True,
+            bootstrap_cis=False,
+            bootstrap_min_obs=100,
+            bootstrap_alpha=0.05,
+            metadata_template=None,
     ):
         super().__init__(min_obs=min_obs, metadata_template=metadata_template)
 
@@ -1703,13 +1713,13 @@ class TripleCollocationMetrics(MetadataMetrics, PairwiseMetricsMixin):
     """
 
     def __init__(
-        self,
-        refname,
-        min_obs=10,
-        bootstrap_cis=False,
-        bootstrap_min_obs=100,
-        bootstrap_alpha=0.05,
-        metadata_template=None,
+            self,
+            refname,
+            min_obs=10,
+            bootstrap_cis=False,
+            bootstrap_min_obs=100,
+            bootstrap_alpha=0.05,
+            metadata_template=None,
     ):
 
         super().__init__(min_obs=min_obs, metadata_template=metadata_template)
diff --git a/src/pytesmo/validation_framework/upscaling.py b/src/pytesmo/validation_framework/upscaling.py
index 41110e33..533a7d47 100644
--- a/src/pytesmo/validation_framework/upscaling.py
+++ b/src/pytesmo/validation_framework/upscaling.py
@@ -212,8 +212,8 @@ def temporal_match(
         for n, df in enumerate(to_match):
             if df is None:
                 continue
-            points = int(df.count())
-            if int(ref.count()) >= points:
+            points = int(df.count().iloc[0])
+            if int(ref.count().iloc[0]) >= points:
                 continue
             else:
                 ref = df
@@ -225,7 +225,7 @@ def temporal_match(
         matched = combined_temporal_collocation(
             ref,
             to_match,
-            pd.Timedelta(hours, "H"),
+            pd.Timedelta(hours, "h"),
             combined_dropna=combined_dropna,
             checkna=True,
         )
diff --git a/src/pytesmo/validation_framework/validation.py b/src/pytesmo/validation_framework/validation.py
index 60edebaf..d84dae02 100644
--- a/src/pytesmo/validation_framework/validation.py
+++ b/src/pytesmo/validation_framework/validation.py
@@ -440,10 +440,7 @@ def dummy_result():
 
                 # at this stage we can drop the column multiindex and just use
                 # the dataset name
-                if LooseVersion(pd.__version__) < LooseVersion("0.23"):
-                    data.columns = data.columns.droplevel(level=1)
-                else:
-                    data = data.rename(columns=lambda x: x[0])
+                data = data.rename(columns=lambda x: x[0])
 
                 if self.scaling is not None:
                     # get scaling index by finding the column in the
diff --git a/tests/test_docs/test_examples.py b/tests/test_docs/test_examples.py
index 8b3e742a..cf1cf1cf 100644
--- a/tests/test_docs/test_examples.py
+++ b/tests/test_docs/test_examples.py
@@ -5,6 +5,7 @@
 """
 
 import os
+import subprocess
 from nbconvert.preprocessors import ExecutePreprocessor
 import nbformat
 import pytest
@@ -34,6 +35,11 @@ def test_ipython_notebook(notebook):
     applicable to the tests here, this file must be within a sub-folder of
     the tests/ directory (assuming that examples are in docs/examples)!
     """
+    # Handles jupyter warning (can probably be removed again in future):
+    os.environ["JUPYTER_PLATFORM_DIRS"] = "1"
+    subprocess.call(["jupyter", "--paths"])
+
+    # Run ipynb files and check if they pass
     preprocessor = ExecutePreprocessor(timeout=600, kernel_name="python3")
     with open(os.path.join(examples_path, notebook)) as f:
         nb = nbformat.read(f, as_version=4)
diff --git a/tests/test_scaling.py b/tests/test_scaling.py
index a1cc04bc..8b27b199 100644
--- a/tests/test_scaling.py
+++ b/tests/test_scaling.py
@@ -38,12 +38,10 @@
 import numpy.testing as nptest
 import pytest
 
-
 scaling_methods = list(scaling.get_scaling_method_lut().keys())
 
 
 def test_mean_std_scaling():
-
     # use a random sample from a standard distribution
     n = 1000
     x = np.random.normal(0, 0.5, n)
@@ -55,7 +53,6 @@ def test_mean_std_scaling():
 
 
 def test_min_max_scaling():
-
     # use a random sample from a standard distribution
     n = 1000
     x = np.random.normal(0, 0.5, n)
@@ -79,6 +76,7 @@ def test_scaling_method(method):
     y = np.arange(n) * 0.5
 
     o = getattr(scaling, method)(y, x)
+
     nptest.assert_almost_equal(x, o)
 
 
@@ -102,7 +100,6 @@ def test_scaling_kwargs(method):
 
 @pytest.mark.parametrize('method', scaling_methods)
 def test_scale(method):
-
     n = 1000
     x = np.arange(n)
     y = np.arange(n) * 0.5
@@ -117,7 +114,6 @@ def test_scale(method):
 
 @pytest.mark.parametrize('method', ['non_existing_method'])
 def test_scale_error(method):
-
     n = 1000
     x = np.arange(n)
     y = np.arange(n) * 0.5
@@ -133,7 +129,6 @@ def test_scale_error(method):
 
 @pytest.mark.parametrize('method', ['non_existing_method'])
 def test_add_scale_error(method):
-
     n = 1000
     x = np.arange(n, dtype=float)
     y = np.arange(n) * 0.5
@@ -147,7 +142,6 @@ def test_add_scale_error(method):
 
 @pytest.mark.parametrize('method', scaling_methods)
 def test_add_scale(method):
-
     n = 1000
     x = np.arange(n, dtype=float)
     y = np.arange(n) * 0.5
@@ -182,11 +176,12 @@ def test_linreg_with_nan():
     nptest.assert_almost_equal(df.loc[10:, 'x'].values,
                                df.loc[10:, 'y'].values)
 
-    assert(df.index.size == n)
+    assert (df.index.size == n)
 
 
+@pytest.mark.filterwarnings(
+    "ignore:Too few percentiles for chosen k.:UserWarning")
 def test_single_percentile_data():
-
     n = 1000
     x = np.arange(n, dtype=float)
     y = np.ones(n)
diff --git a/tests/test_time_series/test_plotting.py b/tests/test_time_series/test_plotting.py
deleted file mode 100644
index 5ff15efb..00000000
--- a/tests/test_time_series/test_plotting.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (c) 2015,Vienna University of Technology,
-# Department of Geodesy and Geoinformation
-# All rights reserved.
-
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#   * Redistributions of source code must retain the above copyright
-#     notice, this list of conditions and the following disclaimer.
-#    * Redistributions in binary form must reproduce the above copyright
-#      notice, this list of conditions and the following disclaimer in the
-#      documentation and/or other materials provided with the distribution.
-#    * Neither the name of the Vienna University of Technology,
-#      Department of Geodesy and Geoinformation nor the
-#      names of its contributors may be used to endorse or promote products
-#      derived from this software without specific prior written permission.
-
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL VIENNA UNIVERSITY OF TECHNOLOGY,
-# DEPARTMENT OF GEODESY AND GEOINFORMATION BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-'''
-Test for timeseries plotting
-'''
-
-import numpy as np
-import pandas as pd
-import matplotlib
-matplotlib.use('Agg')
-import pytest
-
-import pytesmo.time_series.plotting as plotting
-
-
-@pytest.mark.xfail(matplotlib.__version__ != '2.1.2', reason='plotting defaults change')
-@pytest.mark.mpl_image_compare(tolerance=21)
-def test_anomaly_calc_given_climatology():
-
-    clim = pd.DataFrame({'data': np.concatenate((np.arange(150) - 40,
-                                                 np.arange(216) + 190))},
-                        index=np.arange(366) + 1)
-    data = pd.DataFrame({'data': np.arange(366)},
-                        index=pd.date_range('2000-01-01', periods=366))
-    fig, axes = plotting.plot_clim_anom(data, clim=clim)
-    return fig
-
-
diff --git a/tests/test_validation_framework/test_adapters.py b/tests/test_validation_framework/test_adapters.py
index 68d83149..cc83bf0b 100644
--- a/tests/test_validation_framework/test_adapters.py
+++ b/tests/test_validation_framework/test_adapters.py
@@ -1,6 +1,7 @@
 import pytest
 
 from pytesmo.validation_framework.adapters import TimestampAdapter
+
 """
 Test for the adapters.
 """
@@ -92,7 +93,7 @@ def test_advanced_masking_adapter_nans_ignored():
     ds = TestDataset("", n=20)
     # introduce nan
     ts = ds.read()
-    ts.iloc[7]["x"] = np.nan
+    ts.iloc[7, ts.columns.get_loc('x')] = np.nan
 
     def _read():
         return ts
@@ -168,7 +169,6 @@ def test_anomaly_clim_adapter_one_column():
 
 
 def test_adapters_custom_fct_name():
-
     def assert_all_read_fcts(reader):
         assert (np.all(reader.read() == reader.read()))
         assert (np.all(reader.read() == reader.alias_read()))
@@ -305,6 +305,8 @@ def read_empty():
                                   pd.DataFrame(columns=['x', 'y', 'xy_mean']))
 
 
+@pytest.mark.filterwarnings(
+    "ignore:The input DataFrame is either empty or has.*:UserWarning")
 def test_timestamp_adapter():
     ds = TestDataset("", n=20)
 
@@ -421,7 +423,7 @@ def _read_all_nans():
     # -----------------------
 
     def _read_empty():
-        return pd.DataFrame(columns=["sm", "offset"],)
+        return pd.DataFrame(columns=["sm", "offset"], )
 
     setattr(ds, "read", _read_empty)
     origin = ds.read()
@@ -467,8 +469,8 @@ def _read_complex():
     should_be = origin.apply(
         lambda row: np.datetime64("2005-02-01") + np.timedelta64(
             int(row["base_time"]), "D") + np.timedelta64(
-                int(row["offset_min"]), "m") + np.timedelta64(
-                    int(row["offset_sec"]), "s"),
+            int(row["offset_min"]), "m") + np.timedelta64(
+            int(row["offset_sec"]), "s"),
         axis=1).values
 
     assert (adapted.index.values == should_be).all()
diff --git a/tests/test_validation_framework/test_data_manager.py b/tests/test_validation_framework/test_data_manager.py
index 1f787aa0..6ae1d990 100644
--- a/tests/test_validation_framework/test_data_manager.py
+++ b/tests/test_validation_framework/test_data_manager.py
@@ -197,8 +197,10 @@ def test_DataManager_dataset_names():
 
     dm = setup_TestDataManager()
     result_names = dm.get_results_names(3)
-    assert result_names == [(('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm')),
-                            (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm2'))]
+    assert result_names == [
+        (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm')),
+        (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm2'))
+    ]
 
     result_names = dm.get_results_names(2)
     assert result_names == [(('DS1', 'soil moisture'), ('DS2', 'sm')),
@@ -209,7 +211,8 @@ def test_DataManager_dataset_names():
 def test_DataManager_get_data():
 
     datasets = setup_TestDatasets()
-    dm = DataManager(datasets, 'DS1', read_ts_names={f'DS{i}': 'read' for i in range(1,4)})
+    dm = DataManager(datasets, 'DS1',
+                     read_ts_names={f'DS{i}': 'read' for i in range(1, 4)})
     data = dm.get_data(1, 1, 1)
     assert sorted(list(data)) == ['DS1', 'DS2', 'DS3']
 
@@ -220,8 +223,10 @@ def test_get_result_names():
                    'DS2': ['sm'],
                    'DS3': ['sm', 'sm2']}
     result_names = get_result_names(tst_ds_dict, 'DS1', 3)
-    assert result_names == [(('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm')),
-                            (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm2'))]
+    assert result_names == [
+        (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm')),
+        (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm2'))
+    ]
 
     result_names = get_result_names(tst_ds_dict, 'DS1', 2)
     assert result_names == [(('DS1', 'soil moisture'), ('DS2', 'sm')),
@@ -239,8 +244,10 @@ def test_get_result_combinations():
                    'DS2': ['sm'],
                    'DS3': ['sm', 'sm2']}
     result_names = get_result_combinations(tst_ds_dict, n=3)
-    assert result_names == [(('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm')),
-                            (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm2'))]
+    assert result_names == [
+        (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm')),
+        (('DS1', 'soil moisture'), ('DS2', 'sm'), ('DS3', 'sm2'))
+    ]
 
     result_names = get_result_combinations(tst_ds_dict, n=2)
     assert result_names == [(('DS1', 'soil moisture'), ('DS2', 'sm')),
@@ -250,6 +257,7 @@ def test_get_result_combinations():
                             (('DS2', 'sm'), ('DS3', 'sm2'))]
 
 
+@pytest.mark.filterwarnings("ignore:Less than k=1 points.*:UserWarning")
 def test_maxdist():
 
     testdf = pd.DataFrame([1, 1, 1], columns=["sm"])
@@ -308,8 +316,7 @@ def flush(self):
     df_dict = dm.get_data(0, 0, 0)
     assert df_dict == expected
 
-    # test if the far away point in the other dataset can be found (should not happen)
+    # test if the far away point in the other dataset can be found
+    # (should not happen)
     df_dict = dm.get_data(1, 1, 1)
     assert df_dict == {}
-
-
diff --git a/tests/test_validation_framework/test_error_handling.py b/tests/test_validation_framework/test_error_handling.py
index d09c7503..0202d1e4 100644
--- a/tests/test_validation_framework/test_error_handling.py
+++ b/tests/test_validation_framework/test_error_handling.py
@@ -15,6 +15,7 @@
 from .utils import create_datasets
 
 
+@pytest.mark.filterwarnings("ignore:Not enough observations.*:UserWarning")
 def test_error_handling_empty_df():
     # This tests whether error handling works if one of the datasets consists
     # of an empty dataframe.
@@ -34,7 +35,8 @@ def test_error_handling_empty_df():
         datasets,
         spatial_ref="0-ERA5",
         metrics_calculators={(n_datasets, 2): metric_calculator.calc_metrics},
-        temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "h")),
+        temporal_matcher=make_combined_temporal_matcher(
+            pd.Timedelta(12, "h")),
     )
     gpis = list(range(npoints))
     args = (gpis, gpis, gpis)
@@ -56,6 +58,7 @@ def test_error_handling_empty_df():
         assert np.all(results[key]["status"] == eh.NO_TEMP_MATCHED_DATA)
 
 
+@pytest.mark.filterwarnings("ignore:Not enough observations.*:UserWarning")
 def test_error_handling_nodata():
     # this tests if we get the NoGpiDataError if one dataset doesn't have any
     # values.  Here we use only 2 datasets, otherwise the third one will be
@@ -72,7 +75,8 @@ def test_error_handling_nodata():
         datasets,
         spatial_ref="0-ERA5",
         metrics_calculators={(n_datasets, 2): metric_calculator.calc_metrics},
-        temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "h")),
+        temporal_matcher=make_combined_temporal_matcher(
+            pd.Timedelta(12, "h")),
     )
     gpis = list(range(npoints))
     args = (gpis, gpis, gpis)
@@ -94,6 +98,7 @@ def test_error_handling_nodata():
         assert np.all(results[key]["status"] == eh.NO_GPI_DATA)
 
 
+@pytest.mark.filterwarnings("ignore:Not enough observations.*:UserWarning")
 def test_error_handling_not_enough_data():
     # This tests if we get a proper warning if we have not enough data to
     # calculate correlations (nsamples = 5). In this case, the behaviour of all
@@ -110,7 +115,8 @@ def test_error_handling_not_enough_data():
         datasets,
         spatial_ref="0-ERA5",
         metrics_calculators={(n_datasets, 2): metric_calculator.calc_metrics},
-        temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "h")),
+        temporal_matcher=make_combined_temporal_matcher(
+            pd.Timedelta(12, "h")),
     )
     gpis = list(range(npoints))
     args = (gpis, gpis, gpis)
@@ -118,9 +124,11 @@ def test_error_handling_not_enough_data():
 
     for handle_errors in ["ignore", "raise"]:
         with pytest.warns(
-            UserWarning, match="Not enough observations to calculate metrics."
+                UserWarning,
+                match="Not enough observations to calculate metrics."
         ):
-            results = val.calc(*args, **kwargs, handle_errors=handle_errors)
+            results = val.calc(*args, **kwargs,
+                               handle_errors=handle_errors)
         for key in results:
             for metric in results[key]:
                 assert len(results[key][metric]) == npoints
@@ -142,7 +150,8 @@ def test_error_handling_ok():
         datasets,
         spatial_ref="0-ERA5",
         metrics_calculators={(n_datasets, 2): metric_calculator.calc_metrics},
-        temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "h")),
+        temporal_matcher=make_combined_temporal_matcher(
+            pd.Timedelta(12, "h")),
     )
     gpis = list(range(npoints))
     args = (gpis, gpis, gpis)
@@ -158,6 +167,7 @@ def test_error_handling_ok():
             assert np.all(results[key]["status"] == eh.OK)
 
 
+@pytest.mark.filterwarnings("ignore:Not enough observations.*:UserWarning")
 def test_error_handling_scaling_failed():
     # This tests whether a scaling error is raised if the scaling fails due to
     # insufficient data.
@@ -177,7 +187,8 @@ def scale(self, data, ref_idx, gpi_info):
         scaling=BadScaler(),
         spatial_ref="0-ERA5",
         metrics_calculators={(n_datasets, 2): metric_calculator.calc_metrics},
-        temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "h")),
+        temporal_matcher=make_combined_temporal_matcher(
+            pd.Timedelta(12, "h")),
     )
     gpis = list(range(npoints))
     args = (gpis, gpis, gpis)
@@ -197,6 +208,7 @@ def scale(self, data, ref_idx, gpi_info):
         assert np.all(results[key]["status"] == eh.SCALING_FAILED)
 
 
+@pytest.mark.filterwarnings("ignore:Not enough observations.*:UserWarning")
 def test_error_handling_datamanager_failed():
     # This tests whether a scaling error is raised if the scaling fails due to
     # insufficient data.
@@ -220,7 +232,8 @@ def bad_get_data(*args):
         data_manager,
         spatial_ref=spatial_ref,
         metrics_calculators={(n_datasets, 2): metric_calculator.calc_metrics},
-        temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "h")),
+        temporal_matcher=make_combined_temporal_matcher(
+            pd.Timedelta(12, "h")),
     )
     gpis = list(range(npoints))
     args = (gpis, gpis, gpis)
@@ -240,6 +253,7 @@ def bad_get_data(*args):
         assert np.all(results[key]["status"] == eh.DATA_MANAGER_FAILED)
 
 
+@pytest.mark.filterwarnings("ignore:Not enough observations.*:UserWarning")
 def test_error_handling_temp_matching_failed():
     # This tests whether a TemporalMatchingError is raised if the matching
     # fails
@@ -278,6 +292,7 @@ def bad_matching(*args, **kwargs):
         assert np.all(results[key]["status"] == eh.TEMPORAL_MATCHING_FAILED)
 
 
+@pytest.mark.filterwarnings("ignore:Not enough observations.*:UserWarning")
 def test_error_handling_metrics_calculation_failed():
     # This tests whether a MetricsCalculationError is raised if metrics
     # calculation fails
@@ -296,7 +311,8 @@ def bad_metrics(data, gpi_info):
         datasets,
         spatial_ref="0-ERA5",
         metrics_calculators={(n_datasets, 2): bad_metrics},
-        temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "h")),
+        temporal_matcher=make_combined_temporal_matcher(
+            pd.Timedelta(12, "h")),
     )
     gpis = list(range(npoints))
     args = (gpis, gpis, gpis)
diff --git a/tests/test_validation_framework/test_metric_calculators.py b/tests/test_validation_framework/test_metric_calculators.py
index 59ee3177..3d7164bb 100644
--- a/tests/test_validation_framework/test_metric_calculators.py
+++ b/tests/test_validation_framework/test_metric_calculators.py
@@ -62,7 +62,6 @@
 from pytesmo.validation_framework.results_manager import netcdf_results_manager
 import pytesmo.metrics as metrics
 
-
 from .utils import DummyReader
 
 
@@ -155,7 +154,8 @@ def test_BasicMetrics_calculator_metadata():
     df = make_some_data()
     data = df[["ref", "k1"]]
 
-    metadata_dict_template = {"network": np.array(["None"], dtype="U256")}
+    metadata_dict_template = {
+        "network": np.array(["None"], dtype="U256")}
 
     metriccalc = BasicMetrics(
         other_name="k1",
@@ -219,7 +219,8 @@ def test_BasicMetricsPlusMSE_calculator_metadata():
     df = make_some_data()
     data = df[["ref", "k1"]]
 
-    metadata_dict_template = {"network": np.array(["None"], dtype="U256")}
+    metadata_dict_template = {
+        "network": np.array(["None"], dtype="U256")}
 
     metriccalc = BasicMetricsPlusMSE(
         other_name="k1", metadata_template=metadata_dict_template
@@ -253,9 +254,10 @@ def test_IntercompMetrics_calculator():
     df = make_some_data()
     data = df[["ref", "k1", "k2"]]
 
-    metriccalc = IntercomparisonMetrics(
-        other_names=("k1", "k2"), calc_tau=True
-    )
+    with pytest.warns(DeprecationWarning):
+        metriccalc = IntercomparisonMetrics(
+            other_names=("k1", "k2"), calc_tau=True
+        )
 
     res = metriccalc.calc_metrics(data, gpi_info=(0, 0, 0))
 
@@ -275,34 +277,42 @@ def test_IntercompMetrics_calculator():
     )
 
     np.testing.assert_almost_equal(
-        res["mse_corr_between_ref_and_k1"], np.array([0], dtype=np.float32)
+        res["mse_corr_between_ref_and_k1"],
+        np.array([0], dtype=np.float32)
     )
     np.testing.assert_almost_equal(
-        res["mse_corr_between_ref_and_k2"], np.array([0], dtype=np.float32)
+        res["mse_corr_between_ref_and_k2"],
+        np.array([0], dtype=np.float32)
     )
 
     np.testing.assert_almost_equal(
-        res["mse_bias_between_ref_and_k1"], np.array([0.04], dtype=np.float32)
+        res["mse_bias_between_ref_and_k1"],
+        np.array([0.04], dtype=np.float32)
     )
     np.testing.assert_almost_equal(
-        res["mse_bias_between_ref_and_k2"], np.array([0.04], dtype=np.float32)
+        res["mse_bias_between_ref_and_k2"],
+        np.array([0.04], dtype=np.float32)
     )
 
     # scipy 1.3.0 is not built for python 2.7 so we allow both for now
     assert (
-        np.isnan(res["p_R_between_ref_and_k1"])
-        or res["p_R_between_ref_and_k1"] == 1.0
+            np.isnan(res["p_R_between_ref_and_k1"])
+            or res["p_R_between_ref_and_k1"] == 1.0
     )
     assert (
-        np.isnan(res["p_R_between_ref_and_k2"])
-        or res["p_R_between_ref_and_k2"] == 1.0
+            np.isnan(res["p_R_between_ref_and_k2"])
+            or res["p_R_between_ref_and_k2"] == 1.0
     )
 
-    assert res["RMSD_between_ref_and_k1"] == np.array([0.2], dtype="float32")
-    assert res["RMSD_between_ref_and_k2"] == np.array([0.2], dtype="float32")
+    assert (res["RMSD_between_ref_and_k1"] ==
+            np.array([0.2], dtype="float32"))
+    assert (res["RMSD_between_ref_and_k2"] ==
+            np.array([0.2], dtype="float32"))
 
-    assert res["BIAS_between_ref_and_k1"] == np.array([-0.2], dtype="float32")
-    assert res["BIAS_between_ref_and_k2"] == np.array([0.2], dtype="float32")
+    assert (res["BIAS_between_ref_and_k1"] ==
+            np.array([-0.2], dtype="float32"))
+    assert (res["BIAS_between_ref_and_k2"] ==
+            np.array([0.2], dtype="float32"))
 
     np.testing.assert_almost_equal(
         res["urmsd_between_ref_and_k1"], np.array([0.0], dtype="float32")
@@ -322,13 +332,15 @@ def test_IntercompMetrics_calculator_metadata():
     df = make_some_data()
     data = df[["ref", "k1", "k2"]]
 
-    metadata_dict_template = {"network": np.array(["None"], dtype="U256")}
+    metadata_dict_template = {
+        "network": np.array(["None"], dtype="U256")}
 
-    metriccalc = IntercomparisonMetrics(
-        other_names=("k1", "k2"),
-        calc_tau=True,
-        metadata_template=metadata_dict_template,
-    )
+    with pytest.warns(DeprecationWarning):
+        metriccalc = IntercomparisonMetrics(
+            other_names=("k1", "k2"),
+            calc_tau=True,
+            metadata_template=metadata_dict_template,
+        )
     res = metriccalc.calc_metrics(
         data, gpi_info=(0, 0, 0, {"network": "SOILSCAPE"})
     )
@@ -344,11 +356,12 @@ def test_TC_metrics_calculator():
     df = make_some_data()
     data = df[["ref", "k1", "k2", "k3"]]
 
-    metriccalc = TCMetrics(
-        other_names=("k1", "k2", "k3"),
-        calc_tau=True,
-        dataset_names=("ref", "k1", "k2", "k3"),
-    )
+    with pytest.warns(DeprecationWarning):
+        metriccalc = TCMetrics(
+            other_names=("k1", "k2", "k3"),
+            calc_tau=True,
+            dataset_names=("ref", "k1", "k2", "k3"),
+        )
 
     res = metriccalc.calc_metrics(data, gpi_info=(0, 0, 0))
 
@@ -361,47 +374,59 @@ def test_TC_metrics_calculator():
     assert np.isnan(res["rho_between_ref_and_k2"])
 
     np.testing.assert_almost_equal(
-        res["mse_between_ref_and_k1"], np.array([0.04], dtype=np.float32)
+        res["mse_between_ref_and_k1"],
+        np.array([0.04], dtype=np.float32)
     )
     np.testing.assert_almost_equal(
-        res["mse_between_ref_and_k2"], np.array([0.04], dtype=np.float32)
+        res["mse_between_ref_and_k2"],
+        np.array([0.04], dtype=np.float32)
     )
 
     np.testing.assert_almost_equal(
-        res["mse_corr_between_ref_and_k1"], np.array([0], dtype=np.float32)
+        res["mse_corr_between_ref_and_k1"],
+        np.array([0], dtype=np.float32)
     )
     np.testing.assert_almost_equal(
-        res["mse_corr_between_ref_and_k2"], np.array([0], dtype=np.float32)
+        res["mse_corr_between_ref_and_k2"],
+        np.array([0], dtype=np.float32)
     )
 
     np.testing.assert_almost_equal(
-        res["mse_bias_between_ref_and_k1"], np.array([0.04], dtype=np.float32)
+        res["mse_bias_between_ref_and_k1"],
+        np.array([0.04], dtype=np.float32)
     )
     np.testing.assert_almost_equal(
-        res["mse_bias_between_ref_and_k2"], np.array([0.04], dtype=np.float32)
+        res["mse_bias_between_ref_and_k2"],
+        np.array([0.04], dtype=np.float32)
     )
 
     # scipy 1.3.0 is not built for python 2.7 so we allow both for now
     assert (
-        np.isnan(res["p_R_between_ref_and_k1"])
-        or res["p_R_between_ref_and_k1"] == 1.0
+            np.isnan(res["p_R_between_ref_and_k1"])
+            or res["p_R_between_ref_and_k1"] == 1.0
     )
     assert (
-        np.isnan(res["p_R_between_ref_and_k2"])
-        or res["p_R_between_ref_and_k2"] == 1.0
+            np.isnan(res["p_R_between_ref_and_k2"])
+            or res["p_R_between_ref_and_k2"] == 1.0
     )
 
-    assert res["RMSD_between_ref_and_k1"] == np.array([0.2], dtype="float32")
-    assert res["RMSD_between_ref_and_k2"] == np.array([0.2], dtype="float32")
+    assert (res["RMSD_between_ref_and_k1"] ==
+            np.array([0.2], dtype="float32"))
+    assert (res["RMSD_between_ref_and_k2"] ==
+            np.array([0.2], dtype="float32"))
 
-    assert res["BIAS_between_ref_and_k1"] == np.array([-0.2], dtype="float32")
-    assert res["BIAS_between_ref_and_k2"] == np.array([0.2], dtype="float32")
+    assert (res["BIAS_between_ref_and_k1"] ==
+            np.array([-0.2], dtype="float32"))
+    assert (res["BIAS_between_ref_and_k2"] ==
+            np.array([0.2], dtype="float32"))
 
     np.testing.assert_almost_equal(
-        res["urmsd_between_ref_and_k1"], np.array([0.0], dtype="float32")
+        res["urmsd_between_ref_and_k1"],
+        np.array([0.0], dtype="float32")
     )
     np.testing.assert_almost_equal(
-        res["urmsd_between_ref_and_k2"], np.array([0.0], dtype="float32")
+        res["urmsd_between_ref_and_k2"],
+        np.array([0.0], dtype="float32")
     )
 
     assert "RSS_between_ref_and_k1" in res.keys()
@@ -430,14 +455,16 @@ def test_TC_metrics_calculator_metadata():
     df = make_some_data()
     data = df[["ref", "k1", "k2"]]
 
-    metadata_dict_template = {"network": np.array(["None"], dtype="U256")}
-
-    metriccalc = TCMetrics(
-        other_names=("k1", "k2"),
-        calc_tau=True,
-        dataset_names=["ref", "k1", "k2"],
-        metadata_template=metadata_dict_template,
-    )
+    metadata_dict_template = {
+        "network": np.array(["None"], dtype="U256")}
+
+    with pytest.warns(DeprecationWarning):
+        metriccalc = TCMetrics(
+            other_names=("k1", "k2"),
+            calc_tau=True,
+            dataset_names=["ref", "k1", "k2"],
+            metadata_template=metadata_dict_template,
+        )
     res = metriccalc.calc_metrics(
         data, gpi_info=(0, 0, 0, {"network": "SOILSCAPE"})
     )
@@ -472,7 +499,8 @@ def test_FTMetrics_metadata():
     df = make_some_data()
     data = df[["ref", "k1"]]
 
-    metadata_dict_template = {"network": np.array(["None"], dtype="U256")}
+    metadata_dict_template = {
+        "network": np.array(["None"], dtype="U256")}
 
     metriccalc = FTMetrics(
         frozen_flag=2,
@@ -510,7 +538,8 @@ def test_BasicSeasonalMetrics_metadata():
     df = make_some_data()
     data = df[["ref", "k1"]]
 
-    metadata_dict_template = {"network": np.array(["None"], dtype="U256")}
+    metadata_dict_template = {
+        "network": np.array(["None"], dtype="U256")}
 
     with pytest.warns(UserWarning):
         metriccalc = MonthsMetricsAdapter(
@@ -525,6 +554,8 @@ def test_BasicSeasonalMetrics_metadata():
     assert res["network"] == np.array(["SOILSCAPE"], dtype="U256")
 
 
+@pytest.mark.filterwarnings(
+    "ignore:invalid value encountered in divide*:RuntimeWarning")
 def test_HSAF_Metrics():
     """
     Test HSAF Metrics
@@ -542,6 +573,8 @@ def test_HSAF_Metrics():
     assert np.isnan(res["ref_k2_ALL_rho"])
 
 
+@pytest.mark.filterwarnings(
+    "ignore:invalid value encountered in divide*:RuntimeWarning")
 def test_HSAF_Metrics_metadata():
     """
     Test HSAF Metrics with metadata.
@@ -583,7 +616,8 @@ def test_RollingMetrics():
     for i in range(indexer.shape[0]):
         rmsd_arr.append(
             metrics.rmsd(
-                df["ref"][indexer[i, :]].values, df["k1"][indexer[i, :]].values
+                df.iloc[indexer[i, :], df.columns.get_loc("ref")].values,
+                df.iloc[indexer[i, :], df.columns.get_loc("k1")].values
             )
         )
 
@@ -611,7 +645,7 @@ def make_datasets(df):
     return datasets
 
 
-def testdata_known_results():
+def make_testdata_known_results():
     dr = pd.date_range("2000", "2020", freq="D")
     n = len(dr)
     x = np.ones(n) * 2
@@ -684,16 +718,19 @@ def testdata_known_results():
     for ck in expected:
         for m in expected[ck]:
             if m in ["n_obs", "gpi"]:
-                expected[ck][m] = np.array([expected[ck][m]], dtype=np.int32)
+                expected[ck][m] = np.array([expected[ck][m]],
+                                           dtype=np.int32)
             elif m in ["lat", "lon"]:
-                expected[ck][m] = np.array([expected[ck][m]], dtype=np.float64)
+                expected[ck][m] = np.array([expected[ck][m]],
+                                           dtype=np.float64)
             else:
-                expected[ck][m] = np.array([expected[ck][m]], dtype=np.float32)
+                expected[ck][m] = np.array([expected[ck][m]],
+                                           dtype=np.float32)
 
     return make_datasets(df), expected
 
 
-def testdata_random():
+def make_testdata_random():
     np.random.seed(42)
     dr = pd.date_range("2000", "2020", freq="D")
     n = len(dr)
@@ -749,19 +786,24 @@ def testdata_random():
     for ck in expected:
         for m in expected[ck]:
             if m in ["n_obs", "gpi"]:
-                expected[ck][m] = np.array([expected[ck][m]], dtype=np.int32)
+                expected[ck][m] = np.array([expected[ck][m]],
+                                           dtype=np.int32)
             elif m in ["lat", "lon"]:
-                expected[ck][m] = np.array([expected[ck][m]], dtype=np.float64)
+                expected[ck][m] = np.array([expected[ck][m]],
+                                           dtype=np.float64)
             else:
-                expected[ck][m] = np.array([expected[ck][m]], dtype=np.float32)
+                expected[ck][m] = np.array([expected[ck][m]],
+                                           dtype=np.float32)
 
     return make_datasets(df), expected
 
 
 @pytest.mark.parametrize(
-    "testdata_generator", [testdata_known_results, testdata_random]
+    "testdata_generator", [make_testdata_known_results, make_testdata_random]
 )
 @pytest.mark.parametrize("seas_metrics", [None, MonthsMetricsAdapter])
+@pytest.mark.filterwarnings(
+    "ignore:invalid value encountered in divide.*:RuntimeWarning")
 def test_PairwiseIntercomparisonMetrics(testdata_generator, seas_metrics):
     # This test first compares the PairwiseIntercomparisonMetrics to known
     # results and then confirms that it agrees with IntercomparisonMetrics as
@@ -836,26 +878,26 @@ def test_PairwiseIntercomparisonMetrics(testdata_generator, seas_metrics):
 
     # preparation of IntercomparisonMetrics run for comparison
     ds_names = list(datasets.keys())
-    metrics = IntercomparisonMetrics(
-        dataset_names=ds_names,
-        # passing the names here explicitly, see GH issue #220
-        refname="reference_name",
-        other_names=ds_names[1:],
-        calc_tau=True,
-    )
+    with pytest.warns(DeprecationWarning):
+        metrics = IntercomparisonMetrics(
+            dataset_names=ds_names,
+            # passing the names here explicitly, see GH issue #220
+            refname="reference_name",
+            other_names=ds_names[1:],
+            calc_tau=True,
+        )
     if seas_metrics:
         with pytest.warns(UserWarning):
             metrics = seas_metrics(metrics)
-    val = Validation(
-        datasets,
-        "reference_name",
-        scaling=None,
-        temporal_matcher=None,  # use default here
-        metrics_calculators={(4, 4): metrics.calc_metrics},
-    )
-
-    print("running old setup")
 
+    with pytest.warns(UserWarning):
+        val = Validation(
+            datasets,
+            "reference_name",
+            scaling=None,
+            temporal_matcher=None,  # use default here
+            metrics_calculators={(4, 4): metrics.calc_metrics},
+        )
     results = val.calc(0, 1, 1, rename_cols=False)
 
     # results is a dictionary with one entry and key
@@ -925,7 +967,7 @@ def test_PairwiseIntercomparisonMetrics(testdata_generator, seas_metrics):
 def test_PairwiseIntercomparisonMetrics_confidence_intervals():
     # tests if the correct confidence intervals are returned
 
-    datasets, _ = testdata_random()
+    datasets, _ = make_testdata_random()
     matcher = make_combined_temporal_matcher(pd.Timedelta(6, "h"))
     val = Validation(
         datasets,
@@ -944,7 +986,8 @@ def test_PairwiseIntercomparisonMetrics_confidence_intervals():
         },
     )
     results_pw = val.calc(
-        [0], [1], [1], rename_cols=False, only_with_reference=True
+        [0], [1], [1], rename_cols=False,
+        only_with_reference=True
     )
 
     metrics_with_ci = {
@@ -999,7 +1042,7 @@ def test_PairwiseIntercomparisonMetrics_confidence_intervals():
 
 
 @pytest.mark.parametrize(
-    "testdata_generator", [testdata_known_results, testdata_random]
+    "testdata_generator", [make_testdata_known_results, make_testdata_random]
 )
 @pytest.mark.parametrize("seas_metrics", [None, MonthsMetricsAdapter])
 def test_TripleCollocationMetrics(testdata_generator, seas_metrics):
@@ -1028,7 +1071,8 @@ def test_TripleCollocationMetrics(testdata_generator, seas_metrics):
         metrics_calculators={(4, 3): triplet_metrics_calculator.calc_metrics},
     )
     results_triplet = val_triplet.calc(
-        [0], [1], [1], rename_cols=False, only_with_reference=True
+        [0], [1], [1], rename_cols=False,
+        only_with_reference=True
     )
 
     if "col1_name" in datasets.keys():
@@ -1092,7 +1136,8 @@ def test_TripleCollocationMetrics(testdata_generator, seas_metrics):
             },
         )
         results_triplet = val_triplet.calc(
-            [0], [1], [1], rename_cols=False, only_with_reference=True
+            [0], [1], [1], rename_cols=False,
+            only_with_reference=True
         )
         for key in results_triplet:
             for dset, _ in key:
@@ -1108,12 +1153,12 @@ def test_TripleCollocationMetrics(testdata_generator, seas_metrics):
                     assert (*lkey, dset) in results_triplet[key]
                     assert (*ukey, dset) in results_triplet[key]
                     assert (
-                        results_triplet[key][(*lkey, dset)]
-                        <= results_triplet[key][(*mkey, dset)]
+                            results_triplet[key][(*lkey, dset)]
+                            <= results_triplet[key][(*mkey, dset)]
                     )
                     assert (
-                        results_triplet[key][(*mkey, dset)]
-                        <= results_triplet[key][(*ukey, dset)]
+                            results_triplet[key][(*mkey, dset)]
+                            <= results_triplet[key][(*ukey, dset)]
                     )
 
 
@@ -1161,13 +1206,14 @@ def test_temporal_matching_ascat_ismn():
 
     # old setup
     ds_names = list(datasets.keys())
-    metrics = IntercomparisonMetrics(
-        dataset_names=ds_names,
-        # passing the names here explicitly, see GH issue #220
-        refname=refname,
-        other_names=ds_names[1:],
-        calc_tau=True,
-    )
+    with pytest.warns(DeprecationWarning):
+        metrics = IntercomparisonMetrics(
+            dataset_names=ds_names,
+            # passing the names here explicitly, see GH issue #220
+            refname=refname,
+            other_names=ds_names[1:],
+            calc_tau=True,
+        )
     old_val = Validation(
         datasets,
         refname,
diff --git a/tests/test_validation_framework/test_upscaling.py b/tests/test_validation_framework/test_upscaling.py
index 1cc105eb..c5026dd5 100644
--- a/tests/test_validation_framework/test_upscaling.py
+++ b/tests/test_validation_framework/test_upscaling.py
@@ -60,20 +60,20 @@ def averager():
 def test_upscale(averager):
     """Test all upscaling functions"""
     to_upscale = pd.concat(
-        [pd.Series(2, index=np.linspace(1,10), name='sm'),
-         pd.Series(4, index=np.linspace(1,10), name='sm')],
+        [pd.Series(2, index=np.linspace(1, 10), name='sm'),
+         pd.Series(4, index=np.linspace(1, 10), name='sm')],
         axis=1
     )
     # simple check of series averaging
     upscaled = averager.upscale(to_upscale, method="average")
-    should = pd.Series(float(3), index=np.linspace(1,10))
+    should = pd.Series(float(3), index=np.linspace(1, 10))
     assert upscaled.equals(should)
 
 
 def test_tstability(averager):
     """Test temporal stability filtering with noisy or uncorrelated series"""
     n_obs = 1000
-    points = np.linspace(0, 2*np.pi, n_obs)
+    points = np.linspace(0, 2 * np.pi, n_obs)
     ts = np.sin(points)
     low_corr = np.sin(points + np.pi)
     high_sterr = np.sin(points) + np.random.normal(0, 2, n_obs)
@@ -99,12 +99,14 @@ def series_2_match():
 
     ref_ser = pd.Series(
         data_ref,
-        index=pd.date_range("2007-01-01 01:00:00", "2007-01-30 01:00:00", freq="D"),
+        index=pd.date_range("2007-01-01 01:00:00",
+                            "2007-01-30 01:00:00", freq="D"),
         name="ref"
     ).to_frame()
     match_ser = pd.Series(
         data2match,
-        index=pd.date_range("2007-01-01 05:00:00", "2007-01-29 05:00:00", freq="D"),
+        index=pd.date_range("2007-01-01 05:00:00",
+                            "2007-01-29 05:00:00", freq="D"),
         name="ref"
     ).to_frame()
     to_match = [ref_ser, match_ser]
@@ -115,25 +117,29 @@ def series_2_match():
 def test_temporal_matching(averager, series_2_match):
     """Test temporal matching"""
     matched = averager.temporal_match(series_2_match, drop_missing=False)
-    assert len(matched.index) == 30, "Should be matched to the longest timeseries"
+    assert len(matched.index) == 30, \
+        "Should be matched to the longest timeseries"
 
     matched = averager.temporal_match(series_2_match, drop_missing=True)
-    assert len(matched.index) == 28, "Should drop the row and the missing timestep with a missing value"
+    assert len(matched.index) == 28, \
+        "Should drop the row and the missing timestep with a missing value"
 
-    matched = averager.temporal_match(series_2_match, hours=3)
-    assert matched.equals(series_2_match[0]), "Should not be matched"
+    with pytest.warns(UserWarning):
+        matched = averager.temporal_match(series_2_match, hours=3)
+        assert matched.equals(series_2_match[0]), "Should not be matched"
 
 
 def test_capture_warning(averager, series_2_match):
     def override_read(points, other_name):
         return series_2_match
 
-    averager.lut = {"other_ds": {0 : [0]}}
+    averager.lut = {"other_ds": {0: [0]}}
     averager.datasets = {"other_ds": {"columns": ["ref"]}}
     averager._read = override_read
-    res = averager.get_upscaled_ts(
-        gpi=0,
-        other_name="other_ds",
-        **{"hours": 3}
-    )
+    with pytest.warns(UserWarning):
+        res = averager.get_upscaled_ts(
+            gpi=0,
+            other_name="other_ds",
+            **{"hours": 3}
+        )
     assert res.equals(series_2_match[0])
diff --git a/tests/test_validation_framework/test_validation.py b/tests/test_validation_framework/test_validation.py
index bd70d265..a3789ac0 100644
--- a/tests/test_validation_framework/test_validation.py
+++ b/tests/test_validation_framework/test_validation.py
@@ -137,9 +137,9 @@ def ismn_reader():
 
 
 def check_results(
-    filename: str,
-    target_vars: dict,
-    variables: list = None,
+        filename: str,
+        target_vars: dict,
+        variables: list = None,
 ):
     """
     Check that standard vars are present and that nobs, rho and rmsd match
@@ -185,7 +185,6 @@ def test_ascat_ismn_validation(ascat_reader, ismn_reader):
         metadata = ismn_reader.read_metadata(idx)
         jobs.append((idx, metadata["longitude"].val, metadata["latitude"].val))
 
-
     # Create the validation object.
 
     datasets = {
@@ -205,7 +204,8 @@ def test_ascat_ismn_validation(ascat_reader, ismn_reader):
     }
 
     read_ts_names = {"ASCAT": "read", "ISMN": "read"}
-    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]
+    period = [datetime(2007, 1, 1),
+              datetime(2014, 12, 31)]
 
     datasets = DataManager(
         datasets, "ISMN", period, read_ts_names=read_ts_names)
@@ -216,6 +216,8 @@ def test_ascat_ismn_validation(ascat_reader, ismn_reader):
         temporal_ref="ASCAT",
         scaling="cdf_match",
         scaling_ref="ASCAT",
+        temporal_matcher=make_combined_temporal_matcher(
+            pd.Timedelta(1, "h")),
         metrics_calculators={
             (2, 2):
                 metrics_calculators.BasicMetrics(other_name="k1").calc_metrics
@@ -337,6 +339,8 @@ def test_ascat_ismn_validation_metadata(ascat_reader, ismn_reader):
         temporal_ref="ASCAT",
         scaling="cdf_match",
         scaling_ref="ASCAT",
+        temporal_matcher=make_combined_temporal_matcher(
+            pd.Timedelta(1, "h")),
         metrics_calculators={
             (2, 2):
                 metrics_calculators.BasicMetrics(
@@ -396,7 +400,7 @@ def test_ascat_ismn_validation_metadata(ascat_reader, ismn_reader):
             "SOILSCAPE",
             "SOILSCAPE",
             "SOILSCAPE",
-        ], dtype="U256",)
+        ], dtype="U256", )
     }
     vars_should = [
         'BIAS', 'R', 'RMSD', '_row_size', 'climate', 'gpi', 'idx', 'landcover',
@@ -432,14 +436,10 @@ def test_validation_with_averager(ascat_reader, ismn_reader):
                       (7, -120.80639, 38.17353)]
         }
     }
-    gpis = (1814367, 1803695, 1856312)
-    lons, lats = [], []
-    for gpi in gpis:
-        lon, lat = ascat_reader.grid.gpi2lonlat(gpi)
-        lons.append(lon)
-        lats.append(lat)
 
-    jobs = [(gpis, lons, lats)]
+    jobs = [((1814367, 1803695, 1856312),
+             (0, 0, 0),
+             (0, 0, 0))]   # coords not needed here
 
     # Create the variable ***save_path*** which is a string representing the
     # path where the results will be saved. **DO NOT CHANGE** the name
@@ -467,7 +467,8 @@ def test_validation_with_averager(ascat_reader, ismn_reader):
     }
 
     read_ts_names = {"ASCAT": "read", "ISMN": "read"}
-    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]
+    period = [datetime(2007, 1, 1),
+              datetime(2014, 12, 31)]
 
     datasets = DataManager(
         datasets,
@@ -486,6 +487,8 @@ def test_validation_with_averager(ascat_reader, ismn_reader):
         temporal_ref="ISMN",
         scaling="cdf_match",
         scaling_ref="ISMN",
+        temporal_matcher=make_combined_temporal_matcher(
+            pd.Timedelta(1, "h")),
         metrics_calculators={
             (2, 2):
                 metrics_calculators.BasicMetrics(other_name="k1").calc_metrics
@@ -829,7 +832,6 @@ def test_validation_n3_k2():
 
 
 def test_validation_n3_k2_temporal_matching_no_matches2():
-
     empty_result = {
         'gpi': np.array([4], dtype=np.int32),
         'lon': np.array([4.]),
@@ -1159,6 +1161,8 @@ def test_ascat_ismn_validation_metadata_rolling(ascat_reader, ismn_reader):
         temporal_ref="ASCAT",
         scaling="cdf_match",
         scaling_ref="ASCAT",
+        temporal_matcher=make_combined_temporal_matcher(
+            window=pd.Timedelta(1, "h")),
         metrics_calculators={
             (2, 2):
                 metrics_calculators.RollingMetrics(