diff --git a/src/pyaro/timeseries/Filter.py b/src/pyaro/timeseries/Filter.py index 14e086a..67a4ed6 100644 --- a/src/pyaro/timeseries/Filter.py +++ b/src/pyaro/timeseries/Filter.py @@ -1,3 +1,4 @@ +import json import logging import math import abc @@ -5,6 +6,7 @@ import csv from datetime import datetime import inspect +import pathlib import re import sys import types @@ -1225,7 +1227,7 @@ class ValleyFloorRelativeAltitudeFilter(StationFilter): def __init__( self, - topo_file: str | None = None, + topo: str | None = None, *, radius: float = 5000, topo_var: str = "Band1", @@ -1241,7 +1243,21 @@ def __init__( "valleyfloor_relaltitude filter is missing required dependency 'xarray'. Please install to use this filter." ) - self._topo_file = topo_file + self._topo = None + if topo is not None: + try: + self._topo = pathlib.Path(topo) + except TypeError as e: + raise TypeError( + f"Topo needs to be an instance of str. Got {type(topo)}." + ) from e + + if not self._topo.exists(): + logger.warning( + f"Provided location for topography data ({self._topo}) does not exist. It should be either a .nc file, or a folder with several .nc files and a metadata.json file." + ) + + self._topo_file = None self._topo_var = topo_var self._radius = radius self._lower = lower @@ -1249,7 +1265,8 @@ def __init__( def init_kwargs(self): return { - "topo_file": self._topo_file, + # Converting to string for serialization purposes. + "topo": str(self._topo_file), "topo_var": self._topo_var, "radius": self._radius, "lower": self._lower, @@ -1259,7 +1276,52 @@ def init_kwargs(self): def name(self): return "valleyfloor_relaltitude" + def _update_topo_file_path(self, lat: float, lon: float) -> bool: + """Updates self._topo_file based on the lat/lon pair, and returns a boolean + indicating whether it changed. + + :param lat: Latitude + :param lon: Longitude + :raises FileNotFoundError: If self._topo does not exist. + :raises FileNotFoundError: If self._topo is a directory and 'metadata.json' does not exist. + :return: Boolean indicating whether _topo_file changed. + """ + old_path = self._topo_file + if self._topo.is_file(): + self._topo_file = self._topo + elif self._topo.is_dir(): + metadata_file = self._topo / "metadata.json" + if not metadata_file.exists(): + raise FileNotFoundError(f"No 'metadata.json' file found in directory.") + + with open(metadata_file) as f: + metadata = json.load(f) + + file = None + for file in metadata: + if lat < metadata[file]["s"] or lat > metadata[file]["n"]: + continue + if lon < metadata[file]["w"] or lon > metadata[file]["e"]: + continue + + self._topo_file = self._topo / file + break + + if file is None: + raise Exception( + f"No matching topography file found for coordinate pair (lat={lat:.6f}; lon={lon:.6f})" + ) + + else: + raise FileNotFoundError + + return self._topo_file != old_path + def filter_stations(self, stations: dict[str, Station]) -> dict[str, Station]: + if self._topo is None or (self._upper is None and self._lower is None): + # Default initialized filter should not do anything, so return unfiltered stations. + return stations + if "cf_units" not in sys.modules: raise ModuleNotFoundError( "valleyfloor_relaltitude filter is missing required dependency 'cf-units'. Please install to use this filter." @@ -1268,27 +1330,37 @@ def filter_stations(self, stations: dict[str, Station]) -> dict[str, Station]: raise ModuleNotFoundError( "valleyfloor_relaltitude filter is missing required dependency 'xarray'. Please install to use this filter." ) + if not self._topo.exists(): + raise FileNotFoundError( + f"Provided location for topography data ({self._topo}) does not exist. It should be either a .nc file, or a folder with several .nc files and a metadata.json file." + ) filtered_stations = {} - with xr.open_dataset(self._topo_file) as topo: - for k, v in stations.items(): - lat = v.latitude - lon = v.longitude - alt = v.altitude - - ralt = self._calculate_relative_altitude( - lat, lon, radius=self._radius, altitude=alt, topo=topo - ) - keep = True - if self._lower is not None: - if self._lower > ralt: - keep = False - if self._upper is not None: - if self._upper < ralt: - keep = False - if keep: - filtered_stations[k] = v + # Sorting stations by latitude minimizes reloading of data if each topo file + # is a band that includes 360deg of longitude. This is the case for the merged + # dataset. + for k, v in sorted(stations.items(), key=lambda x: x[1].latitude): + lat = v.latitude + lon = v.longitude + alt = v.altitude + if self._update_topo_file_path(lat, lon): + with xr.open_dataset(self._topo_file) as top: + topo = top + + ralt = self._calculate_relative_altitude( + lat, lon, radius=self._radius, altitude=alt, topo=topo + ) + + keep = True + if self._lower is not None: + if self._lower > ralt: + keep = False + if self._upper is not None: + if self._upper < ralt: + keep = False + if keep: + filtered_stations[k] = v return filtered_stations diff --git a/tests/test_CSVTimeSeriesReader.py b/tests/test_CSVTimeSeriesReader.py index f73320f..a24fee1 100644 --- a/tests/test_CSVTimeSeriesReader.py +++ b/tests/test_CSVTimeSeriesReader.py @@ -676,7 +676,7 @@ def test_valley_floor_filter(self): filters=[ pyaro.timeseries.filters.get( "valleyfloor_relaltitude", - topo_file="tests/testdata/datadir_elevation/gtopo30_subset.nc", + topo="tests/testdata/datadir_elevation/gtopo30_subset.nc", radius=5000, lower=150, upper=250,