Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow directory in valleyfloor filter #76

Merged
merged 5 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 93 additions & 21 deletions src/pyaro/timeseries/Filter.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import json
import logging
import math
import abc
from collections import defaultdict
import csv
from datetime import datetime
import inspect
import pathlib
import re
import sys
import types
Expand Down Expand Up @@ -1225,7 +1227,7 @@ class ValleyFloorRelativeAltitudeFilter(StationFilter):

def __init__(
self,
topo_file: str | None = None,
topo: str | None = None,
*,
radius: float = 5000,
topo_var: str = "Band1",
Expand All @@ -1241,15 +1243,30 @@ def __init__(
"valleyfloor_relaltitude filter is missing required dependency 'xarray'. Please install to use this filter."
)

self._topo_file = topo_file
self._topo = None
if topo is not None:
try:
self._topo = pathlib.Path(topo)
except TypeError as e:
raise TypeError(
f"Topo needs to be an instance of str. Got {type(topo)}."
) from e

if not self._topo.exists():
logger.warning(
f"Provided location for topography data ({self._topo}) does not exist. It should be either a .nc file, or a folder with several .nc files and a metadata.json file."
)

self._topo_file = None
self._topo_var = topo_var
self._radius = radius
self._lower = lower
self._upper = upper

def init_kwargs(self):
return {
"topo_file": self._topo_file,
# Converting to string for serialization purposes.
"topo": str(self._topo_file),
"topo_var": self._topo_var,
"radius": self._radius,
"lower": self._lower,
Expand All @@ -1259,7 +1276,52 @@ def init_kwargs(self):
def name(self):
return "valleyfloor_relaltitude"

def _update_topo_file_path(self, lat: float, lon: float) -> bool:
"""Updates self._topo_file based on the lat/lon pair, and returns a boolean
indicating whether it changed.

:param lat: Latitude
:param lon: Longitude
:raises FileNotFoundError: If self._topo does not exist.
:raises FileNotFoundError: If self._topo is a directory and 'metadata.json' does not exist.
:return: Boolean indicating whether _topo_file changed.
"""
old_path = self._topo_file
if self._topo.is_file():
self._topo_file = self._topo
elif self._topo.is_dir():
metadata_file = self._topo / "metadata.json"
if not metadata_file.exists():
raise FileNotFoundError(f"No 'metadata.json' file found in directory.")

with open(metadata_file) as f:
metadata = json.load(f)

file = None
for file in metadata:
if lat < metadata[file]["s"] or lat > metadata[file]["n"]:
continue
if lon < metadata[file]["w"] or lon > metadata[file]["e"]:
continue

self._topo_file = self._topo / file
break

if file is None:
raise Exception(
f"No matching topography file found for coordinate pair (lat={lat:.6f}; lon={lon:.6f})"
)

else:
raise FileNotFoundError

return self._topo_file != old_path

def filter_stations(self, stations: dict[str, Station]) -> dict[str, Station]:
if self._topo is None or (self._upper is None and self._lower is None):
# Default initialized filter should not do anything, so return unfiltered stations.
return stations

if "cf_units" not in sys.modules:
raise ModuleNotFoundError(
"valleyfloor_relaltitude filter is missing required dependency 'cf-units'. Please install to use this filter."
Expand All @@ -1268,27 +1330,37 @@ def filter_stations(self, stations: dict[str, Station]) -> dict[str, Station]:
raise ModuleNotFoundError(
"valleyfloor_relaltitude filter is missing required dependency 'xarray'. Please install to use this filter."
)
if not self._topo.exists():
raise FileNotFoundError(
f"Provided location for topography data ({self._topo}) does not exist. It should be either a .nc file, or a folder with several .nc files and a metadata.json file."
)

filtered_stations = {}
with xr.open_dataset(self._topo_file) as topo:
for k, v in stations.items():
lat = v.latitude
lon = v.longitude
alt = v.altitude

ralt = self._calculate_relative_altitude(
lat, lon, radius=self._radius, altitude=alt, topo=topo
)

keep = True
if self._lower is not None:
if self._lower > ralt:
keep = False
if self._upper is not None:
if self._upper < ralt:
keep = False
if keep:
filtered_stations[k] = v
# Sorting stations by latitude minimizes reloading of data if each topo file
# is a band that includes 360deg of longitude. This is the case for the merged
# dataset.
for k, v in sorted(stations.items(), key=lambda x: x[1].latitude):
lat = v.latitude
lon = v.longitude
alt = v.altitude
if self._update_topo_file_path(lat, lon):
with xr.open_dataset(self._topo_file) as top:
topo = top

ralt = self._calculate_relative_altitude(
lat, lon, radius=self._radius, altitude=alt, topo=topo
)

keep = True
if self._lower is not None:
if self._lower > ralt:
keep = False
if self._upper is not None:
if self._upper < ralt:
keep = False
if keep:
filtered_stations[k] = v

return filtered_stations

Expand Down
2 changes: 1 addition & 1 deletion tests/test_CSVTimeSeriesReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ def test_valley_floor_filter(self):
filters=[
pyaro.timeseries.filters.get(
"valleyfloor_relaltitude",
topo_file="tests/testdata/datadir_elevation/gtopo30_subset.nc",
topo="tests/testdata/datadir_elevation/gtopo30_subset.nc",
radius=5000,
lower=150,
upper=250,
Expand Down
Loading