Skip to content
This repository has been archived by the owner on Jan 10, 2025. It is now read-only.

Commit

Permalink
more on create
Browse files Browse the repository at this point in the history
  • Loading branch information
b8raoult committed Feb 18, 2024
1 parent 9f305bb commit 20b95a8
Show file tree
Hide file tree
Showing 6 changed files with 199 additions and 14 deletions.
28 changes: 28 additions & 0 deletions ecml_tools/create/functions/actions/grib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# (C) Copyright 2024 ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#


from climetlab import load_source
from climetlab.utils.patterns import Pattern


def execute(context, dates, path, *args, **kwargs):
paths = Pattern(path, ignore_missing_keys=True).substitute(
*args, date=dates, **kwargs
)

ds = load_source("empty")

for path in paths:
print("PATH", path)
s = load_source("file", path)
s = s.sel(valid_datetime=[d.isoformat() for d in dates], **kwargs)

ds = ds + s
return ds
12 changes: 12 additions & 0 deletions ecml_tools/create/functions/actions/netcdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# (C) Copyright 2024 ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#

from .opendap import execute as opendap_execute

execute = opendap_execute # netcdf is an alias for opendap
12 changes: 12 additions & 0 deletions ecml_tools/create/functions/steps/noop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# (C) Copyright 2024 ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#


def execute(context, input, *args, **kwargs):
return input
30 changes: 30 additions & 0 deletions ecml_tools/create/functions/steps/rename.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# (C) Copyright 2024 ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#

from climetlab.indexing.fieldset import FieldArray


class RenamedField:
def __init__(self, field, what, renaming):
self.field = field
self.what = what
self.renaming = renaming

def metadata(self, key):
value = self.field.metadata(key)
if key == self.what:
return self.renaming.get(value, value)
return value

def __getattr__(self, name):
return getattr(self.field, name)


def execute(context, input, what="param", **kwargs):
return FieldArray([RenamedField(fs, what, kwargs) for fs in input])
75 changes: 63 additions & 12 deletions ecml_tools/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1179,15 +1179,16 @@ def _frequency_to_hours(frequency):

def _as_date(d, dates, last):
if isinstance(d, datetime.datetime):
assert d.minutes == 0 and d.hours == 0 and d.seconds == 0, d
if not d.minute == 0 and d.hour == 0 and d.second == 0:
return np.datetime64(d)
d = datetime.date(d.year, d.month, d.day)

if isinstance(d, datetime.date):
d = d.year * 10_000 + d.month * 100 + d.day

try:
d = int(d)
except ValueError:
except (ValueError, TypeError):
pass

if isinstance(d, int):
Expand Down Expand Up @@ -1255,12 +1256,14 @@ def _as_last_date(d, dates):
return _as_date(d, dates, last=True)


def _concat_or_join(datasets):
def _concat_or_join(datasets, kwargs):
datasets, kwargs = _auto_adjust(datasets, kwargs)

# Study the dates
ranges = [(d.dates[0].astype(object), d.dates[-1].astype(object)) for d in datasets]

if len(set(ranges)) == 1:
return Join(datasets)._overlay()
return Join(datasets)._overlay(), kwargs

# Make sure the dates are disjoint
for i in range(len(ranges)):
Expand All @@ -1285,7 +1288,7 @@ def _concat_or_join(datasets):
f"{r} and {s} ({datasets[i]} {datasets[i+1]})"
)

return Concat(datasets)
return Concat(datasets), kwargs


def _open(a, zarr_root):
Expand All @@ -1310,6 +1313,48 @@ def _open(a, zarr_root):
raise NotImplementedError()


def _auto_adjust(datasets, kwargs):
"""Adjust the datasets for concatenation or joining based
on parameters set to 'matching'"""

if kwargs.get("ajust") == "matching":
kwargs.pop("ajust")
for p in ("select", "frequency", "start", "end"):
kwargs[p] = "matching"

adjust = {}

if kwargs.get("select") == "matching":
kwargs.pop("select")
variables = None
for d in datasets:
if variables is None:
variables = set(d.variables)
else:
variables &= set(d.variables)
if len(variables) == 0:
raise ValueError("No common variables")

adjust["select"] = variables

if kwargs.get("frequency") == "matching":
kwargs.pop("frequency")
adjust["frequency"] = max(d.frequency for d in datasets)

if kwargs.get("start") == "matching":
kwargs.pop("start")
adjust["start"] = max(d.dates[0] for d in datasets).astype(object)

if kwargs.get("end") == "matching":
kwargs.pop("end")
adjust["end"] = max(d.dates[-1] for d in datasets).astype(object)

if adjust:
datasets = [d._subset(**adjust) for d in datasets]

return datasets, kwargs


def _open_dataset(*args, zarr_root, **kwargs):
sets = []
for a in args:
Expand All @@ -1318,17 +1363,21 @@ def _open_dataset(*args, zarr_root, **kwargs):
if "ensemble" in kwargs:
if "grids" in kwargs:
raise NotImplementedError("Cannot use both 'ensemble' and 'grids'")

ensemble = kwargs.pop("ensemble")
axis = kwargs.pop("axis", 2)
assert len(args) == 0
assert isinstance(ensemble, (list, tuple))
return Ensemble([_open(e, zarr_root) for e in ensemble], axis=axis)._subset(
**kwargs
)

datasets = [_open(e, zarr_root) for e in ensemble]
datasets, kwargs = _auto_adjust(datasets, kwargs)

return Ensemble(datasets, axis=axis)._subset(**kwargs)

if "grids" in kwargs:
if "ensemble" in kwargs:
raise NotImplementedError("Cannot use both 'ensemble' and 'grids'")

grids = kwargs.pop("grids")
mode = kwargs.pop("mode", "concatenate")
axis = kwargs.pop("axis", 3)
Expand All @@ -1344,9 +1393,10 @@ def _open_dataset(*args, zarr_root, **kwargs):
f"Unknown grids mode: {mode}, values are {list(KLASSES.keys())}"
)

return KLASSES[mode]([_open(e, zarr_root) for e in grids], axis=axis)._subset(
**kwargs
)
datasets = [_open(e, zarr_root) for e in grids]
datasets, kwargs = _auto_adjust(datasets, kwargs)

return KLASSES[mode](datasets, axis=axis)._subset(**kwargs)

for name in ("datasets", "dataset"):
if name in kwargs:
Expand All @@ -1359,7 +1409,8 @@ def _open_dataset(*args, zarr_root, **kwargs):
assert len(sets) > 0, (args, kwargs)

if len(sets) > 1:
return _concat_or_join(sets)._subset(**kwargs)
dataset, kwargs = _concat_or_join(sets, kwargs)
return dataset._subset(**kwargs)

return sets[0]._subset(**kwargs)

Expand Down
56 changes: 54 additions & 2 deletions ecml_tools/grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def cutout_mask(

zero = np.array([0.0, 0.0, 0.0])
ok = []
for i, (era_point, distance, index) in enumerate(
for i, (global_point, distance, index) in enumerate(
zip(global_points, distances, indices)
):
t = Triangle3D(points[index[0]], points[index[1]], points[index[2]])
Expand All @@ -164,7 +164,7 @@ def cutout_mask(
# from the point to the center of the Earth is not None
# (the direction of the ray is not important)
ok.append(
(t.intersect(zero, era_point) or t.intersect(era_point, zero))
(t.intersect(zero, global_point) or t.intersect(global_point, zero))
# and (distance >= min_distance)
)

Expand All @@ -188,6 +188,58 @@ def cutout_mask(
return mask


def thinning_mask(
lats,
lons,
global_lats,
global_lons,
cropping_distance=2.0,
):
"""
Return the list of points in [lats, lons] closest to [global_lats, global_lons]
"""

assert global_lats.ndim == 1
assert global_lons.ndim == 1
assert lats.ndim == 1
assert lons.ndim == 1

assert global_lats.shape == global_lons.shape
assert lats.shape == lons.shape

north = np.amax(lats)
south = np.amin(lats)
east = np.amax(lons)
west = np.amin(lons)

# Reduce the global grid to the area of interest

mask = cropping_mask(
global_lats,
global_lons,
np.min([90.0, north + cropping_distance]),
west - cropping_distance,
np.max([-90.0, south - cropping_distance]),
east + cropping_distance,
)

# return mask
global_lats_masked = global_lats[mask]
global_lons_masked = global_lons[mask]

global_xyx = latlon_to_xyz(global_lats_masked, global_lons_masked)
global_points = np.array(global_xyx).transpose()

xyx = latlon_to_xyz(lats, lons)
points = np.array(xyx).transpose()

# Use a KDTree to find the nearest points
kdtree = KDTree(points)
_, indices = kdtree.query(global_points, k=1)

return np.array([i for i in indices])


if __name__ == "__main__":
global_lats, global_lons = np.meshgrid(
np.linspace(90, -90, 90),
Expand Down

0 comments on commit 20b95a8

Please sign in to comment.