more on create

ecmwf-lab · Feb 18, 2024 · 20b95a8 · 20b95a8
1 parent 9f305bb
commit 20b95a8
Show file tree

Hide file tree

Showing 6 changed files with 199 additions and 14 deletions.
diff --git a/ecml_tools/create/functions/actions/grib.py b/ecml_tools/create/functions/actions/grib.py
@@ -0,0 +1,28 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+
+
+from climetlab import load_source
+from climetlab.utils.patterns import Pattern
+
+
+def execute(context, dates, path, *args, **kwargs):
+    paths = Pattern(path, ignore_missing_keys=True).substitute(
+        *args, date=dates, **kwargs
+    )
+
+    ds = load_source("empty")
+
+    for path in paths:
+        print("PATH", path)
+        s = load_source("file", path)
+        s = s.sel(valid_datetime=[d.isoformat() for d in dates], **kwargs)
+
+        ds = ds + s
+    return ds
diff --git a/ecml_tools/create/functions/actions/netcdf.py b/ecml_tools/create/functions/actions/netcdf.py
@@ -0,0 +1,12 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+
+from .opendap import execute as opendap_execute
+
+execute = opendap_execute  # netcdf is an alias for opendap
diff --git a/ecml_tools/create/functions/steps/noop.py b/ecml_tools/create/functions/steps/noop.py
@@ -0,0 +1,12 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+
+
+def execute(context, input, *args, **kwargs):
+    return input
diff --git a/ecml_tools/create/functions/steps/rename.py b/ecml_tools/create/functions/steps/rename.py
@@ -0,0 +1,30 @@
+# (C) Copyright 2024 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+
+from climetlab.indexing.fieldset import FieldArray
+
+
+class RenamedField:
+    def __init__(self, field, what, renaming):
+        self.field = field
+        self.what = what
+        self.renaming = renaming
+
+    def metadata(self, key):
+        value = self.field.metadata(key)
+        if key == self.what:
+            return self.renaming.get(value, value)
+        return value
+
+    def __getattr__(self, name):
+        return getattr(self.field, name)
+
+
+def execute(context, input, what="param", **kwargs):
+    return FieldArray([RenamedField(fs, what, kwargs) for fs in input])
diff --git a/ecml_tools/data.py b/ecml_tools/data.py
@@ -1179,15 +1179,16 @@ def _frequency_to_hours(frequency):
 
 def _as_date(d, dates, last):
     if isinstance(d, datetime.datetime):
-        assert d.minutes == 0 and d.hours == 0 and d.seconds == 0, d
+        if not d.minute == 0 and d.hour == 0 and d.second == 0:
+            return np.datetime64(d)
         d = datetime.date(d.year, d.month, d.day)
 
     if isinstance(d, datetime.date):
         d = d.year * 10_000 + d.month * 100 + d.day
 
     try:
         d = int(d)
-    except ValueError:
+    except (ValueError, TypeError):
         pass
 
     if isinstance(d, int):
@@ -1255,12 +1256,14 @@ def _as_last_date(d, dates):
     return _as_date(d, dates, last=True)
 
 
-def _concat_or_join(datasets):
+def _concat_or_join(datasets, kwargs):
+    datasets, kwargs = _auto_adjust(datasets, kwargs)
+
     # Study the dates
     ranges = [(d.dates[0].astype(object), d.dates[-1].astype(object)) for d in datasets]
 
     if len(set(ranges)) == 1:
-        return Join(datasets)._overlay()
+        return Join(datasets)._overlay(), kwargs
 
     # Make sure the dates are disjoint
     for i in range(len(ranges)):
@@ -1285,7 +1288,7 @@ def _concat_or_join(datasets):
                 f"{r} and {s} ({datasets[i]} {datasets[i+1]})"
             )
 
-    return Concat(datasets)
+    return Concat(datasets), kwargs
 
 
 def _open(a, zarr_root):
@@ -1310,6 +1313,48 @@ def _open(a, zarr_root):
     raise NotImplementedError()
 
 
+def _auto_adjust(datasets, kwargs):
+    """Adjust the datasets for concatenation or joining based
+    on parameters set to 'matching'"""
+
+    if kwargs.get("ajust") == "matching":
+        kwargs.pop("ajust")
+        for p in ("select", "frequency", "start", "end"):
+            kwargs[p] = "matching"
+
+    adjust = {}
+
+    if kwargs.get("select") == "matching":
+        kwargs.pop("select")
+        variables = None
+        for d in datasets:
+            if variables is None:
+                variables = set(d.variables)
+            else:
+                variables &= set(d.variables)
+        if len(variables) == 0:
+            raise ValueError("No common variables")
+
+        adjust["select"] = variables
+
+    if kwargs.get("frequency") == "matching":
+        kwargs.pop("frequency")
+        adjust["frequency"] = max(d.frequency for d in datasets)
+
+    if kwargs.get("start") == "matching":
+        kwargs.pop("start")
+        adjust["start"] = max(d.dates[0] for d in datasets).astype(object)
+
+    if kwargs.get("end") == "matching":
+        kwargs.pop("end")
+        adjust["end"] = max(d.dates[-1] for d in datasets).astype(object)
+
+    if adjust:
+        datasets = [d._subset(**adjust) for d in datasets]
+
+    return datasets, kwargs
+
+
 def _open_dataset(*args, zarr_root, **kwargs):
     sets = []
     for a in args:
@@ -1318,17 +1363,21 @@ def _open_dataset(*args, zarr_root, **kwargs):
     if "ensemble" in kwargs:
         if "grids" in kwargs:
             raise NotImplementedError("Cannot use both 'ensemble' and 'grids'")
+
         ensemble = kwargs.pop("ensemble")
         axis = kwargs.pop("axis", 2)
         assert len(args) == 0
         assert isinstance(ensemble, (list, tuple))
-        return Ensemble([_open(e, zarr_root) for e in ensemble], axis=axis)._subset(
-            **kwargs
-        )
+
+        datasets = [_open(e, zarr_root) for e in ensemble]
+        datasets, kwargs = _auto_adjust(datasets, kwargs)
+
+        return Ensemble(datasets, axis=axis)._subset(**kwargs)
 
     if "grids" in kwargs:
         if "ensemble" in kwargs:
             raise NotImplementedError("Cannot use both 'ensemble' and 'grids'")
+
         grids = kwargs.pop("grids")
         mode = kwargs.pop("mode", "concatenate")
         axis = kwargs.pop("axis", 3)
@@ -1344,9 +1393,10 @@ def _open_dataset(*args, zarr_root, **kwargs):
                 f"Unknown grids mode: {mode}, values are {list(KLASSES.keys())}"
             )
 
-        return KLASSES[mode]([_open(e, zarr_root) for e in grids], axis=axis)._subset(
-            **kwargs
-        )
+        datasets = [_open(e, zarr_root) for e in grids]
+        datasets, kwargs = _auto_adjust(datasets, kwargs)
+
+        return KLASSES[mode](datasets, axis=axis)._subset(**kwargs)
 
     for name in ("datasets", "dataset"):
         if name in kwargs:
@@ -1359,7 +1409,8 @@ def _open_dataset(*args, zarr_root, **kwargs):
     assert len(sets) > 0, (args, kwargs)
 
     if len(sets) > 1:
-        return _concat_or_join(sets)._subset(**kwargs)
+        dataset, kwargs = _concat_or_join(sets, kwargs)
+        return dataset._subset(**kwargs)
 
     return sets[0]._subset(**kwargs)
 

diff --git a/ecml_tools/grids.py b/ecml_tools/grids.py
@@ -155,7 +155,7 @@ def cutout_mask(
 
     zero = np.array([0.0, 0.0, 0.0])
     ok = []
-    for i, (era_point, distance, index) in enumerate(
+    for i, (global_point, distance, index) in enumerate(
         zip(global_points, distances, indices)
     ):
         t = Triangle3D(points[index[0]], points[index[1]], points[index[2]])
@@ -164,7 +164,7 @@ def cutout_mask(
         # from the point to the center of the Earth is not None
         # (the direction of the ray is not important)
         ok.append(
-            (t.intersect(zero, era_point) or t.intersect(era_point, zero))
+            (t.intersect(zero, global_point) or t.intersect(global_point, zero))
             # and (distance >= min_distance)
         )
 
@@ -188,6 +188,58 @@ def cutout_mask(
     return mask
 
 
+def thinning_mask(
+    lats,
+    lons,
+    global_lats,
+    global_lons,
+    cropping_distance=2.0,
+):
+    """
+    Return the list of points in [lats, lons] closest to [global_lats, global_lons]
+    """
+
+    assert global_lats.ndim == 1
+    assert global_lons.ndim == 1
+    assert lats.ndim == 1
+    assert lons.ndim == 1
+
+    assert global_lats.shape == global_lons.shape
+    assert lats.shape == lons.shape
+
+    north = np.amax(lats)
+    south = np.amin(lats)
+    east = np.amax(lons)
+    west = np.amin(lons)
+
+    # Reduce the global grid to the area of interest
+
+    mask = cropping_mask(
+        global_lats,
+        global_lons,
+        np.min([90.0, north + cropping_distance]),
+        west - cropping_distance,
+        np.max([-90.0, south - cropping_distance]),
+        east + cropping_distance,
+    )
+
+    # return mask
+    global_lats_masked = global_lats[mask]
+    global_lons_masked = global_lons[mask]
+
+    global_xyx = latlon_to_xyz(global_lats_masked, global_lons_masked)
+    global_points = np.array(global_xyx).transpose()
+
+    xyx = latlon_to_xyz(lats, lons)
+    points = np.array(xyx).transpose()
+
+    # Use a KDTree to find the nearest points
+    kdtree = KDTree(points)
+    _, indices = kdtree.query(global_points, k=1)
+
+    return np.array([i for i in indices])
+
+
 if __name__ == "__main__":
     global_lats, global_lons = np.meshgrid(
         np.linspace(90, -90, 90),