Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New variables #199

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
92baf32
added two new variables: sst and siconc
Oct 7, 2024
d7c1b47
updated siconc units
Oct 8, 2024
fcffcca
Merge branch 'main' into new-variables
aslibese Oct 8, 2024
3f3e778
Update AUTHORS.rst
aslibese Oct 8, 2024
f58d9da
Update AUTHORS.rst
aslibese Oct 8, 2024
556cbc6
add standard_name to raw to allow units conversion
tlogan2000 Oct 8, 2024
c731f10
Merge branch 'new-variables' of github.com:Ouranosinc/miranda into ne…
tlogan2000 Oct 8, 2024
882b469
updated the units of tos and siconc to cmip6 standards
Oct 8, 2024
70522f5
Merge branch 'main' into new-variables
tlogan2000 Oct 8, 2024
3239cc8
use pr standard_name for now
tlogan2000 Oct 9, 2024
70d1e9a
rollback
tlogan2000 Oct 9, 2024
1058d6c
comment out _offset_time (not doing anything ... potentially could re…
tlogan2000 Oct 10, 2024
e4857da
remove _offset_time from ecmwf config; add "era5-single-levels-month…
tlogan2000 Oct 10, 2024
eb12f5c
added the new variable 'land-sea mask (lsm)'
Oct 22, 2024
24c61ea
Merge branch 'new-variables' of https://github.com/Ouranosinc/miranda…
Oct 22, 2024
edadd07
add units correction for raw ERA5 tp data
tlogan2000 Nov 27, 2024
6b1b7a3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 27, 2024
3d857ca
remove transformation unit correction is a rate for monthly
tlogan2000 Nov 27, 2024
762bcf5
added four new variables cp, cape, u, v. removed stlf
Dec 19, 2024
02807b8
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 19, 2024
9b966e7
Merge branch 'main' into new-variables
Zeitsperre Jan 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ Contributors
* Sébastien Biner <biner.sebastien@hydroquebec.com> `@sbiner <https://github.com/sbiner>`_
* David Huard <huard.david@ouranos.ca> `@huard <https://github.com/huard>`_
* Gabriel Rondeau-Genesse <rondeau-genesse.gabriel@ouranos.ca> `@RondeauG <https://github.com/RondeauG>`_
* Aslı Beşe <bese.asli@ouranos.ca> `@aslibese <https://github.com/aslibese>`_
112 changes: 61 additions & 51 deletions src/miranda/convert/_data_corrections.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,14 @@ def _preprocess_correct(d: xr.Dataset, *, ops: list[partial]) -> xr.Dataset:
return ds


def _correct_standard_names(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
key = "_corrected_standard_name"
for var, val in _iter_entry_key(d, m, "variables", key, p):
if val:
d[var].attrs["standard_name"] = val
return d


def _correct_units_names(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
key = "_corrected_units"
for var, val in _iter_entry_key(d, m, "variables", key, p):
Expand Down Expand Up @@ -450,53 +458,55 @@ def _transform(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
return d_out


def _offset_time(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
key = "_offset_time"
d_out = xr.Dataset(coords=d.coords, attrs=d.attrs)
converted = []
offset, offset_meaning = None, None

time_freq = dict()
expected_period = _get_section_entry_key(
m, "dimensions", "time", "_ensure_correct_time", p
)
if isinstance(expected_period, str):
time_freq["expected_period"] = expected_period

for vv, offs in _iter_entry_key(d, m, "dimensions", key, p):
if offs:
# Offset time by value of one time-step
if offset is None and offset_meaning is None:
try:
offset, offset_meaning = get_time_frequency(d, **time_freq)
except TypeError:
logging.error(
"Unable to parse the time frequency. Verify data integrity before retrying."
)
raise

msg = f"Offsetting data for `{vv}` by `{offset[0]} {offset_meaning}(s)`."

logging.info(msg)
with xr.set_options(keep_attrs=True):
out = d[vv]
out["time"] = out.time - np.timedelta64(offset[0], offset[1])
d_out[vv] = out
converted.append(vv)
prev_history = d.attrs.get("history", "")
history = f"Offset variable `{vv}` values by `{offset[0]} {offset_meaning}(s). {prev_history}"
d_out.attrs.update(dict(history=history))
elif offs is False:
msg = f"No time offsetting needed for `{vv}` in `{p}` (Explicitly set to False)."

logging.info(msg)
continue

# Copy unconverted variables
for vv in d.data_vars:
if vv not in converted:
d_out[vv] = d[vv]
return d_out
# TODO: Determine if this function is still needed

# def _offset_time(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
# key = "_offset_time"
# d_out = xr.Dataset(coords=d.coords, attrs=d.attrs)
# converted = []
# offset, offset_meaning = None, None
#
# time_freq = dict()
# expected_period = _get_section_entry_key(
# m, "dimensions", "time", "_ensure_correct_time", p
# )
# if isinstance(expected_period, str):
# time_freq["expected_period"] = expected_period
#
# for vv, offs in _iter_entry_key(d, m, "dimensions", key, p):
# if offs:
# # Offset time by value of one time-step
# if offset is None and offset_meaning is None:
# try:
# offset, offset_meaning = get_time_frequency(d, **time_freq)
# except TypeError:
# logging.error(
# "Unable to parse the time frequency. Verify data integrity before retrying."
# )
# raise
#
# msg = f"Offsetting data for `{vv}` by `{offset[0]} {offset_meaning}(s)`."
#
# logging.info(msg)
# with xr.set_options(keep_attrs=True):
# out = d[vv]
# out["time"] = out.time - np.timedelta64(offset[0], offset[1])
# d_out[vv] = out
# converted.append(vv)
# prev_history = d.attrs.get("history", "")
# history = f"Offset variable `{vv}` values by `{offset[0]} {offset_meaning}(s). {prev_history}"
# d_out.attrs.update(dict(history=history))
# elif offs is False:
# msg = f"No time offsetting needed for `{vv}` in `{p}` (Explicitly set to False)."
#
# logging.info(msg)
# continue
#
# # Copy unconverted variables
# for vv in d.data_vars:
# if vv not in converted:
# d_out[vv] = d[vv]
# return d_out


def _invert_sign(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
Expand Down Expand Up @@ -536,7 +546,7 @@ def _units_cf_conversion(d: xr.Dataset, m: dict) -> xr.Dataset:
for vv, unit in _iter_entry_key(d, m, "variables", "units", None):
if unit:
with xr.set_options(keep_attrs=True):
d[vv] = units.convert_units_to(d[vv], unit, context="hydro")
d[vv] = units.convert_units_to(d[vv], unit)
prev_history = d.attrs.get("history", "")
history = f"Converted variable `{vv}` to CF-compliant units (`{unit}`). {prev_history}"
d.attrs.update(dict(history=history))
Expand Down Expand Up @@ -888,17 +898,17 @@ def dataset_corrections(ds: xr.Dataset, project: str) -> xr.Dataset:
metadata_definition = load_json_data_mappings(project)

ds = _correct_units_names(ds, project, metadata_definition)
ds = _correct_standard_names(ds, project, metadata_definition)
ds = _transform(ds, project, metadata_definition)
ds = _invert_sign(ds, project, metadata_definition)
ds = _units_cf_conversion(ds, metadata_definition)
ds = _clip_values(ds, project, metadata_definition)

ds = dims_conversion(ds, project, metadata_definition)
ds = _ensure_correct_time(ds, project, metadata_definition)
ds = _offset_time(ds, project, metadata_definition)

# TODO validate this is needed
# ds = _offset_time(ds, project, metadata_definition)
ds = variable_conversion(ds, project, metadata_definition)

ds = metadata_conversion(ds, project, metadata_definition)

ds.attrs["history"] = (
Expand Down
Loading
Loading