-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsetup_models.py
68 lines (58 loc) · 1.8 KB
/
setup_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import matplotlib as mpl
import yaml
from intake_esgf import ESGFCatalog
cat = ESGFCatalog().search(
experiment_id="historical",
source_id=[
"BCC-CSM2-MR",
"CanESM5",
"CESM2",
"GFDL-ESM4",
"IPSL-CM6A-LR",
"MIROC-ESM2L",
"MPI-ESM1.2-HR",
"NorESM2-LM",
"UKESM1-0-LL",
],
variable_id=["areacella", "sftlf"],
frequency=["fx"],
)
# How many of our variables does each model group (unique combination of
# ('source_id','member_id','grid_label')) have?
counts = cat.model_groups()
# We use this function to remove model groups that have less than the max
def has_max_counts(df) -> bool:
model = df.iloc[0]["source_id"]
if len(df) == counts[counts.index.get_level_values(0) == model].max():
return True
return False
# Then we also select just the 'smallest' member_id that has all of our variables.
cat.remove_incomplete(has_max_counts).remove_ensembles()
paths = cat.to_path_dict(ignore_facets=["institution_id"])
paths = {
model: {
key: list(set([str(p.parent) for p in data]))
for key, data in paths.items()
if key.startswith(model)
}
for model in cat.df.source_id.unique()
}
# Define some model colors using a matplotlib color sequence
clrs = mpl.color_sequences.get("tab10")
colormap = {
model: "#%02x%02x%02x"
% (int(255 * clrs[i][0]), int(255 * clrs[i][1]), int(255 * clrs[i][2]))
for i, model in enumerate(cat.df.source_id.unique())
}
# Output the ILAMB model setup
paths = {
model: {
"modelname": model,
"color": colormap[model],
"path": None,
"paths": sorted([k for _, key in keys.items() for k in key]),
}
for model, keys in paths.items()
}
with open("models_cmip6.yaml", mode="w") as out:
out.write(yaml.dump(paths))