-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
224 lines (187 loc) · 7.44 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import iris.coord_categorisation as cat
import iris
import numpy as np
from scipy import stats
from netCDF4 import Dataset
def savitzky_golay(y, window_size, order, deriv=0, rate=1):
r"""Smooth (and optionally differentiate) data with a Savitzky-Golay filter.
The Savitzky-Golay filter removes high frequency noise from data.
It has the advantage of preserving the original shape and
features of the signal better than other types of filtering
approaches, such as moving averages techniques.
Parameters
----------
y : array_like, shape (N,)
the values of the time history of the signal.
window_size : int
the length of the window. Must be an odd integer number.
order : int
the order of the polynomial used in the filtering.
Must be less then `window_size` - 1.
deriv: int
the order of the derivative to compute (default = 0 means only smoothing)
Returns
-------
ys : ndarray, shape (N)
the smoothed signal (or it's n-th derivative).
Notes
-----
The Savitzky-Golay is a type of low-pass filter, particularly
suited for smoothing noisy data. The main idea behind this
approach is to make for each point a least-square fit with a
polynomial of high order over a odd-sized window centered at
the point.
Examples
--------
t = np.linspace(-4, 4, 500)
y = np.exp( -t**2 ) + np.random.normal(0, 0.05, t.shape)
ysg = savitzky_golay(y, window_size=31, order=4)
import matplotlib.pyplot as plt
plt.plot(t, y, label='Noisy signal')
plt.plot(t, np.exp(-t**2), 'k', lw=1.5, label='Original signal')
plt.plot(t, ysg, 'r', label='Filtered signal')
plt.legend()
plt.show()
References
----------
.. [1] A. Savitzky, M. J. E. Golay, Smoothing and Differentiation of
Data by Simplified Least Squares Procedures. Analytical
Chemistry, 1964, 36 (8), pp 1627-1639.
.. [2] Numerical Recipes 3rd Edition: The Art of Scientific Computing
W.H. Press, S.A. Teukolsky, W.T. Vetterling, B.P. Flannery
Cambridge University Press ISBN-13: 9780521880688
"""
import numpy as np
from math import factorial
try:
window_size = np.abs(np.int(window_size))
order = np.abs(np.int(order))
except (ValueError, msg):
raise ValueError("window_size and order have to be of type int")
if window_size % 2 != 1 or window_size < 1:
raise TypeError("window_size size must be a positive odd number")
if window_size < order + 2:
raise TypeError("window_size is too small for the polynomials order")
order_range = range(order+1)
half_window = (window_size -1) // 2
# precompute coefficients
b = np.mat([[k**i for i in order_range] for k in range(-half_window, half_window+1)])
m = np.linalg.pinv(b).A[deriv] * rate**deriv * factorial(deriv)
# pad the signal at the extremes with
# values taken from the signal itself
firstvals = y[0] - np.abs( y[1:half_window+1][::-1] - y[0] )
lastvals = y[-1] + np.abs(y[-half_window-1:-1][::-1] - y[-1])
y = np.concatenate((firstvals, y, lastvals))
return np.convolve( m[::-1], y, mode='valid')
def aggregate_by_year(cube):
'''
Parameters
----------
cube : TYPE
DESCRIPTION.
Returns
-------
seas_cube : TYPE
DESCRIPTION.
'''
list_time_coordnames=[]
time_unit_str=str(cube.coords('time')[0].units)
print(time_unit_str)
for i in range(len(cube.coords())):
string_coordtime=str(cube.coords()[i].long_name)
list_time_coordnames.append(string_coordtime)
print('cubes coordinates:',list_time_coordnames )
if 'month' not in list_time_coordnames:
#print('month not in list coords')
#cat.add_month(cube, 'time')
newtimestr= time_unit_str.replace('months', 'days')
cube.coord('time').convert_units(newtimestr)
cat.add_month(cube, 'time')
else:
newtimestr= time_unit_str.replace('months', 'days')
cube.coord('time').convert_units(newtimestr)
if 'year' in list_time_coordnames:
print('year in coords list')
else:
cat.add_year(cube, 'time')
print('year added to coords')
yr_cube=cube.aggregated_by(['year'], iris.analysis.MEAN)
return yr_cube
def low_pass_weights(window, cutoff):
"""Calculate weights for a low pass Lanczos filter.
Args:
window: int
The length of the filter window.
cutoff: float
The cutoff frequency in inverse time steps.
"""
order = ((window - 1) // 2 ) + 1
nwts = 2 * order + 1
w = np.zeros([nwts])
n = nwts // 2
w[n] = 2 * cutoff
k = np.arange(1., n)
sigma = np.sin(np.pi * k / n) * n / (np.pi * k)
firstfactor = np.sin(2. * np.pi * cutoff * k) / (np.pi * k)
w[n-1:0:-1] = firstfactor * sigma
w[n+1:-1] = firstfactor * sigma
return w[1:-1]
#Function to compute F1-score from p_matrices and val_matrices
def get_metric_f1(ref_p_matrix, p_matrix, ref_val_matrix, val_matrix, alpha,
tau_min=0, tau_diff=1, same_sign=True):
'''
F1-score function from Debeire.K following methods from Nowack.P
'''
N, N, taumaxp1 = val_matrix.shape
TP = 0
FP = 0
FN = 0
auto = 0
count = 0
for i in range(N):
for j in range(N):
if i != j:
for tau in range(tau_min, taumaxp1):
# print(np.sum(ref_p_matrix[i,j,tau] < alpha),np.sum(p_matrix[i,j,tau] < alpha))
if ref_p_matrix[i,j,tau] > alpha and p_matrix[i,j,tau] < alpha:
FP += 1
elif ref_p_matrix[i,j,tau] < alpha and np.any(p_matrix[i,j,max(0,tau-tau_diff):tau+tau_diff+1] < alpha):
count +=1
if same_sign==True and np.sign(ref_val_matrix[i,j,tau]) == np.sign(val_matrix[i,j,tau]):
TP += 1
elif same_sign==True and np.sign(ref_val_matrix[i,j,tau]) != np.sign(val_matrix[i,j,tau]):
FN += 1
elif same_sign==False:
TP += 1
elif ref_p_matrix[i,j,tau] < alpha and not(np.any(p_matrix[i,j,max(0,tau-tau_diff):tau+tau_diff+1] < alpha)):
FN += 1
else:
auto +=1
precision = float(TP+1e-10) / float(TP + FP +1e-10)
recall = float(TP+1e-10) / float(TP + FN +1e-10)
f1 = 2.0*precision*recall/float(precision + recall)
return precision, recall, TP, FP, FN, f1, auto, count
def detrend_kw(data):
'''
'''
reg = stats.linregress(range(0,len(data)), data)
detr_data = data - (reg[1] + (reg[0] * range(0,len(data))))
return detr_data
def load_cesm2_ms(pathnc, cont):
'''
Function to load CESM2 data and handling the multi-D coordinates. Provided by Schlund.M
'''
with Dataset(pathnc, mode='a') as ds:
for (var_name, var) in ds.variables.items():
# Ignore variables that do not have the coordinates attribute
if 'coordinates' not in var.ncattrs():
continue
coords = var.coordinates.split()
dims = [d.name for d in var.get_dims()]
# Ignore variables that depend on the 2D lat/lon coords
if 'i' in dims or 'i2' in dims:
continue
# Set coordinates to correct values
var.coordinates = ' '.join(dims)
CUBES = iris.load(pathnc, cont)[0]
return CUBES