-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCO2_data_modelisation.py
128 lines (100 loc) · 3.62 KB
/
CO2_data_modelisation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from sklearn.datasets import fetch_openml
from smt.kernels import Product
from scipy.io import loadmat
from smt.kernels import SquarSinExp
from smt.kernels import Kernel
from smt.kernels import Matern32
from smt.kernels import Matern52
from smt.kernels import PowExp
import numpy as np
from smt.surrogate_models import KRG
import pickle
import os
import matplotlib.pyplot as plt
co2 = fetch_openml(data_id=41187, as_frame=True)
import polars as pl
co2_data = pl.DataFrame(co2.frame[["year", "month", "day", "co2"]]).select(
pl.date("year", "month", "day"), "co2"
)
co2_data = (
co2_data.sort(by="date")
.group_by_dynamic("date", every="1mo")
.agg(pl.col("co2").mean())
.drop_nulls()
)
X = co2_data.select(
pl.col("date").dt.year() + pl.col("date").dt.month() / 12
).to_numpy()[:100]
y = co2_data["co2"].to_numpy()[:100]
X_smt=X.reshape(-1)[:100]
class Period(Kernel):
def __call__(self,d,grad_ind=None,hess_ind=None,derivative_params=None):
n=self.theta.shape[0]
theta2=self.theta[:n//2]
theta3=self.theta[n//2:]
return np.atleast_2d(np.exp(-np.sum(theta2*np.sin(theta3*d)**2,axis=1))).T
class RBF(Kernel):
def __call__(self, d, grad_ind=None, hess_ind=None, derivative_params=None):
theta=self.theta.reshape(1, d.shape[1])
#r=np.zeros((d.shape[0],1))
r=np.exp(-np.sum(theta*d**2,axis=1))
return np.atleast_2d(r).T
class Rat_quad(Kernel):
def __call__(self, d, grad_ind=None, hess_ind=None, derivative_params=None):
n=self.theta.shape[0]
theta4=self.theta[:n//2]
theta5=self.theta[n//2:]
r3=(1+d**2/(2*theta4*theta5))**(-theta4)
return r3
class LocalPeriod(Kernel):
def __call__(self, d, grad_ind=None, hess_ind=None, derivative_params=None):
n=self.theta.shape[0]
theta1=self.theta[:n//6]
theta2=self.theta[n//6:2*n//6]
theta3=self.theta[2*n//6:3*n//6]
theta4=self.theta[3*n//6:4*n//6]
theta5=self.theta[4*n//6:5*n//6]
theta6=self.theta[5*n//6:]
r1=np.exp(-np.sum(theta1*d**2,axis=1))
r2=np.exp(-np.sum(theta2*np.sin(theta3*d)**2,axis=1))
r3=(1+d**2/(2*theta4*theta5))**(-theta4)
r4=np.exp(-np.sum(theta6*d**2,axis=1))
r=(np.atleast_2d(r4).T+r3+np.atleast_2d(r2).T*np.atleast_2d(r1).T)/3
return np.atleast_2d(r).T
k_test=LocalPeriod([0.01,0.01,0.01,0.01,0.01,0.01])
k=RBF([0.01])+Period([0.01,0.01])*RBF([0.01])+Rat_quad([0.01,0.01])
from smt.surrogate_models import KRG
import time
sm=KRG(corr=k, hyper_opt="Cobyla",n_start=50)
sm.set_training_values(X, y)
sm.train()
print(sm.corr)
print(sm.corr)
import datetime
import numpy as np
import matplotlib.pyplot as plt
today = datetime.datetime.now()
current_month = today.year + today.month / 12
X_test = np.linspace(start=1958, stop=current_month, num=1_000).reshape(-1, 1)
mean_y_pred=sm.predict_values(X_test)
s2 = sm.predict_variances(X_test)
plt.plot(X, y, color="black", linestyle="dashed", label="Measurements")
plt.plot(X_test, mean_y_pred, color="tab:blue", alpha=0.4, label="Gaussian process")
plt.legend()
plt.xlabel("Year")
plt.ylabel("Monthly average of CO$_2$ concentration (ppm)")
_ = plt.title(
"Monthly average of air samples measurements\nfrom the Mauna Loa Observatory"
)
_, axs = plt.subplots(1)
# add a plot with variance
axs.plot(X, y,color="black", linestyle="dashed", label="Measurements")
axs.plot(X_test, mean_y_pred, color="tab:blue", alpha=0.4, label="Gaussian process")
axs.fill_between(
np.ravel(X_test),
np.ravel(mean_y_pred - 3 * np.sqrt(s2)),
np.ravel(mean_y_pred + 3 * np.sqrt(s2)),
color="lightgrey",
)
axs.legend()
plt.show()