-
Notifications
You must be signed in to change notification settings - Fork 112
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
UPD: remove large files and transform nb to py file
- Loading branch information
Thibault Cordier
committed
Nov 14, 2023
1 parent
bd013b9
commit 8052192
Showing
4 changed files
with
249 additions
and
35,263 deletions.
There are no files selected for viewing
249 changes: 249 additions & 0 deletions
249
examples/regression/3-scientific-articles/plot_zaffran2022_comparison.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,249 @@ | ||
import warnings | ||
|
||
from io import BytesIO | ||
from typing import Any, Optional, Tuple | ||
from urllib.request import urlopen | ||
import ssl | ||
from zipfile import ZipFile | ||
import pickle | ||
|
||
import datetime | ||
import numpy as np | ||
import pandas as pd | ||
from matplotlib import pylab as plt | ||
from sklearn.ensemble import RandomForestRegressor | ||
|
||
from mapie.time_series_regression import MapieTimeSeriesRegressor | ||
|
||
from mapie._typing import NDArray | ||
|
||
warnings.simplefilter("ignore") | ||
|
||
|
||
######################################################### | ||
# Global random forests parameters | ||
######################################################### | ||
|
||
def init_model(): | ||
# the number of trees in the forest | ||
n_estimators = 1000 | ||
|
||
# the minimum number of samples required to be at a leaf node | ||
# (default skgarden's parameter) | ||
min_samples_leaf = 1 | ||
|
||
# the number of features to consider when looking for the best split | ||
# (default skgarden's parameter) | ||
max_features = 6 | ||
|
||
model = RandomForestRegressor( | ||
n_estimators=n_estimators, | ||
min_samples_leaf=min_samples_leaf, | ||
max_features=max_features, | ||
random_state=1 | ||
) | ||
|
||
return model | ||
|
||
|
||
######################################################### | ||
# Get data | ||
######################################################### | ||
|
||
def get_data() -> pd.DataFrame: | ||
""" | ||
TODO | ||
Returns | ||
------- | ||
TODO | ||
""" | ||
website = "https://raw.githubusercontent.com/" | ||
page = "mzaffran/AdaptiveConformalPredictionsTimeSeries/" | ||
folder = "131656fe4c25251bad745f52db3c2d7cb1c24bbb/data_prices/" | ||
file = "Prices_2016_2019_extract.csv" | ||
url = website + page + folder + file | ||
ssl._create_default_https_context = ssl._create_unverified_context | ||
df = pd.read_csv(url) | ||
return df | ||
|
||
|
||
######################################################### | ||
# Get & Present data | ||
######################################################### | ||
|
||
data = get_data() | ||
|
||
date_data = pd.to_datetime(data.Date) | ||
|
||
plt.figure(figsize=(10, 5)) | ||
plt.plot(date_data, data.Spot, color='black', linewidth=0.6) | ||
|
||
locs, labels = plt.xticks() | ||
new_labels = ['2016', '2017', '2018', '2019',' 2020'] | ||
plt.xticks(locs[0:len(locs):2], labels=new_labels) | ||
|
||
plt.xlabel('Date') | ||
plt.ylabel('Spot price (\u20AC/MWh)') | ||
|
||
plt.show() | ||
|
||
######################################################### | ||
# Prepare data | ||
######################################################### | ||
|
||
limit = datetime.datetime(2019, 1, 1, tzinfo=datetime.timezone.utc) | ||
id_train = data.index[pd.to_datetime(data['Date'], utc=True) < limit].tolist() | ||
id_test = data.index[pd.to_datetime(data['Date'], utc=True) >= limit].tolist() | ||
|
||
data_train = data.iloc[id_train,:] | ||
data_test = data.iloc[id_test,:] | ||
|
||
features = ['hour'] + ['dow_%d'%i for i in range(7)] \ | ||
+ ['lag_24_%d'%i for i in range(24)] \ | ||
+ ['lag_168_%d'%i for i in range(24)] + ['conso'] | ||
X_train = data_train.loc[:, features] | ||
y_train = data_train.Spot | ||
|
||
X_train_0 = X_train.loc[X_train.hour == 0] | ||
y_train_0 = data_train.loc[data_train.hour == 0, 'Spot'] | ||
|
||
X_test = data_test.loc[:, features] | ||
y_test = data_test.Spot | ||
|
||
X_test_0 = X_test.loc[X_test.hour == 0] | ||
y_test_0 = data_test.loc[data_test.hour == 0, 'Spot'] | ||
|
||
|
||
######################################################### | ||
# Prepare model | ||
######################################################### | ||
|
||
alpha = 0.1 | ||
gap = 1 | ||
iteration_max = 10 | ||
gamma = 0.04 | ||
|
||
model = init_model() | ||
mapie_aci = MapieTimeSeriesRegressor( | ||
model, method="aci", agg_function="mean", n_jobs=-1 | ||
) | ||
|
||
|
||
######################################################### | ||
# Reproduce experiment and results | ||
######################################################### | ||
|
||
all_x_train = [ | ||
np.array(data_train.loc[data_train.hour == h]) for h in range(24) | ||
] | ||
|
||
train_size = all_x_train[0].shape[0] | ||
idx = np.array(range(train_size)) | ||
n_half = int(np.floor(train_size/2)) | ||
|
||
X_train_0 = X_train_0[:n_half] | ||
y_train_0 = y_train_0[:n_half] | ||
|
||
mapie_aci = mapie_aci.fit(X_train_0, y_train_0) | ||
y_pred_aci_npfit, y_pis_aci_npfit = mapie_aci.predict( | ||
X_test_0, alpha=alpha, ensemble=True, optimize_beta=False | ||
) | ||
print("MAPIE estimator fitted!") | ||
|
||
# step 0 | ||
y_pred_aci_pfit = np.zeros(y_pred_aci_npfit.shape) | ||
y_pis_aci_pfit = np.zeros(y_pis_aci_npfit.shape) | ||
y_pred_aci_pfit[:gap], y_pis_aci_pfit[:gap, :, :] = mapie_aci.predict( | ||
X_test_0.iloc[:gap, :], alpha=alpha, ensemble=True, optimize_beta=False | ||
) | ||
|
||
# step t | ||
for step in range(1, min(len(X_test_0), iteration_max+1), gap): | ||
|
||
mapie_aci.estimator_.single_estimator_.fit( | ||
X_test_0.iloc[(step - gap):step, :], | ||
y_test_0.iloc[(step - gap):step] | ||
) | ||
|
||
mapie_aci.partial_fit( | ||
X_test.iloc[(step - gap):step, :], | ||
y_test.iloc[(step - gap):step], | ||
) | ||
|
||
mapie_aci.adapt_conformal_inference( | ||
X_test_0.iloc[(step - gap):step, :], | ||
y_test_0.iloc[(step - gap):step], | ||
gamma = gamma | ||
) | ||
( | ||
y_pred_aci_pfit[step:step + gap], | ||
y_pis_aci_pfit[step:step + gap, :, :], | ||
) = mapie_aci.predict( | ||
X_test_0.iloc[step:(step + gap), :], | ||
alpha=alpha, | ||
ensemble=True, | ||
optimize_beta=True | ||
) | ||
|
||
results = y_pis_aci_pfit.copy() | ||
|
||
|
||
######################################################### | ||
# Get referenced result to reproduce | ||
######################################################### | ||
|
||
def get_pickle() -> Tuple[NDArray, NDArray]: | ||
""" | ||
TODO | ||
Returns | ||
------- | ||
TODO | ||
""" | ||
website = "https://github.com/" | ||
page = "mzaffran/AdaptiveConformalPredictionsTimeSeries/raw/" | ||
folder = "131656fe4c25251bad745f52db3c2d7cb1c24bbb/results/" | ||
folder += "Spot_France_Hour_0_train_2019-01-01/" | ||
file = "ACP_0.04_RF.pkl" | ||
url = website + page + folder + file | ||
ssl._create_default_https_context = ssl._create_unverified_context | ||
try: | ||
loaded_data = pickle.load(urlopen(url)) | ||
except FileNotFoundError: | ||
print(f"The file {file} was not found.") | ||
except Exception as e: | ||
print(f"An error occurred: {e}") | ||
return loaded_data | ||
|
||
|
||
data_ref = get_pickle() | ||
|
||
|
||
######################################################### | ||
# Compare results | ||
######################################################### | ||
|
||
# Flatten the array to shape (n, 2) | ||
results_ref = np.concatenate( | ||
[data_ref['Y_inf'], data_ref['Y_sup']], axis=-1 | ||
).reshape(-1, 2) | ||
results = np.array(results.reshape(-1, 2)) | ||
|
||
# Compare the NumPy array with the corresponding DataFrame columns | ||
comparison_result_Y_inf = np.array_equal( | ||
results[:iteration_max, 0], | ||
results_ref[:iteration_max, 0] | ||
) | ||
comparison_result_Y_sup = np.array_equal( | ||
results[:iteration_max, 1], | ||
results_ref[:iteration_max, 1] | ||
) | ||
|
||
# Print the comparison results | ||
print(f"Comparison (Y_inf): {results[:iteration_max, 0]}") | ||
print(f"Comparison (Y_inf ref): {results_ref[:iteration_max, 0]}") | ||
print(f"Comparison (Y_sup): {results[:iteration_max, 1]}") | ||
print(f"Comparison (Y_sup ref): {results_ref[:iteration_max, 1]}") | ||
print(f"Comparison for ACP_0.04 (Y_inf): {comparison_result_Y_inf}") | ||
print(f"Comparison for ACP_0.04 (Y_sup): {comparison_result_Y_sup}") |
Oops, something went wrong.