-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredictions.py
96 lines (75 loc) · 3.22 KB
/
predictions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import random
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
from datetime import datetime
# Read the data from the CSV file
df = pd.read_csv('btc_bars (next_close_with_scalling).csv')
df_without_scaling = pd.read_csv('btc_bars.csv')
df_without_scaling = df_without_scaling.iloc[:-1]
'''
# Choose highest and lowest values from df
highest_df = df['Next_Close'].max()
lowest_df = df['Next_Close'].min()
'''
# Choose highest and lowest values from df_scaling
highest_without_scaling = df_without_scaling['Next_Close'].max()
lowest_without_scaling = df_without_scaling['Next_Close'].min()
'''
# Print the results
print("Highest Next_Close in df:", highest_df)
print("Lowest Next_Close in df:", lowest_df)
print("Highest Next_Close in df_without_scaling:", highest_without_scaling)
print("Lowest Next_Close in df_without_scaling:", lowest_without_scaling)
'''
# Convert timestamp column to pandas datetime type
df['timestamp'] = pd.to_datetime(df['timestamp'])
# Extract the numerical columns
numerical_columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'trade_count', 'vwap', 'SMA_20',
'EMA_50', 'RSI', '%K', 'BB_upper', 'BB_lower', 'ATR', 'OBV', 'Daily_Return', 'Cumulative_Return', 'Next_Close']
# Split the data into features (X) and target variable (y)
X = df[numerical_columns[1:]] # Exclude the timestamp column
y = df['Next_Close']
# Reshape the data into LSTM input shape
timesteps = 6 # Number of past time steps to consider
num_features = X.shape[1]
num_samples = len(X) - timesteps + 1
X_reshaped = np.zeros((num_samples, timesteps, num_features))
for i in range(num_samples):
X_reshaped[i] = X.values[i:i+timesteps]
# Load the best model
best_model_path = 'best_model.h5'
best_model = keras.models.load_model(best_model_path)
# Generate random indices for data selection
num_samples = X_reshaped.shape[0]
num_predictions = 10 # Number of random predictions to generate
random_indices = random.sample(range(num_samples), num_predictions)
# Select random data for prediction
X_random = X_reshaped[random_indices]
# Make predictions
predictions = best_model.predict(X_random)
# Load the scaler used for scaling the target variable
scaler = MinMaxScaler() # or StandardScaler() if that was used for scaling
# Reshape y to a 2D array before fitting the scaler
scaler.fit(y.values.reshape(-1, 1))
# Inverse scaling of predictions
predictions_unscaled = scaler.inverse_transform(predictions)
# Print the results
for i in range(num_predictions):
actual = y.iloc[random_indices[i]] * \
(highest_without_scaling - lowest_without_scaling) + lowest_without_scaling
# Extract the single prediction value
predicted = predictions_unscaled[i][0] * \
(highest_without_scaling - lowest_without_scaling) + lowest_without_scaling
actual = round(actual, 2)
predicted = round(predicted, 2)
# Get the corresponding timestamp
timestamp = df['timestamp'].iloc[random_indices[i]]
formatted_timestamp = timestamp.strftime("%Y-%m-%d %H:%M")
# Print the results
print("Sample", i+1)
print("Timestamp:", formatted_timestamp)
print("Actual Next_Close:", actual)
print("Predicted Next_Close:", predicted)
print()