-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
127 lines (112 loc) · 4.19 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from agent.agent import Agent
from functions import *
import sys
# if len(sys.argv) != 4:
# print("Usage: python train.py [stock] [window] [episodes]")
# exit()
stock_name, window_size, episode_count = "XAUUSD15", 10, 10000
state_size = window_size*5 + 2
agent = Agent(state_size)
data = getStockDataVec(stock_name)
total_sample = len(data) - 1
batch_size = 128
buy_amount = 1
for e in range(episode_count + 1):
order = {
'price': 0,
'action': 0,
'state': None,
'next_state': None,
'trading': False
}
state = getState(data, 0, window_size + 1, order)
total_profit = 0
budget = 1000
equity = 1000
margin = 0
for t in range(total_sample):
action = agent.act(state)
next_state = getState(data, t + 1, window_size + 1, order)
reward = 0
done = False
current_stock_price = data[t][3]
if action == 0:
if order['trading']:
_reversed = 1
if order['action'] == 2: # sell order
_reversed = -1
profit = (current_stock_price - order['price']) * buy_amount * _reversed
reward = 1 if profit > 0 else -1
print("Hold order: " + formatPrice(order['price']) + " => " + formatPrice(current_stock_price) + " | Profit: " + formatPrice(profit))
else:
reward = -1
elif action == 1: # place order buy
if order['trading']:
reward = -5
else:
margin += agent.calculate_margin(current_stock_price, buy_amount)
order = {
'price': current_stock_price,
'action': action,
'state': state,
'next_state': next_state,
'trading': True
}
reward = 1
print("Buy: " + formatPrice(current_stock_price))
elif action == 2: # place order sell
if order['trading']:
reward = -5
else:
margin += agent.calculate_margin(current_stock_price, buy_amount)
order = {
'price': current_stock_price,
'action': action,
'state': state,
'next_state': next_state,
'trading': True
}
reward = 1
print("Sell: " + formatPrice(current_stock_price))
elif action == 3: # close order
if not order['trading']:
reward = -5
else:
_reversed = 1
if order['action'] == 2: # sell order
_reversed = -1
profit = (current_stock_price - order['price']) * buy_amount * _reversed
reward = 1 if profit > 0 else -1
#if reward < 0:
# agent.memory.append((order['state'], order['action'], reward, order['next_state'], True))
budget += profit
order = {
'price': 0,
'action': 0,
'state': None,
'next_state': None,
'trading': False
}
print("Close order: " + formatPrice(current_stock_price) + " | Profit: " + formatPrice(profit))
done = True if (profit < 0) else False
agent.memory.append((state, action, reward, next_state, done))
state = next_state
if done:
print("--------------------------------")
print("Episode " + str(e) + "/" + str(episode_count) + " Timestep: " + str(t))
print("Budget: " + formatPrice(budget))
print("--------------------------------")
order = {
'price': 0,
'action': 0,
'state': None,
'next_state': None,
'trading': False
}
#budget = 1000
#equity = 1000
#margin = 0
if len(agent.memory) > batch_size and t % 10 == 0:
agent.expReplay(batch_size)
if e % 10 == 0:
agent.actor.save("models/model_ep" + str(e))