forked from bigtreezhudi/Advantage_Actor_Critic
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
85 lines (78 loc) · 2.61 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gym
from baselines import deepq
from baselines.common.atari_wrappers_deprecated import wrap_dqn, ScaledFloatFrame
from collections import deque
import numpy as np
from Advantage_Actor_Critic import Advantage_Actor_Critic
import pickle
import time
GAMMA = 0.99
EPISODE = 1000000
TEST = 100
MAX_STEP_PER_EPISODE = 10000
def main():
env = gym.make("CartPole-v0")
agent = Advantage_Actor_Critic(env)
episodes_rewards = [0] * 100
avg_rewards = []
skip_rewards = []
step_num = 0
for episode in range(EPISODE):
goal = 0
I = 1
state = env.reset()
while True:
action = agent.select_action(state)
next_state, reward, done, _ = env.step(action)
I = GAMMA * I
env.render()
# time.sleep(1)
agent.perceive(state, action, reward, next_state, done, I, step_num)
goal += reward
step_num += 1
state = next_state
# if done:
# episodes_rewards.pop(0)
# episodes_rewards.append(goal)
# break
# # print "Current reward:", goal," Step number:", step_num
# print("Episode: ", episode, " Last 100 episode average reward: ", np.average(episodes_rewards), " Toal step number: ", step_num, " eps: ", agent.epsilon)
#
# if step_num > 2000000:
# break
#
# if episode % 50 == 0:
# skip_rewards.append(goal)
#
# if episode % 100 == 0:
# avg_rewards.append(np.average(episodes_rewards))
# out_file = open("avg_rewards.pkl",'wb')
# out_file1 = open("skip_rewards.pkl",'wb')
# pickle.dump(avg_rewards, out_file)
# pickle.dump(skip_rewards, out_file1)
# out_file.close()
# out_file1.close()
# agent.saver.save(agent.session, 'saved_networks/' + 'network' + '-dqn', global_step=episode)
env.close()
def play():
env = gym.make("BreakoutNoFrameskip-v4")
env = ScaledFloatFrame(wrap_dqn(env))
agent = DQN(env)
for episode in range(TEST):
goal = 0
step_num = 0
state = env.reset()
while True:
action = agent.action(state)
next_state, reward, done, _ = env.step(action)
step_num += 1
env.render()
# time.sleep(0.01)
goal += reward
state = next_state
if done or step_num > MAX_STEP_PER_EPISODE:
print("Episode: ", episode, " Total reward: ", goal)
break
if __name__ == '__main__':
main()
# play()