forked from ikostrikov/pytorch-a2c-ppo-acktr-gail
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvisualize.py
138 lines (105 loc) · 3.7 KB
/
visualize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# Copied from https://github.com/emansim/baselines-mansimov/blob/master/baselines/a2c/visualize_atari.py
# and https://github.com/emansim/baselines-mansimov/blob/master/baselines/a2c/load.py
# Thanks to the author and OpenAI team!
import glob
import json
import os
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import numpy as np
from scipy.signal import medfilt
matplotlib.rcParams.update({'font.size': 8})
def smooth_reward_curve(x, y):
# Halfwidth of our smoothing convolution
halfwidth = min(31, int(np.ceil(len(x) / 30)))
k = halfwidth
xsmoo = x[k:-k]
ysmoo = np.convolve(y, np.ones(2 * k + 1), mode='valid') / \
np.convolve(np.ones_like(y), np.ones(2 * k + 1), mode='valid')
downsample = max(int(np.floor(len(xsmoo) / 1e3)), 1)
return xsmoo[::downsample], ysmoo[::downsample]
def fix_point(x, y, interval):
np.insert(x, 0, 0)
np.insert(y, 0, 0)
fx, fy = [], []
pointer = 0
ninterval = int(max(x) / interval + 1)
for i in range(ninterval):
tmpx = interval * i
while pointer + 1 < len(x) and tmpx > x[pointer + 1]:
pointer += 1
if pointer + 1 < len(x):
alpha = (y[pointer + 1] - y[pointer]) / \
(x[pointer + 1] - x[pointer])
tmpy = y[pointer] + alpha * (tmpx - x[pointer])
fx.append(tmpx)
fy.append(tmpy)
return fx, fy
def load_data(indir, smooth, bin_size):
datas = []
infiles = glob.glob(os.path.join(indir, '*.monitor.csv'))
for inf in infiles:
with open(inf, 'r') as f:
f.readline()
f.readline()
for line in f:
tmp = line.split(',')
t_time = float(tmp[2])
tmp = [t_time, int(tmp[1]), float(tmp[0])]
datas.append(tmp)
datas = sorted(datas, key=lambda d_entry: d_entry[0])
result = []
timesteps = 0
for i in range(len(datas)):
result.append([timesteps, datas[i][-1]])
timesteps += datas[i][1]
if len(result) < bin_size:
return [None, None]
x, y = np.array(result)[:, 0], np.array(result)[:, 1]
if smooth == 1:
x, y = smooth_reward_curve(x, y)
if smooth == 2:
y = medfilt(y, kernel_size=9)
x, y = fix_point(x, y, bin_size)
return [x, y]
color_defaults = [
'#1f77b4', # muted blue
'#ff7f0e', # safety orange
'#2ca02c', # cooked asparagus green
'#d62728', # brick red
'#9467bd', # muted purple
'#8c564b', # chestnut brown
'#e377c2', # raspberry yogurt pink
'#7f7f7f', # middle gray
'#bcbd22', # curry yellow-green
'#17becf' # blue-teal
]
def visdom_plot(viz, win, folder, game, name, num_steps, bin_size=100, smooth=1):
tx, ty = load_data(folder, smooth, bin_size)
if tx is None or ty is None:
return win
fig = plt.figure()
plt.plot(tx, ty, label="{}".format(name))
tick_fractions = np.array([0.1, 0.2, 0.4, 0.6, 0.8, 1.0])
ticks = tick_fractions * num_steps
tick_names = ["{:.0e}".format(tick) for tick in ticks]
plt.xticks(ticks, tick_names)
plt.xlim(0, num_steps * 1.01)
plt.xlabel('Number of Timesteps')
plt.ylabel('Rewards')
plt.title(game)
plt.legend(loc=4)
plt.show()
plt.draw()
image = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
image = image.reshape(fig.canvas.get_width_height()[::-1] + (3, ))
plt.close(fig)
# Show it in visdom
image = np.transpose(image, (2, 0, 1))
return viz.image(image, win=win)
if __name__ == "__main__":
from visdom import Visdom
viz = Visdom()
visdom_plot(viz, None, '/tmp/gym/', 'BreakOut', 'a2c', bin_size=100, smooth=1)