forked from NathanKlineInstitute/SMARTAgent
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimulatePong.py
151 lines (145 loc) · 5.93 KB
/
simulatePong.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import numpy as np
from conf import dconf
import random
class simulatePong:
def __init__ (self):
self.court_top = 34
self.court_bottom = 194
self.ball_width = 2
self.ball_height = 4
self.racket_width = 4
self.racket_height = 16
# start ball from the middle
self.randomizeYpos = dconf['simulatedEnvParams']['random']
self.ypos_ball = dconf['simulatedEnvParams']['yball'] # this corresponds to 0 index
self.xpos_ball = 20 # this corresponds to 1 index
self.xpos_racket = 140 # this is fixed
self.ypos_racket = dconf['simulatedEnvParams']['yracket'] # this can change
self.xpos_modelracket = 16 # this is fixed
self.ypos_modelracket = 80
# create background
self.obs = np.zeros(shape=(210,160,3))
self.obs[self.court_top:self.court_bottom,:,0]=144
self.obs[self.court_top:self.court_bottom,:,1]=72
self.obs[self.court_top:self.court_bottom,:,2]=17
self.mr1x = self.xpos_modelracket
self.mr2x = self.xpos_modelracket+self.racket_width
self.mr1y = self.court_top+self.ypos_modelracket
self.mr2y = self.court_top+self.ypos_modelracket+self.racket_height
self.obs[self.mr1y:self.mr2y,self.mr1x:self.mr2x,0]= 213
self.obs[self.mr1y:self.mr2y,self.mr1x:self.mr2x,1]= 130
self.obs[self.mr1y:self.mr2y,self.mr1x:self.mr2x,2]= 74
# create ball
self.b1x = self.xpos_ball
self.b2x = self.xpos_ball+self.ball_width
self.b1y = self.court_top+self.ypos_ball
self.b2y = self.court_top+self.ypos_ball+self.ball_height
self.obs[self.b1y:self.b2y,self.b1x:self.b2x,0]=236
self.obs[self.b1y:self.b2y,self.b1x:self.b2x,1]=236
self.obs[self.b1y:self.b2y,self.b1x:self.b2x,2]=236
# create racket
self.r1x = self.xpos_racket
self.r2x = self.xpos_racket+self.racket_width
self.r1y = self.court_top+self.ypos_racket
self.r2y = self.court_top+self.ypos_racket+self.racket_height
self.obs[self.r1y:self.r2y,self.r1x:self.r2x,0]= 92
self.obs[self.r1y:self.r2y,self.r1x:self.r2x,1]= 186
self.obs[self.r1y:self.r2y,self.r1x:self.r2x,2]= 92
# by default no reward
self.reward =0
self.done = 0
def createnewframe(self):
self.obs = np.zeros(shape=(210,160,3))
self.obs[self.court_top:self.court_bottom,:,0]=144
self.obs[self.court_top:self.court_bottom,:,1]=72
self.obs[self.court_top:self.court_bottom,:,2]=17
#self.obs[self.mr1y:self.mr2y,self.mr1x:self.mr2x,0]= 213
#self.obs[self.mr1y:self.mr2y,self.mr1x:self.mr2x,1]= 130
#self.obs[self.mr1y:self.mr2y,self.mr1x:self.mr2x,2]= 74
def moveball(self,xshift_ball,yshift_ball):
self.b1x = self.b1x+xshift_ball
self.b2x = self.b2x+xshift_ball
self.b1y = self.b1y+yshift_ball
self.b2y = self.b2y+yshift_ball
self.obs[self.b1y:self.b2y,self.b1x:self.b2x,0]=236
self.obs[self.b1y:self.b2y,self.b1x:self.b2x,1]=236
self.obs[self.b1y:self.b2y,self.b1x:self.b2x,2]=236
def moveracket(self,yshift_racket):
self.r1y = self.r1y+yshift_racket
self.r2y = self.r2y+yshift_racket
if self.r1y>self.court_bottom-8:
self.r1y = self.r1y-yshift_racket
self.r2y = self.r2y-yshift_racket
if self.r2y<self.court_top+8:
self.r1y = self.r1y-yshift_racket
self.r2y = self.r2y-yshift_racket
self.obs[self.r1y:self.r2y,self.r1x:self.r2x,0]= 92
self.obs[self.r1y:self.r2y,self.r1x:self.r2x,1]= 186
self.obs[self.r1y:self.r2y,self.r1x:self.r2x,2]= 92
def movemodelracket(self,yshift_racket2):
self.mr1y = self.mr1y+yshift_racket2
self.mr2y = self.mr2y+yshift_racket2
if self.mr1y>self.court_bottom-8:
self.mr1y = self.mr1y-yshift_racket2
self.mr2y = self.mr2y-yshift_racket2
if self.mr2y<self.court_top+8:
self.mr1y = self.mr1y-yshift_racket2
self.mr2y = self.mr2y-yshift_racket2
self.obs[self.mr1y:self.mr2y,self.mr1x:self.mr2x,0]= 213
self.obs[self.mr1y:self.mr2y,self.mr1x:self.mr2x,1]= 130
self.obs[self.mr1y:self.mr2y,self.mr1x:self.mr2x,2]= 74
def step(self,action):
if action==3:
yshift_racket=10
elif action==4:
yshift_racket=-10
else:
yshift_racket=0
self.createnewframe()
randaction = random.randint(3,4)
if randaction==3: rand_yshift = 10
else: rand_yshift = -10
self.movemodelracket(rand_yshift)
self.moveracket(yshift_racket)
self.moveball(xshift_ball=3, yshift_ball=0)
if self.b2x>=self.r1x:
if ((self.b1y>self.r1y) and (self.b1y<self.r2y)) or ((self.b2y>self.r1y) and (self.b2y<self.r2y)): # if upper or lower edge of the ball is within the range of the racket
if self.done==0:
self.reward = 1
self.b1x = self.xpos_ball
self.b2x = self.xpos_ball+self.ball_width
if self.randomizeYpos:
shiftdir = random.choice([1,3,4])
if shiftdir==3:
self.ypos_ball = dconf['simulatedEnvParams']['yball'] + 40
elif shiftdir==4:
self.ypos_ball = dconf['simulatedEnvParams']['yball'] - 40
else:
self.ypos_ball = dconf['simulatedEnvParams']['yball']
self.b1y = self.court_top+self.ypos_ball
self.b2y = self.court_top+self.ypos_ball+self.ball_height
self.done = 1
else:
if self.done==0:
self.reward = -1
self.done = 1
else:
self.reward = 0
else:
self.reward = 0
if self.b2x>self.r2x+4:
self.b1x = self.xpos_ball
self.b2x = self.xpos_ball+self.ball_width
if self.randomizeYpos:
shiftdir = random.choice([1,3,4])
if shiftdir==3:
self.ypos_ball = dconf['simulatedEnvParams']['yball'] + 40
elif shiftdir==4:
self.ypos_ball = dconf['simulatedEnvParams']['yball'] - 40
else:
self.ypos_ball = dconf['simulatedEnvParams']['yball']
self.b1y = self.court_top+self.ypos_ball
self.b2y = self.court_top+self.ypos_ball+self.ball_height
self.done = 0
self.obs = self.obs.astype(np.uint8)
return self.obs, self.reward, self.done