-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtictactoe.py
433 lines (340 loc) · 14.8 KB
/
tictactoe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
import random
import os
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
import pygame as pg
import pygame_menu
import sys
import csv
import time
from pygame.locals import *
N = 3
SIZE = N ** 2
# INITIALIZING DIMENSIONS OF THE GAME SCREEN
width, height = 400, 400
# LIST CONTAINING COORDINATES OF THE CENTRE OF EACH SQUARE ON THE GRID
POSITIONS = list(zip([30, width / 3 + 30, width / 3 * 2 + 30] * 3, [30] * 3 + [height / 3 + 30] * 3 + [height / 3 * 2 + 30] * 3))
# LIST CONTAINING THE COORDINATES OF THE EXTREMITIES (BOTTOM RIGHT CORNER) OF EACH SQUARE ON THE GRID
LIMITS = list(zip([width / 3, width / 3 * 2, width] * 3, [height / 3] * 3 + [height / 3 * 2] * 3 + [height] * 3))
ROW1 = (0, 1, 2)
ROW2 = (3, 4, 5)
ROW3 = (6, 7, 8)
COL1 = (0, 3, 6)
COL2 = (1, 4, 7)
COL3 = (2, 5, 8)
LDIAG = (0, 4, 8)
RDIAG = (2, 4, 6)
CHECK = [ROW1, ROW2, ROW3, COL1, COL2, COL3, LDIAG, RDIAG]
white = (255, 255, 255)
black = (0, 0, 0)
red = (255, 0, 0)
line_color = black
CROSS = 'X'
NOUGHT = 'O'
EMPTY = '-'
# INITIALIZNG PYGAME
pg.init()
fps = 30
CLOCK = pg.time.Clock()
screen = pg.display.set_mode((width, height + 100), 0, 32)
pg.display.set_caption("Tic Tac Toe")
# DICTIONARY STORING LINES IN THE GRID AS KEYS AND PARAMETERS REQUIRED TO DRAW THE RED LINE THROUGH THEM ON WINNING
LINEARGS = {
ROW1 : (screen, red, (20, height / 6), (width - 20, height / 6), 4),
ROW2 : (screen, red, (20, height / 2), (width - 20, height / 2), 4),
ROW3 : (screen, red, (20, height / 6 * 5), (width - 20, height / 6 * 5), 4),
COL1 : (screen, red, (width / 6, 20), (width / 6, height - 20), 4),
COL2 : (screen, red, (width / 2, 20), (width / 2, height - 20), 4),
COL3 : (screen, red, (width / 6 * 5, 20), (width / 6 * 5, height - 20), 4),
LDIAG: (screen, red, (50, 50), (350, 350), 4),
RDIAG: (screen, red, (350, 50), (50, 350), 4)
}
initiating_window = pg.image.load("bg1.png")
x_img = pg.image.load("cross.png")
y_img = pg.image.load("nought.png")
initiating_window = pg.transform.scale(initiating_window, (width, height + 100))
x_img = pg.transform.scale(x_img, (80, 80))
o_img = pg.transform.scale(y_img, (80, 80))
ICON = {CROSS : x_img, NOUGHT : o_img}
filename = 'state_values.csv'
class TicTacToe():
def __init__(self):
self.player = CROSS
self.winner = None
self.draw = False
self.board = [EMPTY] * SIZE
def game_initiating_window(self):
''' This function initialises the game window with the background image for 1.5 seconds
before showing an empty grid for a new game '''
screen.blit(initiating_window, (0, 0))
pg.display.update()
time.sleep(1.5)
screen.fill(white)
pg.draw.line(screen, line_color, (width / 3, 0), (width / 3, height), 7)
pg.draw.line(screen, line_color, (width / 3 * 2, 0), (width / 3 * 2, height), 7)
pg.draw.line(screen, line_color, (0, height / 3), (width, height / 3), 7)
pg.draw.line(screen, line_color, (0, height / 3 * 2), (width, height / 3 * 2), 7)
def check_win(self):
''' This functions checks if a winner is determined at the given state of the game '''
for line in CHECK:
if self.board[line[0]] == EMPTY:
continue
if all(self.board[play] == self.board[line[0]] for play in line[1:]):
pg.draw.line(*LINEARGS[line])
self.winner = self.board[line[0]]
def _check_win(self):
for line in CHECK:
if self.board[line[0]] == EMPTY:
continue
if all(self.board[play] == self.board[line[0]] for play in line[1:]):
self.winner = self.board[line[0]]
def check_draw(self):
''' This functions checks if there are no available valid moves for any player (all squares occupied).
This is the draw condition if there is no winner '''
self.draw = all(play != EMPTY for play in self.board)
def playable(self):
self.check_win()
self.check_draw()
return not self.draw and not self.winner
def game_status(self):
''' This function prints the status of the game currently by deciding
and displaying the message at the bottom of the grid on the game screen '''
if self.winner:
message = self.winner + " won !"
elif self.draw:
message = "Game Draw !"
else:
message = self.player + "'s Turn"
font = pg.font.Font(pg.font.get_default_font(), 30)
text = font.render(message, 1, white)
screen.fill(black, (0, 400, 500, 100))
text_rect = text.get_rect(center =(width / 2, 500-50))
screen.blit(text, text_rect)
pg.display.update()
def make_move(self, pos):
''' This function assigns the value at a particular position on the board and
displays the appropriate icon at the required position on the game screen '''
posx, posy = POSITIONS[pos]
self.board[pos] = self.player
screen.blit(ICON[self.player], (posx, posy))
pg.display.update()
self.flip()
self.check_win()
self.check_draw()
def _make_move(self, pos):
self.board[pos] = self.player
self.flip()
self._check_win()
self.check_draw()
def get_square(self):
''' This function returns the index of the board
depending on where the user has clicked on the game screen '''
x, y = pg.mouse.get_pos()
for idx, limit in enumerate(LIMITS):
xlim, ylim = limit
if x < xlim and y < ylim:
return idx
return None
def user_click(self):
''' This function updates the board and game status on user click on the game screen '''
pos = self.get_square()
if pos is not None and self.board[pos] == EMPTY:
self.make_move(pos)
def flip(self):
''' This function allows the switching of move control between the two players '''
if self.player == NOUGHT:
self.player = CROSS
else:
self.player = NOUGHT
def valid_moves(self):
''' This function returns a list of valid moves on the board '''
return [idx for idx, item in enumerate(self.board) if item == EMPTY]
class Agent():
def __init__(self, game_class, epsilon = 0.1, alpha = 0.5, value_player = CROSS):
self.V = dict()
self.NewGame = game_class
self.epsilon = epsilon
self.alpha = alpha
self.value_player = value_player
def learn_game(self, num_episodes = 1000):
''' This function trains the agent for the specified number of games '''
for episode in range(num_episodes):
self.learn_from_episode()
def learn_from_episode(self):
''' This function trains for one game as the agent '''
game = self.NewGame()
_, move = self.learn_select_move(game)
while move:
move = self.learn_from_move(game, move)
def learn_select_move(self, game):
''' This function returns the best next move and the selected next move for a given state of the game '''
allowed_state_values = self.__state_values(self.form_states(game, game.valid_moves()))
if game.player == self.value_player:
best_move = self.choose_state(allowed_state_values, True)
else:
best_move = self.choose_state(allowed_state_values, False)
selected_move = best_move
if random.random() < self.epsilon:
selected_move = self.__random_V(allowed_state_values)
return best_move, selected_move
def __random_V(self, state_values):
''' This function returns a random next state '''
return random.choice(list(state_values.keys()))
def learn_from_move(self, game, move):
''' This function modifies the state value of the current state of the game on making the desired move '''
game._make_move(self.find_pos(game, move))
r = self.__reward(game)
next_state_value = 0.0
selected_next_move = None
if game.playable():
best_next_move, selected_next_move = self.learn_select_move(game)
next_state_value = self.state_value(best_next_move)
current_state_value = self.state_value(move)
td_target = r + next_state_value
self.V[move] = current_state_value + self.alpha * (td_target - current_state_value)
return selected_next_move
def __reward(self, game):
''' This function returns the reward associated with the given state '''
if game.winner == self.value_player:
return 1.0
elif game.winner:
return -1.0
else:
return 0.0
def interactive_game(self, agent_player = NOUGHT):
''' This function allows interactive play using the pygame screen '''
game = self.NewGame()
game.game_initiating_window()
game.game_status()
end = False
while not end:
for event in pg.event.get():
if event.type == QUIT:
pg.quit()
sys.exit()
elif game.player == agent_player:
time.sleep(0.5)
move = self.play_select_move(game)
game.make_move(self.find_pos(game, move))
game.game_status()
elif event.type is MOUSEBUTTONDOWN:
game.user_click()
game.game_status()
if game.winner or game.draw:
time.sleep(1.5)
end = True
break
pg.display.update()
CLOCK.tick(fps)
def find_pos(self, game, state):
''' This function finds the move made given the next state and current instance of the game '''
for idx, item in enumerate(state):
if item != game.board[idx]:
return idx
return None
def choose_state(self, state_values, is_agent_player):
''' This function returns the state with the best state value for the current player '''
values = state_values.values()
val = max(values) if is_agent_player else min(values)
chosen_state = random.choice([state for state, v in state_values.items() if v == val])
return chosen_state
def state_value(self, game_state):
''' This function retrieves the state value for given state '''
return self.V.get(game_state, 0.0)
def __state_values(self, game_states):
''' This function returns a dictionary of allowed states and their state values '''
return dict((state, self.state_value(state)) for state in game_states)
def form_states(self, game, positions):
''' This function converts move positions to game states '''
possible_states = []
for pos in positions:
new_state = game.board[:]
new_state[pos] = game.player
possible_states.append(''.join(new_state))
return possible_states
def play_select_move(self, game):
''' This function allows agent to make its move during interactive play or demo games '''
allowed_state_values = self.__state_values(self.form_states(game, game.valid_moves()))
if game.player == self.value_player:
return self.choose_state(allowed_state_values, True)
return self.choose_state(allowed_state_values, False)
def demo_game(self):
''' This function plays demo games to provide stats '''
game = self.NewGame()
while game.playable():
move = self.play_select_move(game)
game._make_move(self.find_pos(game, move))
if game.winner:
return game.winner
return '-'
def round_V(self):
''' This function rounds off the state values in the value table '''
for k in self.V.keys():
self.V[k] = round(self.V[k], 1)
def save_v_table(self):
''' This function stores states and their state values in a csv file '''
with open(filename, 'w', newline = '') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['State', 'Value'])
all_states = list(self.V.keys())
all_states.sort()
for state in all_states:
writer.writerow([state, self.V[state]])
def retrieve_v_table(self):
''' This function retrieves states and state values from a csv file '''
if os.path.isfile(filename):
with open(filename, 'r') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
if row == ['State', 'Value']:
continue
self.V[row[0]] = float(row[1])
def demo_game_stats(agent):
''' This function plays 10000 demo games and displays game stats '''
results = [agent.demo_game() for i in range(10000)]
game_stats = {k: results.count(k) / 100 for k in [CROSS, NOUGHT, '-']}
print(' percentage results: {}'.format(game_stats))
def play_CROSS():
''' This function allows interactive play where agent plays second '''
agent.interactive_game()
def play_NOUGHT():
''' This function allows interactive play where agent plays first '''
agent.interactive_game(agent_player = CROSS)
agent = Agent(TicTacToe, epsilon = 1.0, alpha = 0.4)
agent.retrieve_v_table()
if len(sys.argv) == 2 and sys.argv[-1] == '-t':
print('Before learning:')
demo_game_stats(agent)
agent.learn_game(1000)
print('After 1000 learning games:')
demo_game_stats(agent)
agent.epsilon -= 0.1
agent.learn_game(4000)
print('After 5000 learning games:')
demo_game_stats(agent)
agent.epsilon -= 0.2
agent.learn_game(5000)
print('After 10000 learning games:')
demo_game_stats(agent)
agent.epsilon -= 0.2
agent.learn_game(10000)
print('After 20000 learning games:')
demo_game_stats(agent)
agent.epsilon -= 0.3
agent.learn_game(10000)
print('After 30000 learning games:')
demo_game_stats(agent)
agent.epsilon -= 0.1
agent.learn_game(20000)
print('After 50000 learning games:')
demo_game_stats(agent)
agent.epsilon -= 0.1
agent.round_V()
agent.save_v_table()
agent.epsilon = 0.0
mytheme = pygame_menu.themes.Theme(title_bar_style = pygame_menu.widgets.MENUBAR_STYLE_UNDERLINE_TITLE, title_background_color = (4, 47, 126), title_font = pygame_menu.font.FONT_OPEN_SANS_ITALIC, background_color = (0, 60, 255, 100) )
menu = pygame_menu.Menu(height + 99, width - 1, 'Tic Tac Toe', theme = mytheme)
while True:
menu.add_label("Choose Icon", font_color = white, font_size = 40)
menu.add_button(CROSS, play_CROSS, font_size = 60, font_color = white, shadow = True)
menu.add_button(NOUGHT, play_NOUGHT, font_size = 60, font_color = white, shadow = True)
menu.mainloop(screen)