-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathopening_book.py
110 lines (83 loc) · 3.75 KB
/
opening_book.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Udacity AIND Project 3 Game Agent - Opening Book
Build a opening book, with no time limit, and overwrite data pickle file
"""
from isolation import Isolation #, DebugState
from collections import defaultdict, Counter
import random
import time
import pickle
def main():
start_time = time.time()
state = Isolation()
book = OpeningBook(initial_state=state, num_rounds=99*98*8*8*(10), tree_depth=4).get_book()
OpeningBook.save_opening_book(book)
end_time = time.time()
# time taken to build opening book = 332.32017993927
print ('time taken to build opening book = ', end_time - start_time)
class OpeningBook():
def __init__(self, initial_state, num_rounds, tree_depth):
# properties
self.state = initial_state
self.NUM_ROUNDS = num_rounds
self.tree_depth = tree_depth
# build
self.table = self.build_table(self.NUM_ROUNDS)
def get_book(self):
return self.table
def save_opening_book(book):
with open('data.pickle', 'wb') as f:
pickle.dump(book, f)
def build_table(self, num_rounds):
# Builds a table that maps from game state -> action
# by choosing the action that accumulates the most
# wins for the active player. (Note that this uses
# raw win counts, which are a poor statistic to
# estimate the value of an action; better statistics
# exist.)
book = defaultdict(Counter)
for _ in range(num_rounds):
# progress checking, feedback every 1000 rounds
if _ % 1000 == 0:
print ('round=', _)
# new blank state for each round
state = Isolation()
self.build_tree(state, book, self.tree_depth)
# this line chooses the max score action of each board state,
# effectively removing all other possible actions of the board state which scored less
return {k: max(v, key=v.get) for k, v in book.items()}
def build_tree(self, state, book, depth):
# if depth reach sepcified limit, the simulate the rest of the game by random moves
if depth <= 0 or state.terminal_test():
# evaluate by simulate the rest of the game by random actions
# return -self.simulate(state)
# evaulate by having more possible moves
return -self.score(state)
# random move to explore
action = random.choice(state.actions())
# recursivly get reward from deeper tree state-action
reward = self.build_tree(state.result(action), book, depth - 1)
# accumulate reward from current state of the board, for this action
book[state.board][action] += reward
# negative as last state action (level above) is opponents move
return -reward
# evaluation function of opening book from lesson, play the game randomly to the end
def simulate(self, state):
player_id = state.player()
while not state.terminal_test():
state = state.result(random.choice(state.actions()))
return -1 if state.utility(player_id) < 0 else 1
# evaluation function used by AI minimax - score by diff in number of possible moves
def score(self, state):
player_id = state.player()
own_loc = state.locs[player_id]
opp_loc = state.locs[1 - player_id]
own_liberties = state.liberties(own_loc)
opp_liberties = state.liberties(opp_loc)
diff = len(own_liberties) - len(opp_liberties)
# return -1 if diff < 0 else 1
return diff
if __name__ == "__main__":
main()