-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpolicy.py
139 lines (116 loc) · 4.39 KB
/
policy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# policy.py
# ----------
# Abstract Class
#
from abc import ABC, abstractmethod
import random
class Policy(ABC):
def __init__(self):
pass
##
# Accessor Method For Instance Variable: self.reward
#
@property
@abstractmethod
def reward(self):
return
##
# Mutator Method For Instance Variable: self.reward
#
@reward.setter
@abstractmethod
def reward(self, reward):
pass
##
# Accessor Method For Instance Variable: self.config
#
@property
@abstractmethod
def config(self):
return
##
# Mutator Method For Instance Variable: self.config
#
@config.setter
@abstractmethod
def config(self, config):
pass
##
# Determines the best move given the current gamestate
# @param location a tuple representing a location in the game
# @return the recommended move
#
@abstractmethod
def best_move(self, gamestate):
pass
##
# Returns the current policy
# @return self.location_
#
@abstractmethod
def policy(self):
pass
##
# Converts Relative Directions to Absolute Directions
# @param prior_absolution_direction The (absolute) direction last taken
# @param relative_direction the relative direction to be converted into absolute
# @return the new absolute direction
def relativeToAbsoluteDirection(self, prior_absolute_direction, relative_direction):
direction_index = self._config.actions[relative_direction] - 1
retDirection = self.shortToAbsDir((self.absDirToShort(prior_absolute_direction) + direction_index) % 4)
return retDirection
#takes absolute direction [x,y] and turns it into location [x,y]
def directionToLocation(self, inpLoc, inpDirection):
retLocation = [x + y for x, y in zip(inpLoc, inpDirection)]
return retLocation
#not used
def listToTuple():
return retTuple
#helper function
def absDirToShort(self, inpDir):
retShort = None
inpDir = tuple(inpDir)
if inpDir == self._config.rawMovementValue["NORTH"]:
retShort = self._config.movementsShortValue["NORTH"]
elif inpDir == self._config.rawMovementValue["EAST"]:
retShort = self._config.movementsShortValue["EAST"]
elif inpDir == self._config.rawMovementValue["SOUTH"]:
retShort = self._config.movementsShortValue["SOUTH"]
elif inpDir == self._config.rawMovementValue["WEST"]:
retShort = self._config.movementsShortValue["WEST"]
return retShort
#helper function
def shortToAbsDir(self, inpShort):
retDir = None
if inpShort == self._config.movementsShortValue["NORTH"]:
retDir = self._config.rawMovementValue["NORTH"]
elif inpShort == self._config.movementsShortValue["EAST"]:
retDir = self._config.rawMovementValue["EAST"]
elif inpShort == self._config.movementsShortValue["SOUTH"]:
retDir = self._config.rawMovementValue["SOUTH"]
elif inpShort == self._config.movementsShortValue["WEST"]:
retDir = self._config.rawMovementValue["WEST"]
return retDir
#calculate the reward of states
def rewardValue(self, gamestate):
retReward = 0
location = gamestate.getHeadLocation()
if gamestate.isScoreChange():
retReward += self._config.reward.food
if gamestate.isHazardSpot(location):
retReward += self._config.reward.hazard
retReward += self._config.reward.living
return retReward
#scrambles the direction of the action
def moveScrambler(self, inpRelativeDirection):
probabilityTotal = 0
for direction in self._config.stochastic.directions[inpRelativeDirection]:
probabilityTotal += self._config.stochastic.directions[inpRelativeDirection][direction]
randomValue = random.random()*(probabilityTotal)
probabilitySegment = 0
for direction in self._config.stochastic.directions[inpRelativeDirection]:
tempValue = self._config.stochastic.directions[inpRelativeDirection][direction]
if randomValue >= probabilitySegment and randomValue < probabilitySegment + tempValue:
return direction
probabilitySegment += tempValue
return list(self._config.stochastic.directions.keys())[0]