-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpolicy_configuration.py
72 lines (58 loc) · 1.96 KB
/
policy_configuration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# policy_configurations.py
class PolicyConfiguration:
actions = {
"LEFT": 0,
"FORWARD": 1,
"RIGHT": 2
}
movementsShortValue = {
"NORTH": 0,
"EAST": 1,
"SOUTH": 2,
"WEST": 3
}
rawMovementValue = {
"NORTH": (0,-1),
"EAST": (1,0),
"SOUTH": (0,1),
"WEST": (-1,0)
}
def __init__(self, inpRewards = [1,-1,0,10,-1], inpDiscounts = [1,.1,.1], inpStochastic = [[100,0,0],[0,100,0],[0,0,100]], inpFile = None, inpTrainingLimit = 5000):
self.reward = Reward(inpRewards[0], inpRewards[1], inpRewards[2], inpRewards[3], inpRewards[4])
self.discount = Discount(inpDiscounts[0], inpDiscounts[1], inpDiscounts[2])
self.stochastic = Stochastic(inpStochastic[0], inpStochastic[1], inpStochastic[2])
self.file = inpFile
self.trainingLimit = inpTrainingLimit
class Reward:
def __init__(self, inpFood = 1, inpHazard = -1, inpLiving = 0, inpGoodLoc = 10, inpBadLoc = -1):
self.food = inpFood
self.hazard = inpHazard
self.living = inpLiving
#qLearning
self.goodLocation = inpGoodLoc
self.badLocation = inpBadLoc
class Discount:
def __init__(self, inpGamma = 0.99, inpAlpha= 0.1, inpEpsilon = 0.1):
self.gamma = inpGamma
#qLearning
self.alpha = inpAlpha
self.epsilon = inpEpsilon
class Stochastic:
def __init__(self, inpFW = [100, 0, 0], inpLT = [0, 100, 0], inpRT = [0, 0, 100]):
self.directions = {
"FORWARD" : {
"FORWARD" : inpFW[0],
"LEFT" : inpFW[1],
"RIGHT" : inpFW[2]
},
"LEFT": {
"FORWARD": inpLT[0],
"LEFT": inpLT[1],
"RIGHT": inpLT[2]
},
"RIGHT": {
"FORWARD": inpRT[0],
"LEFT": inpRT[1],
"RIGHT": inpRT[2]
}
}