-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsolver.py
150 lines (117 loc) · 7.05 KB
/
solver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
from gurobipy import *
import numpy as np
import time
class NashEqulibriumSolver:
def __init__(self, game_step:int, p1_action_num:int, p2_action_num:int, payoff_matrix:np.array):
"""
game_step: step number of the game; normal game step is 1, extensive game step is more than 1
p1_action_num: player 1's action number
p2_action_num: player 2's action number
payoff_matrix: the generated payoff matrix of the game
"""
self.game_step = game_step
self.action_choice_player1 = p1_action_num
self.action_choice_player2 = p2_action_num
self.action_total_num_player1 = self.action_choice_player1 ** game_step
self.action_total_num_player2 = self.action_choice_player2 ** game_step
self.payoff_matrix = payoff_matrix
assert self.payoff_matrix.shape[0] == self.action_total_num_player1, "Payoff matrix row number does not match Player 1 pure strategy number"
assert self.payoff_matrix.shape[1] == self.action_total_num_player2, "Payoff matrix column number does not match Player 2 pure strategy number"
def solve_linear_program_player1(self, verbose: bool=False):
"""
Solve the NE profile strategy for player 1
verbose: output result details or not
Return (filtered_player1_NE, NE_utility), where filtered_player1_NE is player 1's strategy,
and NE_utility is its NE profile utility
"""
start_time = time.time()
## setup model
LP_model = Model()
LP_model.setParam('OutputFlag', 0)
## define variables
game_value = LP_model.addVar(name='game_value', vtype=GRB.CONTINUOUS) # the NE profile utility, i.e., the value of the zero-sum two-player game
player1_actions = LP_model.addVars(self.action_total_num_player1, vtype=GRB.CONTINUOUS) # the probabilities for all actions of player 1
## add constraints
# all player 1 actions' probability should be non-negative, and their sum is 1
LP_model.addConstr(player1_actions.sum() == 1)
LP_model.addConstrs(player1_actions[i] >= 0 for i in range(self.action_total_num_player1))
# bound player 1's utility by game_value (for each player 1's pure strategy)
for player2_action_index in range(self.action_total_num_player2):
lhs = 0
for player1_action_index in range(self.action_total_num_player1):
# if the payoff matrix is huge (e.g., converted from extensive game), we can store it with bool utilities to save space
if type(self.payoff_matrix[player1_action_index, player2_action_index]) is np.bool_:
p1_utility = 1 if self.payoff_matrix[player1_action_index, player2_action_index] else -1
# for simple normal form game, just store all payoff values
else:
p1_utility = self.payoff_matrix[player1_action_index, player2_action_index]
lhs += p1_utility * player1_actions[player1_action_index]
LP_model.addConstr(lhs >= game_value)
## setup objective func and solve
obj = game_value
LP_model.setObjective(obj, GRB.MAXIMIZE)
LP_model.optimize()
## return solved outcomes
if LP_model.Status == GRB.Status.OPTIMAL:
player1_NE = LP_model.getAttr('x', player1_actions)
filtered_player1_NE = {k: v for k,v in player1_NE.items() if v > 0.}
NE_utility = LP_model.ObjVal
if verbose:
print("Player 1 strategy solving time: ", time.time() - start_time)
print("Nash Equlibrium Profile Utility for Player 1: ", NE_utility)
print("Nash Equlibrium Player 1 Strategy: ", filtered_player1_NE)
print("------------------------------------------")
return (filtered_player1_NE, NE_utility)
else:
print(LP_model.Status, GRB.Status.OPTIMAL)
print("The linear programming for player 1 is infeasible, please double check.")
return None
def solve_linear_program_player2(self, verbose: bool=False):
"""
Solve the NE profile strategy for player 2
verbose: output result details or not
Return (filtered_player2_NE, NE_utility), where filtered_player2_NE is player 2's strategy,
and NE_utility is its NE profile utility
"""
start_time = time.time()
## setup model
LP_model = Model()
LP_model.setParam('OutputFlag', 0)
## define variables
game_value = LP_model.addVar(name='game_value', vtype=GRB.CONTINUOUS) # the NE profile utility, i.e., the value of the zero-sum two-player game
player2_actions = LP_model.addVars(self.action_total_num_player2, vtype=GRB.CONTINUOUS) # the probabilities for all actions of player 2
## add constraints
# all player 2 actions' probability should be non-negative, and their sum is 1
LP_model.addConstr(player2_actions.sum() == 1)
LP_model.addConstrs(player2_actions[i] >= 0 for i in range(self.action_total_num_player2))
# bound player 2's utility by game_value (for each player 1's pure strategy)
for player1_action_index in range(self.action_total_num_player1):
lhs = 0
for player2_action_index in range(self.action_total_num_player2):
# if the payoff matrix is huge (e.g., converted from extensive game), we can store it with bool utilities to save space
if type(self.payoff_matrix[player1_action_index, player2_action_index]) is np.bool_:
p1_utility = 1 if self.payoff_matrix[player1_action_index, player2_action_index] else 1
# for simple normal form game, just store all payoff values
else:
p1_utility = self.payoff_matrix[player1_action_index, player2_action_index]
lhs += p1_utility * player2_actions[player2_action_index]
LP_model.addConstr(lhs <= game_value)
## setup objective func and solve
obj = game_value
LP_model.setObjective(obj, GRB.MINIMIZE)
LP_model.optimize()
## return solved outcomes
if LP_model.Status == GRB.Status.OPTIMAL:
player2_NE = LP_model.getAttr('x', player2_actions)
filtered_player2_NE = {k: v for k,v in player2_NE.items() if v > 0.}
NE_utility = LP_model.ObjVal
if verbose:
print("Player 2 strategy solving time: ", time.time() - start_time)
print("Nash Equlibrium Profile Utility for Player 2: ", - NE_utility)
print("Nash Equlibrium Player 2 Strategy: ", filtered_player2_NE)
print("------------------------------------------")
return (filtered_player2_NE, NE_utility)
else:
print(LP_model.Status, GRB.Status.OPTIMAL)
print("The linear programming for player 2 is infeasible, please double check.")
return None