Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rewrite Stackelberg-eq solver in cvxpy; add degenerated test case #1123

Merged
merged 4 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 20 additions & 27 deletions open_spiel/python/algorithms/stackelberg_lp.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
"""

import numpy as np
import cvxpy as cp

from open_spiel.python.algorithms import lp_solver
from open_spiel.python.algorithms.projected_replicator_dynamics import _simplex_projection
from open_spiel.python.egt.utils import game_payoffs_array

Expand Down Expand Up @@ -51,39 +51,32 @@ def solve_stackelberg(game, is_first_leader=True):
follower_eq_strategy = None

for t in range(num_follower_strategies):
lp = lp_solver.LinearProgram(objective=lp_solver.OBJ_MAX)
for s in range(num_leader_strategies):
lp.add_or_reuse_variable("s_{}".format(s))
lp.set_obj_coeff("s_{}".format(s), leader_payoff[s, t])

p_s = cp.Variable(num_leader_strategies)
constraints = [p_s >= 0, p_s <= 1, cp.sum(p_s) == 1]
for t_ in range(num_follower_strategies):
if t_ == t:
continue
lp.add_or_reuse_constraint("t_{}".format(t_), lp_solver.CONS_TYPE_GEQ)
for s in range(num_leader_strategies):
lp.set_cons_coeff("t_{}".format(t_), "s_{}".format(s),
follower_payoff[s, t] - follower_payoff[s, t_])
lp.set_cons_rhs("t_{}".format(t_), 0.0)
lp.add_or_reuse_constraint("sum_to_one", lp_solver.CONS_TYPE_EQ)
for s in range(num_leader_strategies):
lp.set_cons_coeff("sum_to_one", "s_{}".format(s), 1.0)
lp.set_cons_rhs("sum_to_one", 1.0)
try:
leader_strategy = np.array(lp.solve())
leader_strategy = _simplex_projection(
leader_strategy.reshape(-1)).reshape(-1, 1)
leader_value = leader_strategy.T.dot(leader_payoff)[0, t]
if leader_value > leader_eq_value:
leader_eq_strategy = leader_strategy
follower_eq_strategy = t
leader_eq_value = leader_value
follower_eq_value = leader_strategy.T.dot(follower_payoff)[0, t]
except: # pylint: disable=bare-except
constraints.append(
p_s @ follower_payoff[:, t_] <= p_s @ follower_payoff[:, t])
prob = cp.Problem(cp.Maximize(p_s @ leader_payoff[:, t]), constraints)
prob.solve()
p_s_value = p_s.value
if p_s_value is None:
continue
leader_strategy = _simplex_projection(
p_s.value.reshape(-1)).reshape(-1, 1)
leader_value = leader_strategy.T.dot(leader_payoff)[0, t]
if leader_value > leader_eq_value:
leader_eq_strategy = leader_strategy
follower_eq_strategy = t
leader_eq_value = leader_value
follower_eq_value = leader_strategy.T.dot(follower_payoff)[0, t]

assert leader_eq_strategy is not None, p_mat
if is_first_leader:
return (leader_eq_strategy.reshape(-1), np.identity(
num_follower_strategies)[follower_eq_strategy],
leader_eq_value, follower_eq_value)
leader_eq_value, follower_eq_value)
else:
return (np.identity(num_follower_strategies)[follower_eq_strategy],
leader_eq_strategy.reshape(-1), follower_eq_value, leader_eq_value)
11 changes: 10 additions & 1 deletion open_spiel/python/algorithms/stackelberg_lp_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from open_spiel.python.egt.utils import game_payoffs_array
import pyspiel

EPS = 1e-6

# game instances based on Conitzer & Sandholm'06 paper
game0 = pyspiel.create_matrix_game([[2, 4], [1, 3]], [[1, 0], [0, 1]])
commit_strategy0 = np.array([0.5, 0.5])
Expand All @@ -32,12 +34,19 @@
commit_strategy1 = np.array([1 / 3, 2 / 3])
commit_value1 = 4 / 3

# a game with dominated strategy
game2 = pyspiel.create_matrix_game([[3, 9], [9, 1]],
[[0, 0], [1, 8]])
commit_strategy2 = np.array([1.0, 0.0])
commit_value2 = 9.0


class StackelbergLPTest(parameterized.TestCase):

@parameterized.named_parameters(
("game0", game0, commit_strategy0, commit_value0),
("game1", game1, commit_strategy1, commit_value1),
("game2", game2, commit_strategy2, commit_value2)
)
def test_simple_games(self, game, commit_strategy, commit_value):
leader_eq_strategy, _, leader_eq_value, _ = solve_stackelberg(game)
Expand All @@ -53,7 +62,7 @@ def test_simple_games(self, game, commit_strategy, commit_value):
leader_nash_value = eq[0].reshape(1,
-1).dot(p_mat[0]).dot(eq[1].reshape(
-1, 1))
self.assertGreaterEqual(leader_eq_value, leader_nash_value)
self.assertGreaterEqual(leader_eq_value-leader_nash_value, -EPS)


if __name__ == "__main__":
Expand Down