/****************************************************************************** * Compilation: javac TwoPersonZeroSumGame.java * Execution: java TwoPersonZeroSumGame m n * Dependencies: LinearProgramming.java StdOut.java * * Solve an m-by-n two-person zero-sum game by reducing it to * linear programming. Assuming A is a strictly positive payoff * matrix, the optimal row and column player strategies are x* and y*, * scaled to be probability distributions. * * (P) max y^T 1 (D) min 1^T x * s.t A^T y <= 1 s.t A x >= 1 * y >= 0 x >= 0 * * Row player is x, column player is y. * ******************************************************************************/ /** * The {@code TwoPersonZeroSumGame} class represents a data type for * computing optimal row and column strategies to two-person zero-sum games. *

* This implementation solves an m-by-n two-person * zero-sum game by reducing it to a linear programming problem. * Assuming the payoff matrix A is strictly positive, the * optimal row and column player strategies x* and y* are obtained * by solving the following primal and dual pair of linear programs, * scaling the results to be probability distributions. *

 *  (P)  max  y^T 1           (D)  min   1^T x
 *       s.t  A^T y ≤ 1         s.t   A x ≥ 1
 *                y ≤ 0                 x ≥ 0
 *  
*

* If the payoff matrix A has any negative entries, we add * the same constant to every entry so that every entry is positive. * This increases the value of the game by that constant, but does not * change solutions to the two-person zero-sum game. *

* This implementation is not suitable for large inputs, as it calls * a bare-bones linear programming solver that is neither fast nor * robust with respect to floating-point roundoff error. *

* For additional documentation, see * Section 6.5 * Algorithms, 4th Edition by Robert Sedgewick and Kevin Wayne. * * @author Robert Sedgewick * @author Kevin Wayne */ public class TwoPersonZeroSumGame { private static final double EPSILON = 1.0E-8; private final int m; // number of rows private final int n; // number of columns private LinearProgramming lp; // linear program solver private double constant; // constant added to each entry in payoff matrix // (0 if all entries are strictly positive) /** * Determines an optimal solution to the two-sum zero-sum game * with the specified payoff matrix. * * @param payoff the m-by-n payoff matrix */ public TwoPersonZeroSumGame(double[][] payoff) { m = payoff.length; n = payoff[0].length; double[] c = new double[n]; double[] b = new double[m]; double[][] A = new double[m][n]; for (int i = 0; i < m; i++) b[i] = 1.0; for (int j = 0; j < n; j++) c[j] = 1.0; // find smallest entry constant = Double.POSITIVE_INFINITY; for (int i = 0; i < m; i++) for (int j = 0; j < n; j++) if (payoff[i][j] < constant) constant = payoff[i][j]; // add constant to every entry to make strictly positive if (constant <= 0) constant = -constant + 1; else constant = 0; for (int i = 0; i < m; i++) for (int j = 0; j < n; j++) A[i][j] = payoff[i][j] + constant; lp = new LinearProgramming(A, b, c); assert certifySolution(payoff); } /** * Returns the optimal value of this two-person zero-sum game. * * @return the optimal value of this two-person zero-sum game * */ public double value() { return 1.0 / scale() - constant; } // sum of x[j] private double scale() { double[] x = lp.primal(); double sum = 0.0; for (int j = 0; j < n; j++) sum += x[j]; return sum; } /** * Returns the optimal row strategy of this two-person zero-sum game. * * @return the optimal row strategy x of this two-person zero-sum game */ public double[] row() { double scale = scale(); double[] x = lp.primal(); for (int j = 0; j < n; j++) x[j] /= scale; return x; } /** * Returns the optimal column strategy of this two-person zero-sum game. * * @return the optimal column strategy y of this two-person zero-sum game */ public double[] column() { double scale = scale(); double[] y = lp.dual(); for (int i = 0; i < m; i++) y[i] /= scale; return y; } /************************************************************************** * * The code below is solely for testing correctness of the data type. * **************************************************************************/ // is the row vector x primal feasible? private boolean isPrimalFeasible() { double[] x = row(); double sum = 0.0; for (int j = 0; j < n; j++) { if (x[j] < 0) { StdOut.println("row vector not a probability distribution"); StdOut.printf(" x[%d] = %f\n", j, x[j]); return false; } sum += x[j]; } if (Math.abs(sum - 1.0) > EPSILON) { StdOut.println("row vector x[] is not a probability distribution"); StdOut.println(" sum = " + sum); return false; } return true; } // is the column vector y dual feasible? private boolean isDualFeasible() { double[] y = column(); double sum = 0.0; for (int i = 0; i < m; i++) { if (y[i] < 0) { StdOut.println("column vector y[] is not a probability distribution"); StdOut.printf(" y[%d] = %f\n", i, y[i]); return false; } sum += y[i]; } if (Math.abs(sum - 1.0) > EPSILON) { StdOut.println("column vector not a probability distribution"); StdOut.println(" sum = " + sum); return false; } return true; } // is the solution a Nash equilibrium? private boolean isNashEquilibrium(double[][] payoff) { double[] x = row(); double[] y = column(); double value = value(); // given row player's mixed strategy, find column player's best pure strategy double opt1 = Double.NEGATIVE_INFINITY; for (int i = 0; i < m; i++) { double sum = 0.0; for (int j = 0; j < n; j++) { sum += payoff[i][j] * x[j]; } if (sum > opt1) opt1 = sum; } if (Math.abs(opt1 - value) > EPSILON) { StdOut.println("Optimal value = " + value); StdOut.println("Optimal best response for column player = " + opt1); return false; } // given column player's mixed strategy, find row player's best pure strategy double opt2 = Double.POSITIVE_INFINITY; for (int j = 0; j < n; j++) { double sum = 0.0; for (int i = 0; i < m; i++) { sum += payoff[i][j] * y[i]; } if (sum < opt2) opt2 = sum; } if (Math.abs(opt2 - value) > EPSILON) { StdOut.println("Optimal value = " + value); StdOut.println("Optimal best response for row player = " + opt2); return false; } return true; } private boolean certifySolution(double[][] payoff) { return isPrimalFeasible() && isDualFeasible() && isNashEquilibrium(payoff); } private static void test(String description, double[][] payoff) { StdOut.println(); StdOut.println(description); StdOut.println("------------------------------------"); int m = payoff.length; int n = payoff[0].length; TwoPersonZeroSumGame zerosum = new TwoPersonZeroSumGame(payoff); double[] x = zerosum.row(); double[] y = zerosum.column(); StdOut.print("x[] = ["); for (int j = 0; j < n-1; j++) StdOut.printf("%8.4f, ", x[j]); StdOut.printf("%8.4f]\n", x[n-1]); StdOut.print("y[] = ["); for (int i = 0; i < m-1; i++) StdOut.printf("%8.4f, ", y[i]); StdOut.printf("%8.4f]\n", y[m-1]); StdOut.println("value = " + zerosum.value()); } // row = { 4/7, 3/7 }, column = { 0, 4/7, 3/7 }, value = 20/7 // http://en.wikipedia.org/wiki/Zero-sum private static void test1() { double[][] payoff = { { 30, -10, 20 }, { 10, 20, -20 } }; test("wikipedia", payoff); } // skew-symmetric => value = 0 // Linear Programming by Chvatal, p. 230 private static void test2() { double[][] payoff = { { 0, 2, -3, 0 }, { -2, 0, 0, 3 }, { 3, 0, 0, -4 }, { 0, -3, 4, 0 } }; test("Chvatal, p. 230", payoff); } // Linear Programming by Chvatal, p. 234 // row = { 0, 56/99, 40/99, 0, 0, 2/99, 0, 1/99 } // column = { 28/99, 30/99, 21/99, 20/99 } // value = 4/99 private static void test3() { double[][] payoff = { { 0, 2, -3, 0 }, { -2, 0, 0, 3 }, { 3, 0, 0, -4 }, { 0, -3, 4, 0 }, { 0, 0, -3, 3 }, { -2, 2, 0, 0 }, { 3, -3, 0, 0 }, { 0, 0, 4, -4 } }; test("Chvatal, p. 234", payoff); } // Linear Programming by Chvatal, p. 236 // row = { 0, 2/5, 7/15, 0, 2/15, 0, 0, 0 } // column = { 2/3, 0, 0, 1/3 } // value = -1/3 private static void test4() { double[][] payoff = { { 0, 2, -1, -1 }, { 0, 1, -2, -1 }, { -1, -1, 1, 1 }, { -1, 0, 0, 1 }, { 1, -2, 0, -3 }, { 1, -1, -1, -3 }, { 0, -3, 2, -1 }, { 0, -2, 1, -1 }, }; test("Chvatal p. 236", payoff); } // rock, paper, scissors // row = { 1/3, 1/3, 1/3 } // column = { 1/3, 1/3, 1/3 } private static void test5() { double[][] payoff = { { 0, -1, 1 }, { 1, 0, -1 }, { -1, 1, 0 } }; test("rock, paper, scisssors", payoff); } /** * Unit tests the {@code ZeroSumGameToLP} data type. * * @param args the command-line arguments */ public static void main(String[] args) { test1(); test2(); test3(); test4(); test5(); int m = Integer.parseInt(args[0]); int n = Integer.parseInt(args[1]); double[][] payoff = new double[m][n]; for (int i = 0; i < m; i++) for (int j = 0; j < n; j++) payoff[i][j] = StdRandom.uniformDouble(-0.5, 0.5); test("random " + m + "-by-" + n, payoff); } }