This is some code from my experiments for Kuhn/One Card Poker. It works.
Code:
public class BestResponse {
static Random random = new Random();
/* best response ev ------------------------------------------------------------------------- */
static double bestResponseEv(Action[] root, double[][] rootProbabilities) {
double result = 0;
int noCards = rootProbabilities.length;
for (int brCard = 0; brCard < noCards; brCard++) {
double[] eqProbabilities = rootProbabilities[brCard];
double ev = bestResponseEv(root, brCard, eqProbabilities, noCards);
result += ev;
}
return result;
}
static double bestResponseEv(Action[] node, int brCard, double[] eqProbabilities, int noCards) {
double maxEv = -Double.MAX_VALUE;
Action maxAction = null;
for (Action action : node) {
double ev = bestResponseEv(action, brCard, eqProbabilities, noCards);
if(ev > maxEv) {
maxEv = ev;
maxAction = action;
}
}
maxAction.timesTaken[brCard]++;
return maxEv;
}
private static double bestResponseEv(Action action, int brCard, double[] eqProbabilities, int noCards) {
switch (action.type) {
case NONTERMINAL:
return -equilibriumEv(action.endNode, brCard, eqProbabilities, noCards);
case FOLD:
return foldEv(action, eqProbabilities, noCards);
case SHOWDOWN:
return showdownEv(action, brCard, eqProbabilities, noCards);
}
return unreachable();
}
/* equilibrium ev --------------------------------------------------------------------------- */
static double equilibriumEv(Action[] root, double[][] rootProbabilities) {
double result = 0;
int noCards = rootProbabilities.length;
for (int brCard = 0; brCard < noCards; brCard++) {
double[] eqProbabilities = rootProbabilities[brCard];
double ev = equilibriumEv(root, brCard, eqProbabilities, noCards);
result += ev;
}
return result;
}
static double equilibriumEv(Action[] node, int brCard, double[] eqProbabilities, int noCards) {
double result = 0;
double[][] equilibriumStrategies = equilibriumStrategies(node, noCards);
for (int a = 0; a < node.length; a++) {
Action action = node[a];
double[] strategy = equilibriumStrategies[a];
double[] actionProbabilities = Matrix.product(strategy, eqProbabilities);
double endEv = equilibriumEv(action, brCard, actionProbabilities, noCards);
result += endEv;
}
return result;
}
private static double equilibriumEv(Action action, int brCard, double[] eqProbabilities, int noCards) {
double ev = 0;
switch (action.type) {
case NONTERMINAL:
ev = -bestResponseEv(action.endNode, brCard, eqProbabilities, noCards);
return ev;
case FOLD:
ev = foldEv(action, eqProbabilities, noCards);
return ev;
case SHOWDOWN:
ev = -showdownEv(action, brCard, eqProbabilities, noCards);
return ev;
}
return unreachable();
}
/* ev --------------------------------------------------------------------------------------- */
private static double showdownEv(Action action, int brCard, double[] eqProbabilities, int noCards) {
double result = 0;
for (int v = 0; v < noCards; v++) {
result += Math.signum(brCard - v) * action.payoff * eqProbabilities[v];
}
return result;
}
private static double foldEv(Action action, double[] eqProbabilities, int noCards) {
double result = 0;
for (int i = 0; i < noCards; i++) {
result += eqProbabilities[i] * action.payoff;
}
return result;
}
/* ------------------------------------------------------------------------------------------ */
private static double[][] equilibriumStrategies(Action[] node, int noCards) {
int noActions = node.length;
double[][] result = new double[noActions][noCards];
for (int c = 0; c < noCards; c++) {
long sum = 0;
for (int a = 0; a < noActions; a++) {
sum += node[a].timesTaken[c];
}
for (int a = 0; a < noActions; a++) {
result[a][c] = (double)node[a].timesTaken[c] / (double)sum;
}
}
return result;
}
private static double unreachable() {
throw new RuntimeException("Unreachable");
}
}