For UpdateCFR, I'm just doing:
regret(hand) += u[a] - u[sampledAction];
For sample, I'm selecting an action probabilistically from regret(hand).
It's really such a simple algorithm I don't know where else I could have gone wrong. Regrets are integers, and the cumulative strategies are Uint16s.
Code:
// Oskari Tammelin 2010
//
// + no multiplications so cfr values can be integers
// + low iteration cost
function pureExternalSampling(player, node, hands)
{
if (node.isTerminal())
return node.getPayoff(player, hands);
var sampledAction = sample(node.getCurrentStrategy(hands[node.player]));
if (node.player == player)
{
var u = new Array(node.children.length);
for (var a = 0; a < node.children.length; a++)
u[a] = pureExternalSampling(player, node.children[a], hands);
for (var a = 0; a < node.children.length; a++)
node.updateCFR(hands[player], a, u[a] - u[sampledAction]);
return u[sampledAction];
}
else
{
node.strategy[hands[node.player]][sampledAction]++;
return pureExternalSampling(player, node.children[sampledAction], hands);
}
}
function iteration()
{
var hands = dealHands();
pureExternalSampling(0, game, hands);
pureExternalSampling(1, game, hands);
}
Statistics: Posted by cantina — Mon Sep 02, 2013 5:33 am
]]>