I couldn't get the other version of this algo to work. Maybe somebody can explain what the difference is?
See this thread:
viewtopic.php?f=24&t=2571Code:
int PureCfrMachine::walk_pure_cfr( const int position,
const BettingNode *cur_node,
const hand_t &hand,
rng_state_t &rng )
{
int retval = 0;
if( ( cur_node->get_child( ) == NULL )
|| cur_node->did_player_fold( position ) ) {
/* Game over, calculate utility */
retval = cur_node->evaluate( hand, position );
return retval;
}
/* Grab some values that will be used often */
int num_choices = cur_node->get_num_choices( );
int8_t player = cur_node->get_player( );
int8_t round = cur_node->get_round( );
int64_t soln_idx = cur_node->get_soln_idx( );
int bucket;
if( ag.card_abs->can_precompute_buckets( ) ) {
bucket = hand.precomputed_buckets[ player ][ round ];
} else {
bucket = ag.card_abs->get_bucket( ag.game, cur_node, hand.board_cards,
hand.hole_cards );
}
/* Get the positive regrets at this information set */
uint64_t pos_regrets[ num_choices ];
uint64_t sum_pos_regrets
= regrets[ round ]->get_pos_values( bucket,
soln_idx,
num_choices,
pos_regrets );
if( sum_pos_regrets == 0 ) {
/* No positive regret, so assume a default uniform random current strategy */
sum_pos_regrets = num_choices;
for( int c = 0; c < num_choices; ++c ) {
pos_regrets[ c ] = 1;
}
}
/* Purify the current strategy so that we always take choice */
uint64_t dart = genrand_int32( &rng ) % sum_pos_regrets;
int choice;
for( choice = 0; choice < num_choices; ++choice ) {
if( dart < pos_regrets[ choice ] ) {
break;
}
dart -= pos_regrets[ choice ];
}
assert( choice < num_choices );
assert( pos_regrets[ choice ] > 0 );
const BettingNode *child = cur_node->get_child( );
if( player != position ) {
/* Opponent's node. Recurse down the single choice. */
for( int c = 0; c < choice; ++c ) {
child = child->get_sibling( );
}
retval = walk_pure_cfr( position, child, hand, rng );
/* Update the average strategy if we are keeping track of one */
if( do_average ) {
if( avg_strategy[ round ]->increment_entry( bucket, soln_idx, choice ) ) {
fprintf( stderr, "The average strategy has overflown :(\n" );
fprintf( stderr, "To fix this, you must set a bigger AVG_STRATEGY_TYPE "
"in constants.cpp and start again from scratch.\n" );
exit( 1 );
}
}
} else {
/* Current player's node. Recurse down all choices to get the value of each */
int values[ num_choices ];
for( int c = 0; c < num_choices; ++c ) {
values[ c ] = walk_pure_cfr( position, child, hand, rng );
child = child->get_sibling( );
}
/* We return the value that the sampled pure strategy attains */
retval = values[ choice ];
/* Update the regrets at the current node */
regrets[ round ]->update_regret( bucket, soln_idx, num_choices,
values, retval );
}
return retval;
}