uct/policy/generic.c

   1 #include <assert.h>
   2 #include <math.h>
   3 #include <stdio.h>
   4 #include <stdlib.h>
   5
   6 #include "board.h"
   7 #include "debug.h"
   8 #include "move.h"
   9 #include "tactics/util.h"
  10 #include "random.h"
  11 #include "uct/internal.h"
  12 #include "uct/tree.h"
  13 #include "uct/policy/generic.h"
  14
  15 struct tree_node *
  16 uctp_generic_choose(struct uct_policy *p, struct tree_node *node, struct board *b, enum stone color, coord_t exclude)
  17 {
  18         struct tree_node *nbest = NULL;
  19         /* This function is called while the tree is updated by other threads.
  20          * We rely on node->children being set only after the node has been fully expanded. */
  21         for (struct tree_node *ni = node->children; ni; ni = ni->sibling)
  22                 // we compare playouts and choose the best-explored
  23                 // child; comparing values is more brittle
  24                 if (!nbest || ni->u.playouts > nbest->u.playouts) {
  25                         if (ni->coord == exclude)
  26                                 continue;
  27                         if (ni->hints & TREE_HINT_INVALID)
  28                                 continue;
  29 #if 0
  30                         /* Play pass only if we can afford scoring */
  31                         /* NOTE: But then the engine would never pass a losing game if the opponent
  32                            does not pass either (e.g. when playing with another copy of pachi.  This
  33                            can lead to the filling of own eyes. */
  34                         if (is_pass(ni->coord) && !uct_pass_is_safe(p->uct, b, color, p->uct->pass_all_alive))
  35                                 continue;
  36 #endif
  37                         nbest = ni;
  38                 }
  39         return nbest;
  40 }
  41
  42 /* Return the node with best value instead of best explored. We must use the heuristic
  43  * value (using prior and possibly rave), because the raw value is meaningless for
  44  * nodes evaluated rarely.
  45  * This function is called while the tree is updated by other threads */
  46 void
  47 uctp_generic_winner(struct uct_policy *p, struct tree *tree, struct uct_descent *descent)
  48 {
  49         if (!p->evaluate)
  50                 return;
  51         bool allow_pass = false; /* At worst forces some extra playouts at the end */
  52         int parity = tree_node_parity(tree, descent->node);
  53
  54         uctd_try_node_children(tree, descent, allow_pass, parity, p->uct->tenuki_d, di, urgency) {
  55                 urgency = p->evaluate(p, tree, &di, parity);
  56         } uctd_set_best_child(di, urgency);
  57
  58         uctd_get_best_child(descent);
  59 }