playout.c

   1 #define DEBUG
   2 #include <assert.h>
   3 #include <math.h>
   4 #include <stdio.h>
   5 #include <stdlib.h>
   6 #include <string.h>
   7
   8 #include "board.h"
   9 #include "debug.h"
  10 #include "engine.h"
  11 #include "move.h"
  12 #include "ownermap.h"
  13 #include "playout.h"
  14
  15 /* Whether to set global debug level to the same as the playout
  16  * has, in case it is different. This can make sure e.g. tactical
  17  * reading produces proper level of debug prints during simulations.
  18  * But it is safe to enable this only in single-threaded instances! */
  19 //#define DEBUGL_BY_PLAYOUT
  20
  21 #define PLDEBUGL(n) DEBUGL_(policy->debug_level, n)
  22
  23
  24 coord_t
  25 play_random_move(struct playout_setup *setup,
  26                  struct board *b, enum stone color,
  27                  struct playout_policy *policy)
  28 {
  29         coord_t coord = pass;
  30
  31         if (setup->prepolicy_hook) {
  32                 coord = setup->prepolicy_hook(policy, setup, b, color);
  33                 // fprintf(stderr, "prehook: %s\n", coord2sstr(coord, b));
  34         }
  35
  36         if (is_pass(coord)) {
  37                 coord = policy->choose(policy, setup, b, color);
  38                 // fprintf(stderr, "policy: %s\n", coord2sstr(coord, b));
  39         }
  40
  41         if (is_pass(coord) && setup->postpolicy_hook) {
  42                 coord = setup->postpolicy_hook(policy, setup, b, color);
  43                 // fprintf(stderr, "posthook: %s\n", coord2sstr(coord, b));
  44         }
  45
  46         if (is_pass(coord)) {
  47 play_random:
  48                 /* Defer to uniformly random move choice. */
  49                 /* This must never happen if the policy is tracking
  50                  * internal board state, obviously. */
  51                 assert(!policy->setboard || policy->setboard_randomok);
  52                 board_play_random(b, color, &coord, (ppr_permit) policy->permit, policy);
  53
  54         } else {
  55                 struct move m;
  56                 m.coord = coord; m.color = color;
  57                 if (board_play(b, &m) < 0) {
  58                         if (PLDEBUGL(4)) {
  59                                 fprintf(stderr, "Pre-picked move %d,%d is ILLEGAL:\n",
  60                                         coord_x(coord, b), coord_y(coord, b));
  61                                 board_print(b, stderr);
  62                         }
  63                         goto play_random;
  64                 }
  65         }
  66
  67         return coord;
  68 }
  69
  70 int
  71 play_random_game(struct playout_setup *setup,
  72                  struct board *b, enum stone starting_color,
  73                  struct playout_amafmap *amafmap,
  74                  struct board_ownermap *ownermap,
  75                  struct playout_policy *policy)
  76 {
  77         assert(setup && policy);
  78
  79         int gamelen = setup->gamelen - b->moves;
  80
  81         if (policy->setboard)
  82                 policy->setboard(policy, b);
  83 #ifdef DEBUGL_BY_PLAYOUT
  84         int debug_level_orig = debug_level;
  85         debug_level = policy->debug_level;
  86 #endif
  87
  88         enum stone color = starting_color;
  89
  90         int passes = is_pass(b->last_move.coord) && b->moves > 0;
  91
  92         while (gamelen-- && passes < 2) {
  93                 coord_t coord = play_random_move(setup, b, color, policy);
  94
  95 #if 0
  96                 /* For UCT, superko test here is downright harmful since
  97                  * in superko-likely situation we throw away literally
  98                  * 95% of our playouts; UCT will deal with this fine by
  99                  * itself. */
 100                 if (unlikely(b->superko_violation)) {
 101                         /* We ignore superko violations that are suicides. These
 102                          * are common only at the end of the game and are
 103                          * rather harmless. (They will not go through as a root
 104                          * move anyway.) */
 105                         if (group_at(b, coord)) {
 106                                 if (DEBUGL(3)) {
 107                                         fprintf(stderr, "Superko fun at %d,%d in\n", coord_x(coord, b), coord_y(coord, b));
 108                                         if (DEBUGL(4))
 109                                                 board_print(b, stderr);
 110                                 }
 111                                 return 0;
 112                         } else {
 113                                 if (DEBUGL(6)) {
 114                                         fprintf(stderr, "Ignoring superko at %d,%d in\n", coord_x(coord, b), coord_y(coord, b));
 115                                         board_print(b, stderr);
 116                                 }
 117                                 b->superko_violation = false;
 118                         }
 119                 }
 120 #endif
 121
 122                 if (PLDEBUGL(7)) {
 123                         fprintf(stderr, "%s %s\n", stone2str(color), coord2sstr(coord, b));
 124                         if (PLDEBUGL(8))
 125                                 board_print(b, stderr);
 126                 }
 127
 128                 if (unlikely(is_pass(coord))) {
 129                         passes++;
 130                 } else {
 131                         passes = 0;
 132                 }
 133                 if (amafmap) {
 134                         assert(amafmap->gamelen < MAX_GAMELEN);
 135                         amafmap->is_ko_capture[amafmap->gamelen] = board_playing_ko_threat(b);
 136                         amafmap->game[amafmap->gamelen++] = coord;
 137                 }
 138
 139                 if (setup->mercymin && abs(b->captures[S_BLACK] - b->captures[S_WHITE]) > setup->mercymin)
 140                         break;
 141
 142                 color = stone_other(color);
 143         }
 144
 145         floating_t score = board_fast_score(b);
 146         int result = (starting_color == S_WHITE ? score * 2 : - (score * 2));
 147
 148         if (DEBUGL(6)) {
 149                 fprintf(stderr, "Random playout result: %d (W %f)\n", result, score);
 150                 if (DEBUGL(7))
 151                         board_print(b, stderr);
 152         }
 153
 154         if (ownermap)
 155                 board_ownermap_fill(ownermap, b);
 156
 157         if (b->ps)
 158                 free(b->ps);
 159
 160 #ifdef DEBUGL_BY_PLAYOUT
 161         debug_level = debug_level_orig;
 162 #endif
 163
 164         return result;
 165 }
 166
 167 void
 168 playout_policy_done(struct playout_policy *p)
 169 {
 170         if (p->done) p->done(p);
 171         if (p->data) free(p->data);
 172         free(p);
 173 }