UCT pattern prior: -1000 -> -400 based on Jean-loup's tuning
[pachi/nmclean.git] / playout.c
blobb09eb1a4b19cbb92dd759735595d1e99d129ebbb
1 #define DEBUG
2 #include <assert.h>
3 #include <math.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
8 #include "board.h"
9 #include "debug.h"
10 #include "engine.h"
11 #include "move.h"
12 #include "ownermap.h"
13 #include "playout.h"
15 /* Whether to set global debug level to the same as the playout
16 * has, in case it is different. This can make sure e.g. tactical
17 * reading produces proper level of debug prints during simulations.
18 * But it is safe to enable this only in single-threaded instances! */
19 //#define DEBUGL_BY_PLAYOUT
21 #define PLDEBUGL(n) DEBUGL_(policy->debug_level, n)
24 coord_t
25 play_random_move(struct playout_setup *setup,
26 struct board *b, enum stone color,
27 struct playout_policy *policy)
29 coord_t coord = pass;
31 if (setup->prepolicy_hook) {
32 coord = setup->prepolicy_hook(policy, setup, b, color);
33 // fprintf(stderr, "prehook: %s\n", coord2sstr(coord, b));
36 if (is_pass(coord)) {
37 coord = policy->choose(policy, setup, b, color);
38 // fprintf(stderr, "policy: %s\n", coord2sstr(coord, b));
41 if (is_pass(coord) && setup->postpolicy_hook) {
42 coord = setup->postpolicy_hook(policy, setup, b, color);
43 // fprintf(stderr, "posthook: %s\n", coord2sstr(coord, b));
46 if (is_pass(coord)) {
47 play_random:
48 /* Defer to uniformly random move choice. */
49 /* This must never happen if the policy is tracking
50 * internal board state, obviously. */
51 assert(!policy->setboard);
52 board_play_random(b, color, &coord, (ppr_permit) policy->permit, policy);
54 } else {
55 struct move m;
56 m.coord = coord; m.color = color;
57 if (board_play(b, &m) < 0) {
58 if (PLDEBUGL(4)) {
59 fprintf(stderr, "Pre-picked move %d,%d is ILLEGAL:\n",
60 coord_x(coord, b), coord_y(coord, b));
61 board_print(b, stderr);
63 goto play_random;
67 return coord;
70 int
71 play_random_game(struct playout_setup *setup,
72 struct board *b, enum stone starting_color,
73 struct playout_amafmap *amafmap,
74 struct board_ownermap *ownermap,
75 struct playout_policy *policy)
77 assert(setup && policy);
79 int gamelen = setup->gamelen - b->moves;
80 if (gamelen < 10)
81 gamelen = 10;
83 if (policy->setboard)
84 policy->setboard(policy, b);
85 #ifdef DEBUGL_BY_PLAYOUT
86 int debug_level_orig = debug_level;
87 debug_level = policy->debug_level;
88 #endif
90 enum stone color = starting_color;
92 int passes = is_pass(b->last_move.coord) && b->moves > 0;
94 while (gamelen-- && passes < 2) {
95 coord_t coord = play_random_move(setup, b, color, policy);
97 #if 0
98 /* For UCT, superko test here is downright harmful since
99 * in superko-likely situation we throw away literally
100 * 95% of our playouts; UCT will deal with this fine by
101 * itself. */
102 if (unlikely(b->superko_violation)) {
103 /* We ignore superko violations that are suicides. These
104 * are common only at the end of the game and are
105 * rather harmless. (They will not go through as a root
106 * move anyway.) */
107 if (group_at(b, coord)) {
108 if (DEBUGL(3)) {
109 fprintf(stderr, "Superko fun at %d,%d in\n", coord_x(coord, b), coord_y(coord, b));
110 if (DEBUGL(4))
111 board_print(b, stderr);
113 return 0;
114 } else {
115 if (DEBUGL(6)) {
116 fprintf(stderr, "Ignoring superko at %d,%d in\n", coord_x(coord, b), coord_y(coord, b));
117 board_print(b, stderr);
119 b->superko_violation = false;
122 #endif
124 if (PLDEBUGL(7)) {
125 fprintf(stderr, "%s %s\n", stone2str(color), coord2sstr(coord, b));
126 if (PLDEBUGL(8))
127 board_print(b, stderr);
130 if (unlikely(is_pass(coord))) {
131 passes++;
132 } else {
133 passes = 0;
135 if (amafmap) {
136 assert(amafmap->gamelen < MAX_GAMELEN);
137 amafmap->game[amafmap->gamelen++] = coord;
140 if (setup->mercymin && abs(b->captures[S_BLACK] - b->captures[S_WHITE]) > setup->mercymin)
141 break;
143 color = stone_other(color);
146 floating_t score = board_fast_score(b);
147 int result = (starting_color == S_WHITE ? score * 2 : - (score * 2));
149 if (DEBUGL(6)) {
150 fprintf(stderr, "Random playout result: %d (W %f)\n", result, score);
151 if (DEBUGL(7))
152 board_print(b, stderr);
155 if (ownermap)
156 board_ownermap_fill(ownermap, b);
158 if (b->ps)
159 free(b->ps);
161 #ifdef DEBUGL_BY_PLAYOUT
162 debug_level = debug_level_orig;
163 #endif
165 return result;