17 #include "tactics/util.h"
18 #include "uct/dynkomi.h"
19 #include "uct/internal.h"
20 #include "uct/search.h"
25 #define DESCENT_DLEN 512
29 uct_progress_text(struct uct
*u
, struct tree
*t
, enum stone color
, int playouts
)
35 struct tree_node
*best
= u
->policy
->choose(u
->policy
, t
->root
, t
->board
, color
, resign
);
37 fprintf(stderr
, "... No moves left\n");
40 fprintf(stderr
, "[%d] ", playouts
);
41 fprintf(stderr
, "best %f ", tree_node_get_value(t
, 1, best
->u
.value
));
44 if (t
->use_extra_komi
)
45 fprintf(stderr
, "xkomi %.1f ", t
->extra_komi
);
48 fprintf(stderr
, "| seq ");
49 for (int depth
= 0; depth
< 4; depth
++) {
50 if (best
&& best
->u
.playouts
>= 25) {
51 fprintf(stderr
, "%3s ", coord2sstr(node_coord(best
), t
->board
));
52 best
= u
->policy
->choose(u
->policy
, best
, t
->board
, color
, resign
);
59 fprintf(stderr
, "| can ");
61 struct tree_node
*can
[cans
];
62 memset(can
, 0, sizeof(can
));
63 best
= t
->root
->children
;
66 while ((!can
[c
] || best
->u
.playouts
> can
[c
]->u
.playouts
) && ++c
< cans
);
67 for (int d
= 0; d
< c
; d
++) can
[d
] = can
[d
+ 1];
68 if (c
> 0) can
[c
- 1] = best
;
73 fprintf(stderr
, "%3s(%.3f) ",
74 coord2sstr(node_coord(can
[cans
]), t
->board
),
75 tree_node_get_value(t
, 1, can
[cans
]->u
.value
));
81 fprintf(stderr
, "\n");
85 uct_progress_json(struct uct
*u
, struct tree
*t
, enum stone color
, int playouts
, coord_t
*final
, bool big
)
87 /* Prefix indicating JSON line. */
88 fprintf(stderr
, "{\"%s\": {", final
? "move" : "frame");
91 fprintf(stderr
, "\"playouts\": %d", playouts
);
94 if (t
->use_extra_komi
)
95 fprintf(stderr
, ", \"extrakomi\": %.1f", t
->extra_komi
);
98 /* Final move choice */
99 fprintf(stderr
, ", \"choice\": {\"%s\"}",
100 coord2sstr(*final
, t
->board
));
102 struct tree_node
*best
= u
->policy
->choose(u
->policy
, t
->root
, t
->board
, color
, resign
);
105 fprintf(stderr
, ", \"best\": {\"%s\": %f}",
106 coord2sstr(best
->coord
, t
->board
),
107 tree_node_get_value(t
, 1, best
->u
.value
));
111 /* Best candidates */
113 struct tree_node
*can
[cans
];
114 memset(can
, 0, sizeof(can
));
115 struct tree_node
*best
= t
->root
->children
;
118 while ((!can
[c
] || best
->u
.playouts
> can
[c
]->u
.playouts
) && ++c
< cans
);
119 for (int d
= 0; d
< c
; d
++) can
[d
] = can
[d
+ 1];
120 if (c
> 0) can
[c
- 1] = best
;
121 best
= best
->sibling
;
123 fprintf(stderr
, ", \"can\": [");
124 while (--cans
>= 0) {
125 if (!can
[cans
]) break;
127 fprintf(stderr
, "%s[", cans
< 3 ? ", " : "");
129 for (int depth
= 0; depth
< 4; depth
++) {
130 if (!best
|| best
->u
.playouts
< 25) break;
131 fprintf(stderr
, "%s{\"%s\":%.3f}", depth
> 0 ? "," : "",
132 coord2sstr(best
->coord
, t
->board
),
133 tree_node_get_value(t
, 1, best
->u
.value
));
134 best
= u
->policy
->choose(u
->policy
, best
, t
->board
, color
, resign
);
136 fprintf(stderr
, "]");
138 fprintf(stderr
, "]");
142 if (t
->avg_score
.playouts
> 0)
143 fprintf(stderr
, ", \"avg\": {\"score\": %.3f}", t
->avg_score
.value
);
144 /* Per-intersection information. */
145 fprintf(stderr
, ", \"boards\": {");
146 /* Position coloring information. */
147 fprintf(stderr
, "\"colors\": [");
149 foreach_point(t
->board
) {
150 if (board_at(t
->board
, c
) == S_OFFBOARD
) continue;
151 fprintf(stderr
, "%s%d", f
++ > 0 ? "," : "", board_at(t
->board
, c
));
153 fprintf(stderr
, "]");
154 /* Ownership statistics. Value (0..1000) for each possible
155 * point describes likelihood of this point becoming black.
156 * Normally, white rate is 1000-value; exception are possible
157 * seki points, but these should be rare. */
158 fprintf(stderr
, ", \"territory\": [");
160 foreach_point(t
->board
) {
161 if (board_at(t
->board
, c
) == S_OFFBOARD
) continue;
162 int rate
= u
->ownermap
.map
[c
][S_BLACK
] * 1000 / u
->ownermap
.playouts
;
163 fprintf(stderr
, "%s%d", f
++ > 0 ? "," : "", rate
);
165 fprintf(stderr
, "]");
166 fprintf(stderr
, "}");
169 fprintf(stderr
, "}}\n");
173 uct_progress_status(struct uct
*u
, struct tree
*t
, enum stone color
, int playouts
, coord_t
*final
)
175 switch (u
->reporting
) {
177 uct_progress_text(u
, t
, color
, playouts
);
181 uct_progress_json(u
, t
, color
, playouts
, final
,
182 u
->reporting
== UR_JSON_BIG
);
190 record_amaf_move(struct playout_amafmap
*amaf
, coord_t coord
, bool is_ko_capture
)
192 assert(amaf
->gamelen
< MAX_GAMELEN
);
193 amaf
->is_ko_capture
[amaf
->gamelen
] = is_ko_capture
;
194 amaf
->game
[amaf
->gamelen
++] = coord
;
198 struct uct_playout_callback
{
201 struct tree_node
*lnode
;
206 uct_playout_hook(struct playout_policy
*playout
, struct playout_setup
*setup
, struct board
*b
, enum stone color
, int mode
)
208 /* XXX: This is used in some non-master branches. */
213 uct_playout_prepolicy(struct playout_policy
*playout
, struct playout_setup
*setup
, struct board
*b
, enum stone color
)
215 return uct_playout_hook(playout
, setup
, b
, color
, 0);
219 uct_playout_postpolicy(struct playout_policy
*playout
, struct playout_setup
*setup
, struct board
*b
, enum stone color
)
221 return uct_playout_hook(playout
, setup
, b
, color
, 1);
226 uct_leaf_node(struct uct
*u
, struct board
*b
, enum stone player_color
,
227 struct playout_amafmap
*amaf
,
228 struct uct_descent
*descent
, int *dlen
,
229 struct tree_node
*significant
[2],
230 struct tree
*t
, struct tree_node
*n
, enum stone node_color
,
233 enum stone next_color
= stone_other(node_color
);
234 int parity
= (next_color
== player_color
? 1 : -1);
237 fprintf(stderr
, "%s*-- UCT playout #%d start [%s] %f\n",
238 spaces
, n
->u
.playouts
, coord2sstr(node_coord(n
), t
->board
),
239 tree_node_get_value(t
, -parity
, n
->u
.value
));
241 struct uct_playout_callback upc
= {
244 /* TODO: Don't necessarily restart the sequence walk when
245 * entering playout. */
249 struct playout_setup ps
= {
250 .gamelen
= u
->gamelen
,
251 .mercymin
= u
->mercymin
,
252 .prepolicy_hook
= uct_playout_prepolicy
,
253 .postpolicy_hook
= uct_playout_postpolicy
,
256 int result
= play_random_game(&ps
, b
, next_color
,
257 u
->playout_amaf
? amaf
: NULL
,
258 &u
->ownermap
, u
->playout
);
259 if (next_color
== S_WHITE
) {
260 /* We need the result from black's perspective. */
264 fprintf(stderr
, "%s -- [%d..%d] %s random playout result %d\n",
265 spaces
, player_color
, next_color
, coord2sstr(node_coord(n
), t
->board
), result
);
271 scale_value(struct uct
*u
, struct board
*b
, enum stone node_color
, struct tree_node
*significant
[2], int result
)
273 floating_t rval
= result
> 0 ? 1.0 : result
< 0 ? 0.0 : 0.5;
274 if (u
->val_scale
&& result
!= 0) {
276 if (u
->t
->avg_score
.playouts
< 50)
278 result
-= u
->t
->avg_score
.value
* 2;
281 double scale
= u
->val_scale
;
283 /* xvalue is 0 at 0.5, 1 at 0 or 1 */
284 /* No correction for parity necessary. */
285 double xvalue
= significant
[node_color
- 1] ? fabs(significant
[node_color
- 1]->u
.value
- 0.5) * 2 : 0;
286 scale
= u
->val_bytemp_min
+ (u
->val_scale
- u
->val_bytemp_min
) * xvalue
;
289 int vp
= u
->val_points
;
291 vp
= board_size(b
) - 1; vp
*= vp
; vp
*= 2;
294 floating_t sval
= (floating_t
) abs(result
) / vp
;
295 sval
= sval
> 1 ? 1 : sval
;
296 if (result
< 0) sval
= 1 - sval
;
298 rval
+= scale
* sval
;
300 rval
= (1 - scale
) * rval
+ scale
* sval
;
301 // fprintf(stderr, "score %d => sval %f, rval %f\n", result, sval, rval);
307 local_value(struct uct
*u
, struct board
*b
, coord_t coord
, enum stone color
)
309 /* Tactical evaluation of move @coord by color @color, given
310 * simulation end position @b. I.e., a move is tactically good
311 * if the resulting group stays on board until the game end. */
312 /* We can also take into account surrounding stones, e.g. to
313 * encourage taking off external liberties during a semeai. */
314 double val
= board_local_value(u
->local_tree_neival
, b
, coord
, color
);
315 return (color
== S_WHITE
) ? 1.f
- val
: val
;
319 record_local_sequence(struct uct
*u
, struct tree
*t
, struct board
*endb
,
320 struct uct_descent
*descent
, int dlen
, int di
,
321 enum stone seq_color
)
323 #define LTREE_DEBUG if (UDEBUGL(6))
325 /* Ignore pass sequences. */
326 if (is_pass(node_coord(descent
[di
].node
)))
329 LTREE_DEBUG
board_print(endb
, stderr
);
330 LTREE_DEBUG
fprintf(stderr
, "recording local %s sequence: ",
331 stone2str(seq_color
));
333 /* Sequences starting deeper are less relevant in general. */
334 int pval
= LTREE_PLAYOUTS_MULTIPLIER
;
335 if (u
->local_tree
&& u
->local_tree_depth_decay
> 0)
336 pval
= ((floating_t
) pval
) / pow(u
->local_tree_depth_decay
, di
- 1);
338 LTREE_DEBUG
fprintf(stderr
, "too deep @%d\n", di
);
342 /* Pick the right local tree root... */
343 struct tree_node
*lnode
= seq_color
== S_BLACK
? t
->ltree_black
: t
->ltree_white
;
346 /* ...determine the sequence value... */
348 if (u
->local_tree_eval
!= LTE_EACH
) {
349 sval
= local_value(u
, endb
, node_coord(descent
[di
].node
), seq_color
);
350 LTREE_DEBUG
fprintf(stderr
, "(goal %s[%s %1.3f][%d]) ",
351 coord2sstr(node_coord(descent
[di
].node
), t
->board
),
352 stone2str(seq_color
), sval
, descent
[di
].node
->d
);
354 if (u
->local_tree_eval
== LTE_TOTAL
) {
356 while (di
< dlen
&& (di
== di0
|| descent
[di
].node
->d
< u
->tenuki_d
)) {
357 enum stone color
= (di
- di0
) % 2 ? stone_other(seq_color
) : seq_color
;
358 double rval
= local_value(u
, endb
, node_coord(descent
[di
].node
), color
);
364 sval
/= (di
- di0
+ 1);
369 /* ...and record the sequence. */
371 while (di
< dlen
&& !is_pass(node_coord(descent
[di
].node
))
372 && (di
== di0
|| descent
[di
].node
->d
< u
->tenuki_d
)) {
373 enum stone color
= (di
- di0
) % 2 ? stone_other(seq_color
) : seq_color
;
375 if (u
->local_tree_eval
!= LTE_EACH
)
378 rval
= local_value(u
, endb
, node_coord(descent
[di
].node
), color
);
379 LTREE_DEBUG
fprintf(stderr
, "%s[%s %1.3f][%d] ",
380 coord2sstr(node_coord(descent
[di
].node
), t
->board
),
381 stone2str(color
), rval
, descent
[di
].node
->d
);
382 lnode
= tree_get_node(t
, lnode
, node_coord(descent
[di
++].node
), true);
384 stats_add_result(&lnode
->u
, rval
, pval
);
387 /* Add lnode for tenuki (pass) if we descended further. */
389 double rval
= u
->local_tree_eval
!= LTE_EACH
? sval
: 0.5;
390 LTREE_DEBUG
fprintf(stderr
, "pass ");
391 lnode
= tree_get_node(t
, lnode
, pass
, true);
393 stats_add_result(&lnode
->u
, rval
, pval
);
396 LTREE_DEBUG
fprintf(stderr
, "\n");
401 uct_playout(struct uct
*u
, struct board
*b
, enum stone player_color
, struct tree
*t
)
406 struct playout_amafmap amaf
;
407 amaf
.gamelen
= amaf
.game_baselen
= 0;
409 /* Walk the tree until we find a leaf, then expand it and do
410 * a random playout. */
411 struct tree_node
*n
= t
->root
;
412 enum stone node_color
= stone_other(player_color
);
413 assert(node_color
== t
->root_color
);
415 /* Make sure the root node is expanded. */
416 if (tree_leaf_node(n
) && !__sync_lock_test_and_set(&n
->is_expanded
, 1))
417 tree_expand_node(t
, n
, &b2
, player_color
, u
, 1);
419 /* Tree descent history. */
420 /* XXX: This is somewhat messy since @n and descent[dlen-1].node are
422 struct uct_descent descent
[DESCENT_DLEN
];
423 descent
[0].node
= n
; descent
[0].lnode
= NULL
;
425 /* Total value of the sequence. */
426 struct move_stats seq_value
= { .playouts
= 0 };
427 /* The last "significant" node along the descent (i.e. node
428 * with higher than configured number of playouts). For black
430 struct tree_node
*significant
[2] = { NULL
, NULL
};
431 if (n
->u
.playouts
>= u
->significant_threshold
)
432 significant
[node_color
- 1] = n
;
435 int pass_limit
= (board_size(&b2
) - 2) * (board_size(&b2
) - 2) / 2;
436 int passes
= is_pass(b
->last_move
.coord
) && b
->moves
> 0;
439 static char spaces
[] = "\0 ";
442 fprintf(stderr
, "--- UCT walk with color %d\n", player_color
);
444 while (!tree_leaf_node(n
) && passes
< 2) {
445 spaces
[dlen
- 1] = ' '; spaces
[dlen
] = 0;
448 /*** Choose a node to descend to: */
450 /* Parity is chosen already according to the child color, since
451 * it is applied to children. */
452 node_color
= stone_other(node_color
);
453 int parity
= (node_color
== player_color
? 1 : -1);
455 assert(dlen
< DESCENT_DLEN
);
456 descent
[dlen
] = descent
[dlen
- 1];
457 if (u
->local_tree
&& (!descent
[dlen
].lnode
|| descent
[dlen
].node
->d
>= u
->tenuki_d
)) {
458 /* Start new local sequence. */
459 /* Remember that node_color already holds color of the
460 * to-be-found child. */
461 descent
[dlen
].lnode
= node_color
== S_BLACK
? t
->ltree_black
: t
->ltree_white
;
464 if (!u
->random_policy_chance
|| fast_random(u
->random_policy_chance
))
465 u
->policy
->descend(u
->policy
, t
, &descent
[dlen
], parity
, b2
.moves
> pass_limit
);
467 u
->random_policy
->descend(u
->random_policy
, t
, &descent
[dlen
], parity
, b2
.moves
> pass_limit
);
470 /*** Perform the descent: */
472 if (descent
[dlen
].node
->u
.playouts
>= u
->significant_threshold
) {
473 significant
[node_color
- 1] = descent
[dlen
].node
;
476 seq_value
.playouts
+= descent
[dlen
].value
.playouts
;
477 seq_value
.value
+= descent
[dlen
].value
.value
* descent
[dlen
].value
.playouts
;
478 n
= descent
[dlen
++].node
;
479 assert(n
== t
->root
|| n
->parent
);
481 fprintf(stderr
, "%s+-- UCT sent us to [%s:%d] %d,%f\n",
482 spaces
, coord2sstr(node_coord(n
), t
->board
),
483 node_coord(n
), n
->u
.playouts
,
484 tree_node_get_value(t
, parity
, n
->u
.value
));
486 /* Add virtual loss if we need to; this is used to discourage
487 * other threads from visiting this node in case of multiple
488 * threads doing the tree search. */
490 stats_add_result(&n
->u
, node_color
== S_BLACK
? 0.0 : 1.0, u
->virtual_loss
);
492 struct move m
= { node_coord(n
), node_color
};
493 int res
= board_play(&b2
, &m
);
495 if (res
< 0 || (!is_pass(m
.coord
) && !group_at(&b2
, m
.coord
)) /* suicide */
496 || b2
.superko_violation
) {
498 for (struct tree_node
*ni
= n
; ni
; ni
= ni
->parent
)
499 fprintf(stderr
, "%s<%"PRIhash
"> ", coord2sstr(node_coord(ni
), t
->board
), ni
->hash
);
500 fprintf(stderr
, "marking invalid %s node %d,%d res %d group %d spk %d\n",
501 stone2str(node_color
), coord_x(node_coord(n
),b
), coord_y(node_coord(n
),b
),
502 res
, group_at(&b2
, m
.coord
), b2
.superko_violation
);
504 n
->hints
|= TREE_HINT_INVALID
;
509 assert(node_coord(n
) >= -1);
510 record_amaf_move(&amaf
, node_coord(n
), board_playing_ko_threat(&b2
));
512 if (is_pass(node_coord(n
)))
517 enum stone next_color
= stone_other(node_color
);
518 /* We need to make sure only one thread expands the node. If
519 * we are unlucky enough for two threads to meet in the same
520 * node, the latter one will simply do another simulation from
521 * the node itself, no big deal. t->nodes_size may exceed
522 * the maximum in multi-threaded case but not by much so it's ok.
523 * The size test must be before the test&set not after, to allow
524 * expansion of the node later if enough nodes have been freed. */
525 if (tree_leaf_node(n
)
526 && n
->u
.playouts
- u
->virtual_loss
>= u
->expand_p
&& t
->nodes_size
< u
->max_tree_size
527 && !__sync_lock_test_and_set(&n
->is_expanded
, 1))
528 tree_expand_node(t
, n
, &b2
, next_color
, u
, -parity
);
531 amaf
.game_baselen
= amaf
.gamelen
;
533 if (t
->use_extra_komi
&& u
->dynkomi
->persim
) {
534 b2
.komi
+= round(u
->dynkomi
->persim(u
->dynkomi
, &b2
, t
, n
));
538 /* XXX: No dead groups support. */
539 floating_t score
= board_official_score(&b2
, NULL
);
540 /* Result from black's perspective (no matter who
541 * the player; black's perspective is always
542 * what the tree stores. */
543 result
= - (score
* 2);
546 fprintf(stderr
, "[%d..%d] %s p-p scoring playout result %d (W %f)\n",
547 player_color
, node_color
, coord2sstr(node_coord(n
), t
->board
), result
, score
);
549 board_print(&b2
, stderr
);
551 board_ownermap_fill(&u
->ownermap
, &b2
);
553 } else { // assert(tree_leaf_node(n));
554 /* In case of parallel tree search, the assertion might
555 * not hold if two threads chew on the same node. */
556 result
= uct_leaf_node(u
, &b2
, player_color
, &amaf
, descent
, &dlen
, significant
, t
, n
, node_color
, spaces
);
559 if (u
->policy
->wants_amaf
&& u
->playout_amaf_cutoff
) {
560 unsigned int cutoff
= amaf
.game_baselen
;
561 cutoff
+= (amaf
.gamelen
- amaf
.game_baselen
) * u
->playout_amaf_cutoff
/ 100;
562 amaf
.gamelen
= cutoff
;
565 /* Record the result. */
567 assert(n
== t
->root
|| n
->parent
);
568 floating_t rval
= scale_value(u
, b
, node_color
, significant
, result
);
569 u
->policy
->update(u
->policy
, t
, n
, node_color
, player_color
, &amaf
, &b2
, rval
);
571 stats_add_result(&t
->avg_score
, result
/ 2, 1);
572 if (t
->use_extra_komi
) {
573 stats_add_result(&u
->dynkomi
->score
, result
/ 2, 1);
574 stats_add_result(&u
->dynkomi
->value
, rval
, 1);
577 if (u
->local_tree
&& n
->parent
&& !is_pass(node_coord(n
)) && dlen
> 0) {
578 /* Get the local sequences and record them in ltree. */
579 /* We will look for sequence starts in our descent
580 * history, then run record_local_sequence() for each
581 * found sequence start; record_local_sequence() may
582 * pick longer sequences from descent history then,
583 * which is expected as it will create new lnodes. */
584 enum stone seq_color
= player_color
;
585 /* First move always starts a sequence. */
586 record_local_sequence(u
, t
, &b2
, descent
, dlen
, 1, seq_color
);
587 seq_color
= stone_other(seq_color
);
588 for (int dseqi
= 2; dseqi
< dlen
; dseqi
++, seq_color
= stone_other(seq_color
)) {
589 if (u
->local_tree_allseq
) {
590 /* We are configured to record all subsequences. */
591 record_local_sequence(u
, t
, &b2
, descent
, dlen
, dseqi
, seq_color
);
594 if (descent
[dseqi
].node
->d
>= u
->tenuki_d
) {
595 /* Tenuki! Record the fresh sequence. */
596 record_local_sequence(u
, t
, &b2
, descent
, dlen
, dseqi
, seq_color
);
599 if (descent
[dseqi
].lnode
&& !descent
[dseqi
].lnode
) {
600 /* Record result for in-descent picked sequence. */
601 record_local_sequence(u
, t
, &b2
, descent
, dlen
, dseqi
, seq_color
);
608 /* We need to undo the virtual loss we added during descend. */
609 if (u
->virtual_loss
) {
610 floating_t loss
= node_color
== S_BLACK
? 0.0 : 1.0;
611 for (; n
->parent
; n
= n
->parent
) {
612 stats_rm_result(&n
->u
, loss
, u
->virtual_loss
);
617 board_done_noalloc(&b2
);
622 uct_playouts(struct uct
*u
, struct board
*b
, enum stone color
, struct tree
*t
, struct time_info
*ti
)
625 if (ti
&& ti
->dim
== TD_GAMES
) {
626 for (i
= 0; t
->root
->u
.playouts
<= ti
->len
.games
&& !uct_halt
; i
++)
627 uct_playout(u
, b
, color
, t
);
629 for (i
= 0; !uct_halt
; i
++)
630 uct_playout(u
, b
, color
, t
);