3 from subprocess
import PIPE
8 from os
.path
import abspath
, exists
12 from itertools
import chain
18 from utils
import misc
, godb_models
, godb_session
19 from utils
.db_cache
import declare_pure_function
, cache_result
20 from utils
.misc
import first_true_pred
, partial_right
, partial
21 from utils
.utils
import head
22 from utils
.colors
import *
23 from utils
.godb_models
import ProcessingError
26 from result_file
import ResultFile
, get_output_resultfile
, get_output_resultpair
27 from config
import OUTPUT_DIR
29 from pachi
import scan_raw_patterns
, generate_spatial_dictionary
32 Contains code for conversion of a game (or list of them) into a vector,
34 Moreover it allows to form .tab files to be used by the Orange datamining framework
38 pat_file_regexp
= '^\s*(\d+)\s*(.+)$'
40 def _make_interval_annotations(l
, varname
):
42 >>> _make_interval_annotations([10,11,12,13], 'X')
43 ['X <= 10', 'X == 11', 'X == 12', 'X == 13', 'X > 13']
44 >>> _make_interval_annotations([22], 'X')
46 >>> _make_interval_annotations([-1, 20], 'X')
47 ['X <= -1', '-1 < X <= 20', 'X > 20']
50 if not all( misc
.is_int(x
) for x
in l
):
51 raise ValueError("Interval boundaries must be a number.")
53 return [ "any " + varname
]
57 for point
in l
+ ['LAST']:
59 # if the interval size is 1 specify the interval precisely
60 if point
!= 'LAST' and prev
== point
- 1:
61 s
= "%s == %d" % (s
, point
)
63 # if not first, add left boundary
65 # when we do not have right boundary as well
67 # nicer like this I guess
68 s
= "%s > %d" % (s
, prev
)
70 s
= "%d < %s" % (prev
, s
)
71 # if not last, add right boundary
73 s
= "%s <= %d" % (s
, point
)
80 ## game -> BlackWhite( vector_black, vector_white )
81 class BWBdistVectorGenerator
:
82 def __init__(self
, by_line
=[2,3,4], by_moves
=[26,76]):
83 self
.by_line
= by_line
84 self
.by_moves
= by_moves
86 if any( x
%2 for x
in by_moves
):
87 logging
.warn("BWDistVectorGenerator called with odd number of moves"
88 "specifying the hist size => this means that the players"
89 "wont have the same number of moves in the buckets!!")
92 line_annots
= _make_interval_annotations(by_line
, 'bdist')
93 move_annots
= _make_interval_annotations(by_moves
, 'move')
94 #line_annots = [ 'bdist <= %d'%line for line in by_line ] + [ 'bdist other']
95 # move_annots = [ 'move <= %d'%move for move in by_moves ] + [ 'move other']
97 self
.annotations
= [ "(bdist histogram: %s, %s)"%(m
,b
) for m
,b
in itertools
.product(move_annots
, line_annots
) ]
98 self
.types
= [ "continuous" ] * len(self
.annotations
)
101 return lambda x
: x
<= val
103 # predicates giving bucket coordinate
104 self
.line_preds
= [ leq_fac(line
) for line
in by_line
] + [ lambda line
: True ]
105 self
.move_preds
= [ leq_fac(movenum
) for movenum
in by_moves
] + [ lambda movenum
: True ]
108 return 'BWBdistVectorGenerator(by_line=%s, by_moves=%s)'%(repr(self
.by_line
),
111 def __call__(self
, game
):
113 For a game, creates histograms of moves distance from border.
114 The histograms' granularity is specified by @by_line and @by_moves parameters.
116 The @by_moves makes different histogram for each game phase, e.g.:
117 by_moves=[] makes one histogram for whole game
118 by_moves=[50] makes two histograms, one for first 50 moves (including)
120 by_moves=[26, 76] makes three histograms,
121 first 26 moves (X <=26) ~ opening
122 first 76 moves (26 < X <= 76) ~ middle game
123 rest of the game (76 < X) ~ end game
124 NOTE: of the by moves number should be even, so that we count the same
125 number of moves for every player.
127 The @by_line specifies granularity of each histogram, that is
128 by_line = [3] each hist has 2 buckets, one counts moves on first three
129 lines, second for the rest
131 by_line = [3, 4, 5] four buckets/histogram, X <= 3, X = 4, X = 5, X > 5
133 # scan game, ignore spatials
134 col_pat
= pachi
.scan_raw_patterns(game
, patargs
='xspat=0', skip_empty
=False)
137 for color
in PLAYER_COLORS
:
139 buckets
[color
] = numpy
.zeros(len(self
.line_preds
) * len(self
.move_preds
))
141 for movenum
, (color
, pat
) in enumerate(col_pat
):
143 bdist
= pat
.first_payload('border')
144 except StopIteration:
147 # X and Y coordinates
148 line_bucket
= first_true_pred(self
.line_preds
, bdist
+ 1) # line = bdist + 1
149 move_bucket
= first_true_pred(self
.move_preds
, movenum
+ 1) # movenum is counted from zero
151 # histogram[color][X][Y] += 1
152 xy
= line_bucket
+ move_bucket
* len(self
.line_preds
)
153 buckets
[color
][xy
] += 1
155 #print movenum, color, bdist, " \t",
156 #print line_bucket, move_bucket,
159 return BlackWhite(buckets
[PLAYER_COLOR_BLACK
], buckets
[PLAYER_COLOR_WHITE
])
161 ## game -> BlackWhite( vector_black, vector_white )
162 class BWLocalSeqVectorGenerator
:
163 def __init__(self
, local_threshold
=5):
164 self
.local_threshold
= local_threshold
165 self
.annotations
= [ '(local seq < %d: sente)'%local_threshold
,
166 '(local seq < %d: gote)'%local_threshold
,
167 '(local seq < %d: sente - gote)'%local_threshold
, ]
168 self
.types
= [ "continuous" ] * len(self
.annotations
)
171 return 'BWLocalSeqVectorGenerator(local_threshold=%s)'%(repr(self
.local_threshold
))
173 def __call__(self
, game
):
174 """self.local_threshold gives threshold specifiing what is considered to be a local
175 sequence, moves closer (or equal) than self.local_threshold in gridcular matrix
176 to each other are considered local."""
177 # scan game, ignore spatials
178 col_pat
= pachi
.scan_raw_patterns(game
, patargs
='xspat=0', skip_empty
=False)
184 count
= {PLAYER_COLOR_BLACK
: numpy
.zeros(3),
185 PLAYER_COLOR_WHITE
: numpy
.zeros(3)}
189 for movenum
, (color
, pat
) in enumerate(col_pat
):
190 if not pat
.has_feature('cont'):
193 local
= pat
.first_payload('cont') <= self
.local_threshold
195 # if the sequence just started
196 if local
and not last_local
:
197 # this color had to reply locally, so it was the other guy that
198 # started the sequence
199 seq_start
= the_other_color(color
)
201 # if the sequence just ended
202 if not local
and last_local
:
203 # the player who started the sequence gets to continue elsewhere
204 if color
== seq_start
:
205 count
[seq_start
][ SENTE_COOR
] += 1
206 # if he does not <=> he lost tempo with the sequence
208 count
[seq_start
][ GOTE_COOR
] += 1
212 for color
in PLAYER_COLORS
:
214 cnt
[DIFF_COOR
] = cnt
[SENTE_COOR
] - cnt
[GOTE_COOR
]
216 return BlackWhite(count
[PLAYER_COLOR_BLACK
], count
[PLAYER_COLOR_WHITE
])
218 ## game -> BlackWhite( vector_black, vector_white )
219 class BWCaptureVectorGenerator
:
220 def __init__(self
, by_moves
=[26,76], offset
=6, payload_size
=4):
221 """The params @offset and @payload size have to be the constants from pachi/pattern.h,
223 offset = PF_CAPTURE_COUNTSTONES
224 payload_size = CAPTURE_COUNTSTONES_PAYLOAD_SIZE
227 self
.payload_size
= payload_size
228 self
.by_moves
= by_moves
230 if any( x
%2 for x
in by_moves
):
231 logging
.warn("BWCaptureVectorGenerator called with odd number of moves"
232 "specifying the hist size => this means that the players"
233 "wont have the same number of moves in the buckets!!")
236 capture_annots
= [ 'captured', 'lost', 'difference' ]
237 move_annots
= _make_interval_annotations(by_moves
, 'move')
239 self
.annotations
= [ "(capture histogram: %s, %s)"%(m
,b
) for m
,b
in itertools
.product(move_annots
, capture_annots
) ]
240 self
.types
= [ "continuous" ] * len(self
.annotations
)
243 return lambda x
: x
<= val
245 # predicates giving bucket coordinate
246 self
.move_preds
= [ leq_fac(move
) for move
in by_moves
] + [ lambda movenum
: True ]
249 args
= map(repr, [self
.by_moves
, self
.offset
, self
.payload_size
])
250 return 'BWCaptureVectorGenerator(by_moves=%s, offset=%s, payload_size=%s)'% tuple(args
)
252 def __call__(self
, game
):
253 # scan game, ignore spatials
254 col_pat
= pachi
.scan_raw_patterns(game
, patargs
='xspat=0', skip_empty
=False)
257 for color
in PLAYER_COLORS
:
258 buckets
[color
] = numpy
.zeros(len(self
.move_preds
))
260 for movenum
, (color
, pat
) in enumerate(col_pat
):
261 if pat
.has_feature('capture'):
262 captured
= pat
.first_payload('capture') >> self
.offset
263 captured
= (2 ** self
.payload_size
- 1 ) & captured
265 move_bucket
= first_true_pred(self
.move_preds
, movenum
+ 1) # counted from zero
266 buckets
[color
][move_bucket
] += captured
269 for color
in PLAYER_COLORS
:
270 ret
[color
] = numpy
.zeros(3 * len(self
.move_preds
))
272 for mp
in xrange(len(self
.move_preds
)):
273 for color
in PLAYER_COLORS
:
275 ret
[color
][3 * mp
] = buckets
[color
][mp
]
277 ret
[color
][3 * mp
+ 1] = buckets
[the_other_color(color
)][mp
]
279 ret
[color
][3 * mp
+ 2] = ret
[color
][3 * mp
] - ret
[color
][3 * mp
+ 1]
282 return BlackWhite(ret
[PLAYER_COLOR_BLACK
], ret
[PLAYER_COLOR_WHITE
])
284 ## game -> BlackWhite( vector_black, vector_white )
285 class BWWinStatVectorGenerator
:
295 self
.types
= [ "continuous" ] * len(self
.annotations
)
298 return 'BWWinStatVectorGenerator2()'
300 def __call__(self
, game
):
302 result
= str(game
.sgf_header
.get('RE', '0'))
304 if result
.lower() in ['0', 'jigo', 'draw']:
305 raise ProcessingError(repr(self
) + " Jigo")
307 match
= re
.match(r
'^([BW])\+(.*)$', result
)
309 raise ProcessingError(repr(self
) + ' Could not find result sgf tag.')
311 player
, val
= match
.group(1), match
.group(2)
312 if ( val
.lower().startswith('f') or # forfeit
313 val
.lower().startswith('t') ): # time
314 raise ProcessingError(repr(self
) + ' Forfeit, time.')
318 if val
.lower().startswith('r'):
325 raise ProcessingError(repr(self
) + ' Points not float.')
329 black
= numpy
.array( wins
+ loses
)
330 white
= numpy
.array( loses
+ wins
)
332 white
= numpy
.array( wins
+ loses
)
333 black
= numpy
.array( loses
+ wins
)
335 return BlackWhite(black
, white
)
339 ## game -> BlackWhite( vector_black, vector_white )
340 class BWWinPointsStatVectorGenerator
:
346 self
.types
= [ "continuous" ] * len(self
.annotations
)
349 return 'BWWinPointsStatVectorGenerator2()'
351 def __call__(self
, game
):
353 result
= str(game
.sgf_header
.get('RE', '0'))
355 if result
.lower() in ['0', 'jigo', 'draw']:
356 raise ProcessingError(repr(self
) + " Jigo")
358 match
= re
.match(r
'^([BW])\+(.*)$', result
)
360 raise ProcessingError(repr(self
) + ' Could not find result sgf tag.')
362 player
, val
= match
.group(1), match
.group(2)
363 if ( val
.lower().startswith('f') or # forfeit
364 val
.lower().startswith('t') or # time
365 val
.lower().startswith('r') # resign
367 raise ProcessingError(repr(self
) + ' Forfeit, time, resign.')
372 raise ProcessingError(repr(self
) + ' Points not float.')
375 black
= numpy
.array( [points
, 0] )
376 white
= numpy
.array( [0, points
] )
379 black
, white
= white
, black
381 return BlackWhite(black
, white
)
383 # - for black - transform_rawpatfile -
385 # game -> raw_patternscan_game --
387 # - for white ----- || -----
390 @declare_pure_function
391 def raw_patternscan_game(game
, spatial_dict
, patargs
=''):
392 assert spatial_dict
.exists(warn
=True)
393 ret
= get_output_resultpair(suffix
='.raw.pat')
395 with
open(ret
.black
.filename
, mode
='w') as fb
:
396 with
open(ret
.white
.filename
, mode
='w') as fw
:
397 for color
, pat
in scan_raw_patterns(game
, spatial_dict
, patargs
=patargs
):
398 fd
= fb
if color
== PLAYER_COLOR_BLACK
else fw
399 # write output for the desired player
401 #logging.debug(gtp + ":" + pat)
403 #logging.info("Generated Raw Patternfiles for game %s, %s"%(game, ret))
407 @declare_pure_function
408 def transform_rawpatfile(rawpat_file
, ignore
=set(), transform
={}, ignore_empty
=True):
409 """Transforms raw pattern file line by line, by ignoring certain features (and their payloads)
410 @ignore and transforming payloads with @transform. If @ignore_empty is specified,
411 empty patterns are ignored.
413 transform_rawpatfile(file, ignore=set('s', 'cont'), transform={'border':lambda x: x - 1})
415 (s:10 border:5 cont:10)
424 ret
= get_output_resultfile('.raw.pat')
425 with
open(ret
.filename
, mode
='w') as fout
:
426 with
open(rawpat_file
.filename
, mode
='r') as fin
:
428 pat
= pachi
.Pattern(line
).reduce(lambda feat
, _
: not feat
in ignore
)
431 p
= transform
.get(f
, lambda x
:x
)(p
)
432 fpairs
.append((f
, p
))
434 if ignore_empty
and not fpairs
:
437 fout
.write( "%s\n"%pachi
.Pattern(fpairs
=fpairs
) )
441 @declare_pure_function
442 def summarize_rawpat_file(rawpat_file
):
443 """Transforms raw pattern file into summarized one:
454 result_file
= get_output_resultfile('.pat')
456 script
="cat %s | sort | uniq -c | sort -rn > %s "%(rawpat_file
.filename
, result_file
.filename
)
458 p
= subprocess
.Popen(script
, shell
=True, stderr
=PIPE
)
459 _
, stderr
= p
.communicate()
461 logging
.warn("subprocess summarize stderr:\n%s"%(stderr
,))
463 raise RuntimeError("Child sumarize failed, exitcode %d."%(p
.returncode
,))
467 class SummarizeMerger(godb_models
.Merger
):
468 """Used to sum Summarized Pattern files:
479 m = SummarizeMerger()
493 def start(self
, bw_gen
):
499 def add(self
, pat_file
, color
):
500 with
open(pat_file
.filename
) as fin
:
502 match
= re
.match(pat_file_regexp
, line
)
504 raise IOError("Wrong file format: " + pat_file
)
505 count
, pattern
= int(match
.group(1)), match
.group(2)
506 self
.cd
[pattern
] = self
.cd
.get(pattern
, 0) + count
509 result_file
= get_output_resultfile('.pat')
510 with
open(result_file
.filename
, 'w') as fout
:
512 for pattern
, count
in sorted(self
.cd
.iteritems(), key
=lambda kv
: - kv
[1]):
514 # get number of decimal places, so that the file is nicely formatted
515 firstlen
= 1 + int(math
.log10(count
))
517 # prefix the count with 2 spaces, see pat_file_regexp for format
518 s
= "%" + str(2 + firstlen
) + "d %s\n"
519 fout
.write(s
%(count
, pattern
))
525 class VectorSumMerger(godb_models
.Merger
):
529 def start(self
, bw_gen
):
530 assert all( tp
== 'continuous' for tp
in bw_gen
.types
)
531 self
.sofar
= numpy
.zeros(len(bw_gen
.types
))
536 def add(self
, vector
, color
=None):
537 if self
.sofar
== None:
538 self
.sofar
= numpy
.zeros(vector
.shape
)
542 if self
.sofar
== None:
543 self
.sofar
= numpy
.zeros(0)
548 class VectorArithmeticMeanMerger(godb_models
.Merger
):
552 def start(self
, bw_gen
):
554 self
.summ
.start(bw_gen
)
558 self
.summ
= VectorSumMerger()
560 def add(self
, vector
, color
=None):
562 self
.summ
.add(vector
)
566 ret
= self
.summ
.finish()
568 ret
= self
.summ
.finish() / self
.count
573 # so that the fc has nice repr
574 @declare_pure_function
578 @declare_pure_function
579 def linear_rescale(vec
, a
=-1, b
=1):
580 """Linearly rescales elements in vector so that:
581 min(vec) gets mapped to a
582 max(vec) gets mapped to b
583 the intermediate values get remapped linearly between
586 MIN
, MAX
= vec
.min(), vec
.max()
588 # return average value of the set
589 return (float(a
+ b
) / 2) * numpy
.ones(vec
.shape
)
590 return a
+ (vec
- MIN
) * ( float(b
- a
) / (MAX
- MIN
) )
592 @declare_pure_function
593 def natural_rescale(vec
):
594 return vec
/ numpy
.sum(vec
)
596 @declare_pure_function
597 def log_rescale(vec
, a
=-1, b
=1):
598 return linear_rescale(numpy
.log(1 + vec
), a
, b
)
600 class VectorApply(godb_models
.Merger
):
601 def __init__(self
, merger
,
603 finish_fc
=identity
):
605 self
.finish_fc
= finish_fc
608 def start(self
, bw_gen
):
609 self
.merger
.start(bw_gen
)
611 def add(self
, vector
, color
=None):
612 self
.merger
.add(self
.add_fc(vector
), color
)
615 return self
.finish_fc( self
.merger
.finish() )
618 return "VectorApply(%s, add_fc=%s, finish_fc=%s)" % (repr(self
.merger
),
620 repr(self
.finish_fc
))
622 class PatternVectorMaker
:
623 def __init__(self
, all_pat
, n
):
624 self
.all_pat
= all_pat
627 self
.annotations
= []
630 with
open(self
.all_pat
.filename
, 'r') as fin
:
631 # take first n patterns
632 for num
, line
in enumerate(fin
):
635 match
= re
.match(pat_file_regexp
, line
)
637 raise IOError("Wrong file format: " + self
.all_pat
)
638 pattern
= match
.group(2)
639 self
.pat2order
[pattern
] = num
640 self
.annotations
.append(pattern
)
642 self
.types
= [ "continuous" ] * len(self
.annotations
)
644 if len(self
.pat2order
) < self
.n
:
645 raise ValueError("Input file all_pat '%s' does not have enough lines."%(self
.all_pat
))
648 return "PatternVectorMaker(all_pat=%s, n=%d)"%(self
.all_pat
, self
.n
)
650 def __call__(self
, sum_patfile
):
651 vector
= numpy
.zeros(self
.n
)
653 with
open(sum_patfile
.filename
, 'r') as fin
:
655 match
= re
.match(pat_file_regexp
, line
)
657 raise IOError("Wrong file format: " + str(sum_patfile
))
659 index
= self
.pat2order
.get(match
.group(2), None)
661 vector
[index
] += int(match
.group(1))
664 # no need to walk through the whole files, the patterns (match.group(2))
665 # are unique since the patfile is summarized
671 ## game -> BlackWhite( vector_black, vector_white )
672 class BWPatternVectorGenerator
:
673 def __init__(self
, bw_game_summarize
, pattern_vector_maker
):
674 self
.pattern_vector_maker
= pattern_vector_maker
675 self
.bw_game_summarize
= bw_game_summarize
677 self
.annotations
= pattern_vector_maker
.annotations
678 self
.types
= pattern_vector_maker
.types
681 return "BWPatternVectorGenerator(bw_game_summarize=%s, pattern_vector_maker=%s)"%(
682 repr(self
.bw_game_summarize
), repr(self
.pattern_vector_maker
))
684 def __call__(self
, game
):
685 bw
= self
.bw_game_summarize(game
)
686 return bw
.map_both(self
.pattern_vector_maker
)
689 @declare_pure_function
690 def process_game(game
, init
, pathway
):
692 return bw
.map_pathway(pathway
)
695 @declare_pure_function
696 def process_one_side_list(osl
, merger
, bw_processor
):
697 return osl
.for_one_side_list( merger
, bw_processor
)
699 ## Process One Side List
700 class OSLVectorGenerator
:
702 Maps one side lists to vectors, using different game vector generators (e.g. BWPatternVectorGenerator), e.g:
703 OSLVectorGenerator([(vg1, m1), (vg2, m2)])
705 game1 m1.add(vg1(game1)) m2.add(vg2(game1))
706 game2 m1.add(vg1(game2)) m2.add(vg2(game2))
710 game666 m1.add(vg1(game666)) m2.add(vg2(game666))
711 m1.finish() m2.finish()
712 = [1,2,3,4,5] = [6,7,8,9,10]
713 vg1.annotations vg2.annotations
714 = [f1, ..., f5] =[f6, ..., f10]
715 ----------------------------------------------
716 result = [ 1,2,3,4,5,6,7,8,9,10 ]
717 annotations = [ f1, ..., f10 ]
719 def __init__(self
, gen_n_merge
, annotate_featurewise
=True):
720 self
.gen_n_merge
= gen_n_merge
721 self
.annotate_featurewise
= annotate_featurewise
723 self
.annotations
= []
726 for num
, (game_vg
, merger
) in enumerate(gen_n_merge
):
727 self
.functions
.append(
728 # this function maps one_side_list to a vector
729 # where vectors from a game in the osl are merged using the merger
730 partial_right(process_one_side_list
, merger
, game_vg
))
732 anns
= game_vg
.annotations
733 if annotate_featurewise
:
734 anns
= [ 'f%d%s' % (num
, an
) for an
in anns
]
736 self
.annotations
.extend(anns
)
737 self
.types
.extend(game_vg
.types
)
740 return "OSLVectorGenerator(gen_n_merge=%s, annotate_featurewise=%s)"%(repr(self
.gen_n_merge
),
741 repr(self
.annotate_featurewise
) )
743 def __call__(self
, osl
):
744 # stack vectors from different generators together
745 return numpy
.hstack( [ f(osl
) for f
in self
.functions
] )
747 def make_all_pat(osl
, bw_summarize_pathway
):
748 return process_one_side_list(osl
, SummarizeMerger(), bw_summarize_pathway
)
751 @declare_pure_function
752 def osl_vector_gen_cached(osl_gen
, osl
):
753 """Just to emulate caching for osl_gen.__call__ method.
754 this is a bit ugly, since this should really be handled by the caching itself to allow for
755 decorating class methods."""
758 @declare_pure_function
763 @declare_pure_function
764 def make_tab_file(datamap
, vg_osl
, osl_name_as_meta
=True, osl_size_as_meta
=True, image_name_as_meta
=True):
765 """As specified in http://orange.biolab.si/doc/reference/Orange.data.formats/
766 If image_name_as_meta or osl_name_as_meta parameters are present, the names of the
767 respective objects are added as meta columns.
769 tab_file
= get_output_resultfile('.tab')
771 def tab_denoted(fout
, l
):
772 """Writes tab-denoted elements of list @l to output stream @fout"""
773 strings
= map(str, l
)
776 raise RuntimeError("Elements of tab-denoted list must not contain tabs.")
777 fout
.write('\t'.join(strings
) + '\n')
779 def get_meta(osl_m
, osl_size_m
, image_m
):
780 return list( itertools
.compress((osl_m
, osl_size_m
, image_m
),
781 (osl_name_as_meta
, osl_size_as_meta
, image_name_as_meta
)))
783 with
open(tab_file
.filename
, 'w') as fout
:
784 # annotations - column names
785 tab_denoted(fout
, chain( vg_osl
.annotations
,
786 datamap
.image_annotations
,
787 get_meta('OSL name', 'OSL size', 'Image name')))
790 tab_denoted(fout
, chain( vg_osl
.types
,
792 get_meta('string', 'continuous', 'string')))
794 # column info type: empty (normal columns) / class (main class attribute) / multiclass / meta
795 tab_denoted(fout
, chain( # attributes are no class
796 [''] * len(vg_osl
.types
),
797 # for the first class attribute if present
798 [ 'class' ] * len(datamap
.image_types
[:1]),
799 # for the following class attributes if present
800 [ 'meta' ] * len(datamap
.image_types
[1:]),
801 #[ 'multiclass' ] * len(datamap.image_types[1:]),
802 # meta information if requested
803 get_meta('meta', 'meta', 'meta')))
806 for num
, (osl
, image
) in enumerate(datamap
):
807 logging
.info('Tab file %d%% (%d / %d)'%(100* (num
+1) / len(datamap
), num
+1, len(datamap
)))
809 tab_denoted(fout
, chain( # the osl
810 osl_vector_gen_cached(vg_osl
, osl
),
812 map(float, image
.data
),
814 get_meta(osl
.name
, float(len(osl
)), image
.name
)))
826 if __name__
== '__main__':
829 from logging
import handlers
831 logger
= logging
.getLogger()
832 logger
.setLevel(logging
.INFO
)
833 ch
= handlers
.WatchedFileHandler('LOG', mode
='w')
834 logger
.addHandler(ch
)
836 from utils
.godb_models
import Game
, GameList
, OneSideList
, PLAYER_COLOR_BLACK
, PLAYER_COLOR_WHITE
837 from utils
.godb_session
import godb_session_maker
838 from utils
import db_cache
842 s
= godb_session_maker(filename
=':memory:')
846 gl
= GameList("pokus")
847 s
.godb_scan_dir_as_gamelist('./TEST_FILES/games', gl
)
850 # add all the games into the all.pat file
851 osl
= OneSideList("all.pat")
852 osl
.batch_add(gl
.games
, PLAYER_COLOR_BLACK
)
853 osl
.batch_add(gl
.games
, PLAYER_COLOR_WHITE
)
857 ## Prepare the pattern vector game processing pathway
858 ## game -> BlackWhite( vector_black, vector_white )
860 spatial_dict
= generate_spatial_dictionary(gl
, spatmin
=2)
862 # the pathway: game -> bw rawpat files -> bw transformed rawpat files -> bw summarized pat files
863 bw_game_summarize
= partial_right(process_game
,
864 partial_right(raw_patternscan_game
, spatial_dict
),
865 [ partial_right(transform_rawpatfile
,
866 #transform={ 'border':partial_right(minus, 1) },
867 ignore
=['border', 'cont']),
868 summarize_rawpat_file
870 all_pat
= make_all_pat(osl
, bw_game_summarize
)
872 vg_pat
= BWPatternVectorGenerator( bw_game_summarize
,
873 PatternVectorMaker(all_pat
, 100) )
874 vg_local
= BWLocalSeqVectorGenerator()
875 vg_bdist
= BWBdistVectorGenerator()
886 ## Process One Side List
888 gen_n_merge
= [ (vg_pat
, VectorApply(VectorSumMerger(), finish_fc
=linear_rescale
)),
889 (vg_local
, VectorArithmeticMeanMerger()),
890 (vg_bdist
, VectorArithmeticMeanMerger())]
892 vg_osl
= OSLVectorGenerator(gen_n_merge
)
894 generate
= partial( osl_vector_gen_cached
, vg_osl
)
897 #vec, annotations = vg_osl(osl), vg_osl.annotations
901 ## now the pathway is ready, we can process whatewer OSL we
902 # feel up to, osl in the following is just an example
903 vec
, annotations
= generate(osl
), vg_osl
.annotations
905 for i
in xrange(len(annotations
)):
906 print vec
[i
], '\t\t', annotations
[i
]
911 from pylab
import figure
, scatter
, subplot
, show
913 vec
= numpy
.random
.random( size
=10)
915 print linear_rescale(vec
, a
=-20, b
=20)
917 vec
= numpy
.array([ 452915., 288357., 271245., 111039., 84811., 74074.,
918 58663., 62257., 55296., 46359., 51022., 41049.,
919 31297., 35259., 34467., 30918., 29869., 36875.,
920 29592., 28075., 25823., 27479., 26343., 26964.,
921 24093., 24724., 23135., 22266., 21725., 21769.,
922 20130., 21625., 20200., 20619., 19741., 19049.,
923 17434., 20167., 19830., 16458., 16513., 21720.,
924 20933., 20216., 18414., 17442., 12046., 16186.,
925 16732., 16142., 15126., 15332., 15435., 12925.,
926 14072., 16321., 11391., 14884., 13147., 15162.,
927 14247., 15578., 11826., 12009., 11533., 12349.,
928 12219., 12590., 10581., 14550., 10699., 12384.,
929 11795., 10769., 12617., 12576., 12281., 11311.,
930 12479., 11327., 11398., 11814., 11050., 10248.,
931 10506., 11541., 12401., 9580., 11201., 10704.,
932 9766., 10402., 9422., 12888., 9473., 9536.,
933 10933., 10844., 11005., 8112., 0.])
937 scatter(range(len(vec
)), vec
, marker
='x', c
='r')
939 scatter(range(len(vec
)), linear_rescale(vec
), marker
='x', c
='g')
941 scatter(range(len(vec
)), numpy
.log(1 + vec
), marker
='x', c
='b')
943 scatter(range(len(vec
)), log_rescale(vec
), marker
='x', c
='y')
945 scatter(range(len(vec
)), vec
/ sum(vec
), marker
='x', c
='b')
948 def test_bdist_hist():
949 s
= godb_session_maker(filename
=':memory:')#, echo=True)
950 game
= s
.godb_sgf_to_game('./TEST_FILES/test_bdist2.sgf')
952 bdg
= BWBdistVectorGenerator(by_line
=[2, 3, 4], by_moves
=[4, 6])
954 assert len(bdg
.annotations
) == len(bw
[0]) == len(bw
[1])
956 print "Interval \t\tBlack\tWhite"
958 for ann
, b
, w
in zip( bdg
.annotations
, bw
[0], bw
[1] ):
959 print "%s\t\t"%(ann), int(b
), "\t", int(w
)
962 s
= godb_session_maker(filename
=':memory:')#, echo=True)
963 #gl = s.godb_add_dir_as_gamelist('./files/')
965 game
= s
.godb_sgf_to_game('../data/go_teaching_ladder/reviews/5443-breakfast-m711-A2.sgf')
967 bdg
= BWWinStatVectorGenerator()
968 #bdg = BWWinPointsStatVectorGenerator()
972 assert len(bdg
.annotations
) == len(bw
[0]) == len(bw
[1])
974 print "Interval \t\tBlack\tWhite"
976 for ann
, b
, w
in zip( bdg
.annotations
, bw
[0], bw
[1] ):
977 print "%30s\t\t" % (ann
), b
, "\t", w
984 print "=" * 10, "\n"+text
+"\n", "=" * 10
986 header("PROCESSING PATHWAY TEST")
991 header("RESCALE TEST")
993 header("BDIST HIST TEST")
996 header("WINSTAT TEST")