Win32 MinGW compilation changes, chat.c requires regex parsing library.
[pachi/nmclean.git] / tools / sgflib / sgflib.py
blobffe2d2929f6d048f12842faa3c05678b3a316553
1 #!/usr/local/bin/python
3 # sgflib.py (Smart Game Format Parser Library)
4 # Copyright (C) 2000 David John Goodger (dgoodger@bigfoot.com)
5 #
6 # This library is free software; you can redistribute it and/or modify it
7 # under the terms of the GNU Lesser General Public License as published by the
8 # Free Software Foundation; either version 2 of the License, or (at your
9 # option) any later version.
11 # This library is distributed in the hope that it will be useful, but WITHOUT
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
14 # for more details.
16 # You should have received a copy of the GNU Lesser General Public License
17 # (lgpl.txt) along with this library; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 # The license is currently available on the Internet at:
20 # http://www.gnu.org/copyleft/lesser.html
22 """
23 =============================================
24 Smart Game Format Parser Library: sgflib.py
25 =============================================
26 version 1.0 (2000-03-27)
28 Homepage: [[http://gotools.sourceforge.net]]
30 Copyright (C) 2000 David John Goodger ([[mailto:dgoodger@bigfoot.com]]; davidg
31 on NNGS, IGS, goclub.org). sgflib.py comes with ABSOLUTELY NO WARRANTY. This is
32 free software, and you are welcome to redistribute it and/or modify it under the
33 terms of the GNU Lesser General Public License; see the source code for details.
35 Description
36 ===========
37 This library contains a parser and classes for SGF, the Smart Game Format. SGF
38 is a text only, tree based file format designed to store game records of board
39 games for two players, most commonly for the game of go. (See the official SGF
40 specification at [[http://www.POBoxes.com/sgf/]]).
42 Given a string containing a complete SGF data instance, the 'SGFParser' class
43 will create a 'Collection' object consisting of one or more 'GameTree''s (one
44 'GameTree' per game in the SGF file), each containing a sequence of 'Node''s and
45 (potentially) two or more variation 'GameTree''s (branches). Each 'Node'
46 contains an ordered dictionary of 'Property' ID/value pairs (note that values
47 are lists, and can have multiple entries).
49 Tree traversal methods are provided through the 'Cursor' class.
51 The default representation (using 'str()' or 'print') of each class of SGF
52 objects is the Smart Game Format itself."""
55 # Revision History:
57 # 1.0 (2000-03-27): First public release.
58 # - Ready for prime time.
60 # 0.1 (2000-01-16):
61 # - Initial idea & started coding.
64 import string, re
65 from typelib import List, Dictionary
68 # Parsing Exceptions
70 class EndOfDataParseError(Exception):
71 """ Raised by 'SGFParser.parseVariations()', 'SGFParser.parseNode()'."""
72 pass
74 class GameTreeParseError(Exception):
75 """ Raised by 'SGFParser.parseGameTree()'."""
76 pass
78 class NodePropertyParseError(Exception):
79 """ Raised by 'SGFParser.parseNode()'."""
80 pass
82 class PropertyValueParseError(Exception):
83 """ Raised by 'SGFParser.parsePropertyValue()'."""
84 pass
86 # Tree Construction Exceptions
88 class DirectAccessError(Exception):
89 """ Raised by 'Node.__setitem__()', 'Node.update()'."""
90 pass
92 class DuplicatePropertyError(Exception):
93 """ Raised by 'Node.addProperty()'."""
94 pass
96 # Tree Navigation Exceptions
97 class GameTreeNavigationError(Exception):
98 """ Raised by 'Cursor.next()'."""
99 pass
101 class GameTreeEndError(Exception):
102 """ Raised by 'Cursor.next()', 'Cursor.previous()'."""
103 pass
106 # for type checking
107 INT_TYPE = type(0) # constant
109 # miscellaneous constants
110 MAX_LINE_LEN = 76 # constant; for line breaks
113 class SGFParser:
115 Parser for SGF data. Creates a tree structure based on the SGF standard
116 itself. 'SGFParser.parse()' will return a 'Collection' object for the entire
117 data.
119 Instance Attributes:
120 - self.data : string -- The complete SGF data instance.
121 - self.datalen : integer -- Length of 'self.data'.
122 - self.index : integer -- Current parsing position in 'self.data'.
124 Class Attributes:
125 - re* : re.RegexObject -- Regular expression text matching patterns.
126 - ctrltrans: string[256] -- Control character translation table for
127 string.translate(), used to remove all control characters from Property
128 values. May be overridden (preferably in instances)."""
130 # text matching patterns
131 reGameTreeStart = re.compile(r'\s*\(')
132 reGameTreeEnd = re.compile(r'\s*\)')
133 reGameTreeNext = re.compile(r'\s*(;|\(|\))')
134 reNodeContents = re.compile(r'\s*([A-Za-z]+(?=\s*\[))')
135 rePropertyStart = re.compile(r'\s*\[')
136 rePropertyEnd = re.compile(r'\]')
137 reEscape = re.compile(r'\\')
138 reLineBreak = re.compile(r'\r\n?|\n\r?') # CR, LF, CR/LF, LF/CR
141 # character translation tables
142 # for control characters (except LF \012 & CR \015): convert to spaces
143 ctrltrans = string.maketrans("\000\001\002\003\004\005\006\007" +
144 "\010\011\013\014\016\017\020\021\022\023\024\025\026\027" +
145 "\030\031\032\033\034\035\036\037", " "*30)
147 def __init__(self, data):
148 """ Initialize the instance attributes. See the class itself for info."""
149 self.data = data
150 self.datalen = len(data)
151 self.index = 0
153 def parse(self):
154 """ Parses the SGF data stored in 'self.data', and returns a 'Collection'."""
155 c = Collection()
156 while self.index < self.datalen:
157 g = self.parseOneGame()
158 if g:
159 c.append(g)
160 else:
161 break
162 return c
164 def parseOneGame(self):
165 """ Parses one game from 'self.data'. Returns a 'GameTree' containing
166 one game, or 'None' if the end of 'self.data' has been reached."""
167 if self.index < self.datalen:
168 match = self.reGameTreeStart.match(self.data, self.index)
169 if match:
170 self.index = match.end()
171 return self.parseGameTree()
172 return None
174 def parseGameTree(self):
175 """ Called when "(" encountered, ends when a matching ")" encountered.
176 Parses and returns one 'GameTree' from 'self.data'. Raises
177 'GameTreeParseError' if a problem is encountered."""
178 g = GameTree()
179 while self.index < self.datalen:
180 match = self.reGameTreeNext.match(self.data, self.index)
181 if match:
182 self.index = match.end()
183 if match.group(1) == ";": # found start of node
184 if g.variations:
185 raise GameTreeParseError(
186 "A node was encountered after a variation.")
187 g.append(g.makeNode(self.parseNode()))
188 elif match.group(1) == "(": # found start of variation
189 g.variations = self.parseVariations()
190 else: # found end of GameTree ")"
191 return g
192 else: # error
193 raise GameTreeParseError
194 return g
196 def parseVariations(self):
197 """ Called when "(" encountered inside a 'GameTree', ends when a
198 non-matching ")" encountered. Returns a list of variation
199 'GameTree''s. Raises 'EndOfDataParseError' if the end of 'self.data'
200 is reached before the end of the enclosing 'GameTree'."""
201 v = []
202 while self.index < self.datalen:
203 # check for ")" at end of GameTree, but don't consume it
204 match = self.reGameTreeEnd.match(self.data, self.index)
205 if match:
206 return v
207 g = self.parseGameTree()
208 if g:
209 v.append(g)
210 # check for next variation, and consume "("
211 match = self.reGameTreeStart.match(self.data, self.index)
212 if match:
213 self.index = match.end()
214 raise EndOfDataParseError
216 def parseNode(self):
217 """ Called when ";" encountered (& is consumed). Parses and returns one
218 'Node', which can be empty. Raises 'NodePropertyParseError' if no
219 property values are extracted. Raises 'EndOfDataParseError' if the
220 end of 'self.data' is reached before the end of the node (i.e., the
221 start of the next node, the start of a variation, or the end of the
222 enclosing game tree)."""
223 n = Node()
224 while self.index < self.datalen:
225 match = self.reNodeContents.match(self.data, self.index)
226 if match:
227 self.index = match.end()
228 pvlist = self.parsePropertyValue()
229 if pvlist:
230 n.addProperty(n.makeProperty(match.group(1), pvlist))
231 else:
232 raise NodePropertyParseError
233 else: # reached end of Node
234 return n
235 raise EndOfDataParseError
237 def parsePropertyValue(self):
238 """ Called when "[" encountered (but not consumed), ends when the next
239 property, node, or variation encountered. Parses and returns a list
240 of property values. Raises 'PropertyValueParseError' if there is a
241 problem."""
242 pvlist = []
243 while self.index < self.datalen:
244 match = self.rePropertyStart.match(self.data, self.index)
245 if match:
246 self.index = match.end()
247 v = "" # value
248 # scan for escaped characters (using '\'), unescape them (remove linebreaks)
249 mend = self.rePropertyEnd.search(self.data, self.index)
250 mesc = self.reEscape.search(self.data, self.index)
251 while mesc and mend and (mesc.end() < mend.end()):
252 # copy up to '\', but remove '\'
253 v = v + self.data[self.index:mesc.start()]
254 mbreak = self.reLineBreak.match(self.data, mesc.end())
255 if mbreak:
256 self.index = mbreak.end() # remove linebreak
257 else:
258 v = v + self.data[mesc.end()] # copy escaped character
259 self.index = mesc.end() + 1 # move to point after escaped char
260 mend = self.rePropertyEnd.search(self.data, self.index)
261 mesc = self.reEscape.search(self.data, self.index)
262 if mend:
263 v = v + self.data[self.index:mend.start()]
264 self.index = mend.end()
265 pvlist.append(self._convertControlChars(v))
266 else:
267 raise PropertyValueParseError
268 else: # reached end of Property
269 break
270 if len(pvlist) >= 1:
271 return pvlist
272 else:
273 raise PropertyValueParseError
275 def _convertControlChars(self, text):
276 """ Converts control characters in 'text' to spaces, using the
277 'self.ctrltrans' translation table. Override for variant
278 behaviour."""
279 return string.translate(text, self.ctrltrans)
282 class RootNodeSGFParser(SGFParser):
283 """ For parsing only the first 'GameTree''s root Node of an SGF file."""
285 def parseNode(self):
286 """ Calls 'SGFParser.parseNode()', sets 'self.index' to point to the end
287 of the data (effectively ending the 'GameTree' and 'Collection'),
288 and returns the single (root) 'Node' parsed."""
289 n = SGFParser.parseNode(self) # process one Node as usual
290 self.index = self.datalen # set end of data
291 return n # we're only interested in the root node
294 class Collection(List):
296 An SGF collection: multiple 'GameTree''s. Instance atributes:
297 - self[.data] : list of 'GameTree' -- One 'GameTree' per game."""
299 def __str__(self):
300 """ SGF representation. Separates game trees with a blank line."""
301 return string.join(map(str, self.data), "\n"*2)
303 def cursor(self, gamenum=0):
304 """ Returns a 'Cursor' object for navigation of the given 'GameTree'."""
305 return Cursor(self[gamenum])
308 class GameTree(List):
310 An SGF game tree: a game or variation. Instance attributes:
311 - self[.data] : list of 'Node' -- game tree 'trunk'.
312 - self.variations : list of 'GameTree' -- 0 or 2+ variations.
313 'self.variations[0]' contains the main branch (sequence actually played)."""
315 def __init__(self, nodelist=None, variations=None):
317 Initialize the 'GameTree'. Arguments:
318 - nodelist : 'GameTree' or list of 'Node' -- Stored in 'self.data'.
319 - variations : list of 'GameTree' -- Stored in 'self.variations'."""
320 List.__init__(self, nodelist)
321 self.variations = variations or []
323 def __str__(self):
324 """ SGF representation, with proper line breaks between nodes."""
325 if len(self):
326 s = "(" + str(self[0]) # append the first Node automatically
327 l = len(string.split(s, "\n")[-1]) # accounts for line breaks within Nodes
328 for n in map(str, self[1:]):
329 if l + len(string.split(n, "\n")[0]) > MAX_LINE_LEN:
330 s = s + "\n"
331 l = 0
332 s = s + n
333 l = len(string.split(s, "\n")[-1])
334 return s + string.join(map(str, [""] + self.variations), "\n") + ")"
335 else:
336 return "" # empty GameTree illegal; "()" illegal
338 def mainline(self):
339 """ Returns the main line of the game (variation A) as a 'GameTree'."""
340 if self.variations:
341 return GameTree(self.data + self.variations[0].mainline())
342 else:
343 return self
345 def makeNode(self, plist):
347 Create a new 'Node' containing the properties contained in 'plist'.
348 Override/extend to create 'Node' subclass instances (move, setup).
349 Argument:
350 - plist : 'Node' or list of 'Property'"""
351 return Node(plist)
353 def cursor(self):
354 """ Returns a 'Cursor' object for navigation of this 'GameTree'."""
355 return Cursor(self)
357 def propertySearch(self, pid, getall=0):
359 Searches this 'GameTree' for nodes containing matching properties.
360 Returns a 'GameTree' containing the matched node(s). Arguments:
361 - pid : string -- ID of properties to search for.
362 - getall : boolean -- Set to true (1) to return all 'Node''s that
363 match, or to false (0) to return only the first match."""
364 matches = []
365 for n in self:
366 if n.has_key(pid):
367 matches.append(n)
368 if not getall:
369 break
370 else: # getall or not matches:
371 for v in self.variations:
372 matches = matches + v.propertySearch(pid, getall)
373 if not getall and matches:
374 break
375 return GameTree(matches)
378 class Node(Dictionary):
380 An SGF node. Instance Attributes:
381 - self[.data] : ordered dictionary -- '{Property.id:Property}' mapping.
382 (Ordered dictionary: allows offset-indexed retrieval). Properties *must*
383 be added using 'self.addProperty()'.
385 Example: Let 'n' be a 'Node' parsed from ';B[aa]BL[250]C[comment]':
386 - 'str(n["BL"])' => '"BL[250]"'
387 - 'str(n[0])' => '"B[aa]"'
388 - 'map(str, n)' => '["B[aa]","BL[250]","C[comment]"]'"""
390 def __init__(self, plist=[]):
392 Initializer. Argument:
393 - plist: Node or list of 'Property'."""
394 Dictionary.__init__(self)
395 self.order = []
396 for p in plist:
397 self.addProperty(p)
399 def __getitem__(self, key):
400 """ On 'self[key]', 'x in self', 'for x in self'. Implements all
401 indexing-related operations. Allows both key- and offset-indexed
402 retrieval. Membership and iteration ('in', 'for') repeatedly index
403 from 0 until 'IndexError'."""
404 if type(key) is INT_TYPE:
405 return self.order[key]
406 else:
407 return self.data[key]
409 def __setitem__(self, key, x):
410 """ On 'self[key]=x'. Allows assignment to existing items only. Raises
411 'DirectAccessError' on new item assignment."""
412 if self.has_key(key):
413 self.order[self.order.index(self[key])] = x
414 Dictionary.__setitem__(self, key, x)
415 else:
416 raise DirectAccessError(
417 "Properties may not be added directly; use addProperty() instead.")
419 def __delitem__(self, key):
420 """ On 'del self[key]'. Updates 'self.order' to maintain consistency."""
421 self.order.remove(self[key])
422 Dictionary.__delitem__(self, key)
424 def __getslice__(self, low, high):
425 """ On 'self[low:high]'."""
426 return self.order[low:high]
428 def __str__(self):
429 """ SGF representation, with proper line breaks between properties."""
430 if len(self):
431 s = ";" + str(self[0])
432 l = len(string.split(s, "\n")[-1]) # accounts for line breaks within Properties
433 for p in map(str, self[1:]):
434 if l + len(string.split(p, "\n")[0]) > MAX_LINE_LEN:
435 s = s + "\n"
436 l = 0
437 s = s + p
438 l = len(string.split(s, "\n")[-1])
439 return s
440 else:
441 return ";"
443 def update(self, dict):
444 """ 'Dictionary' method not applicable to 'Node'. Raises
445 'DirectAccessError'."""
446 raise DirectAccessError(
447 "The update() method is not supported by Node; use addProperty() instead.")
449 def addProperty(self, property):
451 Adds a 'Property' to this 'Node'. Checks for duplicate properties
452 (illegal), and maintains the property order. Argument:
453 - property : 'Property'"""
454 if self.has_key(property.id):
455 raise DuplicatePropertyError
456 else:
457 self.data[property.id] = property
458 self.order.append(property)
460 def makeProperty(self, id, valuelist):
462 Create a new 'Property'. Override/extend to create 'Property'
463 subclass instances (move, setup, game-info, etc.). Arguments:
464 - id : string
465 - valuelist : 'Property' or list of values"""
466 return Property(id, valuelist)
469 class Property(List):
471 An SGF property: a set of label and value(s). Instance attributes:
472 - self[.data] : list -- property values.
473 - self.id : string -- SGF standard property label.
474 - self.name : string -- actual label used in the SGF data. For example, the
475 property 'CoPyright[...]' has name 'CoPyright' and id 'CP'."""
477 def __init__(self, id, values, name=None):
479 Initialize the 'Property'. Arguments:
480 - id : string
481 - name : string (optional) -- If not given, 'self.name'
482 - nodelist : 'GameTree' or list of 'Node' -- Stored in 'self.data'.
483 - variations : list of 'GameTree' -- Stored in 'self.variations'."""
484 List.__init__(self, values) # XXX will _convert work here?
485 self.id = id
486 self.name = name or id
488 def __str__(self):
489 return self.name + "[" + string.join(map(_escapeText, self), "][") + "]"
492 class Cursor:
494 'GameTree' navigation tool. Instance attributes:
495 - self.game : 'GameTree' -- The root 'GameTree'.
496 - self.gametree : 'GameTree' -- The current 'GameTree'.
497 - self.node : 'Node' -- The current Node.
498 - self.nodenum : integer -- The offset of 'self.node' from the root of
499 'self.game'. The nodenum of the root node is 0.
500 - self.index : integer -- The offset of 'self.node' within 'self.gametree'.
501 - self.stack : list of 'GameTree' -- A record of 'GameTree''s traversed.
502 - self.children : list of 'Node' -- All child nodes of the current node.
503 - self.atEnd : boolean -- Flags if we are at the end of a branch.
504 - self.atStart : boolean -- Flags if we are at the start of the game."""
506 def __init__(self, gametree):
507 """ Initialize root 'GameTree' and instance variables."""
508 self.game = gametree # root GameTree
509 self.reset()
511 def reset(self):
512 """ Set 'Cursor' to point to the start of the root 'GameTree', 'self.game'."""
513 self.gametree = self.game
514 self.nodenum = 0
515 self.index = 0
516 self.stack = []
517 self.node = self.gametree[self.index]
518 self._setChildren()
519 self._setFlags()
521 def next(self, varnum=0):
523 Moves the 'Cursor' to & returns the next 'Node'. Raises
524 'GameTreeEndError' if the end of a branch is exceeded. Raises
525 'GameTreeNavigationError' if a non-existent variation is accessed.
526 Argument:
527 - varnum : integer, default 0 -- Variation number. Non-zero only
528 valid at a branching, where variations exist."""
529 if self.index + 1 < len(self.gametree): # more main line?
530 if varnum != 0:
531 raise GameTreeNavigationError("Nonexistent variation.")
532 self.index = self.index + 1
533 elif self.gametree.variations: # variations exist?
534 if varnum < len(self.gametree.variations):
535 self.stack.append(self.gametree)
536 self.gametree = self.gametree.variations[varnum]
537 self.index = 0
538 else:
539 raise GameTreeNavigationError("Nonexistent variation.")
540 else:
541 raise GameTreeEndError
542 self.node = self.gametree[self.index]
543 self.nodenum = self.nodenum + 1
544 self._setChildren()
545 self._setFlags()
546 return self.node
548 def previous(self):
549 """ Moves the 'Cursor' to & returns the previous 'Node'. Raises
550 'GameTreeEndError' if the start of a branch is exceeded."""
551 if self.index - 1 >= 0: # more main line?
552 self.index = self.index - 1
553 elif self.stack: # were we in a variation?
554 self.gametree = self.stack.pop()
555 self.index = len(self.gametree) - 1
556 else:
557 raise GameTreeEndError
558 self.node = self.gametree[self.index]
559 self.nodenum = self.nodenum - 1
560 self._setChildren()
561 self._setFlags()
562 return self.node
564 def _setChildren(self):
565 """ Sets up 'self.children'."""
566 if self.index + 1 < len(self.gametree):
567 self.children = [self.gametree[self.index+1]]
568 else:
569 self.children = map(lambda list: list[0], self.gametree.variations)
571 def _setFlags(self):
572 """ Sets up the flags 'self.atEnd' and 'self.atStart'."""
573 self.atEnd = not self.gametree.variations and (self.index + 1 == len(self.gametree))
574 self.atStart = not self.stack and (self.index == 0)
577 reCharsToEscape = re.compile(r'\]|\\') # characters that need to be \escaped
579 def _escapeText(text):
580 """ Adds backslash-escapes to property value characters that need them."""
581 output = ""
582 index = 0
583 match = reCharsToEscape.search(text, index)
584 while match:
585 output = output + text[index:match.start()] + '\\' + text[match.start()]
586 index = match.end()
587 match = reCharsToEscape.search(text, index)
588 output = output + text[index:]
589 return output
592 def selfTest1(onConsole=0):
593 """ Canned data test case"""
594 sgfdata = r""" (;GM [1]US[someone]CoPyright[\
595 Permission to reproduce this game is given.]GN[a-b]EV[None]RE[B+Resign]
596 PW[a]WR[2k*]PB[b]BR[4k*]PC[somewhere]DT[2000-01-16]SZ[19]TM[300]KM[4.5]
597 HA[3]AB[pd][dp][dd];W[pp];B[nq];W[oq]C[ x started observation.
598 ](;B[qc]C[ [b\]: \\ hi x! ;-) \\];W[kc])(;B[hc];W[oe])) """
599 print "\n\n********** Self-Test 1 **********\n"
600 print "Input data:\n"
601 print sgfdata
602 print "\n\nParsed data: "
603 col = SGFParser(sgfdata).parse()
604 print "done\n"
605 cstr = str(col)
606 print cstr, "\n"
607 print "Mainline:\n"
608 m = col[0].mainline()
609 print m, "\n"
610 ##print "as GameTree:\n"
611 ##print GameTree(m), "\n"
612 print "Tree traversal (forward):\n"
613 c = col.cursor()
614 while 1:
615 print "nodenum: %s; index: %s; children: %s; node: %s" % (c.nodenum, c.index, len(c.children), c.node)
616 if c.atEnd: break
617 c.next()
618 print "\nTree traversal (backward):\n"
619 while 1:
620 print "nodenum: %s; index: %s; children: %s; node: %s" % (c.nodenum, c.index, len(c.children), c.node)
621 if c.atStart: break
622 c.previous()
623 print "\nSearch for property 'B':"
624 print col[0].propertySearch("B", 1)
625 print "\nSearch for property 'C':"
626 print col[0].propertySearch("C", 1)
627 pass
629 def selfTest2(onConsole=0):
630 """ Macintosh-based SGF file test"""
631 import macfs
632 print "\n\n********** Self-Test 2 (Mac) **********\n"
633 thefile = macfs.PromptGetFile("Please choose an SGF file:")
634 if not thefile[1]:
635 return
636 srcpath = thefile[0].as_pathname()
637 src = open(srcpath, 'r')
638 sgfdata = src.read()
639 print "Input data:\n"
640 print sgfdata
641 print "\n\nParsed data:"
642 col = SGFParser(sgfdata).parse()
643 print "done\n"
644 print str(col)
647 if __name__ == '__main__':
648 print __doc__ # show module's documentation string
649 selfTest1()
650 import os
651 if os.name == 'mac':
652 selfTest2()