2 # -*- coding: utf-8 -*-
4 #Copyright 2008-2011 Carl Gherardi
5 #This program is free software: you can redistribute it and/or modify
6 #it under the terms of the GNU Affero General Public License as published by
7 #the Free Software Foundation, version 3 of the License.
9 #This program is distributed in the hope that it will be useful,
10 #but WITHOUT ANY WARRANTY; without even the implied warranty of
11 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 #GNU General Public License for more details.
14 #You should have received a copy of the GNU Affero General Public License
15 #along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #In the "official" distribution you can find the license in agpl-3.0.txt.
19 _
= L10n
.get_translation()
24 from optparse
import OptionParser
27 import xml
.dom
.minidom
29 from decimal_wrapper
import Decimal
31 from xml
.dom
.minidom
import Node
36 from pytz
import timezone
40 # logging has been set up in fpdb.py or HUD_main.py, use their settings:
41 log
= logging
.getLogger("parser")
45 from Exceptions
import *
48 class HandHistoryConverter():
50 READ_CHUNK_SIZE
= 10000 # bytes to read at a time from file in tail mode
52 # filetype can be "text" or "xml"
53 # so far always "text"
54 # subclass HHC_xml for xml parsing
57 # codepage indicates the encoding of the text file.
58 # cp1252 is a safe default
59 # "utf_8" is more likely if there are funny characters
62 re_tzOffset
= re
.compile('^\w+[+-]\d{4}$')
63 copyGameHeader
= False
66 # maybe archive params should be one archive param, then call method in specific converter. if archive: convert_archive()
67 def __init__( self
, config
, in_path
= '-', out_path
= '-', index
=0
68 , autostart
=True, starsArchive
=False, ftpArchive
=False, sitename
="PokerStars"):
70 in_path (default '-' = sys.stdin)
71 out_path (default '-' = sys.stdout)
75 self
.import_parameters
= self
.config
.get_import_parameters()
76 self
.sitename
= sitename
77 log
.info("HandHistory init - %s site, %s subclass, in_path '%s'; out_path '%s'"
78 % (self
.sitename
, self
.__class
__, in_path
, out_path
) ) # should use self.filter, not self.sitename
81 self
.starsArchive
= starsArchive
82 self
.ftpArchive
= ftpArchive
84 self
.in_path
= in_path
85 self
.out_path
= out_path
87 self
.processedHands
= []
92 # Tourney object used to store TourneyInfo when called to deal with a Summary file
96 self
.in_fh
= sys
.stdin
97 self
.out_fh
= get_out_fh(out_path
, self
.import_parameters
)
99 self
.compiledPlayers
= set()
104 self
.parsedObjectType
= "HH" #default behaviour : parsing HH files, can be "Summary" if the parsing encounters a Summary File
112 HandHistoryConverter: '%(sitename)s'
113 filetype '%(filetype)s'
114 in_path '%(in_path)s'
115 out_path '%(out_path)s'
119 """Process a hand at a time from the input specified by in_path."""
120 starttime
= time
.time()
121 if not self
.sanityCheck():
122 log
.warning(_("Failed sanity check"))
127 handsList
= self
.allHandsAsList()
128 log
.debug( _("Hands list is:") + str(handsList
))
129 log
.info(_("Parsing %d hands") % len(handsList
))
130 # Determine if we're dealing with a HH file or a Summary file
131 # quick fix : empty files make the handsList[0] fail ==> If empty file, go on with HH parsing
132 if len(handsList
) == 0 or self
.isSummary(handsList
[0]) == False:
133 self
.parsedObjectType
= "HH"
134 for handText
in handsList
:
136 self
.processedHands
.append(self
.processHand(handText
))
137 except FpdbHandPartial
, e
:
140 except FpdbParseError
:
142 log
.error(_("FpdbParseError for file '%s'") % self
.in_path
)
143 self
.numHands
= len(handsList
)
144 endtime
= time
.time()
145 log
.info(_("Read %d hands (%d failed) in %.3f seconds") % (self
.numHands
, (self
.numErrors
+ self
.numPartial
), endtime
- starttime
))
147 self
.parsedObjectType
= "Summary"
148 summaryParsingStatus
= self
.readSummaryInfo(handsList
)
149 endtime
= time
.time()
150 if summaryParsingStatus
:
151 log
.info(_("Summary file '%s' correctly parsed (took %.3f seconds)") % (self
.in_path
, endtime
- starttime
))
153 log
.warning(_("Error converting summary file '%s' (took %.3f seconds)") % (self
.in_path
, endtime
- starttime
))
156 def progressNotify(self
):
157 "A callback to the interface while events are pending"
159 while gtk
.events_pending():
160 gtk
.main_iteration(False)
162 def allHandsAsList(self
):
163 """Return a list of handtexts in the file at self.in_path"""
164 #TODO : any need for this to be generator? e.g. stars support can email one huge file of all hands in a year. Better to read bit by bit than all at once.
166 self
.obs
= self
.obs
.strip()
167 self
.obs
= self
.obs
.replace('\r\n', '\n')
168 # maybe archive params should be one archive param, then call method in specific converter?
170 # self.obs = self.convert_archive(self.obs)
171 if self
.starsArchive
== True:
172 m
= re
.compile('^Hand #\d+', re
.MULTILINE
)
173 self
.obs
= m
.sub('', self
.obs
)
175 if self
.ftpArchive
== True:
176 # Remove ******************** # 1 *************************
177 m
= re
.compile('\*{20}\s#\s\d+\s\*{20,25}\s+', re
.MULTILINE
)
178 self
.obs
= m
.sub('', self
.obs
)
180 if self
.obs
is None or self
.obs
== "":
181 log
.error(_("Read no hands from file: '%s'") % self
.in_path
)
183 handlist
= re
.split(self
.re_SplitHands
, self
.obs
)
184 # Some HH formats leave dangling text after the split
185 # ie. </game> (split) </session>EOL
186 # Remove this dangler if less than 50 characters and warn in the log
187 if len(handlist
[-1]) <= 50:
189 log
.info(_("Removing text < 50 characters"))
192 def processHand(self
, handText
):
193 if self
.copyGameHeader
:
194 gametype
= self
.determineGameType(self
.whole_file
)
196 gametype
= self
.determineGameType(handText
)
200 gametype
= "unmatched"
201 # TODO: not ideal, just trying to not error. Throw ParseException?
204 # See if gametype is supported.
205 if 'mix' not in gametype
: gametype
['mix'] = 'none'
206 if 'ante' not in gametype
: gametype
['ante'] = 0
207 type = gametype
['type']
208 base
= gametype
['base']
209 limit
= gametype
['limitType']
210 l
= [type] + [base
] + [limit
]
212 if l
in self
.readSupportedGames():
213 if gametype
['base'] == 'hold':
214 hand
= Hand
.HoldemOmahaHand(self
.config
, self
, self
.sitename
, gametype
, handText
)
215 elif gametype
['base'] == 'stud':
216 hand
= Hand
.StudHand(self
.config
, self
, self
.sitename
, gametype
, handText
)
217 elif gametype
['base'] == 'draw':
218 hand
= Hand
.DrawHand(self
.config
, self
, self
.sitename
, gametype
, handText
)
220 log
.error(_("%s Unsupported game type: %s") % (self
.sitename
, gametype
))
224 #hand.writeHand(self.out_fh)
227 log
.error(_("%s Unsupported game type: %s") % (self
.sitename
, gametype
))
228 # TODO: pity we don't know the HID at this stage. Log the entire hand?
231 # These functions are parse actions that may be overridden by the inheriting class
232 # This function should return a list of lists looking like:
233 # return [["ring", "hold", "nl"], ["tour", "hold", "nl"]]
234 # Showing all supported games limits and types
236 def readSupportedGames(self
): abstract
238 # should return a list
240 # [ ring, hold, nl , sb, bb ]
241 # Valid types specified in docs/tabledesign.html in Gametypes
242 def determineGameType(self
, handText
): abstract
243 """return dict with keys/values:
244 'type' in ('ring', 'tour')
245 'limitType' in ('nl', 'cn', 'pl', 'cp', 'fl')
246 'base' in ('hold', 'stud', 'draw')
247 'category' in ('holdem', 'omahahi', omahahilo', 'razz', 'studhi', 'studhilo', 'fivedraw', '27_1draw', '27_3draw', 'badugi')
248 'hilo' in ('h','l','s')
249 'mix' in (site specific, or 'none')
254 'currency' in ('USD', 'EUR', 'T$', <countrycode>)
255 or None if we fail to get the info """
256 #TODO: which parts are optional/required?
258 def readHandInfo(self
, hand
): abstract
259 """Read and set information about the hand being dealt, and set the correct
260 variables in the Hand object 'hand
262 * hand.startTime - a datetime object
263 * hand.handid - The site identified for the hand - a string.
271 * hand.tourNo - The site identified tournament id as appropriate - a string.
279 #TODO: which parts are optional/required?
281 def readPlayerStacks(self
, hand
): abstract
282 """This function is for identifying players at the table, and to pass the
283 information on to 'hand' via Hand.addPlayer(seat, name, chips)
285 At the time of writing the reference function in the PS converter is:
286 log.debug("readPlayerStacks")
287 m = self.re_PlayerInfo.finditer(hand.handText)
289 hand.addPlayer(int(a.group('SEAT')), a.group('PNAME'), a.group('CASH'))
291 Which is pretty simple because the hand history format is consistent. Other hh formats aren't so nice.
293 This is the appropriate place to identify players that are sitting out and ignore them
295 *** NOTE: You may find this is a more appropriate place to set hand.maxseats ***
298 def compilePlayerRegexs(self
): abstract
299 """Compile dynamic regexes -- compile player dependent regexes.
301 Depending on the ambiguity of lines you may need to match, and the complexity of
302 player names - we found that we needed to recompile some regexes for player actions so that they actually contained the player names.
305 We need to match the ante line:
308 But <Player> is actually named
310 YesI antes $4000 - A perfectly legal playername
314 YesI antes $4000 antes $1.00
316 Which without care in your regexes most people would match 'YesI' and not 'YesI antes $4000'
319 # Needs to return a MatchObject with group names identifying the streets into the Hand object
320 # so groups are called by street names 'PREFLOP', 'FLOP', 'STREET2' etc
321 # blinds are done seperately
322 def markStreets(self
, hand
): abstract
323 """For dividing the handText into sections.
325 The function requires you to pass a MatchObject with groups specifically labeled with
326 the 'correct' street names.
328 The Hand object will use the various matches for assigning actions to the correct streets.
331 PREFLOP, FLOP, TURN, RIVER
334 PREDEAL, DEAL, DRAWONE, DRAWTWO, DRAWTHREE
337 ANTES, THIRD, FOURTH, FIFTH, SIXTH, SEVENTH
339 The Stars HHC has a good reference implementation
342 #Needs to return a list in the format
343 # ['player1name', 'player2name', ...] where player1name is the sb and player2name is bb,
344 # addtional players are assumed to post a bb oop
345 def readBlinds(self
, hand
): abstract
346 """Function for reading the various blinds from the hand history.
348 Pass any small blind to hand.addBlind(<name>, "small blind", <value>)
349 - unless it is a single dead small blind then use:
350 hand.addBlind(<name>, 'secondsb', <value>)
351 Pass any big blind to hand.addBlind(<name>, "big blind", <value>)
352 Pass any play posting both big and small blinds to hand.addBlind(<name>, 'both', <vale>)
354 def readAntes(self
, hand
): abstract
355 """Function for reading the antes from the hand history and passing the hand.addAnte"""
356 def readBringIn(self
, hand
): abstract
357 def readButton(self
, hand
): abstract
358 def readHeroCards(self
, hand
): abstract
359 def readPlayerCards(self
, hand
, street
): abstract
360 def readAction(self
, hand
, street
): abstract
361 def readCollectPot(self
, hand
): abstract
362 def readShownCards(self
, hand
): abstract
363 def readTourneyResults(self
, hand
): abstract
364 """This function is for future use in parsing tourney results directly from a hand"""
366 # EDIT: readOther is depreciated
367 # Some sites do odd stuff that doesn't fall in to the normal HH parsing.
368 # e.g., FTP doesn't put mixed game info in the HH, but puts in in the
369 # file name. Use readOther() to clean up those messes.
370 def readOther(self
, hand
): pass
372 # Some sites don't report the rake. This will be called at the end of the hand after the pot total has been calculated
373 # an inheriting class can calculate it for the specific site if need be.
374 def getRake(self
, hand
):
375 hand
.rake
= hand
.totalpot
- hand
.totalcollected
# * Decimal('0.05') # probably not quite right
378 def sanityCheck(self
):
379 """Check we aren't going to do some stupid things"""
383 # Make sure input and output files are different or we'll overwrite the source file
384 if True: # basically.. I don't know
387 if self
.in_path
!= '-' and self
.out_path
== self
.in_path
:
388 print(_("Output and input files are the same, check config."))
393 # Functions not necessary to implement in sub class
394 def setFileType(self
, filetype
= "text", codepage
='utf8'):
395 self
.filetype
= filetype
396 self
.codepage
= codepage
398 def __listof(self
, x
):
399 if isinstance(x
, list) or isinstance(x
, tuple):
405 """Open in_path according to self.codepage. Exceptions caught further up"""
407 if self
.filetype
== "text":
408 for kodec
in self
.__listof
(self
.codepage
):
409 #print "trying", kodec
411 in_fh
= codecs
.open(self
.in_path
, 'r', kodec
)
412 self
.whole_file
= in_fh
.read()
414 self
.obs
= self
.whole_file
[self
.index
:]
415 self
.index
= len(self
.whole_file
)
420 print _("unable to read file with any codec in list!"), self
.in_path
423 elif self
.filetype
== "xml":
424 doc
= xml
.dom
.minidom
.parse(filename
)
427 def guessMaxSeats(self
, hand
):
428 """Return a guess at maxseats when not specified in HH."""
429 # if some other code prior to this has already set it, return it
430 if self
.maxseats
> 1 and self
.maxseats
< 11:
432 mo
= self
.maxOccSeat(hand
)
434 if mo
== 10: return 10 #that was easy
436 if hand
.gametype
['base'] == 'stud':
440 if hand
.gametype
['base'] == 'draw':
448 def maxOccSeat(self
, hand
):
450 for player
in hand
.players
:
456 #TODO: Return a status of true if file processed ok
459 def getProcessedHands(self
):
460 return self
.processedHands
462 def getProcessedFile(self
):
465 def getLastCharacterRead(self
):
468 def isSummary(self
, topline
):
469 return " Tournament Summary " in topline
471 def getParsedObjectType(self
):
472 return self
.parsedObjectType
474 #returns a status (True/False) indicating wether the parsing could be done correctly or not
475 def readSummaryInfo(self
, summaryInfoList
): abstract
477 def getTourney(self
):
481 def changeTimezone(time
, givenTimezone
, wantedTimezone
):
482 """Takes a givenTimezone in format AAA or AAA+HHMM where AAA is a standard timezone
483 and +HHMM is an optional offset (+/-) in hours (HH) and minutes (MM)
484 (See OnGameToFpdb.py for example use of the +HHMM part)
485 Tries to convert the time parameter (with no timezone) from the givenTimezone to
486 the wantedTimeZone (currently only allows "UTC")
488 #log.debug("raw time: " + str(time) + " given time zone: " + str(givenTimezone))
489 if wantedTimezone
=="UTC":
490 wantedTimezone
= pytz
.utc
492 log
.error(_("Unsupported target timezone: ") + givenTimezone
)
493 raise FpdbParseError(_("Unsupported target timezone: ") + givenTimezone
)
496 if HandHistoryConverter
.re_tzOffset
.match(givenTimezone
):
497 offset
= int(givenTimezone
[-5:])
498 givenTimezone
= givenTimezone
[0:-5]
499 #log.debug("changeTimeZone: offset=") + str(offset))
502 if (givenTimezone
=="ET" or givenTimezone
=="EST" or givenTimezone
=="EDT"):
503 givenTZ
= timezone('US/Eastern')
504 elif (givenTimezone
=="CET" or givenTimezone
=="CEST" or givenTimezone
=="MESZ"):
505 #since CEST will only be used in summer time it's ok to treat it as identical to CET.
506 givenTZ
= timezone('Europe/Berlin')
507 #Note: Daylight Saving Time is standardised across the EU so this should be fine
508 elif givenTimezone
== 'GMT': # GMT is always the same as UTC
509 givenTZ
= timezone('GMT')
510 # GMT cannot be treated as WET because some HH's are explicitly
511 # GMT+-delta so would be incorrect during the summertime
512 # if substituted as WET+-delta
513 elif givenTimezone
== 'BST':
514 givenTZ
= timezone('Europe/London')
515 elif givenTimezone
== 'WET': # WET is GMT with daylight saving delta
516 givenTZ
= timezone('WET')
517 elif givenTimezone
== 'HST': # Hawaiian Standard Time
518 givenTZ
= timezone('US/Hawaii')
519 elif givenTimezone
== 'AKT': # Alaska Time
520 givenTZ
= timezone('US/Alaska')
521 elif givenTimezone
== 'PT': # Pacific Time
522 givenTZ
= timezone('US/Pacific')
523 elif givenTimezone
== 'MT': # Mountain Time
524 givenTZ
= timezone('US/Mountain')
525 elif givenTimezone
== 'CT': # Central Time
526 givenTZ
= timezone('US/Central')
527 elif givenTimezone
== 'AT': # Atlantic Time
528 givenTZ
= timezone('Canada/Atlantic')
529 elif givenTimezone
== 'NT': # Newfoundland Time
530 givenTZ
= timezone('Canada/Newfoundland')
531 elif givenTimezone
== 'ART': # Argentinian Time
532 givenTZ
= timezone('America/Argentina/Buenos_Aires')
533 elif givenTimezone
== 'BRT': # Brasilia Time
534 givenTZ
= timezone('America/Sao_Paulo')
535 elif givenTimezone
== 'EET': # Eastern European Time
536 givenTZ
= timezone('Europe/Bucharest')
537 elif (givenTimezone
== 'MSK' or givenTimezone
== 'MESZ'): # Moscow Standard Time
538 givenTZ
= timezone('Europe/Moscow')
539 elif givenTimezone
== 'IST': # India Standard Time
540 givenTZ
= timezone('Asia/Kolkata')
541 elif givenTimezone
== 'CCT': # China Coast Time
542 givenTZ
= timezone('Australia/West')
543 elif givenTimezone
== 'JST': # Japan Standard Time
544 givenTZ
= timezone('Asia/Tokyo')
545 elif givenTimezone
== 'AWST': # Australian Western Standard Time
546 givenTZ
= timezone('Australia/West')
547 elif givenTimezone
== 'ACST': # Australian Central Standard Time
548 givenTZ
= timezone('Australia/Darwin')
549 elif givenTimezone
== 'AEST': # Australian Eastern Standard Time
550 # Each State on the East Coast has different DSTs.
551 # Melbournce is out because I don't like AFL, Queensland doesn't have DST
552 # ACT is full of politicians and Tasmania will never notice.
554 givenTZ
= timezone('Australia/Sydney')
555 elif givenTimezone
== 'NZT': # New Zealand Time
556 givenTZ
= timezone('Pacific/Auckland')
559 # do not crash if timezone not in list, just return unconverted time
560 log
.warn(_("Timezone conversion not supported") + ": " + givenTimezone
+ " " + str(time
))
563 localisedTime
= givenTZ
.localize(time
)
564 utcTime
= localisedTime
.astimezone(wantedTimezone
) + datetime
.timedelta(seconds
=-3600*(offset
/100)-60*(offset
%100))
565 #log.debug("utcTime: " + str(utcTime))
567 #end @staticmethod def changeTimezone
570 def getTableTitleRe(type, table_name
=None, tournament
= None, table_number
=None):
571 "Returns string to search in windows titles"
573 return ( re
.escape(str(tournament
)) + ".+\\Table " + re
.escape(str(table_number
)) )
575 return re
.escape(table_name
)
578 def getTableNoRe(tournament
):
579 "Returns string to search window title for tournament table no."
580 # Full Tilt: $30 + $3 Tournament (181398949), Table 1 - 600/1200 Ante 100 - Limit Razz
581 # PokerStars: WCOOP 2nd Chance 02: $1,050 NLHE - Tournament 307521826 Table 1 - Blinds $30/$60
582 return "%s.+(?:Table|Torneo) (\d+)" % (tournament
, )
585 def clearMoneyString(money
):
586 """Converts human readable string representations of numbers like
587 '1 200', '2,000', '0,01' to more machine processable form - no commas, 1 decimal point
591 money
= money
.replace(' ', '')
593 return money
# No commas until 0,01 or 1,00
595 money
= money
[:-3] + '.' + money
[-2:]
597 return money
.replace(',', '')
599 def getTableTitleRe(config
, sitename
, *args
, **kwargs
):
600 "Returns string to search in windows titles for current site"
601 return getSiteHhc(config
, sitename
).getTableTitleRe(*args
, **kwargs
)
603 def getTableNoRe(config
, sitename
, *args
, **kwargs
):
604 "Returns string to search window titles for tournament table no."
605 return getSiteHhc(config
, sitename
).getTableNoRe(*args
, **kwargs
)
609 def getSiteHhc(config
, sitename
):
610 "Returns HHC class for current site"
611 hhcName
= config
.supported_sites
[sitename
].converter
612 hhcModule
= __import__(hhcName
)
613 return getattr(hhcModule
, hhcName
[:-6])
615 def get_out_fh(out_path
, parameters
):
618 elif parameters
['saveStarsHH']:
619 out_dir
= os
.path
.dirname(out_path
)
620 if not os
.path
.isdir(out_dir
) and out_dir
!= '':
623 except: # we get a WindowsError here in Windows.. pretty sure something else for Linux :D
624 log
.error(_("Unable to create output directory %s for HHC!") % out_dir
)
625 print(_("Unable to create output directory %s for HHC!") % out_dir
)
627 log
.info(_("Created directory '%s'") % out_dir
)
629 return(codecs
.open(out_path
, 'w', 'utf8'))
631 log
.error(_("Output path %s couldn't be opened.") % (out_path
))