Use debian 2.7 only
[fpbd-bostik.git] / pyfpdb / HandHistoryConverter.py
blobfe08f9b24813736c6008433e28dc0f4e015a4c7d
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 #Copyright 2008-2011 Carl Gherardi
5 #This program is free software: you can redistribute it and/or modify
6 #it under the terms of the GNU Affero General Public License as published by
7 #the Free Software Foundation, version 3 of the License.
9 #This program is distributed in the hope that it will be useful,
10 #but WITHOUT ANY WARRANTY; without even the implied warranty of
11 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 #GNU General Public License for more details.
14 #You should have received a copy of the GNU Affero General Public License
15 #along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #In the "official" distribution you can find the license in agpl-3.0.txt.
18 import L10n
19 _ = L10n.get_translation()
21 import re
22 import sys
23 import traceback
24 from optparse import OptionParser
25 import os
26 import os.path
27 import xml.dom.minidom
28 import codecs
29 from decimal_wrapper import Decimal
30 import operator
31 from xml.dom.minidom import Node
33 import time
34 import datetime
36 from pytz import timezone
37 import pytz
39 import logging
40 # logging has been set up in fpdb.py or HUD_main.py, use their settings:
41 log = logging.getLogger("parser")
44 import Hand
45 from Exceptions import *
46 import Configuration
48 class HandHistoryConverter():
50 READ_CHUNK_SIZE = 10000 # bytes to read at a time from file in tail mode
52 # filetype can be "text" or "xml"
53 # so far always "text"
54 # subclass HHC_xml for xml parsing
55 filetype = "text"
57 # codepage indicates the encoding of the text file.
58 # cp1252 is a safe default
59 # "utf_8" is more likely if there are funny characters
60 codepage = "cp1252"
62 re_tzOffset = re.compile('^\w+[+-]\d{4}$')
63 copyGameHeader = False
64 summaryInFile = False
66 # maybe archive params should be one archive param, then call method in specific converter. if archive: convert_archive()
67 def __init__( self, config, in_path = '-', out_path = '-', index=0
68 , autostart=True, starsArchive=False, ftpArchive=False, sitename="PokerStars"):
69 """\
70 in_path (default '-' = sys.stdin)
71 out_path (default '-' = sys.stdout)
72 """
74 self.config = config
75 self.import_parameters = self.config.get_import_parameters()
76 self.sitename = sitename
77 log.info("HandHistory init - %s site, %s subclass, in_path '%s'; out_path '%s'"
78 % (self.sitename, self.__class__, in_path, out_path) ) # should use self.filter, not self.sitename
80 self.index = index
81 self.starsArchive = starsArchive
82 self.ftpArchive = ftpArchive
84 self.in_path = in_path
85 self.out_path = out_path
87 self.processedHands = []
88 self.numHands = 0
89 self.numErrors = 0
90 self.numPartial = 0
92 # Tourney object used to store TourneyInfo when called to deal with a Summary file
93 self.tourney = None
95 if in_path == '-':
96 self.in_fh = sys.stdin
97 self.out_fh = get_out_fh(out_path, self.import_parameters)
99 self.compiledPlayers = set()
100 self.maxseats = 0
102 self.status = True
104 self.parsedObjectType = "HH" #default behaviour : parsing HH files, can be "Summary" if the parsing encounters a Summary File
107 if autostart:
108 self.start()
110 def __str__(self):
111 return """
112 HandHistoryConverter: '%(sitename)s'
113 filetype '%(filetype)s'
114 in_path '%(in_path)s'
115 out_path '%(out_path)s'
116 """ % locals()
118 def start(self):
119 """Process a hand at a time from the input specified by in_path."""
120 starttime = time.time()
121 if not self.sanityCheck():
122 log.warning(_("Failed sanity check"))
123 return
125 self.numHands = 0
126 self.numErrors = 0
127 handsList = self.allHandsAsList()
128 log.debug( _("Hands list is:") + str(handsList))
129 log.info(_("Parsing %d hands") % len(handsList))
130 # Determine if we're dealing with a HH file or a Summary file
131 # quick fix : empty files make the handsList[0] fail ==> If empty file, go on with HH parsing
132 if len(handsList) == 0 or self.isSummary(handsList[0]) == False:
133 self.parsedObjectType = "HH"
134 for handText in handsList:
135 try:
136 self.processedHands.append(self.processHand(handText))
137 except FpdbHandPartial, e:
138 self.numPartial += 1
139 log.debug("%s" % e)
140 except FpdbParseError:
141 self.numErrors += 1
142 log.error(_("FpdbParseError for file '%s'") % self.in_path)
143 self.numHands = len(handsList)
144 endtime = time.time()
145 log.info(_("Read %d hands (%d failed) in %.3f seconds") % (self.numHands, (self.numErrors + self.numPartial), endtime - starttime))
146 else:
147 self.parsedObjectType = "Summary"
148 summaryParsingStatus = self.readSummaryInfo(handsList)
149 endtime = time.time()
150 if summaryParsingStatus :
151 log.info(_("Summary file '%s' correctly parsed (took %.3f seconds)") % (self.in_path, endtime - starttime))
152 else :
153 log.warning(_("Error converting summary file '%s' (took %.3f seconds)") % (self.in_path, endtime - starttime))
156 def progressNotify(self):
157 "A callback to the interface while events are pending"
158 import gtk, pygtk
159 while gtk.events_pending():
160 gtk.main_iteration(False)
162 def allHandsAsList(self):
163 """Return a list of handtexts in the file at self.in_path"""
164 #TODO : any need for this to be generator? e.g. stars support can email one huge file of all hands in a year. Better to read bit by bit than all at once.
165 self.readFile()
166 self.obs = self.obs.strip()
167 self.obs = self.obs.replace('\r\n', '\n')
168 # maybe archive params should be one archive param, then call method in specific converter?
169 # if self.archive:
170 # self.obs = self.convert_archive(self.obs)
171 if self.starsArchive == True:
172 m = re.compile('^Hand #\d+', re.MULTILINE)
173 self.obs = m.sub('', self.obs)
175 if self.ftpArchive == True:
176 # Remove ******************** # 1 *************************
177 m = re.compile('\*{20}\s#\s\d+\s\*{20,25}\s+', re.MULTILINE)
178 self.obs = m.sub('', self.obs)
180 if self.obs is None or self.obs == "":
181 log.error(_("Read no hands from file: '%s'") % self.in_path)
182 return []
183 handlist = re.split(self.re_SplitHands, self.obs)
184 # Some HH formats leave dangling text after the split
185 # ie. </game> (split) </session>EOL
186 # Remove this dangler if less than 50 characters and warn in the log
187 if len(handlist[-1]) <= 50:
188 handlist.pop()
189 log.info(_("Removing text < 50 characters"))
190 return handlist
192 def processHand(self, handText):
193 if self.copyGameHeader:
194 gametype = self.determineGameType(self.whole_file)
195 else:
196 gametype = self.determineGameType(handText)
197 hand = None
198 l = None
199 if gametype is None:
200 gametype = "unmatched"
201 # TODO: not ideal, just trying to not error. Throw ParseException?
202 self.numErrors += 1
203 else:
204 # See if gametype is supported.
205 if 'mix' not in gametype: gametype['mix'] = 'none'
206 if 'ante' not in gametype: gametype['ante'] = 0
207 type = gametype['type']
208 base = gametype['base']
209 limit = gametype['limitType']
210 l = [type] + [base] + [limit]
212 if l in self.readSupportedGames():
213 if gametype['base'] == 'hold':
214 hand = Hand.HoldemOmahaHand(self.config, self, self.sitename, gametype, handText)
215 elif gametype['base'] == 'stud':
216 hand = Hand.StudHand(self.config, self, self.sitename, gametype, handText)
217 elif gametype['base'] == 'draw':
218 hand = Hand.DrawHand(self.config, self, self.sitename, gametype, handText)
219 else:
220 log.error(_("%s Unsupported game type: %s") % (self.sitename, gametype))
221 raise FpdbParseError
223 if hand:
224 #hand.writeHand(self.out_fh)
225 return hand
226 else:
227 log.error(_("%s Unsupported game type: %s") % (self.sitename, gametype))
228 # TODO: pity we don't know the HID at this stage. Log the entire hand?
231 # These functions are parse actions that may be overridden by the inheriting class
232 # This function should return a list of lists looking like:
233 # return [["ring", "hold", "nl"], ["tour", "hold", "nl"]]
234 # Showing all supported games limits and types
236 def readSupportedGames(self): abstract
238 # should return a list
239 # type base limit
240 # [ ring, hold, nl , sb, bb ]
241 # Valid types specified in docs/tabledesign.html in Gametypes
242 def determineGameType(self, handText): abstract
243 """return dict with keys/values:
244 'type' in ('ring', 'tour')
245 'limitType' in ('nl', 'cn', 'pl', 'cp', 'fl')
246 'base' in ('hold', 'stud', 'draw')
247 'category' in ('holdem', 'omahahi', omahahilo', 'razz', 'studhi', 'studhilo', 'fivedraw', '27_1draw', '27_3draw', 'badugi')
248 'hilo' in ('h','l','s')
249 'mix' in (site specific, or 'none')
250 'smallBlind' int?
251 'bigBlind' int?
252 'smallBet'
253 'bigBet'
254 'currency' in ('USD', 'EUR', 'T$', <countrycode>)
255 or None if we fail to get the info """
256 #TODO: which parts are optional/required?
258 def readHandInfo(self, hand): abstract
259 """Read and set information about the hand being dealt, and set the correct
260 variables in the Hand object 'hand
262 * hand.startTime - a datetime object
263 * hand.handid - The site identified for the hand - a string.
264 * hand.tablename
265 * hand.buttonpos
266 * hand.maxseats
267 * hand.mixed
269 Tournament fields:
271 * hand.tourNo - The site identified tournament id as appropriate - a string.
272 * hand.buyin
273 * hand.fee
274 * hand.buyinCurrency
275 * hand.koBounty
276 * hand.isKO
277 * hand.level
279 #TODO: which parts are optional/required?
281 def readPlayerStacks(self, hand): abstract
282 """This function is for identifying players at the table, and to pass the
283 information on to 'hand' via Hand.addPlayer(seat, name, chips)
285 At the time of writing the reference function in the PS converter is:
286 log.debug("readPlayerStacks")
287 m = self.re_PlayerInfo.finditer(hand.handText)
288 for a in m:
289 hand.addPlayer(int(a.group('SEAT')), a.group('PNAME'), a.group('CASH'))
291 Which is pretty simple because the hand history format is consistent. Other hh formats aren't so nice.
293 This is the appropriate place to identify players that are sitting out and ignore them
295 *** NOTE: You may find this is a more appropriate place to set hand.maxseats ***
298 def compilePlayerRegexs(self): abstract
299 """Compile dynamic regexes -- compile player dependent regexes.
301 Depending on the ambiguity of lines you may need to match, and the complexity of
302 player names - we found that we needed to recompile some regexes for player actions so that they actually contained the player names.
305 We need to match the ante line:
306 <Player> antes $1.00
308 But <Player> is actually named
310 YesI antes $4000 - A perfectly legal playername
312 Giving:
314 YesI antes $4000 antes $1.00
316 Which without care in your regexes most people would match 'YesI' and not 'YesI antes $4000'
319 # Needs to return a MatchObject with group names identifying the streets into the Hand object
320 # so groups are called by street names 'PREFLOP', 'FLOP', 'STREET2' etc
321 # blinds are done seperately
322 def markStreets(self, hand): abstract
323 """For dividing the handText into sections.
325 The function requires you to pass a MatchObject with groups specifically labeled with
326 the 'correct' street names.
328 The Hand object will use the various matches for assigning actions to the correct streets.
330 Flop Based Games:
331 PREFLOP, FLOP, TURN, RIVER
333 Draw Based Games:
334 PREDEAL, DEAL, DRAWONE, DRAWTWO, DRAWTHREE
336 Stud Based Games:
337 ANTES, THIRD, FOURTH, FIFTH, SIXTH, SEVENTH
339 The Stars HHC has a good reference implementation
342 #Needs to return a list in the format
343 # ['player1name', 'player2name', ...] where player1name is the sb and player2name is bb,
344 # addtional players are assumed to post a bb oop
345 def readBlinds(self, hand): abstract
346 """Function for reading the various blinds from the hand history.
348 Pass any small blind to hand.addBlind(<name>, "small blind", <value>)
349 - unless it is a single dead small blind then use:
350 hand.addBlind(<name>, 'secondsb', <value>)
351 Pass any big blind to hand.addBlind(<name>, "big blind", <value>)
352 Pass any play posting both big and small blinds to hand.addBlind(<name>, 'both', <vale>)
354 def readAntes(self, hand): abstract
355 """Function for reading the antes from the hand history and passing the hand.addAnte"""
356 def readBringIn(self, hand): abstract
357 def readButton(self, hand): abstract
358 def readHeroCards(self, hand): abstract
359 def readPlayerCards(self, hand, street): abstract
360 def readAction(self, hand, street): abstract
361 def readCollectPot(self, hand): abstract
362 def readShownCards(self, hand): abstract
363 def readTourneyResults(self, hand): abstract
364 """This function is for future use in parsing tourney results directly from a hand"""
366 # EDIT: readOther is depreciated
367 # Some sites do odd stuff that doesn't fall in to the normal HH parsing.
368 # e.g., FTP doesn't put mixed game info in the HH, but puts in in the
369 # file name. Use readOther() to clean up those messes.
370 def readOther(self, hand): pass
372 # Some sites don't report the rake. This will be called at the end of the hand after the pot total has been calculated
373 # an inheriting class can calculate it for the specific site if need be.
374 def getRake(self, hand):
375 hand.rake = hand.totalpot - hand.totalcollected # * Decimal('0.05') # probably not quite right
378 def sanityCheck(self):
379 """Check we aren't going to do some stupid things"""
380 sane = False
381 base_w = False
383 # Make sure input and output files are different or we'll overwrite the source file
384 if True: # basically.. I don't know
385 sane = True
387 if self.in_path != '-' and self.out_path == self.in_path:
388 print(_("Output and input files are the same, check config."))
389 sane = False
391 return sane
393 # Functions not necessary to implement in sub class
394 def setFileType(self, filetype = "text", codepage='utf8'):
395 self.filetype = filetype
396 self.codepage = codepage
398 def __listof(self, x):
399 if isinstance(x, list) or isinstance(x, tuple):
400 return x
401 else:
402 return [x]
404 def readFile(self):
405 """Open in_path according to self.codepage. Exceptions caught further up"""
407 if self.filetype == "text":
408 for kodec in self.__listof(self.codepage):
409 #print "trying", kodec
410 try:
411 in_fh = codecs.open(self.in_path, 'r', kodec)
412 self.whole_file = in_fh.read()
413 in_fh.close()
414 self.obs = self.whole_file[self.index:]
415 self.index = len(self.whole_file)
416 return True
417 except:
418 pass
419 else:
420 print _("unable to read file with any codec in list!"), self.in_path
421 self.obs = ""
422 return False
423 elif self.filetype == "xml":
424 doc = xml.dom.minidom.parse(filename)
425 self.doc = doc
427 def guessMaxSeats(self, hand):
428 """Return a guess at maxseats when not specified in HH."""
429 # if some other code prior to this has already set it, return it
430 if self.maxseats > 1 and self.maxseats < 11:
431 return self.maxseats
432 mo = self.maxOccSeat(hand)
434 if mo == 10: return 10 #that was easy
436 if hand.gametype['base'] == 'stud':
437 if mo <= 8: return 8
438 else: return mo
440 if hand.gametype['base'] == 'draw':
441 if mo <= 6: return 6
442 else: return mo
444 if mo == 2: return 2
445 if mo <= 6: return 6
446 return 10
448 def maxOccSeat(self, hand):
449 max = 0
450 for player in hand.players:
451 if player[0] > max:
452 max = player[0]
453 return max
455 def getStatus(self):
456 #TODO: Return a status of true if file processed ok
457 return self.status
459 def getProcessedHands(self):
460 return self.processedHands
462 def getProcessedFile(self):
463 return self.out_path
465 def getLastCharacterRead(self):
466 return self.index
468 def isSummary(self, topline):
469 return " Tournament Summary " in topline
471 def getParsedObjectType(self):
472 return self.parsedObjectType
474 #returns a status (True/False) indicating wether the parsing could be done correctly or not
475 def readSummaryInfo(self, summaryInfoList): abstract
477 def getTourney(self):
478 return self.tourney
480 @staticmethod
481 def changeTimezone(time, givenTimezone, wantedTimezone):
482 """Takes a givenTimezone in format AAA or AAA+HHMM where AAA is a standard timezone
483 and +HHMM is an optional offset (+/-) in hours (HH) and minutes (MM)
484 (See OnGameToFpdb.py for example use of the +HHMM part)
485 Tries to convert the time parameter (with no timezone) from the givenTimezone to
486 the wantedTimeZone (currently only allows "UTC")
488 #log.debug("raw time: " + str(time) + " given time zone: " + str(givenTimezone))
489 if wantedTimezone=="UTC":
490 wantedTimezone = pytz.utc
491 else:
492 log.error(_("Unsupported target timezone: ") + givenTimezone)
493 raise FpdbParseError(_("Unsupported target timezone: ") + givenTimezone)
495 givenTZ = None
496 if HandHistoryConverter.re_tzOffset.match(givenTimezone):
497 offset = int(givenTimezone[-5:])
498 givenTimezone = givenTimezone[0:-5]
499 #log.debug("changeTimeZone: offset=") + str(offset))
500 else: offset=0
502 if (givenTimezone=="ET" or givenTimezone=="EST" or givenTimezone=="EDT"):
503 givenTZ = timezone('US/Eastern')
504 elif (givenTimezone=="CET" or givenTimezone=="CEST" or givenTimezone=="MESZ"):
505 #since CEST will only be used in summer time it's ok to treat it as identical to CET.
506 givenTZ = timezone('Europe/Berlin')
507 #Note: Daylight Saving Time is standardised across the EU so this should be fine
508 elif givenTimezone == 'GMT': # GMT is always the same as UTC
509 givenTZ = timezone('GMT')
510 # GMT cannot be treated as WET because some HH's are explicitly
511 # GMT+-delta so would be incorrect during the summertime
512 # if substituted as WET+-delta
513 elif givenTimezone == 'BST':
514 givenTZ = timezone('Europe/London')
515 elif givenTimezone == 'WET': # WET is GMT with daylight saving delta
516 givenTZ = timezone('WET')
517 elif givenTimezone == 'HST': # Hawaiian Standard Time
518 givenTZ = timezone('US/Hawaii')
519 elif givenTimezone == 'AKT': # Alaska Time
520 givenTZ = timezone('US/Alaska')
521 elif givenTimezone == 'PT': # Pacific Time
522 givenTZ = timezone('US/Pacific')
523 elif givenTimezone == 'MT': # Mountain Time
524 givenTZ = timezone('US/Mountain')
525 elif givenTimezone == 'CT': # Central Time
526 givenTZ = timezone('US/Central')
527 elif givenTimezone == 'AT': # Atlantic Time
528 givenTZ = timezone('Canada/Atlantic')
529 elif givenTimezone == 'NT': # Newfoundland Time
530 givenTZ = timezone('Canada/Newfoundland')
531 elif givenTimezone == 'ART': # Argentinian Time
532 givenTZ = timezone('America/Argentina/Buenos_Aires')
533 elif givenTimezone == 'BRT': # Brasilia Time
534 givenTZ = timezone('America/Sao_Paulo')
535 elif givenTimezone == 'EET': # Eastern European Time
536 givenTZ = timezone('Europe/Bucharest')
537 elif (givenTimezone == 'MSK' or givenTimezone == 'MESZ'): # Moscow Standard Time
538 givenTZ = timezone('Europe/Moscow')
539 elif givenTimezone == 'IST': # India Standard Time
540 givenTZ = timezone('Asia/Kolkata')
541 elif givenTimezone == 'CCT': # China Coast Time
542 givenTZ = timezone('Australia/West')
543 elif givenTimezone == 'JST': # Japan Standard Time
544 givenTZ = timezone('Asia/Tokyo')
545 elif givenTimezone == 'AWST': # Australian Western Standard Time
546 givenTZ = timezone('Australia/West')
547 elif givenTimezone == 'ACST': # Australian Central Standard Time
548 givenTZ = timezone('Australia/Darwin')
549 elif givenTimezone == 'AEST': # Australian Eastern Standard Time
550 # Each State on the East Coast has different DSTs.
551 # Melbournce is out because I don't like AFL, Queensland doesn't have DST
552 # ACT is full of politicians and Tasmania will never notice.
553 # Using Sydney.
554 givenTZ = timezone('Australia/Sydney')
555 elif givenTimezone == 'NZT': # New Zealand Time
556 givenTZ = timezone('Pacific/Auckland')
558 if givenTZ is None:
559 # do not crash if timezone not in list, just return unconverted time
560 log.warn(_("Timezone conversion not supported") + ": " + givenTimezone + " " + str(time))
561 return time
563 localisedTime = givenTZ.localize(time)
564 utcTime = localisedTime.astimezone(wantedTimezone) + datetime.timedelta(seconds=-3600*(offset/100)-60*(offset%100))
565 #log.debug("utcTime: " + str(utcTime))
566 return utcTime
567 #end @staticmethod def changeTimezone
569 @staticmethod
570 def getTableTitleRe(type, table_name=None, tournament = None, table_number=None):
571 "Returns string to search in windows titles"
572 if type=="tour":
573 return ( re.escape(str(tournament)) + ".+\\Table " + re.escape(str(table_number)) )
574 else:
575 return re.escape(table_name)
577 @staticmethod
578 def getTableNoRe(tournament):
579 "Returns string to search window title for tournament table no."
580 # Full Tilt: $30 + $3 Tournament (181398949), Table 1 - 600/1200 Ante 100 - Limit Razz
581 # PokerStars: WCOOP 2nd Chance 02: $1,050 NLHE - Tournament 307521826 Table 1 - Blinds $30/$60
582 return "%s.+(?:Table|Torneo) (\d+)" % (tournament, )
584 @staticmethod
585 def clearMoneyString(money):
586 """Converts human readable string representations of numbers like
587 '1 200', '2,000', '0,01' to more machine processable form - no commas, 1 decimal point
589 if not money:
590 return money
591 money = money.replace(' ', '')
592 if len(money) < 3:
593 return money # No commas until 0,01 or 1,00
594 if money[-3] == ',':
595 money = money[:-3] + '.' + money[-2:]
597 return money.replace(',', '')
599 def getTableTitleRe(config, sitename, *args, **kwargs):
600 "Returns string to search in windows titles for current site"
601 return getSiteHhc(config, sitename).getTableTitleRe(*args, **kwargs)
603 def getTableNoRe(config, sitename, *args, **kwargs):
604 "Returns string to search window titles for tournament table no."
605 return getSiteHhc(config, sitename).getTableNoRe(*args, **kwargs)
609 def getSiteHhc(config, sitename):
610 "Returns HHC class for current site"
611 hhcName = config.supported_sites[sitename].converter
612 hhcModule = __import__(hhcName)
613 return getattr(hhcModule, hhcName[:-6])
615 def get_out_fh(out_path, parameters):
616 if out_path == '-':
617 return(sys.stdout)
618 elif parameters['saveStarsHH']:
619 out_dir = os.path.dirname(out_path)
620 if not os.path.isdir(out_dir) and out_dir != '':
621 try:
622 os.makedirs(out_dir)
623 except: # we get a WindowsError here in Windows.. pretty sure something else for Linux :D
624 log.error(_("Unable to create output directory %s for HHC!") % out_dir)
625 print(_("Unable to create output directory %s for HHC!") % out_dir)
626 else:
627 log.info(_("Created directory '%s'") % out_dir)
628 try:
629 return(codecs.open(out_path, 'w', 'utf8'))
630 except:
631 log.error(_("Output path %s couldn't be opened.") % (out_path))
632 else:
633 return(sys.stdout)