Don't allow the start date to be later than the end date. If it is, modify whichever...
[fpdb-dooglus.git] / pyfpdb / IdentifySite.py
blobc5cf42b6e503eeb5ebdb66aaa05cc7eb6cfd4adb
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 #Copyright 2010-2011 Chaz Littlejohn
5 #This program is free software: you can redistribute it and/or modify
6 #it under the terms of the GNU Affero General Public License as published by
7 #the Free Software Foundation, version 3 of the License.
9 #This program is distributed in the hope that it will be useful,
10 #but WITHOUT ANY WARRANTY; without even the implied warranty of
11 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 #GNU General Public License for more details.
14 #You should have received a copy of the GNU Affero General Public License
15 #along with this program. If not, see <http://www.gnu.org/licenses/>.
16 #In the "official" distribution you can find the license in agpl-3.0.txt.
18 import L10n
19 _ = L10n.get_translation()
21 import re
22 import sys
23 import os
24 from optparse import OptionParser
25 import codecs
26 import Configuration
27 import Database
29 __ARCHIVE_PRE_HEADER_REGEX='^Hand #(\d+)\s*$|\*{20}\s#\s\d+\s\*{20,25}\s+'
30 re_SplitArchive = re.compile(__ARCHIVE_PRE_HEADER_REGEX, re.MULTILINE)
32 class IdentifySite:
33 def __init__(self, config, in_path = '-', list = []):
34 self.in_path = in_path
35 self.config = config
36 self.db = Database.Database(self.config)
37 self.sitelist = {}
38 self.filelist = {}
39 self.generateSiteList()
40 if list:
41 for file in list:
42 self.idSite(file, self.sitelist)
43 else:
44 if os.path.isdir(self.in_path):
45 self.walkDirectory(self.in_path, self.sitelist)
46 else:
47 self.idSite(self.in_path, self.sitelist)
49 def get_filelist(self):
50 return self.filelist
52 def generateSiteList(self):
53 """Generates a ordered dictionary of site, filter and filter name for each site in hhcs"""
54 for site, hhc in self.config.hhcs.iteritems():
55 filter = hhc.converter
56 filter_name = filter.replace("ToFpdb", "")
57 summary = hhc.summaryImporter
58 result = self.db.get_site_id(site)
59 if len(result) == 1:
60 self.sitelist[result[0][0]] = (site, filter, filter_name, summary)
61 else:
62 pass
64 def walkDirectory(self, dir, sitelist):
65 """Walks a directory, and executes a callback on each file"""
66 dir = os.path.abspath(dir)
67 for file in [file for file in os.listdir(dir) if not file in [".",".."]]:
68 nfile = os.path.join(dir,file)
69 if os.path.isdir(nfile):
70 self.walkDirectory(nfile, sitelist)
71 else:
72 self.idSite(nfile, sitelist)
74 def __listof(self, x):
75 if isinstance(x, list) or isinstance(x, tuple):
76 return x
77 else:
78 return [x]
80 def idSite(self, file, sitelist):
81 """Identifies the site the hh file originated from"""
82 if file.endswith('.txt') or file.endswith('.xml'):
83 self.filelist[file] = ''
84 archive = False
85 for id, info in sitelist.iteritems():
86 site = info[0]
87 filter = info[1]
88 filter_name = info[2]
89 summary = info[3]
90 mod = __import__(filter)
91 obj = getattr(mod, filter_name, None)
92 if summary:
93 smod = __import__(summary)
94 sobj = getattr(smod, summary, None)
96 for kodec in self.__listof(obj.codepage):
97 try:
98 in_fh = codecs.open(file, 'r', kodec)
99 whole_file = in_fh.read(2000)
100 in_fh.close()
101 if filter_name in ('OnGame', 'Winamax'):
102 m = obj.re_HandInfo.search(whole_file)
103 elif filter_name in ('PartyPoker'):
104 m = obj.re_GameInfoRing.search(whole_file)
105 if not m:
106 m = obj.re_GameInfoTrny.search(whole_file)
107 else:
108 m = obj.re_GameInfo.search(whole_file)
109 if m and re_SplitArchive.search(whole_file):
110 archive = True
111 if not m and summary:
112 m = sobj.re_TourneyInfo.search(whole_file)
113 if m:
114 filter = summary
115 if m:
116 self.filelist[file] = [site] + [filter] + [kodec] + [archive]
117 break
119 except:
120 pass
122 def main(argv=None):
123 if argv is None:
124 argv = sys.argv[1:]
126 config = Configuration.Config(file = "HUD_config.test.xml")
127 in_path = os.path.abspath('regression-test-files')
128 IdSite = IdentifySite(config, in_path)
130 print "\n----------- SITE LIST -----------"
131 for site, info in IdSite.sitelist.iteritems():
132 print site, info
133 print "----------- END SITE LIST -----------"
135 print "\n----------- ID REGRESSION FILES -----------"
136 for file, site in IdSite.filelist.iteritems():
137 print file, site
138 print "----------- END ID REGRESSION FILES -----------"
141 if __name__ == '__main__':
142 sys.exit(main())