Small update
[Hockey-Test.git] / alt / ahl15-16 / getahlgames.py
blob323588542852d6b9293e8735652405f725326821
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 import re
5 import os
6 import sys
7 import urllib
8 import urllib2
9 import cookielib
10 import StringIO
11 import gzip
12 import time
13 import datetime
14 import argparse
15 import urlparse
16 import sqlite3
17 import unicodedata
19 sqlcon = sqlite3.connect("../hockey15-16.db3")
20 sqlcur = sqlcon.cursor()
22 leaguename = "AHL"
23 getforday = "9"
24 getformonth = "10"
25 getforyear = "2015"
26 useragent = "Mozilla/5.0 (Windows NT 6.1; rv:41.0) Gecko/20100101 Firefox/41.0"
28 if (len(sys.argv) > 1):
29 getforday = sys.argv[1]
30 if (len(sys.argv) > 2):
31 getformonth = sys.argv[2]
32 if (len(sys.argv) > 3):
33 getforyear = sys.argv[3]
36 def GetFull2Team(sqldatacon, TeamName):
37 global leaguename
38 return str(sqldatacon[0].execute("SELECT TeamName FROM "+leaguename+"Teams WHERE FullName=\""+str(TeamName)+"\"").fetchone()[0])
41 def GetTeam2Full(sqldatacon, TeamName):
42 global leaguename
43 return str(sqldatacon[0].execute("SELECT FullName FROM "+leaguename+"Teams WHERE TeamName=\""+str(TeamName)+"\"").fetchone()[0])
46 geturls_cj = cookielib.CookieJar()
47 geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(geturls_cj))
48 geturls_opener.addheaders = [("Referer", "http://www.nhl.com/"), ("User-Agent", useragent), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"),
49 ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]
50 urllib2.install_opener(geturls_opener)
51 geturls_text = geturls_opener.open(
52 "http://theahl.com/stats/schedule.php?date="+getforyear+"-"+getformonth+"-"+getforday)
53 if (geturls_text.info().get("Content-Encoding") == "gzip" or geturls_text.info().get("Content-Encoding") == "deflate"):
54 strbuf = StringIO.StringIO(geturls_text.read())
55 gzstrbuf = gzip.GzipFile(fileobj=strbuf)
56 prehockey_text = gzstrbuf.read()[:]
57 if (geturls_text.info().get("Content-Encoding") != "gzip" and geturls_text.info().get("Content-Encoding") != "deflate"):
58 prehockey_text = geturls_text.read()[:]
59 pre_get_todays_games = re.escape(
60 "<a href=\"game-summary.php?game_id=")+"([0-9]+)"+re.escape("\">Game Summary</a>")
61 get_todays_games = re.findall(pre_get_todays_games, prehockey_text)
62 num_todays_games = len(get_todays_games)
63 cur_todays_games = 0
64 if (num_todays_games > 0):
65 print("print(\"Inserting \"+leaguename+\" Game Data From " +
66 getformonth.lstrip('0')+"/"+getforday.lstrip('0')+"/"+getforyear+".\\n\");")
67 while (cur_todays_games < num_todays_games):
68 newgetforday = getforday
69 if (len(getforday) == 1):
70 newgetforday = "0"+getforday
71 newgetformonth = getformonth
72 if (len(getformonth) == 1):
73 newgetformonth = "0"+getformonth
74 geturls_opener = urllib2.build_opener(
75 urllib2.HTTPCookieProcessor(geturls_cj))
76 geturls_opener.addheaders = [("Referer", "http://theahl.com/stats/schedule.php?date="+getforyear+"-"+getformonth+"-"+getforday), ("User-Agent", useragent), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language",
77 "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]
78 urllib2.install_opener(geturls_opener)
79 geturls_text = geturls_opener.open(
80 "http://theahl.com/stats/game-summary.php?game_id="+get_todays_games[cur_todays_games])
81 if (geturls_text.info().get("Content-Encoding") == "gzip" or geturls_text.info().get("Content-Encoding") == "deflate"):
82 strbuf = StringIO.StringIO(geturls_text.read())
83 gzstrbuf = gzip.GzipFile(fileobj=strbuf)
84 curgame_text = gzstrbuf.read()[:]
85 if (geturls_text.info().get("Content-Encoding") != "gzip" and geturls_text.info().get("Content-Encoding") != "deflate"):
86 curgame_text = geturls_text.read()[:]
87 numpers = 5
88 pre_curgame_score_text = re.escape("<td align=\"center\" class=\"content\">")+"([0-9]+)"+re.escape("</td><td align=\"center\" class=\"content\">")+"([0-9]+)"+re.escape("</td><td align=\"center\" class=\"content\">")+"([0-9]+)"+re.escape(
89 "</td><td align=\"center\" class=\"content\">")+"([0-9]+)"+re.escape("</td><td align=\"center\" class=\"content\">")+"([0-9]+)"+re.escape("</td><td align=\"center\" class=\"content\"><b>")+"([0-9]+)"+re.escape("</b></td>")
90 curgame_score_text = re.findall(pre_curgame_score_text, curgame_text)
91 if (len(curgame_score_text) == 0):
92 numpers = 4
93 pre_curgame_score_text = re.escape("<td align=\"center\" class=\"content\">")+"([0-9]+)"+re.escape("</td><td align=\"center\" class=\"content\">")+"([0-9]+)"+re.escape(
94 "</td><td align=\"center\" class=\"content\">")+"([0-9]+)"+re.escape("</td><td align=\"center\" class=\"content\">")+"([0-9]+)"+re.escape("</td><td align=\"center\" class=\"content\"><b>")+"([0-9]+)"+re.escape("</b></td>")
95 curgame_score_text = re.findall(pre_curgame_score_text, curgame_text)
96 if (len(curgame_score_text) == 0):
97 numpers = 3
98 pre_curgame_score_text = re.escape("<td align=\"center\" class=\"content\">")+"([0-9]+)"+re.escape("</td><td align=\"center\" class=\"content\">")+"([0-9]+)"+re.escape(
99 "</td><td align=\"center\" class=\"content\">")+"([0-9]+)"+re.escape("</td><td align=\"center\" class=\"content\"><b>")+"([0-9]+)"+re.escape("</b></td>")
100 curgame_score_text = re.findall(pre_curgame_score_text, curgame_text)
101 pre_team_text = re.escape(
102 "<tr class=\"light\"><td class=\"content\" nowrap>")+"(.*)"+re.escape("</td>")
103 team_text = re.findall(pre_team_text, unicodedata.normalize(
104 'NFKD', curgame_text.decode("utf-8")).encode('ASCII', 'ignore'))
105 pre_team_stats = re.escape(
106 "<td class=\"light\" align=\"center\">")+"(.*)"+re.escape("</td>")
107 get_team_stats = re.findall(pre_team_stats, curgame_text)
108 if (numpers == 3):
109 print("MakeHockeyGame((sqlcur, sqlcon), "+getforyear+newgetformonth+newgetforday+", \""+team_text[3]+"\", \""+team_text[2]+"\", \""+curgame_score_text[1][0]+":"+curgame_score_text[0][0]+","+curgame_score_text[1][1]+":"+curgame_score_text[0][1]+"," +
110 curgame_score_text[1][2]+":"+curgame_score_text[0][2]+"\", \""+curgame_score_text[3][0]+":"+curgame_score_text[2][0]+","+curgame_score_text[3][1]+":"+curgame_score_text[2][1]+","+curgame_score_text[3][2]+":"+curgame_score_text[2][2]+"\", 0, False);")
111 if (numpers == 4):
112 print("MakeHockeyGame((sqlcur, sqlcon), "+getforyear+newgetformonth+newgetforday+", \""+team_text[3]+"\", \""+team_text[2]+"\", \""+curgame_score_text[1][0]+":"+curgame_score_text[0][0]+","+curgame_score_text[1][1]+":"+curgame_score_text[0][1]+","+curgame_score_text[1][2]+":"+curgame_score_text[0][2]+"," +
113 curgame_score_text[1][3]+":"+curgame_score_text[0][3]+"\", \""+curgame_score_text[3][0]+":"+curgame_score_text[2][0]+","+curgame_score_text[3][1]+":"+curgame_score_text[2][1]+","+curgame_score_text[3][2]+":"+curgame_score_text[2][2]+","+curgame_score_text[3][3]+":"+curgame_score_text[2][3]+"\", 0, False);")
114 if (numpers == 5):
115 print("MakeHockeyGame((sqlcur, sqlcon), "+getforyear+newgetformonth+newgetforday+", \""+team_text[3]+"\", \""+team_text[2]+"\", \""+curgame_score_text[1][0]+":"+curgame_score_text[0][0]+","+curgame_score_text[1][1]+":"+curgame_score_text[0][1]+","+curgame_score_text[1][2]+":"+curgame_score_text[0][2]+","+curgame_score_text[1][3]+":"+curgame_score_text[0][3]+"," +
116 curgame_score_text[1][4]+":"+curgame_score_text[0][4]+"\", \""+curgame_score_text[3][0]+":"+curgame_score_text[2][0]+","+curgame_score_text[3][1]+":"+curgame_score_text[2][1]+","+curgame_score_text[3][2]+":"+curgame_score_text[2][2]+","+curgame_score_text[3][3]+":"+curgame_score_text[2][3]+","+curgame_score_text[3][4]+":"+curgame_score_text[2][4]+"\", 0, False);")
117 cur_todays_games = cur_todays_games + 1
119 sqlcon.close()