1 #!/usr/bin/env python2.5
6 from BeautifulSoup
import BeautifulSoup
11 curdir
= os
.path
.dirname(os
.path
.abspath(__file__
))
12 con
= sqlite3
.connect(os
.path
.join(curdir
, 'titles.db'), isolation_level
=None)
15 create table if not exists notices (
18 create table if not exists games (
28 base_url
= "http://igo-kisen.hp.infoseek.co.jp/"
29 # base_url = "http://localhost/eidogo/titles/"
30 page
= urllib2
.urlopen(base_url
+ "topics.html")
31 soup
= BeautifulSoup(page
)
33 trs
= soup
.find("table", {"width": "1050"}).findAll("tr")
36 re_tags
= re
.compile("<[^>]+>")
41 tds
= tr
.findAll("td")
45 td0
= tds
.pop(0).contents
[0]
51 sig
.append(re_tags
.sub("", str(td
.contents
[0])))
55 cur
.execute("select * from notices where sig=?", (sig
,))
59 cur
.execute("insert into notices (sig) values (?)", (sig
,))
61 subpage_fn
= tr
.a
['href']
63 if (scraped_subpages
.count(subpage_fn
) > 0):
67 scraped_subpages
.append(subpage_fn
)
69 subpage
= urllib2
.urlopen(base_url
+ subpage_fn
)
73 subsoup
= BeautifulSoup(subpage
)
75 sgf_path
= os
.path
.normpath(os
.path
.join(os
.path
.dirname(os
.path
.abspath(__file__
)), '../sgf/titles'))
77 for a
in subsoup
.findAll("a"):
79 if (not fn
.endswith(".sgf")):
81 cur
.execute("select * from games where fn=?", (fn
,))
85 raw_sgf
= urllib2
.urlopen(base_url
+ fn
).read()
87 print " ! " + fn
+ " not found"
89 if (len(raw_sgf
) > 0):
91 sgf
= sgfparser
.Cursor(raw_sgf
)
92 info
= sgf
.getRootNode(0)
93 cur
.execute("insert into games (fn, dt, ev, pw, pb, re) values (?,?,?,?,?,?)",
94 (fn
, info
['DT'][0], info
['EV'][0], info
['PW'][0] + ' ' + info
['WR'][0], info
['PB'][0] + ' ' + info
['BR'][0], info
['RE'][0]))
95 f
= open(os
.path
.join(sgf_path
, fn
), "w")
99 cur
.execute("select * from games order by dt desc limit 250")
100 rows
= cur
.fetchall()
102 f
= open(os
.path
.join(curdir
, 'titles.html'), "w")
103 f
.write("<table id='tourney-games'><tr><th>Date</th><th>Event</th><th>White</th><th>Black</th><th>Result</th></tr>")
106 if (cl
== " class='odd'"):
110 fn
= row
[0].replace(".sgf", "")
111 f
.write("<tr" + cl
+ ">")
113 f
.write("<td><a href='./#titles/" + fn
+ "'>" + col
+ "</a></td>")