1 # alternate view for reddit
2 # (and rewrite of reddit.ss, the tag cloud)
4 # TODO: insert feature list
9 import os
, time
, logging
, re
10 import pickle
, urllib2
, simplejson
12 from breve
.tags
.html
import tags
as T
13 from breve
.tags
.html
import xml
14 from BeautifulSoup
import BeautifulSoup
17 LOG_FILE
= "/home/protected/logs/reddit.log"
18 CACHE
= "/home/protected/data/reddit.cache"
19 CACHE_TIMEOUT
= 60*5 # 5 mins
21 logging
.basicConfig(level
=logging
.DEBUG
,
22 format
='%(asctime)s %(levelname)s %(message)s',
26 return long(time
.time())
29 """Decorator that caches the return value to disk. Cache is invalidated
30 after `CACHE_TIMEOUT' seconds.
32 See global variable `CACHE'
34 def cached_func(*args
):
35 if not os
.path
.exists(CACHE
):
36 pickle
.dump({}, file(CACHE
, 'w'))
37 cache
= pickle
.load(file(CACHE
))
38 # XXX: this must be like func.full_path_name
39 key
= (func
.func_name
, args
)
41 t
, val
= cache
.get(key
)
42 if now()-t
< CACHE_TIMEOUT
:
47 pickle
.dump(cache
, file(CACHE
, 'w'))
54 return urllib2
.urlopen(url
).read()
56 def short_url(full_url
):
58 >>> print short_url("http://reddit.com/")
62 return full_url
[7:].rstrip("/")
66 "hrs": re
.compile('(\d+) hours'),
67 "days_hrs": re
.compile('(\d+) Days (\d+) hours'),
68 "days": re
.compile('(\d+) days'),
69 "1 day": re
.compile('1 day')
71 def guess_time(human_readable
):
73 >>> guess_time("7 Days 21 hours")
75 See test.py for more rigorous tests.
77 def mpat(k
): return time_patterns
[k
].match(human_readable
)
81 return int(m
.group(1)) * 60*60
85 return int(m
.group(1))*24*60*60 + \
90 return int(m
.group(1))*24*60*60
95 logging
.warn("Cannot guess time for: %s", human_readable
)
98 class RedditLink(web
.Storage
):
102 return "<RedditLink> %s\n (%s)\n by %s (%s ago). " + \
103 "%d points (#%d comments) at %s\n Rank: %d %s" % \
104 (self
.title
, self
.href
, self
.user
,
105 self
.age
, self
.score
or -1, self
.comments
, self
.comments_href
,
106 self
.rank
, self
.top
and "*" or "")
111 def __init__(self
, url
):
117 logging
.debug("Parsing %s", self
.url
)
118 soup
= BeautifulSoup(get_url(self
.url
))
120 def parse_link_html(a
, b
):
121 # `a' is the first tr, `b' is the second
122 # both render a complete reddit link
127 l
.rank
= int(rank
.a
.string
.strip()[:-1]) # get rid of .
130 l
.rank
= int(a
.td
.string
.strip()[:-1])
133 title_a
= a
.find("td", colspan
="3").find("a", )
134 l
.title
= webutil
.decode_entities(title_a
.string
.strip())
135 l
.href
= title_a
["href"].encode("utf-8")
136 l
.href
= l
.href
.replace("&", "&") #BeautifulSoup madness
138 l
.user
= b
.td
.a
.string
.strip().encode("utf-8")
139 if b
.td
.span
is None:
143 n
, s
= b
.td
.span
.string
.strip().encode("utf-8").split()
146 l
.age
= b
.td
.contents
[2].strip().encode("utf-8")
147 l
.age
= " ".join(l
.age
.split()[1:3])
151 l
.age
= guess_time(l
.age
)
153 l
.comments_href
= b
.td
.a
.findNext("a")[
154 "href"].strip().encode("utf-8")
155 if not l
.comments_href
.startswith("http://"):
156 l
.comments_href
= self
.url
+ l
.comments_href
158 c
= b
.td
.a
.findNext("a").string
.strip()
162 l
.comments
= int(b
.td
.a
.findNext("a").string
.strip().split()[0])
166 table
= soup
.find("table", id="siteTable")
167 tr
= table
.find("tr")
169 tr
= a
= tr
.findNext('tr', attrs
={"class": ["evenRow", "oddRow"]})
171 tr
= b
= tr
.findNext('tr', attrs
={"class": ["evenRow", "oddRow"]})
172 self
.links
.append(parse_link_html(a
, b
))
177 GET
= web
.autodelegate('GET_')
180 "Get hot links for subreddit `sub'"
181 #web.header("Content-Type","%s; charset=utf-8" % "text/x-json")
182 sub
= web
.input(sub
="").sub
183 r
= Reddit(self
.reddit_url_for(sub
))
184 print simplejson
.dumps(r
.links
)
187 webutil
.template(["Only one method at the moment: ",
188 T
.code
["/api/hot?sub=science"]])
190 def reddit_url_for(self
, sub
):
192 return "http://reddit.com"
194 assert sub
in ("programming", "science")
195 return "http://%s.reddit.com" % sub
202 '/static/(.*)', 'webutil.static',
206 '/api', 'redirect api/',
211 webutil
.template("index")
215 webutil
.template([T
.head
[
216 T
.link (rel
="stylesheet", href
="static/reddit.css"),
219 T
.div(class_
="transON")["I am a div"],
220 T
.span(class_
="transON")["I am a span"],
221 T
.a(class_
="transON")["I am a A"],"next",
225 logging
.info("Module loaded")