1 from datetime
import datetime
4 from email
import utils
6 from google
.appengine
.ext
import db
7 from google
.appengine
.api
import memcache
10 USER_AGENT
= 'mygpo-feedservice +http://mygpo-feedservice.appspot.com/'
13 class URLObject(db
.Model
):
14 url
= db
.StringProperty(required
=True)
16 etag
= db
.StringProperty(required
=False)
17 expires
= db
.DateTimeProperty(required
=False)
18 modified
= db
.DateTimeProperty(required
=False)
21 return self
.expires
and self
.expires
<= datetime
.utcnow()
24 return len(self
.content
) > 0
27 return '%s (%s, %s, %s)' % (self
.url
, self
.etag
, self
.expires
, self
.modified
)
30 def get_url(url
, use_cache
=True):
32 Gets the contents for the given URL from either memcache,
33 the datastore or the URL itself
36 cached
= from_cache(url
) if use_cache
else None
38 if not cached
or cached
.expired() or not cached
.valid():
39 resp
= fetch_url(url
, cached
)
41 resp
= cached
.url
, cached
.content
, cached
.modified
48 Tries to get the object for the given URL from Memcache or the Datastore
50 obj
= memcache
.get(url
)
51 return obj
or URLObject
.all().filter('url =', url
).get()
54 def fetch_url(url
, cached
=None):
56 Fetches the given URL and stores the resulting object in the Cache
59 request
= urllib2
.Request(url
)
60 request
.add_header('User-Agent', USER_AGENT
)
61 opener
= urllib2
.build_opener()
63 if getattr(cached
, 'modified', False):
64 lm_str
= utils
.formatdate(time
.mktime(cached
.modified
.timetuple()))
65 request
.add_header('If-Modified-Since', lm_str
)
67 if getattr(cached
, 'etag', False):
68 request
.add_header('If-None-Match', cached
.etag
)
71 r
= opener
.open(request
)
72 obj
= cached
or URLObject(url
=url
)
73 obj
.content
= r
.read()
74 obj
.expires
= parse_header_date(r
.headers
.dict.get('expires', None))
75 obj
.modified
= parse_header_date(r
.headers
.dict.get('last-modified', None))
76 obj
.etag
= r
.headers
.dict.get('etag', None)
79 memcache
.set(url
, obj
)
81 except urllib2
.HTTPError
, e
:
88 return obj
.url
, obj
.content
, obj
.modified
91 def parse_header_date(date_str
):
93 Parses dates in RFC2822 format to datetime objects
97 ts
= time
.mktime(utils
.parsedate(date_str
))
98 return datetime
.utcfromtimestamp(ts
)