From c799110def0c99244a481665a53124738f4d0052 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Stefan=20K=C3=B6gl?= Date: Wed, 26 Jan 2011 21:50:34 +0200 Subject: [PATCH] proper format negotiation between html/json --- feedservice/feeddownloader.py | 2 +- feedservice/httputils.py | 83 +++++++++++++++++++++++++++++++++++++++++++ feedservice/main.py | 9 +++-- feedservice/urls.py | 39 -------------------- 4 files changed, 90 insertions(+), 43 deletions(-) create mode 100644 feedservice/httputils.py delete mode 100644 feedservice/urls.py diff --git a/feedservice/feeddownloader.py b/feedservice/feeddownloader.py index da8bb28..dcc03fa 100644 --- a/feedservice/feeddownloader.py +++ b/feedservice/feeddownloader.py @@ -47,7 +47,7 @@ def parse_feed(feed_url, inline_logo, scale_to, strip_html, modified, use_cache) """ import feedparser - from urls import get_redirects + from httputils import get_redirects feed_url, feed_content, last_modified = urlstore.get_url(feed_url, use_cache) diff --git a/feedservice/httputils.py b/feedservice/httputils.py new file mode 100644 index 0000000..2ba173b --- /dev/null +++ b/feedservice/httputils.py @@ -0,0 +1,83 @@ +import urllib2 +import urlparse + +class RedirectCollector(urllib2.HTTPRedirectHandler): + """Collects all seen (intermediate) redirects for a HTTP request""" + + def __init__(self, *args, **kwargs): + self.urls = [] + + def redirect_request(self, req, fp, code, msg, hdrs, newurl): + self.urls.append(newurl) + return urllib2.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, hdrs, newurl) + + +def get_redirects(url): + """ Returns the complete redirect chain, starting from url """ + collector = RedirectCollector() + collector.urls.append(url) + opener = urllib2.build_opener(collector) + opener.open(url) + urls = map(basic_sanitizing, collector.urls) + + # include un-sanitized URL for easy matching of + #response to request URLs + if urls[0] != url: + urls = [url] + urls + + return urls + + +def basic_sanitizing(url): + """ + does basic sanitizing through urlparse and additionally converts the netloc to lowercase + """ + r = urlparse.urlsplit(url) + netloc = r.netloc.lower() + r2 = urlparse.SplitResult(r.scheme, netloc, r.path or '/', r.query, r.fragment) + return r2.geturl() + + +def parse_header_list(values): + """ + Parses a list in a HTTP header with q parameters, such as + Accept-Language: de;q=1, en;q=0.5; *;q=0 + and returns the results as a dictionary and a sorted list + """ + import re, collections + + q_re = re.compile('q=([01](\.\d{0,4})?|(\.\d{0,4}))') + default_q = 1 + + val_list = [] + + values = [x.strip() for x in values.split(',')] + for v in values: + v, q = v.split(';') if ';' in v else (v, 'q=1') + match = q_re.match(q) + q = float(match.group(1)) if match else 1 + if v == '*': + default_q = q + val_list.append( (v, q) ) + + val_list = sorted(val_list, key=lambda x: x[1], reverse=True) + val_dict = collections.defaultdict(lambda: default_q) + val_dict.update(dict(val_list)) + + return val_dict, val_list + + +def select_matching_option(supported_values, accepted_values): + val_dict, val_list = parse_header_list(accepted_values) + + # see if any of the accepted values is supported + for v, q in val_list: + if v in supported_values: + return v + + # if not, we just need to try the first one to + # get the default value + if val_dict[supported_values[0]] > 0: + return supported_values[0] + else: + return None diff --git a/feedservice/main.py b/feedservice/main.py index 3c2a7f6..1e78e0d 100644 --- a/feedservice/main.py +++ b/feedservice/main.py @@ -2,6 +2,7 @@ from google.appengine.ext import webapp from google.appengine.ext.webapp.util import run_wsgi_app import urllib +import httputils import feeddownloader import simplejson as json @@ -34,15 +35,17 @@ class Parse(webapp.RequestHandler): self.response.out.write('parameter url missing') - def send_response(self, podcasts, last_modified, format): + def send_response(self, podcasts, last_modified, formats): self.response.headers.add_header('Vary', 'Accept') - if 'json' in format: + format = httputils.select_matching_option(['text/html', 'application/json'], formats) + + if format in (None, 'application/json'): #serve json as default content_type = 'application/json' content = json.dumps(podcasts, sort_keys=True, indent=None, separators=(',', ':')) from email import utils import time - self.response.headers.add_header('Last-Modified', utils.formatdate(time.mktime(last_modified))) + self.response.headers.add_header('Last-Modified', utils.formatdate(time.mktime(last_modified.timetuple()))) else: diff --git a/feedservice/urls.py b/feedservice/urls.py deleted file mode 100644 index 69e67b2..0000000 --- a/feedservice/urls.py +++ /dev/null @@ -1,39 +0,0 @@ -import urllib2 -import urlparse - -class RedirectCollector(urllib2.HTTPRedirectHandler): - """Collects all seen (intermediate) redirects for a HTTP request""" - - def __init__(self, *args, **kwargs): - self.urls = [] - - def redirect_request(self, req, fp, code, msg, hdrs, newurl): - self.urls.append(newurl) - return urllib2.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, hdrs, newurl) - - -def get_redirects(url): - """ Returns the complete redirect chain, starting from url """ - collector = RedirectCollector() - collector.urls.append(url) - opener = urllib2.build_opener(collector) - opener.open(url) - urls = map(basic_sanitizing, collector.urls) - - # include un-sanitized URL for easy matching of - #response to request URLs - if urls[0] != url: - urls = [url] + urls - - return urls - - -def basic_sanitizing(url): - """ - does basic sanitizing through urlparse and additionally converts the netloc to lowercase - """ - r = urlparse.urlsplit(url) - netloc = r.netloc.lower() - r2 = urlparse.SplitResult(r.scheme, netloc, r.path or '/', r.query, r.fragment) - return r2.geturl() - -- 2.11.4.GIT