From 865d57780930ba36f25008bc4c702b764a2d1d5e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Stefan=20K=C3=B6gl?= Date: Mon, 10 Jan 2011 09:55:44 +0200 Subject: [PATCH] include URLs of a feed in the response --- feedservice/feeddownloader.py | 2 ++ feedservice/urls.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 feedservice/urls.py diff --git a/feedservice/feeddownloader.py b/feedservice/feeddownloader.py index 2bf3880..e767ad4 100644 --- a/feedservice/feeddownloader.py +++ b/feedservice/feeddownloader.py @@ -35,6 +35,7 @@ import feedcore from utils import parse_time import youtube from mimetype import get_mimetype, check_mimetype, get_podcast_types +from urls import get_redirects #socket.setdefaulttimeout(10) fetcher = feedcore.Fetcher(USER_AGENT) @@ -155,6 +156,7 @@ def parse_feed(feed_url, inline_logo, scale_to): podcast['description'] = feed.feed.get('subtitle', '') podcast['author'] = feed.feed.get('author', feed.feed.get('itunes_author', '')) podcast['language'] = feed.feed.get('language', '') + podcast['urls'] = get_redirects(feed_url) logo_url = get_podcast_logo(feed) podcast['logo'] = logo_url diff --git a/feedservice/urls.py b/feedservice/urls.py new file mode 100644 index 0000000..211ceb5 --- /dev/null +++ b/feedservice/urls.py @@ -0,0 +1,32 @@ +import urllib2 +import urlparse + +class RedirectCollector(urllib2.HTTPRedirectHandler): + """Collects all seen (intermediate) redirects for a HTTP request""" + + def __init__(self, *args, **kwargs): + self.urls = [] + + def redirect_request(self, req, fp, code, msg, hdrs, newurl): + self.urls.append(newurl) + return urllib2.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, hdrs, newurl) + + +def get_redirects(url): + """ Returns the complete redirect chain, starting from url """ + collector = RedirectCollector() + collector.urls.append(url) + opener = urllib2.build_opener(collector) + opener.open(url) + return map(basic_sanitizing, collector.urls) + + +def basic_sanitizing(url): + """ + does basic sanitizing through urlparse and additionally converts the netloc to lowercase + """ + r = urlparse.urlsplit(url) + netloc = r.netloc.lower() + r2 = urlparse.SplitResult(r.scheme, netloc, r.path or '/', r.query, r.fragment) + return r2.geturl() + -- 2.11.4.GIT