4 class RedirectCollector(urllib2
.HTTPRedirectHandler
):
5 """Collects all seen (intermediate) redirects for a HTTP request"""
7 def __init__(self
, *args
, **kwargs
):
10 def redirect_request(self
, req
, fp
, code
, msg
, hdrs
, newurl
):
11 self
.urls
.append(newurl
)
12 return urllib2
.HTTPRedirectHandler
.redirect_request(self
, req
, fp
, code
, msg
, hdrs
, newurl
)
15 def get_redirects(url
):
16 """ Returns the complete redirect chain, starting from url """
17 collector
= RedirectCollector()
18 collector
.urls
.append(url
)
19 opener
= urllib2
.build_opener(collector
)
21 urls
= map(basic_sanitizing
, collector
.urls
)
23 # include un-sanitized URL for easy matching of
24 #response to request URLs
31 def basic_sanitizing(url
):
33 does basic sanitizing through urlparse and additionally converts the netloc to lowercase
35 r
= urlparse
.urlsplit(url
)
36 netloc
= r
.netloc
.lower()
37 r2
= urlparse
.SplitResult(r
.scheme
, netloc
, r
.path
or '/', r
.query
, r
.fragment
)