Fullscreen support, UI fixes, reset improved
[smpy-maemo.git] / mechanize / _headersutil.py
blobd8fe47a0e70828b395553edd42223356d3bb4b70
1 """Utility functions for HTTP header value parsing and construction.
3 Copyright 1997-1998, Gisle Aas
4 Copyright 2002-2006, John J. Lee
6 This code is free software; you can redistribute it and/or modify it
7 under the terms of the BSD or ZPL 2.1 licenses (see the file
8 COPYING.txt included with the distribution).
10 """
12 import os, re
13 from types import StringType
14 from types import UnicodeType
15 STRING_TYPES = StringType, UnicodeType
17 from _util import http2time
18 import _rfc3986
20 def is_html(ct_headers, url, allow_xhtml=False):
21 """
22 ct_headers: Sequence of Content-Type headers
23 url: Response URL
25 """
26 if not ct_headers:
27 # guess
28 ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1]
29 html_exts = [".htm", ".html"]
30 if allow_xhtml:
31 html_exts += [".xhtml"]
32 return ext in html_exts
33 # use first header
34 ct = split_header_words(ct_headers)[0][0][0]
35 html_types = ["text/html"]
36 if allow_xhtml:
37 html_types += [
38 "text/xhtml", "text/xml",
39 "application/xml", "application/xhtml+xml",
41 return ct in html_types
43 def unmatched(match):
44 """Return unmatched part of re.Match object."""
45 start, end = match.span(0)
46 return match.string[:start]+match.string[end:]
48 token_re = re.compile(r"^\s*([^=\s;,]+)")
49 quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
50 value_re = re.compile(r"^\s*=\s*([^\s;,]*)")
51 escape_re = re.compile(r"\\(.)")
52 def split_header_words(header_values):
53 r"""Parse header values into a list of lists containing key,value pairs.
55 The function knows how to deal with ",", ";" and "=" as well as quoted
56 values after "=". A list of space separated tokens are parsed as if they
57 were separated by ";".
59 If the header_values passed as argument contains multiple values, then they
60 are treated as if they were a single value separated by comma ",".
62 This means that this function is useful for parsing header fields that
63 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
64 the requirement for tokens).
66 headers = #header
67 header = (token | parameter) *( [";"] (token | parameter))
69 token = 1*<any CHAR except CTLs or separators>
70 separators = "(" | ")" | "<" | ">" | "@"
71 | "," | ";" | ":" | "\" | <">
72 | "/" | "[" | "]" | "?" | "="
73 | "{" | "}" | SP | HT
75 quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
76 qdtext = <any TEXT except <">>
77 quoted-pair = "\" CHAR
79 parameter = attribute "=" value
80 attribute = token
81 value = token | quoted-string
83 Each header is represented by a list of key/value pairs. The value for a
84 simple token (not part of a parameter) is None. Syntactically incorrect
85 headers will not necessarily be parsed as you would want.
87 This is easier to describe with some examples:
89 >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
90 [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
91 >>> split_header_words(['text/html; charset="iso-8859-1"'])
92 [[('text/html', None), ('charset', 'iso-8859-1')]]
93 >>> split_header_words([r'Basic realm="\"foo\bar\""'])
94 [[('Basic', None), ('realm', '"foobar"')]]
96 """
97 assert type(header_values) not in STRING_TYPES
98 result = []
99 for text in header_values:
100 orig_text = text
101 pairs = []
102 while text:
103 m = token_re.search(text)
104 if m:
105 text = unmatched(m)
106 name = m.group(1)
107 m = quoted_value_re.search(text)
108 if m: # quoted value
109 text = unmatched(m)
110 value = m.group(1)
111 value = escape_re.sub(r"\1", value)
112 else:
113 m = value_re.search(text)
114 if m: # unquoted value
115 text = unmatched(m)
116 value = m.group(1)
117 value = value.rstrip()
118 else:
119 # no value, a lone token
120 value = None
121 pairs.append((name, value))
122 elif text.lstrip().startswith(","):
123 # concatenated headers, as per RFC 2616 section 4.2
124 text = text.lstrip()[1:]
125 if pairs: result.append(pairs)
126 pairs = []
127 else:
128 # skip junk
129 non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
130 assert nr_junk_chars > 0, (
131 "split_header_words bug: '%s', '%s', %s" %
132 (orig_text, text, pairs))
133 text = non_junk
134 if pairs: result.append(pairs)
135 return result
137 join_escape_re = re.compile(r"([\"\\])")
138 def join_header_words(lists):
139 """Do the inverse of the conversion done by split_header_words.
141 Takes a list of lists of (key, value) pairs and produces a single header
142 value. Attribute values are quoted if needed.
144 >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
145 'text/plain; charset="iso-8859/1"'
146 >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
147 'text/plain, charset="iso-8859/1"'
150 headers = []
151 for pairs in lists:
152 attr = []
153 for k, v in pairs:
154 if v is not None:
155 if not re.search(r"^\w+$", v):
156 v = join_escape_re.sub(r"\\\1", v) # escape " and \
157 v = '"%s"' % v
158 if k is None: # Netscape cookies may have no name
159 k = v
160 else:
161 k = "%s=%s" % (k, v)
162 attr.append(k)
163 if attr: headers.append("; ".join(attr))
164 return ", ".join(headers)
166 def parse_ns_headers(ns_headers):
167 """Ad-hoc parser for Netscape protocol cookie-attributes.
169 The old Netscape cookie format for Set-Cookie can for instance contain
170 an unquoted "," in the expires field, so we have to use this ad-hoc
171 parser instead of split_header_words.
173 XXX This may not make the best possible effort to parse all the crap
174 that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
175 parser is probably better, so could do worse than following that if
176 this ever gives any trouble.
178 Currently, this is also used for parsing RFC 2109 cookies.
181 known_attrs = ("expires", "domain", "path", "secure",
182 # RFC 2109 attrs (may turn up in Netscape cookies, too)
183 "port", "max-age")
185 result = []
186 for ns_header in ns_headers:
187 pairs = []
188 version_set = False
189 params = re.split(r";\s*", ns_header)
190 for ii in range(len(params)):
191 param = params[ii]
192 param = param.rstrip()
193 if param == "": continue
194 if "=" not in param:
195 k, v = param, None
196 else:
197 k, v = re.split(r"\s*=\s*", param, 1)
198 k = k.lstrip()
199 if ii != 0:
200 lc = k.lower()
201 if lc in known_attrs:
202 k = lc
203 if k == "version":
204 # This is an RFC 2109 cookie.
205 version_set = True
206 if k == "expires":
207 # convert expires date to seconds since epoch
208 if v.startswith('"'): v = v[1:]
209 if v.endswith('"'): v = v[:-1]
210 v = http2time(v) # None if invalid
211 pairs.append((k, v))
213 if pairs:
214 if not version_set:
215 pairs.append(("version", "0"))
216 result.append(pairs)
218 return result
221 def _test():
222 import doctest, _headersutil
223 return doctest.testmod(_headersutil)
225 if __name__ == "__main__":
226 _test()