Fullscreen support, UI fixes, reset improved
[smpy-maemo.git] / mechanize / _msiecookiejar.py
blobf590a844ef6a25a1c4d129555cf5dc7e2629fefd
1 """Microsoft Internet Explorer cookie loading on Windows.
3 Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code)
4 Copyright 2002-2006 John J Lee <jjl@pobox.com> (The Python port)
6 This code is free software; you can redistribute it and/or modify it
7 under the terms of the BSD or ZPL 2.1 licenses (see the file
8 COPYING.txt included with the distribution).
10 """
12 # XXX names and comments are not great here
14 import os, re, time, struct, logging
15 if os.name == "nt":
16 import _winreg
18 from _clientcookie import FileCookieJar, CookieJar, Cookie, \
19 MISSING_FILENAME_TEXT, LoadError
21 debug = logging.getLogger("mechanize").debug
24 def regload(path, leaf):
25 key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0,
26 _winreg.KEY_ALL_ACCESS)
27 try:
28 value = _winreg.QueryValueEx(key, leaf)[0]
29 except WindowsError:
30 value = None
31 return value
33 WIN32_EPOCH = 0x019db1ded53e8000L # 1970 Jan 01 00:00:00 in Win32 FILETIME
35 def epoch_time_offset_from_win32_filetime(filetime):
36 """Convert from win32 filetime to seconds-since-epoch value.
38 MSIE stores create and expire times as Win32 FILETIME, which is 64
39 bits of 100 nanosecond intervals since Jan 01 1601.
41 mechanize expects time in 32-bit value expressed in seconds since the
42 epoch (Jan 01 1970).
44 """
45 if filetime < WIN32_EPOCH:
46 raise ValueError("filetime (%d) is before epoch (%d)" %
47 (filetime, WIN32_EPOCH))
49 return divmod((filetime - WIN32_EPOCH), 10000000L)[0]
51 def binary_to_char(c): return "%02X" % ord(c)
52 def binary_to_str(d): return "".join(map(binary_to_char, list(d)))
54 class MSIEBase:
55 magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*")
56 padding = "\x0d\xf0\xad\x0b"
58 msie_domain_re = re.compile(r"^([^/]+)(/.*)$")
59 cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?"
60 "(.+\@[\x21-\xFF]+\.txt)")
62 # path under HKEY_CURRENT_USER from which to get location of index.dat
63 reg_path = r"software\microsoft\windows" \
64 r"\currentversion\explorer\shell folders"
65 reg_key = "Cookies"
67 def __init__(self):
68 self._delayload_domains = {}
70 def _delayload_domain(self, domain):
71 # if necessary, lazily load cookies for this domain
72 delayload_info = self._delayload_domains.get(domain)
73 if delayload_info is not None:
74 cookie_file, ignore_discard, ignore_expires = delayload_info
75 try:
76 self.load_cookie_data(cookie_file,
77 ignore_discard, ignore_expires)
78 except (LoadError, IOError):
79 debug("error reading cookie file, skipping: %s", cookie_file)
80 else:
81 del self._delayload_domains[domain]
83 def _load_cookies_from_file(self, filename):
84 debug("Loading MSIE cookies file: %s", filename)
85 cookies = []
87 cookies_fh = open(filename)
89 try:
90 while 1:
91 key = cookies_fh.readline()
92 if key == "": break
94 rl = cookies_fh.readline
95 def getlong(rl=rl): return long(rl().rstrip())
96 def getstr(rl=rl): return rl().rstrip()
98 key = key.rstrip()
99 value = getstr()
100 domain_path = getstr()
101 flags = getlong() # 0x2000 bit is for secure I think
102 lo_expire = getlong()
103 hi_expire = getlong()
104 lo_create = getlong()
105 hi_create = getlong()
106 sep = getstr()
108 if "" in (key, value, domain_path, flags, hi_expire, lo_expire,
109 hi_create, lo_create, sep) or (sep != "*"):
110 break
112 m = self.msie_domain_re.search(domain_path)
113 if m:
114 domain = m.group(1)
115 path = m.group(2)
117 cookies.append({"KEY": key, "VALUE": value, "DOMAIN": domain,
118 "PATH": path, "FLAGS": flags, "HIXP": hi_expire,
119 "LOXP": lo_expire, "HICREATE": hi_create,
120 "LOCREATE": lo_create})
121 finally:
122 cookies_fh.close()
124 return cookies
126 def load_cookie_data(self, filename,
127 ignore_discard=False, ignore_expires=False):
128 """Load cookies from file containing actual cookie data.
130 Old cookies are kept unless overwritten by newly loaded ones.
132 You should not call this method if the delayload attribute is set.
134 I think each of these files contain all cookies for one user, domain,
135 and path.
137 filename: file containing cookies -- usually found in a file like
138 C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt
141 now = int(time.time())
143 cookie_data = self._load_cookies_from_file(filename)
145 for cookie in cookie_data:
146 flags = cookie["FLAGS"]
147 secure = ((flags & 0x2000) != 0)
148 filetime = (cookie["HIXP"] << 32) + cookie["LOXP"]
149 expires = epoch_time_offset_from_win32_filetime(filetime)
150 if expires < now:
151 discard = True
152 else:
153 discard = False
154 domain = cookie["DOMAIN"]
155 initial_dot = domain.startswith(".")
156 if initial_dot:
157 domain_specified = True
158 else:
159 # MSIE 5 does not record whether the domain cookie-attribute
160 # was specified.
161 # Assuming it wasn't is conservative, because with strict
162 # domain matching this will match less frequently; with regular
163 # Netscape tail-matching, this will match at exactly the same
164 # times that domain_specified = True would. It also means we
165 # don't have to prepend a dot to achieve consistency with our
166 # own & Mozilla's domain-munging scheme.
167 domain_specified = False
169 # assume path_specified is false
170 # XXX is there other stuff in here? -- eg. comment, commentURL?
171 c = Cookie(0,
172 cookie["KEY"], cookie["VALUE"],
173 None, False,
174 domain, domain_specified, initial_dot,
175 cookie["PATH"], False,
176 secure,
177 expires,
178 discard,
179 None,
180 None,
181 {"flags": flags})
182 if not ignore_discard and c.discard:
183 continue
184 if not ignore_expires and c.is_expired(now):
185 continue
186 CookieJar.set_cookie(self, c)
188 def load_from_registry(self, ignore_discard=False, ignore_expires=False,
189 username=None):
191 username: only required on win9x
194 cookies_dir = regload(self.reg_path, self.reg_key)
195 filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT"))
196 self.load(filename, ignore_discard, ignore_expires, username)
198 def _really_load(self, index, filename, ignore_discard, ignore_expires,
199 username):
200 now = int(time.time())
202 if username is None:
203 username = os.environ['USERNAME'].lower()
205 cookie_dir = os.path.dirname(filename)
207 data = index.read(256)
208 if len(data) != 256:
209 raise LoadError("%s file is too short" % filename)
211 # Cookies' index.dat file starts with 32 bytes of signature
212 # followed by an offset to the first record, stored as a little-
213 # endian DWORD.
214 sig, size, data = data[:32], data[32:36], data[36:]
215 size = struct.unpack("<L", size)[0]
217 # check that sig is valid
218 if not self.magic_re.match(sig) or size != 0x4000:
219 raise LoadError("%s ['%s' %s] does not seem to contain cookies" %
220 (str(filename), sig, size))
222 # skip to start of first record
223 index.seek(size, 0)
225 sector = 128 # size of sector in bytes
227 while 1:
228 data = ""
230 # Cookies are usually in two contiguous sectors, so read in two
231 # sectors and adjust if not a Cookie.
232 to_read = 2 * sector
233 d = index.read(to_read)
234 if len(d) != to_read:
235 break
236 data = data + d
238 # Each record starts with a 4-byte signature and a count
239 # (little-endian DWORD) of sectors for the record.
240 sig, size, data = data[:4], data[4:8], data[8:]
241 size = struct.unpack("<L", size)[0]
243 to_read = (size - 2) * sector
245 ## from urllib import quote
246 ## print "data", quote(data)
247 ## print "sig", quote(sig)
248 ## print "size in sectors", size
249 ## print "size in bytes", size*sector
250 ## print "size in units of 16 bytes", (size*sector) / 16
251 ## print "size to read in bytes", to_read
252 ## print
254 if sig != "URL ":
255 assert (sig in ("HASH", "LEAK",
256 self.padding, "\x00\x00\x00\x00"),
257 "unrecognized MSIE index.dat record: %s" %
258 binary_to_str(sig))
259 if sig == "\x00\x00\x00\x00":
260 # assume we've got all the cookies, and stop
261 break
262 if sig == self.padding:
263 continue
264 # skip the rest of this record
265 assert to_read >= 0
266 if size != 2:
267 assert to_read != 0
268 index.seek(to_read, 1)
269 continue
271 # read in rest of record if necessary
272 if size > 2:
273 more_data = index.read(to_read)
274 if len(more_data) != to_read: break
275 data = data + more_data
277 cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username +
278 "(%s\@[\x21-\xFF]+\.txt)" % username)
279 m = re.search(cookie_re, data, re.I)
280 if m:
281 cookie_file = os.path.join(cookie_dir, m.group(2))
282 if not self.delayload:
283 try:
284 self.load_cookie_data(cookie_file,
285 ignore_discard, ignore_expires)
286 except (LoadError, IOError):
287 debug("error reading cookie file, skipping: %s",
288 cookie_file)
289 else:
290 domain = m.group(1)
291 i = domain.find("/")
292 if i != -1:
293 domain = domain[:i]
295 self._delayload_domains[domain] = (
296 cookie_file, ignore_discard, ignore_expires)
299 class MSIECookieJar(MSIEBase, FileCookieJar):
300 """FileCookieJar that reads from the Windows MSIE cookies database.
302 MSIECookieJar can read the cookie files of Microsoft Internet Explorer
303 (MSIE) for Windows version 5 on Windows NT and version 6 on Windows XP and
304 Windows 98. Other configurations may also work, but are untested. Saving
305 cookies in MSIE format is NOT supported. If you save cookies, they'll be
306 in the usual Set-Cookie3 format, which you can read back in using an
307 instance of the plain old CookieJar class. Don't save using the same
308 filename that you loaded cookies from, because you may succeed in
309 clobbering your MSIE cookies index file!
311 You should be able to have LWP share Internet Explorer's cookies like
312 this (note you need to supply a username to load_from_registry if you're on
313 Windows 9x or Windows ME):
315 cj = MSIECookieJar(delayload=1)
316 # find cookies index file in registry and load cookies from it
317 cj.load_from_registry()
318 opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
319 response = opener.open("http://example.com/")
321 Iterating over a delayloaded MSIECookieJar instance will not cause any
322 cookies to be read from disk. To force reading of all cookies from disk,
323 call read_all_cookies. Note that the following methods iterate over self:
324 clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__
325 and as_string.
327 Additional methods:
329 load_from_registry(ignore_discard=False, ignore_expires=False,
330 username=None)
331 load_cookie_data(filename, ignore_discard=False, ignore_expires=False)
332 read_all_cookies()
335 def __init__(self, filename=None, delayload=False, policy=None):
336 MSIEBase.__init__(self)
337 FileCookieJar.__init__(self, filename, delayload, policy)
339 def set_cookie(self, cookie):
340 if self.delayload:
341 self._delayload_domain(cookie.domain)
342 CookieJar.set_cookie(self, cookie)
344 def _cookies_for_request(self, request):
345 """Return a list of cookies to be returned to server."""
346 domains = self._cookies.copy()
347 domains.update(self._delayload_domains)
348 domains = domains.keys()
350 cookies = []
351 for domain in domains:
352 cookies.extend(self._cookies_for_domain(domain, request))
353 return cookies
355 def _cookies_for_domain(self, domain, request):
356 if not self._policy.domain_return_ok(domain, request):
357 return []
358 debug("Checking %s for cookies to return", domain)
359 if self.delayload:
360 self._delayload_domain(domain)
361 return CookieJar._cookies_for_domain(self, domain, request)
363 def read_all_cookies(self):
364 """Eagerly read in all cookies."""
365 if self.delayload:
366 for domain in self._delayload_domains.keys():
367 self._delayload_domain(domain)
369 def load(self, filename, ignore_discard=False, ignore_expires=False,
370 username=None):
371 """Load cookies from an MSIE 'index.dat' cookies index file.
373 filename: full path to cookie index file
374 username: only required on win9x
377 if filename is None:
378 if self.filename is not None: filename = self.filename
379 else: raise ValueError(MISSING_FILENAME_TEXT)
381 index = open(filename, "rb")
383 try:
384 self._really_load(index, filename, ignore_discard, ignore_expires,
385 username)
386 finally:
387 index.close()