1 """Microsoft Internet Explorer cookie loading on Windows.
3 Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code)
4 Copyright 2002-2006 John J Lee <jjl@pobox.com> (The Python port)
6 This code is free software; you can redistribute it and/or modify it
7 under the terms of the BSD or ZPL 2.1 licenses (see the file
8 COPYING.txt included with the distribution).
12 # XXX names and comments are not great here
14 import os
, re
, time
, struct
, logging
18 from _clientcookie
import FileCookieJar
, CookieJar
, Cookie
, \
19 MISSING_FILENAME_TEXT
, LoadError
21 debug
= logging
.getLogger("mechanize").debug
24 def regload(path
, leaf
):
25 key
= _winreg
.OpenKey(_winreg
.HKEY_CURRENT_USER
, path
, 0,
26 _winreg
.KEY_ALL_ACCESS
)
28 value
= _winreg
.QueryValueEx(key
, leaf
)[0]
33 WIN32_EPOCH
= 0x019db1ded53e8000L
# 1970 Jan 01 00:00:00 in Win32 FILETIME
35 def epoch_time_offset_from_win32_filetime(filetime
):
36 """Convert from win32 filetime to seconds-since-epoch value.
38 MSIE stores create and expire times as Win32 FILETIME, which is 64
39 bits of 100 nanosecond intervals since Jan 01 1601.
41 mechanize expects time in 32-bit value expressed in seconds since the
45 if filetime
< WIN32_EPOCH
:
46 raise ValueError("filetime (%d) is before epoch (%d)" %
47 (filetime
, WIN32_EPOCH
))
49 return divmod((filetime
- WIN32_EPOCH
), 10000000L)[0]
51 def binary_to_char(c
): return "%02X" % ord(c
)
52 def binary_to_str(d
): return "".join(map(binary_to_char
, list(d
)))
55 magic_re
= re
.compile(r
"Client UrlCache MMF Ver \d\.\d.*")
56 padding
= "\x0d\xf0\xad\x0b"
58 msie_domain_re
= re
.compile(r
"^([^/]+)(/.*)$")
59 cookie_re
= re
.compile("Cookie\:.+\@([\x21-\xFF]+).*?"
60 "(.+\@[\x21-\xFF]+\.txt)")
62 # path under HKEY_CURRENT_USER from which to get location of index.dat
63 reg_path
= r
"software\microsoft\windows" \
64 r
"\currentversion\explorer\shell folders"
68 self
._delayload
_domains
= {}
70 def _delayload_domain(self
, domain
):
71 # if necessary, lazily load cookies for this domain
72 delayload_info
= self
._delayload
_domains
.get(domain
)
73 if delayload_info
is not None:
74 cookie_file
, ignore_discard
, ignore_expires
= delayload_info
76 self
.load_cookie_data(cookie_file
,
77 ignore_discard
, ignore_expires
)
78 except (LoadError
, IOError):
79 debug("error reading cookie file, skipping: %s", cookie_file
)
81 del self
._delayload
_domains
[domain
]
83 def _load_cookies_from_file(self
, filename
):
84 debug("Loading MSIE cookies file: %s", filename
)
87 cookies_fh
= open(filename
)
91 key
= cookies_fh
.readline()
94 rl
= cookies_fh
.readline
95 def getlong(rl
=rl
): return long(rl().rstrip())
96 def getstr(rl
=rl
): return rl().rstrip()
100 domain_path
= getstr()
101 flags
= getlong() # 0x2000 bit is for secure I think
102 lo_expire
= getlong()
103 hi_expire
= getlong()
104 lo_create
= getlong()
105 hi_create
= getlong()
108 if "" in (key
, value
, domain_path
, flags
, hi_expire
, lo_expire
,
109 hi_create
, lo_create
, sep
) or (sep
!= "*"):
112 m
= self
.msie_domain_re
.search(domain_path
)
117 cookies
.append({"KEY": key
, "VALUE": value
, "DOMAIN": domain
,
118 "PATH": path
, "FLAGS": flags
, "HIXP": hi_expire
,
119 "LOXP": lo_expire
, "HICREATE": hi_create
,
120 "LOCREATE": lo_create
})
126 def load_cookie_data(self
, filename
,
127 ignore_discard
=False, ignore_expires
=False):
128 """Load cookies from file containing actual cookie data.
130 Old cookies are kept unless overwritten by newly loaded ones.
132 You should not call this method if the delayload attribute is set.
134 I think each of these files contain all cookies for one user, domain,
137 filename: file containing cookies -- usually found in a file like
138 C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt
141 now
= int(time
.time())
143 cookie_data
= self
._load
_cookies
_from
_file
(filename
)
145 for cookie
in cookie_data
:
146 flags
= cookie
["FLAGS"]
147 secure
= ((flags
& 0x2000) != 0)
148 filetime
= (cookie
["HIXP"] << 32) + cookie
["LOXP"]
149 expires
= epoch_time_offset_from_win32_filetime(filetime
)
154 domain
= cookie
["DOMAIN"]
155 initial_dot
= domain
.startswith(".")
157 domain_specified
= True
159 # MSIE 5 does not record whether the domain cookie-attribute
161 # Assuming it wasn't is conservative, because with strict
162 # domain matching this will match less frequently; with regular
163 # Netscape tail-matching, this will match at exactly the same
164 # times that domain_specified = True would. It also means we
165 # don't have to prepend a dot to achieve consistency with our
166 # own & Mozilla's domain-munging scheme.
167 domain_specified
= False
169 # assume path_specified is false
170 # XXX is there other stuff in here? -- eg. comment, commentURL?
172 cookie
["KEY"], cookie
["VALUE"],
174 domain
, domain_specified
, initial_dot
,
175 cookie
["PATH"], False,
182 if not ignore_discard
and c
.discard
:
184 if not ignore_expires
and c
.is_expired(now
):
186 CookieJar
.set_cookie(self
, c
)
188 def load_from_registry(self
, ignore_discard
=False, ignore_expires
=False,
191 username: only required on win9x
194 cookies_dir
= regload(self
.reg_path
, self
.reg_key
)
195 filename
= os
.path
.normpath(os
.path
.join(cookies_dir
, "INDEX.DAT"))
196 self
.load(filename
, ignore_discard
, ignore_expires
, username
)
198 def _really_load(self
, index
, filename
, ignore_discard
, ignore_expires
,
200 now
= int(time
.time())
203 username
= os
.environ
['USERNAME'].lower()
205 cookie_dir
= os
.path
.dirname(filename
)
207 data
= index
.read(256)
209 raise LoadError("%s file is too short" % filename
)
211 # Cookies' index.dat file starts with 32 bytes of signature
212 # followed by an offset to the first record, stored as a little-
214 sig
, size
, data
= data
[:32], data
[32:36], data
[36:]
215 size
= struct
.unpack("<L", size
)[0]
217 # check that sig is valid
218 if not self
.magic_re
.match(sig
) or size
!= 0x4000:
219 raise LoadError("%s ['%s' %s] does not seem to contain cookies" %
220 (str(filename
), sig
, size
))
222 # skip to start of first record
225 sector
= 128 # size of sector in bytes
230 # Cookies are usually in two contiguous sectors, so read in two
231 # sectors and adjust if not a Cookie.
233 d
= index
.read(to_read
)
234 if len(d
) != to_read
:
238 # Each record starts with a 4-byte signature and a count
239 # (little-endian DWORD) of sectors for the record.
240 sig
, size
, data
= data
[:4], data
[4:8], data
[8:]
241 size
= struct
.unpack("<L", size
)[0]
243 to_read
= (size
- 2) * sector
245 ## from urllib import quote
246 ## print "data", quote(data)
247 ## print "sig", quote(sig)
248 ## print "size in sectors", size
249 ## print "size in bytes", size*sector
250 ## print "size in units of 16 bytes", (size*sector) / 16
251 ## print "size to read in bytes", to_read
255 assert (sig
in ("HASH", "LEAK",
256 self
.padding
, "\x00\x00\x00\x00"),
257 "unrecognized MSIE index.dat record: %s" %
259 if sig
== "\x00\x00\x00\x00":
260 # assume we've got all the cookies, and stop
262 if sig
== self
.padding
:
264 # skip the rest of this record
268 index
.seek(to_read
, 1)
271 # read in rest of record if necessary
273 more_data
= index
.read(to_read
)
274 if len(more_data
) != to_read
: break
275 data
= data
+ more_data
277 cookie_re
= ("Cookie\:%s\@([\x21-\xFF]+).*?" % username
+
278 "(%s\@[\x21-\xFF]+\.txt)" % username
)
279 m
= re
.search(cookie_re
, data
, re
.I
)
281 cookie_file
= os
.path
.join(cookie_dir
, m
.group(2))
282 if not self
.delayload
:
284 self
.load_cookie_data(cookie_file
,
285 ignore_discard
, ignore_expires
)
286 except (LoadError
, IOError):
287 debug("error reading cookie file, skipping: %s",
295 self
._delayload
_domains
[domain
] = (
296 cookie_file
, ignore_discard
, ignore_expires
)
299 class MSIECookieJar(MSIEBase
, FileCookieJar
):
300 """FileCookieJar that reads from the Windows MSIE cookies database.
302 MSIECookieJar can read the cookie files of Microsoft Internet Explorer
303 (MSIE) for Windows version 5 on Windows NT and version 6 on Windows XP and
304 Windows 98. Other configurations may also work, but are untested. Saving
305 cookies in MSIE format is NOT supported. If you save cookies, they'll be
306 in the usual Set-Cookie3 format, which you can read back in using an
307 instance of the plain old CookieJar class. Don't save using the same
308 filename that you loaded cookies from, because you may succeed in
309 clobbering your MSIE cookies index file!
311 You should be able to have LWP share Internet Explorer's cookies like
312 this (note you need to supply a username to load_from_registry if you're on
313 Windows 9x or Windows ME):
315 cj = MSIECookieJar(delayload=1)
316 # find cookies index file in registry and load cookies from it
317 cj.load_from_registry()
318 opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
319 response = opener.open("http://example.com/")
321 Iterating over a delayloaded MSIECookieJar instance will not cause any
322 cookies to be read from disk. To force reading of all cookies from disk,
323 call read_all_cookies. Note that the following methods iterate over self:
324 clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__
329 load_from_registry(ignore_discard=False, ignore_expires=False,
331 load_cookie_data(filename, ignore_discard=False, ignore_expires=False)
335 def __init__(self
, filename
=None, delayload
=False, policy
=None):
336 MSIEBase
.__init
__(self
)
337 FileCookieJar
.__init
__(self
, filename
, delayload
, policy
)
339 def set_cookie(self
, cookie
):
341 self
._delayload
_domain
(cookie
.domain
)
342 CookieJar
.set_cookie(self
, cookie
)
344 def _cookies_for_request(self
, request
):
345 """Return a list of cookies to be returned to server."""
346 domains
= self
._cookies
.copy()
347 domains
.update(self
._delayload
_domains
)
348 domains
= domains
.keys()
351 for domain
in domains
:
352 cookies
.extend(self
._cookies
_for
_domain
(domain
, request
))
355 def _cookies_for_domain(self
, domain
, request
):
356 if not self
._policy
.domain_return_ok(domain
, request
):
358 debug("Checking %s for cookies to return", domain
)
360 self
._delayload
_domain
(domain
)
361 return CookieJar
._cookies
_for
_domain
(self
, domain
, request
)
363 def read_all_cookies(self
):
364 """Eagerly read in all cookies."""
366 for domain
in self
._delayload
_domains
.keys():
367 self
._delayload
_domain
(domain
)
369 def load(self
, filename
, ignore_discard
=False, ignore_expires
=False,
371 """Load cookies from an MSIE 'index.dat' cookies index file.
373 filename: full path to cookie index file
374 username: only required on win9x
378 if self
.filename
is not None: filename
= self
.filename
379 else: raise ValueError(MISSING_FILENAME_TEXT
)
381 index
= open(filename
, "rb")
384 self
._really
_load
(index
, filename
, ignore_discard
, ignore_expires
,