1 """HTTP cookie handling for web clients.
3 This module originally developed from my port of Gisle Aas' Perl module
4 HTTP::Cookies, from the libwww-perl library.
6 Docstrings, comments and debug strings in this code refer to the
7 attributes of the HTTP cookie system as cookie-attributes, to distinguish
8 them clearly from Python attributes.
14 MozillaCookieJar | LWPCookieJar \ \
18 | / MSIEDBCookieJar BSDDBCookieJar
22 Comments to John J Lee <jjl@pobox.com>.
25 Copyright 2002-2006 John J Lee <jjl@pobox.com>
26 Copyright 1997-1999 Gisle Aas (original libwww-perl code)
27 Copyright 2002-2003 Johnny Lee (original MSIE Perl code)
29 This code is free software; you can redistribute it and/or modify it
30 under the terms of the BSD or ZPL 2.1 licenses (see the file
31 COPYING.txt included with the distribution).
35 import sys
, re
, copy
, time
, struct
, urllib
, types
, logging
38 _threading
= threading
; del threading
40 import dummy_threading
41 _threading
= dummy_threading
; del dummy_threading
42 import httplib
# only for the default HTTP port
44 MISSING_FILENAME_TEXT
= ("a filename was not supplied (nor was the CookieJar "
45 "instance initialised with one)")
46 DEFAULT_HTTP_PORT
= str(httplib
.HTTP_PORT
)
48 from _headersutil
import split_header_words
, parse_ns_headers
49 from _util
import isstringlike
52 debug
= logging
.getLogger("mechanize.cookies").debug
55 def reraise_unmasked_exceptions(unmasked
=()):
56 # There are a few catch-all except: statements in this module, for
57 # catching input that's bad in unexpected ways.
58 # This function re-raises some exceptions we don't want to trap.
59 import mechanize
, warnings
60 if not mechanize
.USE_BARE_EXCEPT
:
62 unmasked
= unmasked
+ (KeyboardInterrupt, SystemExit, MemoryError)
63 etype
= sys
.exc_info()[0]
64 if issubclass(etype
, unmasked
):
66 # swallowed an exception
67 import traceback
, StringIO
68 f
= StringIO
.StringIO()
69 traceback
.print_exc(None, f
)
71 warnings
.warn("mechanize bug!\n%s" % msg
, stacklevel
=2)
74 IPV4_RE
= re
.compile(r
"\.\d+$")
76 """Return True if text is a host domain name."""
78 # This may well be wrong. Which RFC is HDN defined in, if any (for
79 # the purposes of RFC 2965)?
80 # For the current implementation, what about IPv6? Remember to look
81 # at other uses of IPV4_RE also, if change this.
82 return not (IPV4_RE
.search(text
) or
84 text
[0] == "." or text
[-1] == ".")
86 def domain_match(A
, B
):
87 """Return True if domain A domain-matches domain B, according to RFC 2965.
89 A and B may be host domain names or IP addresses.
93 Host names can be specified either as an IP address or a HDN string.
94 Sometimes we compare one host name with another. (Such comparisons SHALL
95 be case-insensitive.) Host A's name domain-matches host B's if
97 * their host name strings string-compare equal; or
99 * A is a HDN string and has the form NB, where N is a non-empty
100 name string, B has the form .B', and B' is a HDN string. (So,
101 x.y.com domain-matches .Y.com but not Y.com.)
103 Note that domain-match is not a commutative operation: a.b.c.com
104 domain-matches .c.com, but not the reverse.
107 # Note that, if A or B are IP addresses, the only relevant part of the
108 # definition of the domain-match algorithm is the direct string-compare.
116 has_form_nb
= not (i
== -1 or i
== 0)
119 B
.startswith(".") and
123 def liberal_is_HDN(text
):
124 """Return True if text is a sort-of-like a host domain name.
126 For accepting/blocking domains.
129 return not IPV4_RE
.search(text
)
131 def user_domain_match(A
, B
):
132 """For blocking/accepting domains.
134 A and B may be host domain names or IP addresses.
139 if not (liberal_is_HDN(A
) and liberal_is_HDN(B
)):
144 initial_dot
= B
.startswith(".")
145 if initial_dot
and A
.endswith(B
):
147 if not initial_dot
and A
== B
:
151 cut_port_re
= re
.compile(r
":\d+$")
152 def request_host(request
):
153 """Return request-host, as defined by RFC 2965.
155 Variation from RFC: returned value is lowercased, for convenient
159 url
= request
.get_full_url()
160 host
= _rfc3986
.urlsplit(url
)[1]
162 host
= request
.get_header("Host", "")
164 # remove port, if present
165 host
= cut_port_re
.sub("", host
, 1)
168 def eff_request_host(request
):
169 """Return a tuple (request-host, effective request-host name).
171 As defined by RFC 2965, except both are lowercased.
174 erhn
= req_host
= request_host(request
)
175 if req_host
.find(".") == -1 and not IPV4_RE
.search(req_host
):
176 erhn
= req_host
+ ".local"
177 return req_host
, erhn
179 def request_path(request
):
180 """request-URI, as defined by RFC 2965."""
181 url
= request
.get_full_url()
182 path
, query
, frag
= _rfc3986
.urlsplit(url
)[2:]
183 path
= escape_path(path
)
184 req_path
= _rfc3986
.urlunsplit((None, None, path
, query
, frag
))
185 if not req_path
.startswith("/"):
186 req_path
= "/"+req_path
189 def request_port(request
):
190 host
= request
.get_host()
197 debug("nonnumeric port: '%s'", port
)
200 port
= DEFAULT_HTTP_PORT
203 # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
204 # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
205 HTTP_PATH_SAFE
= "%/;:@&=+$,!~*'()"
206 ESCAPED_CHAR_RE
= re
.compile(r
"%([0-9a-fA-F][0-9a-fA-F])")
207 def uppercase_escaped_char(match
):
208 return "%%%s" % match
.group(1).upper()
209 def escape_path(path
):
210 """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
211 # There's no knowing what character encoding was used to create URLs
212 # containing %-escapes, but since we have to pick one to escape invalid
213 # path characters, we pick UTF-8, as recommended in the HTML 4.0
215 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
216 # And here, kind of: draft-fielding-uri-rfc2396bis-03
217 # (And in draft IRI specification: draft-duerst-iri-05)
218 # (And here, for new URI schemes: RFC 2718)
219 if isinstance(path
, types
.UnicodeType
):
220 path
= path
.encode("utf-8")
221 path
= urllib
.quote(path
, HTTP_PATH_SAFE
)
222 path
= ESCAPED_CHAR_RE
.sub(uppercase_escaped_char
, path
)
226 """Return reach of host h, as defined by RFC 2965, section 1.
228 The reach R of a host name H is defined as follows:
232 - H is the host domain name of a host; and,
234 - H has the form A.B; and
236 - A has no embedded (that is, interior) dots; and
238 - B has at least one embedded dot, or B is the string "local".
239 then the reach of H is .B.
241 * Otherwise, the reach of H is H.
243 >>> reach("www.acme.com")
245 >>> reach("acme.com")
247 >>> reach("acme.local")
253 #a = h[:i] # this line is only here to show what a is
256 if is_HDN(h
) and (i
>= 0 or b
== "local"):
260 def is_third_party(request
):
263 RFC 2965, section 3.3.6:
265 An unverifiable transaction is to a third-party host if its request-
266 host U does not domain-match the reach R of the request-host O in the
270 req_host
= request_host(request
)
271 # the origin request's request-host was stuffed into request by
272 # _urllib2_support.AbstractHTTPHandler
273 return not domain_match(req_host
, reach(request
.origin_req_host
))
279 This class represents both Netscape and RFC 2965 cookies.
281 This is deliberately a very simple class. It just holds attributes. It's
282 possible to construct Cookie instances that don't comply with the cookie
283 standards. CookieJar.make_cookies is the factory function for Cookie
284 objects -- it deals with cookie parsing, supplying defaults, and
285 normalising to the representation used in this class. CookiePolicy is
286 responsible for checking them to see whether they should be accepted from
287 and returned to the server.
291 value: string (may be None);
292 port: string; None indicates no attribute was supplied (eg. "Port", rather
293 than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list
294 string (eg. "80,8080")
295 port_specified: boolean; true if a value was supplied with the Port
298 domain_specified: boolean; true if Domain was explicitly set
299 domain_initial_dot: boolean; true if Domain as set in HTTP header by server
300 started with a dot (yes, this really is necessary!)
302 path_specified: boolean; true if Path was explicitly set
303 secure: boolean; true if should only be returned over secure connection
304 expires: integer; seconds since epoch (RFC 2965 cookies should calculate
305 this value from the Max-Age attribute)
306 discard: boolean, true if this is a session cookie; (if no expires value,
310 rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not
311 Set-Cookie2:) header, but had a version cookie-attribute of 1
312 rest: mapping of other cookie-attributes
314 Note that the port may be present in the headers, but unspecified ("Port"
315 rather than"Port=80", for example); if this is the case, port is None.
319 def __init__(self
, version
, name
, value
,
320 port
, port_specified
,
321 domain
, domain_specified
, domain_initial_dot
,
322 path
, path_specified
,
332 if version
is not None: version
= int(version
)
333 if expires
is not None: expires
= int(expires
)
334 if port
is None and port_specified
is True:
335 raise ValueError("if port is None, port_specified must be false")
337 self
.version
= version
341 self
.port_specified
= port_specified
342 # normalise case, as per RFC 2965 section 3.3.3
343 self
.domain
= domain
.lower()
344 self
.domain_specified
= domain_specified
345 # Sigh. We need to know whether the domain given in the
346 # cookie-attribute had an initial dot, in order to follow RFC 2965
347 # (as clarified in draft errata). Needed for the returned $Domain
349 self
.domain_initial_dot
= domain_initial_dot
351 self
.path_specified
= path_specified
353 self
.expires
= expires
354 self
.discard
= discard
355 self
.comment
= comment
356 self
.comment_url
= comment_url
357 self
.rfc2109
= rfc2109
359 self
._rest
= copy
.copy(rest
)
361 def has_nonstandard_attr(self
, name
):
362 return self
._rest
.has_key(name
)
363 def get_nonstandard_attr(self
, name
, default
=None):
364 return self
._rest
.get(name
, default
)
365 def set_nonstandard_attr(self
, name
, value
):
366 self
._rest
[name
] = value
367 def nonstandard_attr_keys(self
):
368 return self
._rest
.keys()
370 def is_expired(self
, now
=None):
371 if now
is None: now
= time
.time()
372 return (self
.expires
is not None) and (self
.expires
<= now
)
375 if self
.port
is None: p
= ""
376 else: p
= ":"+self
.port
377 limit
= self
.domain
+ p
+ self
.path
378 if self
.value
is not None:
379 namevalue
= "%s=%s" % (self
.name
, self
.value
)
381 namevalue
= self
.name
382 return "<Cookie %s for %s>" % (namevalue
, limit
)
386 for name
in ["version", "name", "value",
387 "port", "port_specified",
388 "domain", "domain_specified", "domain_initial_dot",
389 "path", "path_specified",
390 "secure", "expires", "discard", "comment", "comment_url",
392 attr
= getattr(self
, name
)
393 args
.append("%s=%s" % (name
, repr(attr
)))
394 args
.append("rest=%s" % repr(self
._rest
))
395 args
.append("rfc2109=%s" % repr(self
.rfc2109
))
396 return "Cookie(%s)" % ", ".join(args
)
400 """Defines which cookies get accepted from and returned to server.
402 May also modify cookies.
404 The subclass DefaultCookiePolicy defines the standard rules for Netscape
405 and RFC 2965 cookies -- override that if you want a customised policy.
407 As well as implementing set_ok and return_ok, implementations of this
408 interface must also supply the following attributes, indicating which
409 protocols should be used, and how. These can be read and set at any time,
410 though whether that makes complete sense from the protocol point of view is
415 netscape: implement netscape protocol
416 rfc2965: implement RFC 2965 protocol
418 WARNING: This argument will change or go away if is not accepted into
419 the Python standard library in this form!
420 If true, treat RFC 2109 cookies as though they were Netscape cookies. The
421 default is for this attribute to be None, which means treat 2109 cookies
422 as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is,
423 by default), and as Netscape cookies otherwise.
424 hide_cookie2: don't add Cookie2 header to requests (the presence of
425 this header indicates to the server that we understand RFC 2965
429 def set_ok(self
, cookie
, request
):
430 """Return true if (and only if) cookie should be accepted from server.
432 Currently, pre-expired cookies never get this far -- the CookieJar
433 class deletes such cookies itself.
435 cookie: mechanize.Cookie object
436 request: object implementing the interface defined by
437 CookieJar.extract_cookies.__doc__
440 raise NotImplementedError()
442 def return_ok(self
, cookie
, request
):
443 """Return true if (and only if) cookie should be returned to server.
445 cookie: mechanize.Cookie object
446 request: object implementing the interface defined by
447 CookieJar.add_cookie_header.__doc__
450 raise NotImplementedError()
452 def domain_return_ok(self
, domain
, request
):
453 """Return false if cookies should not be returned, given cookie domain.
455 This is here as an optimization, to remove the need for checking every
456 cookie with a particular domain (which may involve reading many files).
457 The default implementations of domain_return_ok and path_return_ok
458 (return True) leave all the work to return_ok.
460 If domain_return_ok returns true for the cookie domain, path_return_ok
461 is called for the cookie path. Otherwise, path_return_ok and return_ok
462 are never called for that cookie domain. If path_return_ok returns
463 true, return_ok is called with the Cookie object itself for a full
464 check. Otherwise, return_ok is never called for that cookie path.
466 Note that domain_return_ok is called for every *cookie* domain, not
467 just for the *request* domain. For example, the function might be
468 called with both ".acme.com" and "www.acme.com" if the request domain is
469 "www.acme.com". The same goes for path_return_ok.
471 For argument documentation, see the docstring for return_ok.
476 def path_return_ok(self
, path
, request
):
477 """Return false if cookies should not be returned, given cookie path.
479 See the docstring for domain_return_ok.
485 class DefaultCookiePolicy(CookiePolicy
):
486 """Implements the standard rules for accepting and returning cookies.
488 Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is
489 switched off by default.
491 The easiest way to provide your own policy is to override this class and
492 call its methods in your overriden implementations before adding your own
496 class MyCookiePolicy(mechanize.DefaultCookiePolicy):
497 def set_ok(self, cookie, request):
498 if not mechanize.DefaultCookiePolicy.set_ok(
499 self, cookie, request):
501 if i_dont_want_to_store_this_cookie():
505 In addition to the features required to implement the CookiePolicy
506 interface, this class allows you to block and allow domains from setting
507 and receiving cookies. There are also some strictness switches that allow
508 you to tighten up the rather loose Netscape protocol rules a little bit (at
509 the cost of blocking some benign cookies).
511 A domain blacklist and whitelist is provided (both off by default). Only
512 domains not in the blacklist and present in the whitelist (if the whitelist
513 is active) participate in cookie setting and returning. Use the
514 blocked_domains constructor argument, and blocked_domains and
515 set_blocked_domains methods (and the corresponding argument and methods for
516 allowed_domains). If you set a whitelist, you can turn it off again by
519 Domains in block or allow lists that do not start with a dot must
520 string-compare equal. For example, "acme.com" matches a blacklist entry of
521 "acme.com", but "www.acme.com" does not. Domains that do start with a dot
522 are matched by more specific domains too. For example, both "www.acme.com"
523 and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does
524 not). IP addresses are an exception, and must match exactly. For example,
525 if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is
526 blocked, but 193.168.1.2 is not.
528 Additional Public Attributes:
530 General strictness switches
532 strict_domain: don't allow sites to set two-component domains with
533 country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc.
534 This is far from perfect and isn't guaranteed to work!
536 RFC 2965 protocol strictness switches
538 strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable
539 transactions (usually, an unverifiable transaction is one resulting from
540 a redirect or an image hosted on another site); if this is false, cookies
541 are NEVER blocked on the basis of verifiability
543 Netscape protocol strictness switches
545 strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions
546 even to Netscape cookies
547 strict_ns_domain: flags indicating how strict to be with domain-matching
548 rules for Netscape cookies:
549 DomainStrictNoDots: when setting cookies, host prefix must not contain a
550 dot (eg. www.foo.bar.com can't set a cookie for .bar.com, because
551 www.foo contains a dot)
552 DomainStrictNonDomain: cookies that did not explicitly specify a Domain
553 cookie-attribute can only be returned to a domain that string-compares
554 equal to the domain that set the cookie (eg. rockets.acme.com won't
555 be returned cookies from acme.com that had no Domain cookie-attribute)
556 DomainRFC2965Match: when setting cookies, require a full RFC 2965
558 DomainLiberal and DomainStrict are the most useful combinations of the
559 above flags, for convenience
560 strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that
561 have names starting with '$'
562 strict_ns_set_path: don't allow setting cookies whose path doesn't
563 path-match request URI
567 DomainStrictNoDots
= 1
568 DomainStrictNonDomain
= 2
569 DomainRFC2965Match
= 4
572 DomainStrict
= DomainStrictNoDots|DomainStrictNonDomain
575 blocked_domains
=None, allowed_domains
=None,
576 netscape
=True, rfc2965
=False,
577 # WARNING: this argument will change or go away if is not
578 # accepted into the Python standard library in this form!
579 # default, ie. treat 2109 as netscape iff not rfc2965
580 rfc2109_as_netscape
=None,
583 strict_rfc2965_unverifiable
=True,
584 strict_ns_unverifiable
=False,
585 strict_ns_domain
=DomainLiberal
,
586 strict_ns_set_initial_dollar
=False,
587 strict_ns_set_path
=False,
590 Constructor arguments should be used as keyword arguments only.
592 blocked_domains: sequence of domain names that we never accept cookies
593 from, nor return cookies to
594 allowed_domains: if not None, this is a sequence of the only domains
595 for which we accept and return cookies
597 For other arguments, see CookiePolicy.__doc__ and
598 DefaultCookiePolicy.__doc__..
601 self
.netscape
= netscape
602 self
.rfc2965
= rfc2965
603 self
.rfc2109_as_netscape
= rfc2109_as_netscape
604 self
.hide_cookie2
= hide_cookie2
605 self
.strict_domain
= strict_domain
606 self
.strict_rfc2965_unverifiable
= strict_rfc2965_unverifiable
607 self
.strict_ns_unverifiable
= strict_ns_unverifiable
608 self
.strict_ns_domain
= strict_ns_domain
609 self
.strict_ns_set_initial_dollar
= strict_ns_set_initial_dollar
610 self
.strict_ns_set_path
= strict_ns_set_path
612 if blocked_domains
is not None:
613 self
._blocked
_domains
= tuple(blocked_domains
)
615 self
._blocked
_domains
= ()
617 if allowed_domains
is not None:
618 allowed_domains
= tuple(allowed_domains
)
619 self
._allowed
_domains
= allowed_domains
621 def blocked_domains(self
):
622 """Return the sequence of blocked domains (as a tuple)."""
623 return self
._blocked
_domains
624 def set_blocked_domains(self
, blocked_domains
):
625 """Set the sequence of blocked domains."""
626 self
._blocked
_domains
= tuple(blocked_domains
)
628 def is_blocked(self
, domain
):
629 for blocked_domain
in self
._blocked
_domains
:
630 if user_domain_match(domain
, blocked_domain
):
634 def allowed_domains(self
):
635 """Return None, or the sequence of allowed domains (as a tuple)."""
636 return self
._allowed
_domains
637 def set_allowed_domains(self
, allowed_domains
):
638 """Set the sequence of allowed domains, or None."""
639 if allowed_domains
is not None:
640 allowed_domains
= tuple(allowed_domains
)
641 self
._allowed
_domains
= allowed_domains
643 def is_not_allowed(self
, domain
):
644 if self
._allowed
_domains
is None:
646 for allowed_domain
in self
._allowed
_domains
:
647 if user_domain_match(domain
, allowed_domain
):
651 def set_ok(self
, cookie
, request
):
653 If you override set_ok, be sure to call this method. If it returns
654 false, so should your subclass (assuming your subclass wants to be more
655 strict about which cookies to accept).
658 debug(" - checking cookie %s", cookie
)
660 assert cookie
.name
is not None
662 for n
in "version", "verifiability", "name", "path", "domain", "port":
663 fn_name
= "set_ok_"+n
664 fn
= getattr(self
, fn_name
)
665 if not fn(cookie
, request
):
670 def set_ok_version(self
, cookie
, request
):
671 if cookie
.version
is None:
672 # Version is always set to 0 by parse_ns_headers if it's a Netscape
673 # cookie, so this must be an invalid RFC 2965 cookie.
674 debug(" Set-Cookie2 without version attribute (%s)", cookie
)
676 if cookie
.version
> 0 and not self
.rfc2965
:
677 debug(" RFC 2965 cookies are switched off")
679 elif cookie
.version
== 0 and not self
.netscape
:
680 debug(" Netscape cookies are switched off")
684 def set_ok_verifiability(self
, cookie
, request
):
685 if request
.unverifiable
and is_third_party(request
):
686 if cookie
.version
> 0 and self
.strict_rfc2965_unverifiable
:
687 debug(" third-party RFC 2965 cookie during "
688 "unverifiable transaction")
690 elif cookie
.version
== 0 and self
.strict_ns_unverifiable
:
691 debug(" third-party Netscape cookie during "
692 "unverifiable transaction")
696 def set_ok_name(self
, cookie
, request
):
697 # Try and stop servers setting V0 cookies designed to hack other
698 # servers that know both V0 and V1 protocols.
699 if (cookie
.version
== 0 and self
.strict_ns_set_initial_dollar
and
700 cookie
.name
.startswith("$")):
701 debug(" illegal name (starts with '$'): '%s'", cookie
.name
)
705 def set_ok_path(self
, cookie
, request
):
706 if cookie
.path_specified
:
707 req_path
= request_path(request
)
708 if ((cookie
.version
> 0 or
709 (cookie
.version
== 0 and self
.strict_ns_set_path
)) and
710 not req_path
.startswith(cookie
.path
)):
711 debug(" path attribute %s is not a prefix of request "
712 "path %s", cookie
.path
, req_path
)
716 def set_ok_countrycode_domain(self
, cookie
, request
):
717 """Return False if explicit cookie domain is not acceptable.
719 Called by set_ok_domain, for convenience of overriding by
723 if cookie
.domain_specified
and self
.strict_domain
:
724 domain
= cookie
.domain
725 # since domain was specified, we know that:
726 assert domain
.startswith(".")
727 if domain
.count(".") == 2:
728 # domain like .foo.bar
729 i
= domain
.rfind(".")
734 "com", "edu", "org", "net", "gov", "mil", "int",
735 "aero", "biz", "cat", "coop", "info", "jobs", "mobi",
736 "museum", "name", "pro", "travel",
743 def set_ok_domain(self
, cookie
, request
):
744 if self
.is_blocked(cookie
.domain
):
745 debug(" domain %s is in user block-list", cookie
.domain
)
747 if self
.is_not_allowed(cookie
.domain
):
748 debug(" domain %s is not in user allow-list", cookie
.domain
)
750 if not self
.set_ok_countrycode_domain(cookie
, request
):
751 debug(" country-code second level domain %s", cookie
.domain
)
753 if cookie
.domain_specified
:
754 req_host
, erhn
= eff_request_host(request
)
755 domain
= cookie
.domain
756 if domain
.startswith("."):
757 undotted_domain
= domain
[1:]
759 undotted_domain
= domain
760 embedded_dots
= (undotted_domain
.find(".") >= 0)
761 if not embedded_dots
and domain
!= ".local":
762 debug(" non-local domain %s contains no embedded dot",
765 if cookie
.version
== 0:
766 if (not erhn
.endswith(domain
) and
767 (not erhn
.startswith(".") and
768 not ("."+erhn
).endswith(domain
))):
769 debug(" effective request-host %s (even with added "
770 "initial dot) does not end end with %s",
773 if (cookie
.version
> 0 or
774 (self
.strict_ns_domain
& self
.DomainRFC2965Match
)):
775 if not domain_match(erhn
, domain
):
776 debug(" effective request-host %s does not domain-match "
779 if (cookie
.version
> 0 or
780 (self
.strict_ns_domain
& self
.DomainStrictNoDots
)):
781 host_prefix
= req_host
[:-len(domain
)]
782 if (host_prefix
.find(".") >= 0 and
783 not IPV4_RE
.search(req_host
)):
784 debug(" host prefix %s for domain %s contains a dot",
789 def set_ok_port(self
, cookie
, request
):
790 if cookie
.port_specified
:
791 req_port
= request_port(request
)
795 req_port
= str(req_port
)
796 for p
in cookie
.port
.split(","):
800 debug(" bad port %s (not numeric)", p
)
805 debug(" request port (%s) not found in %s",
806 req_port
, cookie
.port
)
810 def return_ok(self
, cookie
, request
):
812 If you override return_ok, be sure to call this method. If it returns
813 false, so should your subclass (assuming your subclass wants to be more
814 strict about which cookies to return).
817 # Path has already been checked by path_return_ok, and domain blocking
818 # done by domain_return_ok.
819 debug(" - checking cookie %s", cookie
)
821 for n
in "version", "verifiability", "secure", "expires", "port", "domain":
822 fn_name
= "return_ok_"+n
823 fn
= getattr(self
, fn_name
)
824 if not fn(cookie
, request
):
828 def return_ok_version(self
, cookie
, request
):
829 if cookie
.version
> 0 and not self
.rfc2965
:
830 debug(" RFC 2965 cookies are switched off")
832 elif cookie
.version
== 0 and not self
.netscape
:
833 debug(" Netscape cookies are switched off")
837 def return_ok_verifiability(self
, cookie
, request
):
838 if request
.unverifiable
and is_third_party(request
):
839 if cookie
.version
> 0 and self
.strict_rfc2965_unverifiable
:
840 debug(" third-party RFC 2965 cookie during unverifiable "
843 elif cookie
.version
== 0 and self
.strict_ns_unverifiable
:
844 debug(" third-party Netscape cookie during unverifiable "
849 def return_ok_secure(self
, cookie
, request
):
850 if cookie
.secure
and request
.get_type() != "https":
851 debug(" secure cookie with non-secure request")
855 def return_ok_expires(self
, cookie
, request
):
856 if cookie
.is_expired(self
._now
):
857 debug(" cookie expired")
861 def return_ok_port(self
, cookie
, request
):
863 req_port
= request_port(request
)
866 for p
in cookie
.port
.split(","):
870 debug(" request port %s does not match cookie port %s",
871 req_port
, cookie
.port
)
875 def return_ok_domain(self
, cookie
, request
):
876 req_host
, erhn
= eff_request_host(request
)
877 domain
= cookie
.domain
879 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
880 if (cookie
.version
== 0 and
881 (self
.strict_ns_domain
& self
.DomainStrictNonDomain
) and
882 not cookie
.domain_specified
and domain
!= erhn
):
883 debug(" cookie with unspecified domain does not string-compare "
884 "equal to request domain")
887 if cookie
.version
> 0 and not domain_match(erhn
, domain
):
888 debug(" effective request-host name %s does not domain-match "
889 "RFC 2965 cookie domain %s", erhn
, domain
)
891 if cookie
.version
== 0 and not ("."+erhn
).endswith(domain
):
892 debug(" request-host %s does not match Netscape cookie domain "
893 "%s", req_host
, domain
)
897 def domain_return_ok(self
, domain
, request
):
898 # Liberal check of domain. This is here as an optimization to avoid
899 # having to load lots of MSIE cookie files unless necessary.
901 # Munge req_host and erhn to always start with a dot, so as to err on
902 # the side of letting cookies through.
903 dotted_req_host
, dotted_erhn
= eff_request_host(request
)
904 if not dotted_req_host
.startswith("."):
905 dotted_req_host
= "."+dotted_req_host
906 if not dotted_erhn
.startswith("."):
907 dotted_erhn
= "."+dotted_erhn
908 if not (dotted_req_host
.endswith(domain
) or
909 dotted_erhn
.endswith(domain
)):
910 #debug(" request domain %s does not match cookie domain %s",
914 if self
.is_blocked(domain
):
915 debug(" domain %s is in user block-list", domain
)
917 if self
.is_not_allowed(domain
):
918 debug(" domain %s is not in user allow-list", domain
)
923 def path_return_ok(self
, path
, request
):
924 debug("- checking cookie path=%s", path
)
925 req_path
= request_path(request
)
926 if not req_path
.startswith(path
):
927 debug(" %s does not path-match %s", req_path
, path
)
932 def vals_sorted_by_key(adict
):
935 return map(adict
.get
, keys
)
937 class MappingIterator
:
938 """Iterates over nested mapping, depth-first, in sorted order by key."""
939 def __init__(self
, mapping
):
940 self
._s
= [(vals_sorted_by_key(mapping
), 0, None)] # LIFO stack
942 def __iter__(self
): return self
945 # this is hairy because of lack of generators
948 vals
, i
, prev_item
= self
._s
.pop()
950 raise StopIteration()
954 self
._s
.append((vals
, i
, prev_item
))
957 except AttributeError:
962 self
._s
.append((vals_sorted_by_key(item
), 0, item
))
967 # Used as second parameter to dict.get method, to distinguish absent
968 # dict key from one with a None value.
972 """Collection of HTTP cookies.
974 You may not need to know about this class: try mechanize.urlopen().
976 The major methods are extract_cookies and add_cookie_header; these are all
977 you are likely to need.
979 CookieJar supports the iterator protocol:
981 for cookie in cookiejar:
982 # do something with cookie
986 add_cookie_header(request)
987 extract_cookies(response, request)
988 make_cookies(response, request)
989 set_cookie_if_ok(cookie, request)
991 clear_session_cookies()
992 clear_expired_cookies()
993 clear(domain=None, path=None, name=None)
997 policy: CookiePolicy object
1001 non_word_re
= re
.compile(r
"\W")
1002 quote_re
= re
.compile(r
"([\"\\])")
1003 strict_domain_re = re.compile(r"\
.?
[^
.]*")
1004 domain_re = re.compile(r"[^
.]*")
1005 dots_re = re.compile(r"^\
.+")
1007 def __init__(self, policy=None):
1009 See CookieJar.__doc__ for argument documentation.
1013 policy = DefaultCookiePolicy()
1014 self._policy = policy
1018 # for __getitem__ iteration in pre-2.2 Pythons
1019 self._prev_getitem_index = 0
1021 def set_policy(self, policy):
1022 self._policy = policy
1024 def _cookies_for_domain(self, domain, request):
1026 if not self._policy.domain_return_ok(domain, request):
1028 debug("Checking
%s for cookies to
return", domain)
1029 cookies_by_path = self._cookies[domain]
1030 for path in cookies_by_path.keys():
1031 if not self._policy.path_return_ok(path, request):
1033 cookies_by_name = cookies_by_path[path]
1034 for cookie in cookies_by_name.values():
1035 if not self._policy.return_ok(cookie, request):
1036 debug(" not returning cookie
")
1038 debug(" it
's a match")
1039 cookies.append(cookie)
1042 def _cookies_for_request(self, request):
1043 """Return a list of cookies to be returned to server."""
1045 for domain in self._cookies.keys():
1046 cookies.extend(self._cookies_for_domain(domain, request))
1049 def _cookie_attrs(self, cookies):
1050 """Return a list of cookie-attributes to be returned to server.
1052 like ['foo
="bar"; $Path
="/"', ...]
1054 The $Version attribute is also added when appropriate (currently only
1058 # add cookies in order of most specific (ie. longest) path first
1059 def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
1060 cookies.sort(decreasing_size)
1065 for cookie in cookies:
1066 # set version of Cookie header
1068 # What should it be if multiple matching Set-Cookie headers have
1069 # different versions themselves?
1070 # Answer: there is no answer; was supposed to be settled by
1071 # RFC 2965 errata, but that may never appear...
1072 version = cookie.version
1076 attrs.append("$Version=%s" % version)
1078 # quote cookie value if necessary
1079 # (not for Netscape protocol, which already has any quotes
1080 # intact, due to the poorly-specified Netscape Cookie: syntax)
1081 if ((cookie.value is not None) and
1082 self.non_word_re.search(cookie.value) and version > 0):
1083 value = self.quote_re.sub(r"\\\1", cookie.value)
1085 value = cookie.value
1087 # add cookie-attributes to be returned in Cookie header
1088 if cookie.value is None:
1089 attrs.append(cookie.name)
1091 attrs.append("%s=%s" % (cookie.name, value))
1093 if cookie.path_specified:
1094 attrs.append('$Path
="%s"' % cookie.path)
1095 if cookie.domain.startswith("."):
1096 domain = cookie.domain
1097 if (not cookie.domain_initial_dot and
1098 domain.startswith(".")):
1100 attrs.append('$Domain
="%s"' % domain)
1101 if cookie.port is not None:
1103 if cookie.port_specified:
1104 p = p + ('="%s"' % cookie.port)
1109 def add_cookie_header(self, request):
1110 """Add correct Cookie: header to request (urllib2.Request object).
1112 The Cookie2 header is also added unless policy.hide_cookie2 is true.
1114 The request object (usually a urllib2.Request instance) must support
1115 the methods get_full_url, get_host, get_type, has_header, get_header,
1116 header_items and add_unredirected_header, as documented by urllib2, and
1117 the port attribute (the port number). Actually,
1118 RequestUpgradeProcessor will automatically upgrade your Request object
1119 to one with has_header, get_header, header_items and
1120 add_unredirected_header, if it lacks those methods, for compatibility
1121 with pre-2.4 versions of urllib2.
1124 debug("add_cookie_header")
1125 self._policy._now = self._now = int(time.time())
1127 req_host, erhn = eff_request_host(request)
1128 strict_non_domain = (
1129 self._policy.strict_ns_domain & self._policy.DomainStrictNonDomain)
1131 cookies = self._cookies_for_request(request)
1133 attrs = self._cookie_attrs(cookies)
1135 if not request.has_header("Cookie"):
1136 request.add_unredirected_header("Cookie", "; ".join(attrs))
1138 # if necessary, advertise that we know RFC 2965
1139 if self._policy.rfc2965 and not self._policy.hide_cookie2:
1140 for cookie in cookies:
1141 if cookie.version != 1 and not request.has_header("Cookie2"):
1142 request.add_unredirected_header("Cookie2", '$Version
="1"')
1145 self.clear_expired_cookies()
1147 def _normalized_cookie_tuples(self, attrs_set):
1148 """Return list of tuples containing normalised cookie information.
1150 attrs_set is the list of lists of key,value pairs extracted from
1151 the Set-Cookie or Set-Cookie2 headers.
1153 Tuples are name, value, standard, rest, where name and value are the
1154 cookie name and value, standard is a dictionary containing the standard
1155 cookie-attributes (discard, secure, version, expires or max-age,
1156 domain, path and port) and rest is a dictionary containing the rest of
1157 the cookie-attributes.
1162 boolean_attrs = "discard", "secure"
1163 value_attrs = ("version",
1164 "expires", "max-age",
1165 "domain", "path", "port",
1166 "comment", "commenturl")
1168 for cookie_attrs in attrs_set:
1169 name, value = cookie_attrs[0]
1171 # Build dictionary of standard cookie-attributes (standard) and
1172 # dictionary of other cookie-attributes (rest).
1174 # Note: expiry time is normalised to seconds since epoch. V0
1175 # cookies should have the Expires cookie-attribute, and V1 cookies
1176 # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1177 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1178 # accept either (but prefer Max-Age).
1185 for k, v in cookie_attrs[1:]:
1187 # don't lose case distinction
for unknown fields
1188 if lc
in value_attrs
or lc
in boolean_attrs
:
1190 if k
in boolean_attrs
and v
is None:
1191 # boolean cookie-attribute is present, but has no value
1192 # (like "discard", rather than "port=80")
1194 if standard
.has_key(k
):
1195 # only first value is significant
1199 debug(" missing value for domain attribute")
1202 # RFC 2965 section 3.3.3
1206 # Prefer max-age to expires (like Mozilla)
1209 debug(" missing or invalid value for expires "
1210 "attribute: treating as session cookie")
1217 debug(" missing or invalid (non-numeric) value for "
1218 "max-age attribute")
1221 # convert RFC 2965 Max-Age to seconds since epoch
1222 # XXX Strictly you're supposed to follow RFC 2616
1223 # age-calculation rules. Remember that zero Max-Age is a
1224 # is a request to discard (old and new) cookie, though.
1227 if (k
in value_attrs
) or (k
in boolean_attrs
):
1229 k
not in ["port", "comment", "commenturl"]):
1230 debug(" missing value for %s attribute" % k
)
1240 cookie_tuples
.append((name
, value
, standard
, rest
))
1242 return cookie_tuples
1244 def _cookie_from_cookie_tuple(self
, tup
, request
):
1245 # standard is dict of standard cookie-attributes, rest is dict of the
1247 name
, value
, standard
, rest
= tup
1249 domain
= standard
.get("domain", Absent
)
1250 path
= standard
.get("path", Absent
)
1251 port
= standard
.get("port", Absent
)
1252 expires
= standard
.get("expires", Absent
)
1254 # set the easy defaults
1255 version
= standard
.get("version", None)
1256 if version
is not None: version
= int(version
)
1257 secure
= standard
.get("secure", False)
1258 # (discard is also set if expires is Absent)
1259 discard
= standard
.get("discard", False)
1260 comment
= standard
.get("comment", None)
1261 comment_url
= standard
.get("commenturl", None)
1264 if path
is not Absent
and path
!= "":
1265 path_specified
= True
1266 path
= escape_path(path
)
1268 path_specified
= False
1269 path
= request_path(request
)
1273 # Netscape spec parts company from reality here
1277 if len(path
) == 0: path
= "/"
1279 # set default domain
1280 domain_specified
= domain
is not Absent
1281 # but first we have to remember whether it starts with a dot
1282 domain_initial_dot
= False
1283 if domain_specified
:
1284 domain_initial_dot
= bool(domain
.startswith("."))
1285 if domain
is Absent
:
1286 req_host
, erhn
= eff_request_host(request
)
1288 elif not domain
.startswith("."):
1292 port_specified
= False
1293 if port
is not Absent
:
1295 # Port attr present, but has no value: default to request port.
1296 # Cookie should then only be sent back on that port.
1297 port
= request_port(request
)
1299 port_specified
= True
1300 port
= re
.sub(r
"\s+", "", port
)
1302 # No port attr present. Cookie can be sent back on any port.
1305 # set default expires and discard
1306 if expires
is Absent
:
1309 elif expires
<= self
._now
:
1310 # Expiry date in past is request to delete cookie. This can't be
1311 # in DefaultCookiePolicy, because can't delete cookies there.
1313 self
.clear(domain
, path
, name
)
1316 debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1320 return Cookie(version
,
1322 port
, port_specified
,
1323 domain
, domain_specified
, domain_initial_dot
,
1324 path
, path_specified
,
1332 def _cookies_from_attrs_set(self
, attrs_set
, request
):
1333 cookie_tuples
= self
._normalized
_cookie
_tuples
(attrs_set
)
1336 for tup
in cookie_tuples
:
1337 cookie
= self
._cookie
_from
_cookie
_tuple
(tup
, request
)
1338 if cookie
: cookies
.append(cookie
)
1341 def _process_rfc2109_cookies(self
, cookies
):
1342 if self
._policy
.rfc2109_as_netscape
is None:
1343 rfc2109_as_netscape
= not self
._policy
.rfc2965
1345 rfc2109_as_netscape
= self
._policy
.rfc2109_as_netscape
1346 for cookie
in cookies
:
1347 if cookie
.version
== 1:
1348 cookie
.rfc2109
= True
1349 if rfc2109_as_netscape
:
1350 # treat 2109 cookies as Netscape cookies rather than
1351 # as RFC2965 cookies
1354 def make_cookies(self
, response
, request
):
1355 """Return sequence of Cookie objects extracted from response object.
1357 See extract_cookies.__doc__ for the interfaces required of the
1358 response and request arguments.
1361 # get cookie-attributes for RFC 2965 and Netscape protocols
1362 headers
= response
.info()
1363 rfc2965_hdrs
= headers
.getheaders("Set-Cookie2")
1364 ns_hdrs
= headers
.getheaders("Set-Cookie")
1366 rfc2965
= self
._policy
.rfc2965
1367 netscape
= self
._policy
.netscape
1369 if ((not rfc2965_hdrs
and not ns_hdrs
) or
1370 (not ns_hdrs
and not rfc2965
) or
1371 (not rfc2965_hdrs
and not netscape
) or
1372 (not netscape
and not rfc2965
)):
1373 return [] # no relevant cookie headers: quick exit
1376 cookies
= self
._cookies
_from
_attrs
_set
(
1377 split_header_words(rfc2965_hdrs
), request
)
1379 reraise_unmasked_exceptions()
1382 if ns_hdrs
and netscape
:
1384 # RFC 2109 and Netscape cookies
1385 ns_cookies
= self
._cookies
_from
_attrs
_set
(
1386 parse_ns_headers(ns_hdrs
), request
)
1388 reraise_unmasked_exceptions()
1390 self
._process
_rfc
2109_cookies
(ns_cookies
)
1392 # Look for Netscape cookies (from Set-Cookie headers) that match
1393 # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1394 # For each match, keep the RFC 2965 cookie and ignore the Netscape
1395 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1396 # bundled in with the Netscape cookies for this purpose, which is
1397 # reasonable behaviour.
1400 for cookie
in cookies
:
1401 lookup
[(cookie
.domain
, cookie
.path
, cookie
.name
)] = None
1403 def no_matching_rfc2965(ns_cookie
, lookup
=lookup
):
1404 key
= ns_cookie
.domain
, ns_cookie
.path
, ns_cookie
.name
1405 return not lookup
.has_key(key
)
1406 ns_cookies
= filter(no_matching_rfc2965
, ns_cookies
)
1409 cookies
.extend(ns_cookies
)
1413 def set_cookie_if_ok(self
, cookie
, request
):
1414 """Set a cookie if policy says it's OK to do so.
1416 cookie: mechanize.Cookie instance
1417 request: see extract_cookies.__doc__ for the required interface
1420 self
._policy
._now
= self
._now
= int(time
.time())
1422 if self
._policy
.set_ok(cookie
, request
):
1423 self
.set_cookie(cookie
)
1425 def set_cookie(self
, cookie
):
1426 """Set a cookie, without checking whether or not it should be set.
1428 cookie: mechanize.Cookie instance
1431 if not c
.has_key(cookie
.domain
): c
[cookie
.domain
] = {}
1432 c2
= c
[cookie
.domain
]
1433 if not c2
.has_key(cookie
.path
): c2
[cookie
.path
] = {}
1434 c3
= c2
[cookie
.path
]
1435 c3
[cookie
.name
] = cookie
1437 def extract_cookies(self
, response
, request
):
1438 """Extract cookies from response, where allowable given the request.
1440 Look for allowable Set-Cookie: and Set-Cookie2: headers in the response
1441 object passed as argument. Any of these headers that are found are
1442 used to update the state of the object (subject to the policy.set_ok
1445 The response object (usually be the result of a call to
1446 mechanize.urlopen, or similar) should support an info method, which
1447 returns a mimetools.Message object (in fact, the 'mimetools.Message
1448 object' may be any object that provides a getallmatchingheaders
1451 The request object (usually a urllib2.Request instance) must support
1452 the methods get_full_url and get_host, as documented by urllib2, and
1453 the port attribute (the port number). The request is used to set
1454 default values for cookie-attributes as well as for checking that the
1455 cookie is OK to be set.
1458 debug("extract_cookies: %s", response
.info())
1459 self
._policy
._now
= self
._now
= int(time
.time())
1461 for cookie
in self
.make_cookies(response
, request
):
1462 if self
._policy
.set_ok(cookie
, request
):
1463 debug(" setting cookie: %s", cookie
)
1464 self
.set_cookie(cookie
)
1466 def clear(self
, domain
=None, path
=None, name
=None):
1467 """Clear some cookies.
1469 Invoking this method without arguments will clear all cookies. If
1470 given a single argument, only cookies belonging to that domain will be
1471 removed. If given two arguments, cookies belonging to the specified
1472 path within that domain are removed. If given three arguments, then
1473 the cookie with the specified name, path and domain is removed.
1475 Raises KeyError if no matching cookie exists.
1478 if name
is not None:
1479 if (domain
is None) or (path
is None):
1481 "domain and path must be given to remove a cookie by name")
1482 del self
._cookies
[domain
][path
][name
]
1483 elif path
is not None:
1486 "domain must be given to remove cookies by path")
1487 del self
._cookies
[domain
][path
]
1488 elif domain
is not None:
1489 del self
._cookies
[domain
]
1493 def clear_session_cookies(self
):
1494 """Discard all session cookies.
1496 Discards all cookies held by object which had either no Max-Age or
1497 Expires cookie-attribute or an explicit Discard cookie-attribute, or
1498 which otherwise have ended up with a true discard attribute. For
1499 interactive browsers, the end of a session usually corresponds to
1500 closing the browser window.
1502 Note that the save method won't save session cookies anyway, unless you
1503 ask otherwise by passing a true ignore_discard argument.
1508 self
.clear(cookie
.domain
, cookie
.path
, cookie
.name
)
1510 def clear_expired_cookies(self
):
1511 """Discard all expired cookies.
1513 You probably don't need to call this method: expired cookies are never
1514 sent back to the server (provided you're using DefaultCookiePolicy),
1515 this method is called by CookieJar itself every so often, and the save
1516 method won't save expired cookies anyway (unless you ask otherwise by
1517 passing a true ignore_expires argument).
1522 if cookie
.is_expired(now
):
1523 self
.clear(cookie
.domain
, cookie
.path
, cookie
.name
)
1525 def __getitem__(self
, i
):
1527 self
._getitem
_iterator
= self
.__iter
__()
1528 elif self
._prev
_getitem
_index
!= i
-1: raise IndexError(
1529 "CookieJar.__getitem__ only supports sequential iteration")
1530 self
._prev
_getitem
_index
= i
1532 return self
._getitem
_iterator
.next()
1533 except StopIteration:
1537 return MappingIterator(self
._cookies
)
1540 """Return number of contained cookies."""
1542 for cookie
in self
: i
= i
+ 1
1547 for cookie
in self
: r
.append(repr(cookie
))
1548 return "<%s[%s]>" % (self
.__class
__, ", ".join(r
))
1552 for cookie
in self
: r
.append(str(cookie
))
1553 return "<%s[%s]>" % (self
.__class
__, ", ".join(r
))
1556 class LoadError(Exception): pass
1558 class FileCookieJar(CookieJar
):
1559 """CookieJar that can be loaded from and saved to a file.
1563 save(filename=None, ignore_discard=False, ignore_expires=False)
1564 load(filename=None, ignore_discard=False, ignore_expires=False)
1565 revert(filename=None, ignore_discard=False, ignore_expires=False)
1567 Additional public attributes
1569 filename: filename for loading and saving cookies
1571 Additional public readable attributes
1573 delayload: request that cookies are lazily loaded from disk; this is only
1574 a hint since this only affects performance, not behaviour (unless the
1575 cookies on disk are changing); a CookieJar object may ignore it (in fact,
1576 only MSIECookieJar lazily loads cookies at the moment)
1580 def __init__(self
, filename
=None, delayload
=False, policy
=None):
1582 See FileCookieJar.__doc__ for argument documentation.
1584 Cookies are NOT loaded from the named file until either the load or
1585 revert method is called.
1588 CookieJar
.__init
__(self
, policy
)
1589 if filename
is not None and not isstringlike(filename
):
1590 raise ValueError("filename must be string-like")
1591 self
.filename
= filename
1592 self
.delayload
= bool(delayload
)
1594 def save(self
, filename
=None, ignore_discard
=False, ignore_expires
=False):
1595 """Save cookies to a file.
1597 filename: name of file in which to save cookies
1598 ignore_discard: save even cookies set to be discarded
1599 ignore_expires: save even cookies that have expired
1601 The file is overwritten if it already exists, thus wiping all its
1602 cookies. Saved cookies can be restored later using the load or revert
1603 methods. If filename is not specified, self.filename is used; if
1604 self.filename is None, ValueError is raised.
1607 raise NotImplementedError()
1609 def load(self
, filename
=None, ignore_discard
=False, ignore_expires
=False):
1610 """Load cookies from a file.
1612 Old cookies are kept unless overwritten by newly loaded ones.
1614 Arguments are as for .save().
1616 If filename is not specified, self.filename is used; if self.filename
1617 is None, ValueError is raised. The named file must be in the format
1618 understood by the class, or LoadError will be raised. This format will
1619 be identical to that written by the save method, unless the load format
1620 is not sufficiently well understood (as is the case for MSIECookieJar).
1623 if filename
is None:
1624 if self
.filename
is not None: filename
= self
.filename
1625 else: raise ValueError(MISSING_FILENAME_TEXT
)
1629 self
._really
_load
(f
, filename
, ignore_discard
, ignore_expires
)
1633 def revert(self
, filename
=None,
1634 ignore_discard
=False, ignore_expires
=False):
1635 """Clear all cookies and reload cookies from a saved file.
1637 Raises LoadError (or IOError) if reversion is not successful; the
1638 object's state will not be altered if this happens.
1641 if filename
is None:
1642 if self
.filename
is not None: filename
= self
.filename
1643 else: raise ValueError(MISSING_FILENAME_TEXT
)
1645 old_state
= copy
.deepcopy(self
._cookies
)
1648 self
.load(filename
, ignore_discard
, ignore_expires
)
1649 except (LoadError
, IOError):
1650 self
._cookies
= old_state