Re-commit Ping's patch to the cgi and cgitb documentation, using the
[python/dscho.git] / Lib / rfc822.py
blob96ab21c795c1880844c7933d2d488bf50561f1e4
1 """RFC 2822 message manipulation.
3 Note: This is only a very rough sketch of a full RFC-822 parser; in particular
4 the tokenizing of addresses does not adhere to all the quoting rules.
6 Note: RFC 2822 is a long awaited update to RFC 822. This module should
7 conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
8 effort at RFC 2822 updates have been made, but a thorough audit has not been
9 performed. Consider any RFC 2822 non-conformance to be a bug.
11 RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
12 RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
14 Directions for use:
16 To create a Message object: first open a file, e.g.:
18 fp = open(file, 'r')
20 You can use any other legal way of getting an open file object, e.g. use
21 sys.stdin or call os.popen(). Then pass the open file object to the Message()
22 constructor:
24 m = Message(fp)
26 This class can work with any input object that supports a readline method. If
27 the input object has seek and tell capability, the rewindbody method will
28 work; also illegal lines will be pushed back onto the input stream. If the
29 input object lacks seek but has an `unread' method that can push back a line
30 of input, Message will use that to push back illegal lines. Thus this class
31 can be used to parse messages coming from a buffered stream.
33 The optional `seekable' argument is provided as a workaround for certain stdio
34 libraries in which tell() discards buffered data before discovering that the
35 lseek() system call doesn't work. For maximum portability, you should set the
36 seekable argument to zero to prevent that initial \code{tell} when passing in
37 an unseekable object such as a a file object created from a socket object. If
38 it is 1 on entry -- which it is by default -- the tell() method of the open
39 file object is called once; if this raises an exception, seekable is reset to
40 0. For other nonzero values of seekable, this test is not made.
42 To get the text of a particular header there are several methods:
44 str = m.getheader(name)
45 str = m.getrawheader(name)
47 where name is the name of the header, e.g. 'Subject'. The difference is that
48 getheader() strips the leading and trailing whitespace, while getrawheader()
49 doesn't. Both functions retain embedded whitespace (including newlines)
50 exactly as they are specified in the header, and leave the case of the text
51 unchanged.
53 For addresses and address lists there are functions
55 realname, mailaddress = m.getaddr(name)
56 list = m.getaddrlist(name)
58 where the latter returns a list of (realname, mailaddr) tuples.
60 There is also a method
62 time = m.getdate(name)
64 which parses a Date-like field and returns a time-compatible tuple,
65 i.e. a tuple such as returned by time.localtime() or accepted by
66 time.mktime().
68 See the class definition for lower level access methods.
70 There are also some utility functions here.
71 """
72 # Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
74 import time
76 __all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
78 _blanklines = ('\r\n', '\n') # Optimization for islast()
81 class Message:
82 """Represents a single RFC 2822-compliant message."""
84 def __init__(self, fp, seekable = 1):
85 """Initialize the class instance and read the headers."""
86 if seekable == 1:
87 # Exercise tell() to make sure it works
88 # (and then assume seek() works, too)
89 try:
90 fp.tell()
91 except (AttributeError, IOError):
92 seekable = 0
93 else:
94 seekable = 1
95 self.fp = fp
96 self.seekable = seekable
97 self.startofheaders = None
98 self.startofbody = None
100 if self.seekable:
101 try:
102 self.startofheaders = self.fp.tell()
103 except IOError:
104 self.seekable = 0
106 self.readheaders()
108 if self.seekable:
109 try:
110 self.startofbody = self.fp.tell()
111 except IOError:
112 self.seekable = 0
114 def rewindbody(self):
115 """Rewind the file to the start of the body (if seekable)."""
116 if not self.seekable:
117 raise IOError, "unseekable file"
118 self.fp.seek(self.startofbody)
120 def readheaders(self):
121 """Read header lines.
123 Read header lines up to the entirely blank line that terminates them.
124 The (normally blank) line that ends the headers is skipped, but not
125 included in the returned list. If a non-header line ends the headers,
126 (which is an error), an attempt is made to backspace over it; it is
127 never included in the returned list.
129 The variable self.status is set to the empty string if all went well,
130 otherwise it is an error message. The variable self.headers is a
131 completely uninterpreted list of lines contained in the header (so
132 printing them will reproduce the header exactly as it appears in the
133 file).
135 self.dict = {}
136 self.unixfrom = ''
137 self.headers = list = []
138 self.status = ''
139 headerseen = ""
140 firstline = 1
141 startofline = unread = tell = None
142 if hasattr(self.fp, 'unread'):
143 unread = self.fp.unread
144 elif self.seekable:
145 tell = self.fp.tell
146 while 1:
147 if tell:
148 try:
149 startofline = tell()
150 except IOError:
151 startofline = tell = None
152 self.seekable = 0
153 line = self.fp.readline()
154 if not line:
155 self.status = 'EOF in headers'
156 break
157 # Skip unix From name time lines
158 if firstline and line.startswith('From '):
159 self.unixfrom = self.unixfrom + line
160 continue
161 firstline = 0
162 if headerseen and line[0] in ' \t':
163 # It's a continuation line.
164 list.append(line)
165 x = (self.dict[headerseen] + "\n " + line.strip())
166 self.dict[headerseen] = x.strip()
167 continue
168 elif self.iscomment(line):
169 # It's a comment. Ignore it.
170 continue
171 elif self.islast(line):
172 # Note! No pushback here! The delimiter line gets eaten.
173 break
174 headerseen = self.isheader(line)
175 if headerseen:
176 # It's a legal header line, save it.
177 list.append(line)
178 self.dict[headerseen] = line[len(headerseen)+1:].strip()
179 continue
180 else:
181 # It's not a header line; throw it back and stop here.
182 if not self.dict:
183 self.status = 'No headers'
184 else:
185 self.status = 'Non-header line where header expected'
186 # Try to undo the read.
187 if unread:
188 unread(line)
189 elif tell:
190 self.fp.seek(startofline)
191 else:
192 self.status = self.status + '; bad seek'
193 break
195 def isheader(self, line):
196 """Determine whether a given line is a legal header.
198 This method should return the header name, suitably canonicalized.
199 You may override this method in order to use Message parsing on tagged
200 data in RFC 2822-like formats with special header formats.
202 i = line.find(':')
203 if i > 0:
204 return line[:i].lower()
205 else:
206 return None
208 def islast(self, line):
209 """Determine whether a line is a legal end of RFC 2822 headers.
211 You may override this method if your application wants to bend the
212 rules, e.g. to strip trailing whitespace, or to recognize MH template
213 separators ('--------'). For convenience (e.g. for code reading from
214 sockets) a line consisting of \r\n also matches.
216 return line in _blanklines
218 def iscomment(self, line):
219 """Determine whether a line should be skipped entirely.
221 You may override this method in order to use Message parsing on tagged
222 data in RFC 2822-like formats that support embedded comments or
223 free-text data.
225 return None
227 def getallmatchingheaders(self, name):
228 """Find all header lines matching a given header name.
230 Look through the list of headers and find all lines matching a given
231 header name (and their continuation lines). A list of the lines is
232 returned, without interpretation. If the header does not occur, an
233 empty list is returned. If the header occurs multiple times, all
234 occurrences are returned. Case is not important in the header name.
236 name = name.lower() + ':'
237 n = len(name)
238 list = []
239 hit = 0
240 for line in self.headers:
241 if line[:n].lower() == name:
242 hit = 1
243 elif not line[:1].isspace():
244 hit = 0
245 if hit:
246 list.append(line)
247 return list
249 def getfirstmatchingheader(self, name):
250 """Get the first header line matching name.
252 This is similar to getallmatchingheaders, but it returns only the
253 first matching header (and its continuation lines).
255 name = name.lower() + ':'
256 n = len(name)
257 list = []
258 hit = 0
259 for line in self.headers:
260 if hit:
261 if not line[:1].isspace():
262 break
263 elif line[:n].lower() == name:
264 hit = 1
265 if hit:
266 list.append(line)
267 return list
269 def getrawheader(self, name):
270 """A higher-level interface to getfirstmatchingheader().
272 Return a string containing the literal text of the header but with the
273 keyword stripped. All leading, trailing and embedded whitespace is
274 kept in the string, however. Return None if the header does not
275 occur.
278 list = self.getfirstmatchingheader(name)
279 if not list:
280 return None
281 list[0] = list[0][len(name) + 1:]
282 return ''.join(list)
284 def getheader(self, name, default=None):
285 """Get the header value for a name.
287 This is the normal interface: it returns a stripped version of the
288 header value for a given header name, or None if it doesn't exist.
289 This uses the dictionary version which finds the *last* such header.
291 try:
292 return self.dict[name.lower()]
293 except KeyError:
294 return default
295 get = getheader
297 def getheaders(self, name):
298 """Get all values for a header.
300 This returns a list of values for headers given more than once; each
301 value in the result list is stripped in the same way as the result of
302 getheader(). If the header is not given, return an empty list.
304 result = []
305 current = ''
306 have_header = 0
307 for s in self.getallmatchingheaders(name):
308 if s[0].isspace():
309 if current:
310 current = "%s\n %s" % (current, s.strip())
311 else:
312 current = s.strip()
313 else:
314 if have_header:
315 result.append(current)
316 current = s[s.find(":") + 1:].strip()
317 have_header = 1
318 if have_header:
319 result.append(current)
320 return result
322 def getaddr(self, name):
323 """Get a single address from a header, as a tuple.
325 An example return value:
326 ('Guido van Rossum', 'guido@cwi.nl')
328 # New, by Ben Escoto
329 alist = self.getaddrlist(name)
330 if alist:
331 return alist[0]
332 else:
333 return (None, None)
335 def getaddrlist(self, name):
336 """Get a list of addresses from a header.
338 Retrieves a list of addresses from a header, where each address is a
339 tuple as returned by getaddr(). Scans all named headers, so it works
340 properly with multiple To: or Cc: headers for example.
342 raw = []
343 for h in self.getallmatchingheaders(name):
344 if h[0] in ' \t':
345 raw.append(h)
346 else:
347 if raw:
348 raw.append(', ')
349 i = h.find(':')
350 if i > 0:
351 addr = h[i+1:]
352 raw.append(addr)
353 alladdrs = ''.join(raw)
354 a = AddrlistClass(alladdrs)
355 return a.getaddrlist()
357 def getdate(self, name):
358 """Retrieve a date field from a header.
360 Retrieves a date field from the named header, returning a tuple
361 compatible with time.mktime().
363 try:
364 data = self[name]
365 except KeyError:
366 return None
367 return parsedate(data)
369 def getdate_tz(self, name):
370 """Retrieve a date field from a header as a 10-tuple.
372 The first 9 elements make up a tuple compatible with time.mktime(),
373 and the 10th is the offset of the poster's time zone from GMT/UTC.
375 try:
376 data = self[name]
377 except KeyError:
378 return None
379 return parsedate_tz(data)
382 # Access as a dictionary (only finds *last* header of each type):
384 def __len__(self):
385 """Get the number of headers in a message."""
386 return len(self.dict)
388 def __getitem__(self, name):
389 """Get a specific header, as from a dictionary."""
390 return self.dict[name.lower()]
392 def __setitem__(self, name, value):
393 """Set the value of a header.
395 Note: This is not a perfect inversion of __getitem__, because any
396 changed headers get stuck at the end of the raw-headers list rather
397 than where the altered header was.
399 del self[name] # Won't fail if it doesn't exist
400 self.dict[name.lower()] = value
401 text = name + ": " + value
402 lines = text.split("\n")
403 for line in lines:
404 self.headers.append(line + "\n")
406 def __delitem__(self, name):
407 """Delete all occurrences of a specific header, if it is present."""
408 name = name.lower()
409 if not self.dict.has_key(name):
410 return
411 del self.dict[name]
412 name = name + ':'
413 n = len(name)
414 list = []
415 hit = 0
416 for i in range(len(self.headers)):
417 line = self.headers[i]
418 if line[:n].lower() == name:
419 hit = 1
420 elif not line[:1].isspace():
421 hit = 0
422 if hit:
423 list.append(i)
424 list.reverse()
425 for i in list:
426 del self.headers[i]
428 def get(self, name, default=""):
429 name = name.lower()
430 if self.dict.has_key(name):
431 return self.dict[name]
432 else:
433 return default
435 def setdefault(self, name, default=""):
436 lowername = name.lower()
437 if self.dict.has_key(lowername):
438 return self.dict[lowername]
439 else:
440 text = name + ": " + default
441 lines = text.split("\n")
442 for line in lines:
443 self.headers.append(line + "\n")
444 self.dict[lowername] = default
445 return default
447 def has_key(self, name):
448 """Determine whether a message contains the named header."""
449 return self.dict.has_key(name.lower())
451 def keys(self):
452 """Get all of a message's header field names."""
453 return self.dict.keys()
455 def values(self):
456 """Get all of a message's header field values."""
457 return self.dict.values()
459 def items(self):
460 """Get all of a message's headers.
462 Returns a list of name, value tuples.
464 return self.dict.items()
466 def __str__(self):
467 str = ''
468 for hdr in self.headers:
469 str = str + hdr
470 return str
473 # Utility functions
474 # -----------------
476 # XXX Should fix unquote() and quote() to be really conformant.
477 # XXX The inverses of the parse functions may also be useful.
480 def unquote(str):
481 """Remove quotes from a string."""
482 if len(str) > 1:
483 if str[0] == '"' and str[-1:] == '"':
484 return str[1:-1]
485 if str[0] == '<' and str[-1:] == '>':
486 return str[1:-1]
487 return str
490 def quote(str):
491 """Add quotes around a string."""
492 return str.replace('\\', '\\\\').replace('"', '\\"')
495 def parseaddr(address):
496 """Parse an address into a (realname, mailaddr) tuple."""
497 a = AddrlistClass(address)
498 list = a.getaddrlist()
499 if not list:
500 return (None, None)
501 else:
502 return list[0]
505 class AddrlistClass:
506 """Address parser class by Ben Escoto.
508 To understand what this class does, it helps to have a copy of
509 RFC 2822 in front of you.
511 http://www.faqs.org/rfcs/rfc2822.html
513 Note: this class interface is deprecated and may be removed in the future.
514 Use rfc822.AddressList instead.
517 def __init__(self, field):
518 """Initialize a new instance.
520 `field' is an unparsed address header field, containing one or more
521 addresses.
523 self.specials = '()<>@,:;.\"[]'
524 self.pos = 0
525 self.LWS = ' \t'
526 self.CR = '\r\n'
527 self.atomends = self.specials + self.LWS + self.CR
528 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
529 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
530 # syntax, so allow dots in phrases.
531 self.phraseends = self.atomends.replace('.', '')
532 self.field = field
533 self.commentlist = []
535 def gotonext(self):
536 """Parse up to the start of the next address."""
537 while self.pos < len(self.field):
538 if self.field[self.pos] in self.LWS + '\n\r':
539 self.pos = self.pos + 1
540 elif self.field[self.pos] == '(':
541 self.commentlist.append(self.getcomment())
542 else: break
544 def getaddrlist(self):
545 """Parse all addresses.
547 Returns a list containing all of the addresses.
549 result = []
550 while 1:
551 ad = self.getaddress()
552 if ad:
553 result += ad
554 else:
555 break
556 return result
558 def getaddress(self):
559 """Parse the next address."""
560 self.commentlist = []
561 self.gotonext()
563 oldpos = self.pos
564 oldcl = self.commentlist
565 plist = self.getphraselist()
567 self.gotonext()
568 returnlist = []
570 if self.pos >= len(self.field):
571 # Bad email address technically, no domain.
572 if plist:
573 returnlist = [(' '.join(self.commentlist), plist[0])]
575 elif self.field[self.pos] in '.@':
576 # email address is just an addrspec
577 # this isn't very efficient since we start over
578 self.pos = oldpos
579 self.commentlist = oldcl
580 addrspec = self.getaddrspec()
581 returnlist = [(' '.join(self.commentlist), addrspec)]
583 elif self.field[self.pos] == ':':
584 # address is a group
585 returnlist = []
587 fieldlen = len(self.field)
588 self.pos = self.pos + 1
589 while self.pos < len(self.field):
590 self.gotonext()
591 if self.pos < fieldlen and self.field[self.pos] == ';':
592 self.pos = self.pos + 1
593 break
594 returnlist = returnlist + self.getaddress()
596 elif self.field[self.pos] == '<':
597 # Address is a phrase then a route addr
598 routeaddr = self.getrouteaddr()
600 if self.commentlist:
601 returnlist = [(' '.join(plist) + ' (' + \
602 ' '.join(self.commentlist) + ')', routeaddr)]
603 else: returnlist = [(' '.join(plist), routeaddr)]
605 else:
606 if plist:
607 returnlist = [(' '.join(self.commentlist), plist[0])]
608 elif self.field[self.pos] in self.specials:
609 self.pos = self.pos + 1
611 self.gotonext()
612 if self.pos < len(self.field) and self.field[self.pos] == ',':
613 self.pos = self.pos + 1
614 return returnlist
616 def getrouteaddr(self):
617 """Parse a route address (Return-path value).
619 This method just skips all the route stuff and returns the addrspec.
621 if self.field[self.pos] != '<':
622 return
624 expectroute = 0
625 self.pos = self.pos + 1
626 self.gotonext()
627 adlist = ""
628 while self.pos < len(self.field):
629 if expectroute:
630 self.getdomain()
631 expectroute = 0
632 elif self.field[self.pos] == '>':
633 self.pos = self.pos + 1
634 break
635 elif self.field[self.pos] == '@':
636 self.pos = self.pos + 1
637 expectroute = 1
638 elif self.field[self.pos] == ':':
639 self.pos = self.pos + 1
640 else:
641 adlist = self.getaddrspec()
642 self.pos = self.pos + 1
643 break
644 self.gotonext()
646 return adlist
648 def getaddrspec(self):
649 """Parse an RFC 2822 addr-spec."""
650 aslist = []
652 self.gotonext()
653 while self.pos < len(self.field):
654 if self.field[self.pos] == '.':
655 aslist.append('.')
656 self.pos = self.pos + 1
657 elif self.field[self.pos] == '"':
658 aslist.append('"%s"' % self.getquote())
659 elif self.field[self.pos] in self.atomends:
660 break
661 else: aslist.append(self.getatom())
662 self.gotonext()
664 if self.pos >= len(self.field) or self.field[self.pos] != '@':
665 return ''.join(aslist)
667 aslist.append('@')
668 self.pos = self.pos + 1
669 self.gotonext()
670 return ''.join(aslist) + self.getdomain()
672 def getdomain(self):
673 """Get the complete domain name from an address."""
674 sdlist = []
675 while self.pos < len(self.field):
676 if self.field[self.pos] in self.LWS:
677 self.pos = self.pos + 1
678 elif self.field[self.pos] == '(':
679 self.commentlist.append(self.getcomment())
680 elif self.field[self.pos] == '[':
681 sdlist.append(self.getdomainliteral())
682 elif self.field[self.pos] == '.':
683 self.pos = self.pos + 1
684 sdlist.append('.')
685 elif self.field[self.pos] in self.atomends:
686 break
687 else: sdlist.append(self.getatom())
688 return ''.join(sdlist)
690 def getdelimited(self, beginchar, endchars, allowcomments = 1):
691 """Parse a header fragment delimited by special characters.
693 `beginchar' is the start character for the fragment. If self is not
694 looking at an instance of `beginchar' then getdelimited returns the
695 empty string.
697 `endchars' is a sequence of allowable end-delimiting characters.
698 Parsing stops when one of these is encountered.
700 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
701 within the parsed fragment.
703 if self.field[self.pos] != beginchar:
704 return ''
706 slist = ['']
707 quote = 0
708 self.pos = self.pos + 1
709 while self.pos < len(self.field):
710 if quote == 1:
711 slist.append(self.field[self.pos])
712 quote = 0
713 elif self.field[self.pos] in endchars:
714 self.pos = self.pos + 1
715 break
716 elif allowcomments and self.field[self.pos] == '(':
717 slist.append(self.getcomment())
718 elif self.field[self.pos] == '\\':
719 quote = 1
720 else:
721 slist.append(self.field[self.pos])
722 self.pos = self.pos + 1
724 return ''.join(slist)
726 def getquote(self):
727 """Get a quote-delimited fragment from self's field."""
728 return self.getdelimited('"', '"\r', 0)
730 def getcomment(self):
731 """Get a parenthesis-delimited fragment from self's field."""
732 return self.getdelimited('(', ')\r', 1)
734 def getdomainliteral(self):
735 """Parse an RFC 2822 domain-literal."""
736 return '[%s]' % self.getdelimited('[', ']\r', 0)
738 def getatom(self, atomends=None):
739 """Parse an RFC 2822 atom.
741 Optional atomends specifies a different set of end token delimiters
742 (the default is to use self.atomends). This is used e.g. in
743 getphraselist() since phrase endings must not include the `.' (which
744 is legal in phrases)."""
745 atomlist = ['']
746 if atomends is None:
747 atomends = self.atomends
749 while self.pos < len(self.field):
750 if self.field[self.pos] in atomends:
751 break
752 else: atomlist.append(self.field[self.pos])
753 self.pos = self.pos + 1
755 return ''.join(atomlist)
757 def getphraselist(self):
758 """Parse a sequence of RFC 2822 phrases.
760 A phrase is a sequence of words, which are in turn either RFC 2822
761 atoms or quoted-strings. Phrases are canonicalized by squeezing all
762 runs of continuous whitespace into one space.
764 plist = []
766 while self.pos < len(self.field):
767 if self.field[self.pos] in self.LWS:
768 self.pos = self.pos + 1
769 elif self.field[self.pos] == '"':
770 plist.append(self.getquote())
771 elif self.field[self.pos] == '(':
772 self.commentlist.append(self.getcomment())
773 elif self.field[self.pos] in self.phraseends:
774 break
775 else:
776 plist.append(self.getatom(self.phraseends))
778 return plist
780 class AddressList(AddrlistClass):
781 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
782 def __init__(self, field):
783 AddrlistClass.__init__(self, field)
784 if field:
785 self.addresslist = self.getaddrlist()
786 else:
787 self.addresslist = []
789 def __len__(self):
790 return len(self.addresslist)
792 def __str__(self):
793 return ", ".join(map(dump_address_pair, self.addresslist))
795 def __add__(self, other):
796 # Set union
797 newaddr = AddressList(None)
798 newaddr.addresslist = self.addresslist[:]
799 for x in other.addresslist:
800 if not x in self.addresslist:
801 newaddr.addresslist.append(x)
802 return newaddr
804 def __iadd__(self, other):
805 # Set union, in-place
806 for x in other.addresslist:
807 if not x in self.addresslist:
808 self.addresslist.append(x)
809 return self
811 def __sub__(self, other):
812 # Set difference
813 newaddr = AddressList(None)
814 for x in self.addresslist:
815 if not x in other.addresslist:
816 newaddr.addresslist.append(x)
817 return newaddr
819 def __isub__(self, other):
820 # Set difference, in-place
821 for x in other.addresslist:
822 if x in self.addresslist:
823 self.addresslist.remove(x)
824 return self
826 def __getitem__(self, index):
827 # Make indexing, slices, and 'in' work
828 return self.addresslist[index]
830 def dump_address_pair(pair):
831 """Dump a (name, address) pair in a canonicalized form."""
832 if pair[0]:
833 return '"' + pair[0] + '" <' + pair[1] + '>'
834 else:
835 return pair[1]
837 # Parse a date field
839 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
840 'aug', 'sep', 'oct', 'nov', 'dec',
841 'january', 'february', 'march', 'april', 'may', 'june', 'july',
842 'august', 'september', 'october', 'november', 'december']
843 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
845 # The timezone table does not include the military time zones defined
846 # in RFC822, other than Z. According to RFC1123, the description in
847 # RFC822 gets the signs wrong, so we can't rely on any such time
848 # zones. RFC1123 recommends that numeric timezone indicators be used
849 # instead of timezone names.
851 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
852 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
853 'EST': -500, 'EDT': -400, # Eastern
854 'CST': -600, 'CDT': -500, # Central
855 'MST': -700, 'MDT': -600, # Mountain
856 'PST': -800, 'PDT': -700 # Pacific
860 def parsedate_tz(data):
861 """Convert a date string to a time tuple.
863 Accounts for military timezones.
865 if not data:
866 return None
867 data = data.split()
868 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
869 # There's a dayname here. Skip it
870 del data[0]
871 if len(data) == 3: # RFC 850 date, deprecated
872 stuff = data[0].split('-')
873 if len(stuff) == 3:
874 data = stuff + data[1:]
875 if len(data) == 4:
876 s = data[3]
877 i = s.find('+')
878 if i > 0:
879 data[3:] = [s[:i], s[i+1:]]
880 else:
881 data.append('') # Dummy tz
882 if len(data) < 5:
883 return None
884 data = data[:5]
885 [dd, mm, yy, tm, tz] = data
886 mm = mm.lower()
887 if not mm in _monthnames:
888 dd, mm = mm, dd.lower()
889 if not mm in _monthnames:
890 return None
891 mm = _monthnames.index(mm)+1
892 if mm > 12: mm = mm - 12
893 if dd[-1] == ',':
894 dd = dd[:-1]
895 i = yy.find(':')
896 if i > 0:
897 yy, tm = tm, yy
898 if yy[-1] == ',':
899 yy = yy[:-1]
900 if not yy[0].isdigit():
901 yy, tz = tz, yy
902 if tm[-1] == ',':
903 tm = tm[:-1]
904 tm = tm.split(':')
905 if len(tm) == 2:
906 [thh, tmm] = tm
907 tss = '0'
908 elif len(tm) == 3:
909 [thh, tmm, tss] = tm
910 else:
911 return None
912 try:
913 yy = int(yy)
914 dd = int(dd)
915 thh = int(thh)
916 tmm = int(tmm)
917 tss = int(tss)
918 except ValueError:
919 return None
920 tzoffset = None
921 tz = tz.upper()
922 if _timezones.has_key(tz):
923 tzoffset = _timezones[tz]
924 else:
925 try:
926 tzoffset = int(tz)
927 except ValueError:
928 pass
929 # Convert a timezone offset into seconds ; -0500 -> -18000
930 if tzoffset:
931 if tzoffset < 0:
932 tzsign = -1
933 tzoffset = -tzoffset
934 else:
935 tzsign = 1
936 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
937 tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
938 return tuple
941 def parsedate(data):
942 """Convert a time string to a time tuple."""
943 t = parsedate_tz(data)
944 if type(t) == type( () ):
945 return t[:9]
946 else: return t
949 def mktime_tz(data):
950 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
951 if data[9] is None:
952 # No zone info, so localtime is better assumption than GMT
953 return time.mktime(data[:8] + (-1,))
954 else:
955 t = time.mktime(data[:8] + (0,))
956 return t - data[9] - time.timezone
958 def formatdate(timeval=None):
959 """Returns time format preferred for Internet standards.
961 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
963 According to RFC 1123, day and month names must always be in
964 English. If not for that, this code could use strftime(). It
965 can't because strftime() honors the locale and could generated
966 non-English names.
968 if timeval is None:
969 timeval = time.time()
970 timeval = time.gmtime(timeval)
971 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
972 ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][timeval[6]],
973 timeval[2],
974 ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
975 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][timeval[1]-1],
976 timeval[0], timeval[3], timeval[4], timeval[5])
979 # When used as script, run a small test program.
980 # The first command line argument must be a filename containing one
981 # message in RFC-822 format.
983 if __name__ == '__main__':
984 import sys, os
985 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
986 if sys.argv[1:]: file = sys.argv[1]
987 f = open(file, 'r')
988 m = Message(f)
989 print 'From:', m.getaddr('from')
990 print 'To:', m.getaddrlist('to')
991 print 'Subject:', m.getheader('subject')
992 print 'Date:', m.getheader('date')
993 date = m.getdate_tz('date')
994 tz = date[-1]
995 date = time.localtime(mktime_tz(date))
996 if date:
997 print 'ParsedDate:', time.asctime(date),
998 hhmmss = tz
999 hhmm, ss = divmod(hhmmss, 60)
1000 hh, mm = divmod(hhmm, 60)
1001 print "%+03d%02d" % (hh, mm),
1002 if ss: print ".%02d" % ss,
1003 print
1004 else:
1005 print 'ParsedDate:', None
1006 m.rewindbody()
1007 n = 0
1008 while f.readline():
1009 n = n + 1
1010 print 'Lines:', n
1011 print '-'*70
1012 print 'len =', len(m)
1013 if m.has_key('Date'): print 'Date =', m['Date']
1014 if m.has_key('X-Nonsense'): pass
1015 print 'keys =', m.keys()
1016 print 'values =', m.values()
1017 print 'items =', m.items()