This commit was manufactured by cvs2svn to create tag 'r234c1'.
[python/dscho.git] / Lib / rfc822.py
blob4f69b22aabdeb6506433d7b9baa808d2fc00c2e1
1 """RFC 2822 message manipulation.
3 Note: This is only a very rough sketch of a full RFC-822 parser; in particular
4 the tokenizing of addresses does not adhere to all the quoting rules.
6 Note: RFC 2822 is a long awaited update to RFC 822. This module should
7 conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
8 effort at RFC 2822 updates have been made, but a thorough audit has not been
9 performed. Consider any RFC 2822 non-conformance to be a bug.
11 RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
12 RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
14 Directions for use:
16 To create a Message object: first open a file, e.g.:
18 fp = open(file, 'r')
20 You can use any other legal way of getting an open file object, e.g. use
21 sys.stdin or call os.popen(). Then pass the open file object to the Message()
22 constructor:
24 m = Message(fp)
26 This class can work with any input object that supports a readline method. If
27 the input object has seek and tell capability, the rewindbody method will
28 work; also illegal lines will be pushed back onto the input stream. If the
29 input object lacks seek but has an `unread' method that can push back a line
30 of input, Message will use that to push back illegal lines. Thus this class
31 can be used to parse messages coming from a buffered stream.
33 The optional `seekable' argument is provided as a workaround for certain stdio
34 libraries in which tell() discards buffered data before discovering that the
35 lseek() system call doesn't work. For maximum portability, you should set the
36 seekable argument to zero to prevent that initial \code{tell} when passing in
37 an unseekable object such as a a file object created from a socket object. If
38 it is 1 on entry -- which it is by default -- the tell() method of the open
39 file object is called once; if this raises an exception, seekable is reset to
40 0. For other nonzero values of seekable, this test is not made.
42 To get the text of a particular header there are several methods:
44 str = m.getheader(name)
45 str = m.getrawheader(name)
47 where name is the name of the header, e.g. 'Subject'. The difference is that
48 getheader() strips the leading and trailing whitespace, while getrawheader()
49 doesn't. Both functions retain embedded whitespace (including newlines)
50 exactly as they are specified in the header, and leave the case of the text
51 unchanged.
53 For addresses and address lists there are functions
55 realname, mailaddress = m.getaddr(name)
56 list = m.getaddrlist(name)
58 where the latter returns a list of (realname, mailaddr) tuples.
60 There is also a method
62 time = m.getdate(name)
64 which parses a Date-like field and returns a time-compatible tuple,
65 i.e. a tuple such as returned by time.localtime() or accepted by
66 time.mktime().
68 See the class definition for lower level access methods.
70 There are also some utility functions here.
71 """
72 # Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
74 import time
76 __all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
78 _blanklines = ('\r\n', '\n') # Optimization for islast()
81 class Message:
82 """Represents a single RFC 2822-compliant message."""
84 def __init__(self, fp, seekable = 1):
85 """Initialize the class instance and read the headers."""
86 if seekable == 1:
87 # Exercise tell() to make sure it works
88 # (and then assume seek() works, too)
89 try:
90 fp.tell()
91 except (AttributeError, IOError):
92 seekable = 0
93 else:
94 seekable = 1
95 self.fp = fp
96 self.seekable = seekable
97 self.startofheaders = None
98 self.startofbody = None
100 if self.seekable:
101 try:
102 self.startofheaders = self.fp.tell()
103 except IOError:
104 self.seekable = 0
106 self.readheaders()
108 if self.seekable:
109 try:
110 self.startofbody = self.fp.tell()
111 except IOError:
112 self.seekable = 0
114 def rewindbody(self):
115 """Rewind the file to the start of the body (if seekable)."""
116 if not self.seekable:
117 raise IOError, "unseekable file"
118 self.fp.seek(self.startofbody)
120 def readheaders(self):
121 """Read header lines.
123 Read header lines up to the entirely blank line that terminates them.
124 The (normally blank) line that ends the headers is skipped, but not
125 included in the returned list. If a non-header line ends the headers,
126 (which is an error), an attempt is made to backspace over it; it is
127 never included in the returned list.
129 The variable self.status is set to the empty string if all went well,
130 otherwise it is an error message. The variable self.headers is a
131 completely uninterpreted list of lines contained in the header (so
132 printing them will reproduce the header exactly as it appears in the
133 file).
135 self.dict = {}
136 self.unixfrom = ''
137 self.headers = list = []
138 self.status = ''
139 headerseen = ""
140 firstline = 1
141 startofline = unread = tell = None
142 if hasattr(self.fp, 'unread'):
143 unread = self.fp.unread
144 elif self.seekable:
145 tell = self.fp.tell
146 while 1:
147 if tell:
148 try:
149 startofline = tell()
150 except IOError:
151 startofline = tell = None
152 self.seekable = 0
153 line = self.fp.readline()
154 if not line:
155 self.status = 'EOF in headers'
156 break
157 # Skip unix From name time lines
158 if firstline and line.startswith('From '):
159 self.unixfrom = self.unixfrom + line
160 continue
161 firstline = 0
162 if headerseen and line[0] in ' \t':
163 # It's a continuation line.
164 list.append(line)
165 x = (self.dict[headerseen] + "\n " + line.strip())
166 self.dict[headerseen] = x.strip()
167 continue
168 elif self.iscomment(line):
169 # It's a comment. Ignore it.
170 continue
171 elif self.islast(line):
172 # Note! No pushback here! The delimiter line gets eaten.
173 break
174 headerseen = self.isheader(line)
175 if headerseen:
176 # It's a legal header line, save it.
177 list.append(line)
178 self.dict[headerseen] = line[len(headerseen)+1:].strip()
179 continue
180 else:
181 # It's not a header line; throw it back and stop here.
182 if not self.dict:
183 self.status = 'No headers'
184 else:
185 self.status = 'Non-header line where header expected'
186 # Try to undo the read.
187 if unread:
188 unread(line)
189 elif tell:
190 self.fp.seek(startofline)
191 else:
192 self.status = self.status + '; bad seek'
193 break
195 def isheader(self, line):
196 """Determine whether a given line is a legal header.
198 This method should return the header name, suitably canonicalized.
199 You may override this method in order to use Message parsing on tagged
200 data in RFC 2822-like formats with special header formats.
202 i = line.find(':')
203 if i > 0:
204 return line[:i].lower()
205 else:
206 return None
208 def islast(self, line):
209 """Determine whether a line is a legal end of RFC 2822 headers.
211 You may override this method if your application wants to bend the
212 rules, e.g. to strip trailing whitespace, or to recognize MH template
213 separators ('--------'). For convenience (e.g. for code reading from
214 sockets) a line consisting of \r\n also matches.
216 return line in _blanklines
218 def iscomment(self, line):
219 """Determine whether a line should be skipped entirely.
221 You may override this method in order to use Message parsing on tagged
222 data in RFC 2822-like formats that support embedded comments or
223 free-text data.
225 return False
227 def getallmatchingheaders(self, name):
228 """Find all header lines matching a given header name.
230 Look through the list of headers and find all lines matching a given
231 header name (and their continuation lines). A list of the lines is
232 returned, without interpretation. If the header does not occur, an
233 empty list is returned. If the header occurs multiple times, all
234 occurrences are returned. Case is not important in the header name.
236 name = name.lower() + ':'
237 n = len(name)
238 list = []
239 hit = 0
240 for line in self.headers:
241 if line[:n].lower() == name:
242 hit = 1
243 elif not line[:1].isspace():
244 hit = 0
245 if hit:
246 list.append(line)
247 return list
249 def getfirstmatchingheader(self, name):
250 """Get the first header line matching name.
252 This is similar to getallmatchingheaders, but it returns only the
253 first matching header (and its continuation lines).
255 name = name.lower() + ':'
256 n = len(name)
257 list = []
258 hit = 0
259 for line in self.headers:
260 if hit:
261 if not line[:1].isspace():
262 break
263 elif line[:n].lower() == name:
264 hit = 1
265 if hit:
266 list.append(line)
267 return list
269 def getrawheader(self, name):
270 """A higher-level interface to getfirstmatchingheader().
272 Return a string containing the literal text of the header but with the
273 keyword stripped. All leading, trailing and embedded whitespace is
274 kept in the string, however. Return None if the header does not
275 occur.
278 list = self.getfirstmatchingheader(name)
279 if not list:
280 return None
281 list[0] = list[0][len(name) + 1:]
282 return ''.join(list)
284 def getheader(self, name, default=None):
285 """Get the header value for a name.
287 This is the normal interface: it returns a stripped version of the
288 header value for a given header name, or None if it doesn't exist.
289 This uses the dictionary version which finds the *last* such header.
291 try:
292 return self.dict[name.lower()]
293 except KeyError:
294 return default
295 get = getheader
297 def getheaders(self, name):
298 """Get all values for a header.
300 This returns a list of values for headers given more than once; each
301 value in the result list is stripped in the same way as the result of
302 getheader(). If the header is not given, return an empty list.
304 result = []
305 current = ''
306 have_header = 0
307 for s in self.getallmatchingheaders(name):
308 if s[0].isspace():
309 if current:
310 current = "%s\n %s" % (current, s.strip())
311 else:
312 current = s.strip()
313 else:
314 if have_header:
315 result.append(current)
316 current = s[s.find(":") + 1:].strip()
317 have_header = 1
318 if have_header:
319 result.append(current)
320 return result
322 def getaddr(self, name):
323 """Get a single address from a header, as a tuple.
325 An example return value:
326 ('Guido van Rossum', 'guido@cwi.nl')
328 # New, by Ben Escoto
329 alist = self.getaddrlist(name)
330 if alist:
331 return alist[0]
332 else:
333 return (None, None)
335 def getaddrlist(self, name):
336 """Get a list of addresses from a header.
338 Retrieves a list of addresses from a header, where each address is a
339 tuple as returned by getaddr(). Scans all named headers, so it works
340 properly with multiple To: or Cc: headers for example.
342 raw = []
343 for h in self.getallmatchingheaders(name):
344 if h[0] in ' \t':
345 raw.append(h)
346 else:
347 if raw:
348 raw.append(', ')
349 i = h.find(':')
350 if i > 0:
351 addr = h[i+1:]
352 raw.append(addr)
353 alladdrs = ''.join(raw)
354 a = AddressList(alladdrs)
355 return a.addresslist
357 def getdate(self, name):
358 """Retrieve a date field from a header.
360 Retrieves a date field from the named header, returning a tuple
361 compatible with time.mktime().
363 try:
364 data = self[name]
365 except KeyError:
366 return None
367 return parsedate(data)
369 def getdate_tz(self, name):
370 """Retrieve a date field from a header as a 10-tuple.
372 The first 9 elements make up a tuple compatible with time.mktime(),
373 and the 10th is the offset of the poster's time zone from GMT/UTC.
375 try:
376 data = self[name]
377 except KeyError:
378 return None
379 return parsedate_tz(data)
382 # Access as a dictionary (only finds *last* header of each type):
384 def __len__(self):
385 """Get the number of headers in a message."""
386 return len(self.dict)
388 def __getitem__(self, name):
389 """Get a specific header, as from a dictionary."""
390 return self.dict[name.lower()]
392 def __setitem__(self, name, value):
393 """Set the value of a header.
395 Note: This is not a perfect inversion of __getitem__, because any
396 changed headers get stuck at the end of the raw-headers list rather
397 than where the altered header was.
399 del self[name] # Won't fail if it doesn't exist
400 self.dict[name.lower()] = value
401 text = name + ": " + value
402 lines = text.split("\n")
403 for line in lines:
404 self.headers.append(line + "\n")
406 def __delitem__(self, name):
407 """Delete all occurrences of a specific header, if it is present."""
408 name = name.lower()
409 if not name in self.dict:
410 return
411 del self.dict[name]
412 name = name + ':'
413 n = len(name)
414 list = []
415 hit = 0
416 for i in range(len(self.headers)):
417 line = self.headers[i]
418 if line[:n].lower() == name:
419 hit = 1
420 elif not line[:1].isspace():
421 hit = 0
422 if hit:
423 list.append(i)
424 list.reverse()
425 for i in list:
426 del self.headers[i]
428 def setdefault(self, name, default=""):
429 lowername = name.lower()
430 if lowername in self.dict:
431 return self.dict[lowername]
432 else:
433 text = name + ": " + default
434 lines = text.split("\n")
435 for line in lines:
436 self.headers.append(line + "\n")
437 self.dict[lowername] = default
438 return default
440 def has_key(self, name):
441 """Determine whether a message contains the named header."""
442 return name.lower() in self.dict
444 def __contains__(self, name):
445 """Determine whether a message contains the named header."""
446 return name.lower() in self.dict
448 def keys(self):
449 """Get all of a message's header field names."""
450 return self.dict.keys()
452 def values(self):
453 """Get all of a message's header field values."""
454 return self.dict.values()
456 def items(self):
457 """Get all of a message's headers.
459 Returns a list of name, value tuples.
461 return self.dict.items()
463 def __str__(self):
464 str = ''
465 for hdr in self.headers:
466 str = str + hdr
467 return str
470 # Utility functions
471 # -----------------
473 # XXX Should fix unquote() and quote() to be really conformant.
474 # XXX The inverses of the parse functions may also be useful.
477 def unquote(str):
478 """Remove quotes from a string."""
479 if len(str) > 1:
480 if str.startswith('"') and str.endswith('"'):
481 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
482 if str.startswith('<') and str.endswith('>'):
483 return str[1:-1]
484 return str
487 def quote(str):
488 """Add quotes around a string."""
489 return str.replace('\\', '\\\\').replace('"', '\\"')
492 def parseaddr(address):
493 """Parse an address into a (realname, mailaddr) tuple."""
494 a = AddressList(address)
495 list = a.addresslist
496 if not list:
497 return (None, None)
498 else:
499 return list[0]
502 class AddrlistClass:
503 """Address parser class by Ben Escoto.
505 To understand what this class does, it helps to have a copy of
506 RFC 2822 in front of you.
508 http://www.faqs.org/rfcs/rfc2822.html
510 Note: this class interface is deprecated and may be removed in the future.
511 Use rfc822.AddressList instead.
514 def __init__(self, field):
515 """Initialize a new instance.
517 `field' is an unparsed address header field, containing one or more
518 addresses.
520 self.specials = '()<>@,:;.\"[]'
521 self.pos = 0
522 self.LWS = ' \t'
523 self.CR = '\r\n'
524 self.atomends = self.specials + self.LWS + self.CR
525 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
526 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
527 # syntax, so allow dots in phrases.
528 self.phraseends = self.atomends.replace('.', '')
529 self.field = field
530 self.commentlist = []
532 def gotonext(self):
533 """Parse up to the start of the next address."""
534 while self.pos < len(self.field):
535 if self.field[self.pos] in self.LWS + '\n\r':
536 self.pos = self.pos + 1
537 elif self.field[self.pos] == '(':
538 self.commentlist.append(self.getcomment())
539 else: break
541 def getaddrlist(self):
542 """Parse all addresses.
544 Returns a list containing all of the addresses.
546 result = []
547 while 1:
548 ad = self.getaddress()
549 if ad:
550 result += ad
551 else:
552 break
553 return result
555 def getaddress(self):
556 """Parse the next address."""
557 self.commentlist = []
558 self.gotonext()
560 oldpos = self.pos
561 oldcl = self.commentlist
562 plist = self.getphraselist()
564 self.gotonext()
565 returnlist = []
567 if self.pos >= len(self.field):
568 # Bad email address technically, no domain.
569 if plist:
570 returnlist = [(' '.join(self.commentlist), plist[0])]
572 elif self.field[self.pos] in '.@':
573 # email address is just an addrspec
574 # this isn't very efficient since we start over
575 self.pos = oldpos
576 self.commentlist = oldcl
577 addrspec = self.getaddrspec()
578 returnlist = [(' '.join(self.commentlist), addrspec)]
580 elif self.field[self.pos] == ':':
581 # address is a group
582 returnlist = []
584 fieldlen = len(self.field)
585 self.pos = self.pos + 1
586 while self.pos < len(self.field):
587 self.gotonext()
588 if self.pos < fieldlen and self.field[self.pos] == ';':
589 self.pos = self.pos + 1
590 break
591 returnlist = returnlist + self.getaddress()
593 elif self.field[self.pos] == '<':
594 # Address is a phrase then a route addr
595 routeaddr = self.getrouteaddr()
597 if self.commentlist:
598 returnlist = [(' '.join(plist) + ' (' + \
599 ' '.join(self.commentlist) + ')', routeaddr)]
600 else: returnlist = [(' '.join(plist), routeaddr)]
602 else:
603 if plist:
604 returnlist = [(' '.join(self.commentlist), plist[0])]
605 elif self.field[self.pos] in self.specials:
606 self.pos = self.pos + 1
608 self.gotonext()
609 if self.pos < len(self.field) and self.field[self.pos] == ',':
610 self.pos = self.pos + 1
611 return returnlist
613 def getrouteaddr(self):
614 """Parse a route address (Return-path value).
616 This method just skips all the route stuff and returns the addrspec.
618 if self.field[self.pos] != '<':
619 return
621 expectroute = 0
622 self.pos = self.pos + 1
623 self.gotonext()
624 adlist = ""
625 while self.pos < len(self.field):
626 if expectroute:
627 self.getdomain()
628 expectroute = 0
629 elif self.field[self.pos] == '>':
630 self.pos = self.pos + 1
631 break
632 elif self.field[self.pos] == '@':
633 self.pos = self.pos + 1
634 expectroute = 1
635 elif self.field[self.pos] == ':':
636 self.pos = self.pos + 1
637 else:
638 adlist = self.getaddrspec()
639 self.pos = self.pos + 1
640 break
641 self.gotonext()
643 return adlist
645 def getaddrspec(self):
646 """Parse an RFC 2822 addr-spec."""
647 aslist = []
649 self.gotonext()
650 while self.pos < len(self.field):
651 if self.field[self.pos] == '.':
652 aslist.append('.')
653 self.pos = self.pos + 1
654 elif self.field[self.pos] == '"':
655 aslist.append('"%s"' % self.getquote())
656 elif self.field[self.pos] in self.atomends:
657 break
658 else: aslist.append(self.getatom())
659 self.gotonext()
661 if self.pos >= len(self.field) or self.field[self.pos] != '@':
662 return ''.join(aslist)
664 aslist.append('@')
665 self.pos = self.pos + 1
666 self.gotonext()
667 return ''.join(aslist) + self.getdomain()
669 def getdomain(self):
670 """Get the complete domain name from an address."""
671 sdlist = []
672 while self.pos < len(self.field):
673 if self.field[self.pos] in self.LWS:
674 self.pos = self.pos + 1
675 elif self.field[self.pos] == '(':
676 self.commentlist.append(self.getcomment())
677 elif self.field[self.pos] == '[':
678 sdlist.append(self.getdomainliteral())
679 elif self.field[self.pos] == '.':
680 self.pos = self.pos + 1
681 sdlist.append('.')
682 elif self.field[self.pos] in self.atomends:
683 break
684 else: sdlist.append(self.getatom())
685 return ''.join(sdlist)
687 def getdelimited(self, beginchar, endchars, allowcomments = 1):
688 """Parse a header fragment delimited by special characters.
690 `beginchar' is the start character for the fragment. If self is not
691 looking at an instance of `beginchar' then getdelimited returns the
692 empty string.
694 `endchars' is a sequence of allowable end-delimiting characters.
695 Parsing stops when one of these is encountered.
697 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
698 within the parsed fragment.
700 if self.field[self.pos] != beginchar:
701 return ''
703 slist = ['']
704 quote = 0
705 self.pos = self.pos + 1
706 while self.pos < len(self.field):
707 if quote == 1:
708 slist.append(self.field[self.pos])
709 quote = 0
710 elif self.field[self.pos] in endchars:
711 self.pos = self.pos + 1
712 break
713 elif allowcomments and self.field[self.pos] == '(':
714 slist.append(self.getcomment())
715 elif self.field[self.pos] == '\\':
716 quote = 1
717 else:
718 slist.append(self.field[self.pos])
719 self.pos = self.pos + 1
721 return ''.join(slist)
723 def getquote(self):
724 """Get a quote-delimited fragment from self's field."""
725 return self.getdelimited('"', '"\r', 0)
727 def getcomment(self):
728 """Get a parenthesis-delimited fragment from self's field."""
729 return self.getdelimited('(', ')\r', 1)
731 def getdomainliteral(self):
732 """Parse an RFC 2822 domain-literal."""
733 return '[%s]' % self.getdelimited('[', ']\r', 0)
735 def getatom(self, atomends=None):
736 """Parse an RFC 2822 atom.
738 Optional atomends specifies a different set of end token delimiters
739 (the default is to use self.atomends). This is used e.g. in
740 getphraselist() since phrase endings must not include the `.' (which
741 is legal in phrases)."""
742 atomlist = ['']
743 if atomends is None:
744 atomends = self.atomends
746 while self.pos < len(self.field):
747 if self.field[self.pos] in atomends:
748 break
749 else: atomlist.append(self.field[self.pos])
750 self.pos = self.pos + 1
752 return ''.join(atomlist)
754 def getphraselist(self):
755 """Parse a sequence of RFC 2822 phrases.
757 A phrase is a sequence of words, which are in turn either RFC 2822
758 atoms or quoted-strings. Phrases are canonicalized by squeezing all
759 runs of continuous whitespace into one space.
761 plist = []
763 while self.pos < len(self.field):
764 if self.field[self.pos] in self.LWS:
765 self.pos = self.pos + 1
766 elif self.field[self.pos] == '"':
767 plist.append(self.getquote())
768 elif self.field[self.pos] == '(':
769 self.commentlist.append(self.getcomment())
770 elif self.field[self.pos] in self.phraseends:
771 break
772 else:
773 plist.append(self.getatom(self.phraseends))
775 return plist
777 class AddressList(AddrlistClass):
778 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
779 def __init__(self, field):
780 AddrlistClass.__init__(self, field)
781 if field:
782 self.addresslist = self.getaddrlist()
783 else:
784 self.addresslist = []
786 def __len__(self):
787 return len(self.addresslist)
789 def __str__(self):
790 return ", ".join(map(dump_address_pair, self.addresslist))
792 def __add__(self, other):
793 # Set union
794 newaddr = AddressList(None)
795 newaddr.addresslist = self.addresslist[:]
796 for x in other.addresslist:
797 if not x in self.addresslist:
798 newaddr.addresslist.append(x)
799 return newaddr
801 def __iadd__(self, other):
802 # Set union, in-place
803 for x in other.addresslist:
804 if not x in self.addresslist:
805 self.addresslist.append(x)
806 return self
808 def __sub__(self, other):
809 # Set difference
810 newaddr = AddressList(None)
811 for x in self.addresslist:
812 if not x in other.addresslist:
813 newaddr.addresslist.append(x)
814 return newaddr
816 def __isub__(self, other):
817 # Set difference, in-place
818 for x in other.addresslist:
819 if x in self.addresslist:
820 self.addresslist.remove(x)
821 return self
823 def __getitem__(self, index):
824 # Make indexing, slices, and 'in' work
825 return self.addresslist[index]
827 def dump_address_pair(pair):
828 """Dump a (name, address) pair in a canonicalized form."""
829 if pair[0]:
830 return '"' + pair[0] + '" <' + pair[1] + '>'
831 else:
832 return pair[1]
834 # Parse a date field
836 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
837 'aug', 'sep', 'oct', 'nov', 'dec',
838 'january', 'february', 'march', 'april', 'may', 'june', 'july',
839 'august', 'september', 'october', 'november', 'december']
840 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
842 # The timezone table does not include the military time zones defined
843 # in RFC822, other than Z. According to RFC1123, the description in
844 # RFC822 gets the signs wrong, so we can't rely on any such time
845 # zones. RFC1123 recommends that numeric timezone indicators be used
846 # instead of timezone names.
848 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
849 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
850 'EST': -500, 'EDT': -400, # Eastern
851 'CST': -600, 'CDT': -500, # Central
852 'MST': -700, 'MDT': -600, # Mountain
853 'PST': -800, 'PDT': -700 # Pacific
857 def parsedate_tz(data):
858 """Convert a date string to a time tuple.
860 Accounts for military timezones.
862 if not data:
863 return None
864 data = data.split()
865 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
866 # There's a dayname here. Skip it
867 del data[0]
868 if len(data) == 3: # RFC 850 date, deprecated
869 stuff = data[0].split('-')
870 if len(stuff) == 3:
871 data = stuff + data[1:]
872 if len(data) == 4:
873 s = data[3]
874 i = s.find('+')
875 if i > 0:
876 data[3:] = [s[:i], s[i+1:]]
877 else:
878 data.append('') # Dummy tz
879 if len(data) < 5:
880 return None
881 data = data[:5]
882 [dd, mm, yy, tm, tz] = data
883 mm = mm.lower()
884 if not mm in _monthnames:
885 dd, mm = mm, dd.lower()
886 if not mm in _monthnames:
887 return None
888 mm = _monthnames.index(mm)+1
889 if mm > 12: mm = mm - 12
890 if dd[-1] == ',':
891 dd = dd[:-1]
892 i = yy.find(':')
893 if i > 0:
894 yy, tm = tm, yy
895 if yy[-1] == ',':
896 yy = yy[:-1]
897 if not yy[0].isdigit():
898 yy, tz = tz, yy
899 if tm[-1] == ',':
900 tm = tm[:-1]
901 tm = tm.split(':')
902 if len(tm) == 2:
903 [thh, tmm] = tm
904 tss = '0'
905 elif len(tm) == 3:
906 [thh, tmm, tss] = tm
907 else:
908 return None
909 try:
910 yy = int(yy)
911 dd = int(dd)
912 thh = int(thh)
913 tmm = int(tmm)
914 tss = int(tss)
915 except ValueError:
916 return None
917 tzoffset = None
918 tz = tz.upper()
919 if tz in _timezones:
920 tzoffset = _timezones[tz]
921 else:
922 try:
923 tzoffset = int(tz)
924 except ValueError:
925 pass
926 # Convert a timezone offset into seconds ; -0500 -> -18000
927 if tzoffset:
928 if tzoffset < 0:
929 tzsign = -1
930 tzoffset = -tzoffset
931 else:
932 tzsign = 1
933 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
934 tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
935 return tuple
938 def parsedate(data):
939 """Convert a time string to a time tuple."""
940 t = parsedate_tz(data)
941 if type(t) == type( () ):
942 return t[:9]
943 else: return t
946 def mktime_tz(data):
947 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
948 if data[9] is None:
949 # No zone info, so localtime is better assumption than GMT
950 return time.mktime(data[:8] + (-1,))
951 else:
952 t = time.mktime(data[:8] + (0,))
953 return t - data[9] - time.timezone
955 def formatdate(timeval=None):
956 """Returns time format preferred for Internet standards.
958 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
960 According to RFC 1123, day and month names must always be in
961 English. If not for that, this code could use strftime(). It
962 can't because strftime() honors the locale and could generated
963 non-English names.
965 if timeval is None:
966 timeval = time.time()
967 timeval = time.gmtime(timeval)
968 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
969 ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][timeval[6]],
970 timeval[2],
971 ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
972 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][timeval[1]-1],
973 timeval[0], timeval[3], timeval[4], timeval[5])
976 # When used as script, run a small test program.
977 # The first command line argument must be a filename containing one
978 # message in RFC-822 format.
980 if __name__ == '__main__':
981 import sys, os
982 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
983 if sys.argv[1:]: file = sys.argv[1]
984 f = open(file, 'r')
985 m = Message(f)
986 print 'From:', m.getaddr('from')
987 print 'To:', m.getaddrlist('to')
988 print 'Subject:', m.getheader('subject')
989 print 'Date:', m.getheader('date')
990 date = m.getdate_tz('date')
991 tz = date[-1]
992 date = time.localtime(mktime_tz(date))
993 if date:
994 print 'ParsedDate:', time.asctime(date),
995 hhmmss = tz
996 hhmm, ss = divmod(hhmmss, 60)
997 hh, mm = divmod(hhmm, 60)
998 print "%+03d%02d" % (hh, mm),
999 if ss: print ".%02d" % ss,
1000 print
1001 else:
1002 print 'ParsedDate:', None
1003 m.rewindbody()
1004 n = 0
1005 while f.readline():
1006 n = n + 1
1007 print 'Lines:', n
1008 print '-'*70
1009 print 'len =', len(m)
1010 if 'Date' in m: print 'Date =', m['Date']
1011 if 'X-Nonsense' in m: pass
1012 print 'keys =', m.keys()
1013 print 'values =', m.values()
1014 print 'items =', m.items()