Ditched '_find_SET()', since it was a no-value-added wrapper around
[python/dscho.git] / Lib / rfc822.py
blobbb176144b78942c6bce4cd6bde75fc9d9fe452c8
1 """RFC-822 message manipulation class.
3 XXX This is only a very rough sketch of a full RFC-822 parser;
4 in particular the tokenizing of addresses does not adhere to all the
5 quoting rules.
7 Directions for use:
9 To create a Message object: first open a file, e.g.:
10 fp = open(file, 'r')
11 You can use any other legal way of getting an open file object, e.g. use
12 sys.stdin or call os.popen().
13 Then pass the open file object to the Message() constructor:
14 m = Message(fp)
16 This class can work with any input object that supports a readline
17 method. If the input object has seek and tell capability, the
18 rewindbody method will work; also illegal lines will be pushed back
19 onto the input stream. If the input object lacks seek but has an
20 `unread' method that can push back a line of input, Message will use
21 that to push back illegal lines. Thus this class can be used to parse
22 messages coming from a buffered stream.
24 The optional `seekable' argument is provided as a workaround for
25 certain stdio libraries in which tell() discards buffered data before
26 discovering that the lseek() system call doesn't work. For maximum
27 portability, you should set the seekable argument to zero to prevent
28 that initial \code{tell} when passing in an unseekable object such as
29 a a file object created from a socket object. If it is 1 on entry --
30 which it is by default -- the tell() method of the open file object is
31 called once; if this raises an exception, seekable is reset to 0. For
32 other nonzero values of seekable, this test is not made.
34 To get the text of a particular header there are several methods:
35 str = m.getheader(name)
36 str = m.getrawheader(name)
37 where name is the name of the header, e.g. 'Subject'.
38 The difference is that getheader() strips the leading and trailing
39 whitespace, while getrawheader() doesn't. Both functions retain
40 embedded whitespace (including newlines) exactly as they are
41 specified in the header, and leave the case of the text unchanged.
43 For addresses and address lists there are functions
44 realname, mailaddress = m.getaddr(name) and
45 list = m.getaddrlist(name)
46 where the latter returns a list of (realname, mailaddr) tuples.
48 There is also a method
49 time = m.getdate(name)
50 which parses a Date-like field and returns a time-compatible tuple,
51 i.e. a tuple such as returned by time.localtime() or accepted by
52 time.mktime().
54 See the class definition for lower level access methods.
56 There are also some utility functions here.
57 """
58 # Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
60 import string
61 import time
64 _blanklines = ('\r\n', '\n') # Optimization for islast()
67 class Message:
68 """Represents a single RFC-822-compliant message."""
70 def __init__(self, fp, seekable = 1):
71 """Initialize the class instance and read the headers."""
72 if seekable == 1:
73 # Exercise tell() to make sure it works
74 # (and then assume seek() works, too)
75 try:
76 fp.tell()
77 except:
78 seekable = 0
79 else:
80 seekable = 1
81 self.fp = fp
82 self.seekable = seekable
83 self.startofheaders = None
84 self.startofbody = None
86 if self.seekable:
87 try:
88 self.startofheaders = self.fp.tell()
89 except IOError:
90 self.seekable = 0
92 self.readheaders()
94 if self.seekable:
95 try:
96 self.startofbody = self.fp.tell()
97 except IOError:
98 self.seekable = 0
100 def rewindbody(self):
101 """Rewind the file to the start of the body (if seekable)."""
102 if not self.seekable:
103 raise IOError, "unseekable file"
104 self.fp.seek(self.startofbody)
106 def readheaders(self):
107 """Read header lines.
109 Read header lines up to the entirely blank line that
110 terminates them. The (normally blank) line that ends the
111 headers is skipped, but not included in the returned list.
112 If a non-header line ends the headers, (which is an error),
113 an attempt is made to backspace over it; it is never
114 included in the returned list.
116 The variable self.status is set to the empty string if all
117 went well, otherwise it is an error message.
118 The variable self.headers is a completely uninterpreted list
119 of lines contained in the header (so printing them will
120 reproduce the header exactly as it appears in the file).
122 self.dict = {}
123 self.unixfrom = ''
124 self.headers = list = []
125 self.status = ''
126 headerseen = ""
127 firstline = 1
128 startofline = unread = tell = None
129 if hasattr(self.fp, 'unread'):
130 unread = self.fp.unread
131 elif self.seekable:
132 tell = self.fp.tell
133 while 1:
134 if tell:
135 startofline = tell()
136 line = self.fp.readline()
137 if not line:
138 self.status = 'EOF in headers'
139 break
140 # Skip unix From name time lines
141 if firstline and line[:5] == 'From ':
142 self.unixfrom = self.unixfrom + line
143 continue
144 firstline = 0
145 if headerseen and line[0] in ' \t':
146 # It's a continuation line.
147 list.append(line)
148 x = (self.dict[headerseen] + "\n " + string.strip(line))
149 self.dict[headerseen] = string.strip(x)
150 continue
151 elif self.iscomment(line):
152 # It's a comment. Ignore it.
153 continue
154 elif self.islast(line):
155 # Note! No pushback here! The delimiter line gets eaten.
156 break
157 headerseen = self.isheader(line)
158 if headerseen:
159 # It's a legal header line, save it.
160 list.append(line)
161 self.dict[headerseen] = string.strip(line[len(headerseen)+1:])
162 continue
163 else:
164 # It's not a header line; throw it back and stop here.
165 if not self.dict:
166 self.status = 'No headers'
167 else:
168 self.status = 'Non-header line where header expected'
169 # Try to undo the read.
170 if unread:
171 unread(line)
172 elif tell:
173 self.fp.seek(startofline)
174 else:
175 self.status = self.status + '; bad seek'
176 break
178 def isheader(self, line):
179 """Determine whether a given line is a legal header.
181 This method should return the header name, suitably canonicalized.
182 You may override this method in order to use Message parsing
183 on tagged data in RFC822-like formats with special header formats.
185 i = string.find(line, ':')
186 if i > 0:
187 return string.lower(line[:i])
188 else:
189 return None
191 def islast(self, line):
192 """Determine whether a line is a legal end of RFC-822 headers.
194 You may override this method if your application wants
195 to bend the rules, e.g. to strip trailing whitespace,
196 or to recognise MH template separators ('--------').
197 For convenience (e.g. for code reading from sockets) a
198 line consisting of \r\n also matches.
200 return line in _blanklines
202 def iscomment(self, line):
203 """Determine whether a line should be skipped entirely.
205 You may override this method in order to use Message parsing
206 on tagged data in RFC822-like formats that support embedded
207 comments or free-text data.
209 return None
211 def getallmatchingheaders(self, name):
212 """Find all header lines matching a given header name.
214 Look through the list of headers and find all lines
215 matching a given header name (and their continuation
216 lines). A list of the lines is returned, without
217 interpretation. If the header does not occur, an
218 empty list is returned. If the header occurs multiple
219 times, all occurrences are returned. Case is not
220 important in the header name.
222 name = string.lower(name) + ':'
223 n = len(name)
224 list = []
225 hit = 0
226 for line in self.headers:
227 if string.lower(line[:n]) == name:
228 hit = 1
229 elif line[:1] not in string.whitespace:
230 hit = 0
231 if hit:
232 list.append(line)
233 return list
235 def getfirstmatchingheader(self, name):
236 """Get the first header line matching name.
238 This is similar to getallmatchingheaders, but it returns
239 only the first matching header (and its continuation
240 lines).
242 name = string.lower(name) + ':'
243 n = len(name)
244 list = []
245 hit = 0
246 for line in self.headers:
247 if hit:
248 if line[:1] not in string.whitespace:
249 break
250 elif string.lower(line[:n]) == name:
251 hit = 1
252 if hit:
253 list.append(line)
254 return list
256 def getrawheader(self, name):
257 """A higher-level interface to getfirstmatchingheader().
259 Return a string containing the literal text of the
260 header but with the keyword stripped. All leading,
261 trailing and embedded whitespace is kept in the
262 string, however.
263 Return None if the header does not occur.
266 list = self.getfirstmatchingheader(name)
267 if not list:
268 return None
269 list[0] = list[0][len(name) + 1:]
270 return string.joinfields(list, '')
272 def getheader(self, name, default=None):
273 """Get the header value for a name.
275 This is the normal interface: it returns a stripped
276 version of the header value for a given header name,
277 or None if it doesn't exist. This uses the dictionary
278 version which finds the *last* such header.
280 try:
281 return self.dict[string.lower(name)]
282 except KeyError:
283 return default
284 get = getheader
286 def getheaders(self, name):
287 """Get all values for a header.
289 This returns a list of values for headers given more than once;
290 each value in the result list is stripped in the same way as the
291 result of getheader(). If the header is not given, return an
292 empty list.
294 result = []
295 current = ''
296 have_header = 0
297 for s in self.getallmatchingheaders(name):
298 if s[0] in string.whitespace:
299 if current:
300 current = "%s\n %s" % (current, string.strip(s))
301 else:
302 current = string.strip(s)
303 else:
304 if have_header:
305 result.append(current)
306 current = string.strip(s[string.find(s, ":") + 1:])
307 have_header = 1
308 if have_header:
309 result.append(current)
310 return result
312 def getaddr(self, name):
313 """Get a single address from a header, as a tuple.
315 An example return value:
316 ('Guido van Rossum', 'guido@cwi.nl')
318 # New, by Ben Escoto
319 alist = self.getaddrlist(name)
320 if alist:
321 return alist[0]
322 else:
323 return (None, None)
325 def getaddrlist(self, name):
326 """Get a list of addresses from a header.
328 Retrieves a list of addresses from a header, where each address is a
329 tuple as returned by getaddr(). Scans all named headers, so it works
330 properly with multiple To: or Cc: headers for example.
333 raw = []
334 for h in self.getallmatchingheaders(name):
335 if h[0] in ' \t':
336 raw.append(h)
337 else:
338 if raw:
339 raw.append(', ')
340 i = string.find(h, ':')
341 if i > 0:
342 addr = h[i+1:]
343 raw.append(addr)
344 alladdrs = string.join(raw, '')
345 a = AddrlistClass(alladdrs)
346 return a.getaddrlist()
348 def getdate(self, name):
349 """Retrieve a date field from a header.
351 Retrieves a date field from the named header, returning
352 a tuple compatible with time.mktime().
354 try:
355 data = self[name]
356 except KeyError:
357 return None
358 return parsedate(data)
360 def getdate_tz(self, name):
361 """Retrieve a date field from a header as a 10-tuple.
363 The first 9 elements make up a tuple compatible with
364 time.mktime(), and the 10th is the offset of the poster's
365 time zone from GMT/UTC.
367 try:
368 data = self[name]
369 except KeyError:
370 return None
371 return parsedate_tz(data)
374 # Access as a dictionary (only finds *last* header of each type):
376 def __len__(self):
377 """Get the number of headers in a message."""
378 return len(self.dict)
380 def __getitem__(self, name):
381 """Get a specific header, as from a dictionary."""
382 return self.dict[string.lower(name)]
384 def __setitem__(self, name, value):
385 """Set the value of a header.
387 Note: This is not a perfect inversion of __getitem__, because
388 any changed headers get stuck at the end of the raw-headers list
389 rather than where the altered header was.
391 del self[name] # Won't fail if it doesn't exist
392 self.dict[string.lower(name)] = value
393 text = name + ": " + value
394 lines = string.split(text, "\n")
395 for line in lines:
396 self.headers.append(line + "\n")
398 def __delitem__(self, name):
399 """Delete all occurrences of a specific header, if it is present."""
400 name = string.lower(name)
401 if not self.dict.has_key(name):
402 return
403 del self.dict[name]
404 name = name + ':'
405 n = len(name)
406 list = []
407 hit = 0
408 for i in range(len(self.headers)):
409 line = self.headers[i]
410 if string.lower(line[:n]) == name:
411 hit = 1
412 elif line[:1] not in string.whitespace:
413 hit = 0
414 if hit:
415 list.append(i)
416 list.reverse()
417 for i in list:
418 del self.headers[i]
420 def has_key(self, name):
421 """Determine whether a message contains the named header."""
422 return self.dict.has_key(string.lower(name))
424 def keys(self):
425 """Get all of a message's header field names."""
426 return self.dict.keys()
428 def values(self):
429 """Get all of a message's header field values."""
430 return self.dict.values()
432 def items(self):
433 """Get all of a message's headers.
435 Returns a list of name, value tuples.
437 return self.dict.items()
439 def __str__(self):
440 str = ''
441 for hdr in self.headers:
442 str = str + hdr
443 return str
446 # Utility functions
447 # -----------------
449 # XXX Should fix unquote() and quote() to be really conformant.
450 # XXX The inverses of the parse functions may also be useful.
453 def unquote(str):
454 """Remove quotes from a string."""
455 if len(str) > 1:
456 if str[0] == '"' and str[-1:] == '"':
457 return str[1:-1]
458 if str[0] == '<' and str[-1:] == '>':
459 return str[1:-1]
460 return str
463 def quote(str):
464 """Add quotes around a string."""
465 return '"%s"' % string.join(
466 string.split(
467 string.join(
468 string.split(str, '\\'),
469 '\\\\'),
470 '"'),
471 '\\"')
474 def parseaddr(address):
475 """Parse an address into a (realname, mailaddr) tuple."""
476 a = AddrlistClass(address)
477 list = a.getaddrlist()
478 if not list:
479 return (None, None)
480 else:
481 return list[0]
484 class AddrlistClass:
485 """Address parser class by Ben Escoto.
487 To understand what this class does, it helps to have a copy of
488 RFC-822 in front of you.
490 Note: this class interface is deprecated and may be removed in the future.
491 Use rfc822.AddressList instead.
494 def __init__(self, field):
495 """Initialize a new instance.
497 `field' is an unparsed address header field, containing
498 one or more addresses.
500 self.specials = '()<>@,:;.\"[]'
501 self.pos = 0
502 self.LWS = ' \t'
503 self.CR = '\r\n'
504 self.atomends = self.specials + self.LWS + self.CR
505 self.field = field
506 self.commentlist = []
508 def gotonext(self):
509 """Parse up to the start of the next address."""
510 while self.pos < len(self.field):
511 if self.field[self.pos] in self.LWS + '\n\r':
512 self.pos = self.pos + 1
513 elif self.field[self.pos] == '(':
514 self.commentlist.append(self.getcomment())
515 else: break
517 def getaddrlist(self):
518 """Parse all addresses.
520 Returns a list containing all of the addresses.
522 ad = self.getaddress()
523 if ad:
524 return ad + self.getaddrlist()
525 else: return []
527 def getaddress(self):
528 """Parse the next address."""
529 self.commentlist = []
530 self.gotonext()
532 oldpos = self.pos
533 oldcl = self.commentlist
534 plist = self.getphraselist()
536 self.gotonext()
537 returnlist = []
539 if self.pos >= len(self.field):
540 # Bad email address technically, no domain.
541 if plist:
542 returnlist = [(string.join(self.commentlist), plist[0])]
544 elif self.field[self.pos] in '.@':
545 # email address is just an addrspec
546 # this isn't very efficient since we start over
547 self.pos = oldpos
548 self.commentlist = oldcl
549 addrspec = self.getaddrspec()
550 returnlist = [(string.join(self.commentlist), addrspec)]
552 elif self.field[self.pos] == ':':
553 # address is a group
554 returnlist = []
556 fieldlen = len(self.field)
557 self.pos = self.pos + 1
558 while self.pos < len(self.field):
559 self.gotonext()
560 if self.pos < fieldlen and self.field[self.pos] == ';':
561 self.pos = self.pos + 1
562 break
563 returnlist = returnlist + self.getaddress()
565 elif self.field[self.pos] == '<':
566 # Address is a phrase then a route addr
567 routeaddr = self.getrouteaddr()
569 if self.commentlist:
570 returnlist = [(string.join(plist) + ' (' + \
571 string.join(self.commentlist) + ')', routeaddr)]
572 else: returnlist = [(string.join(plist), routeaddr)]
574 else:
575 if plist:
576 returnlist = [(string.join(self.commentlist), plist[0])]
577 elif self.field[self.pos] in self.specials:
578 self.pos = self.pos + 1
580 self.gotonext()
581 if self.pos < len(self.field) and self.field[self.pos] == ',':
582 self.pos = self.pos + 1
583 return returnlist
585 def getrouteaddr(self):
586 """Parse a route address (Return-path value).
588 This method just skips all the route stuff and returns the addrspec.
590 if self.field[self.pos] != '<':
591 return
593 expectroute = 0
594 self.pos = self.pos + 1
595 self.gotonext()
596 adlist = None
597 while self.pos < len(self.field):
598 if expectroute:
599 self.getdomain()
600 expectroute = 0
601 elif self.field[self.pos] == '>':
602 self.pos = self.pos + 1
603 break
604 elif self.field[self.pos] == '@':
605 self.pos = self.pos + 1
606 expectroute = 1
607 elif self.field[self.pos] == ':':
608 self.pos = self.pos + 1
609 expectaddrspec = 1
610 else:
611 adlist = self.getaddrspec()
612 self.pos = self.pos + 1
613 break
614 self.gotonext()
616 return adlist
618 def getaddrspec(self):
619 """Parse an RFC-822 addr-spec."""
620 aslist = []
622 self.gotonext()
623 while self.pos < len(self.field):
624 if self.field[self.pos] == '.':
625 aslist.append('.')
626 self.pos = self.pos + 1
627 elif self.field[self.pos] == '"':
628 aslist.append('"%s"' % self.getquote())
629 elif self.field[self.pos] in self.atomends:
630 break
631 else: aslist.append(self.getatom())
632 self.gotonext()
634 if self.pos >= len(self.field) or self.field[self.pos] != '@':
635 return string.join(aslist, '')
637 aslist.append('@')
638 self.pos = self.pos + 1
639 self.gotonext()
640 return string.join(aslist, '') + self.getdomain()
642 def getdomain(self):
643 """Get the complete domain name from an address."""
644 sdlist = []
645 while self.pos < len(self.field):
646 if self.field[self.pos] in self.LWS:
647 self.pos = self.pos + 1
648 elif self.field[self.pos] == '(':
649 self.commentlist.append(self.getcomment())
650 elif self.field[self.pos] == '[':
651 sdlist.append(self.getdomainliteral())
652 elif self.field[self.pos] == '.':
653 self.pos = self.pos + 1
654 sdlist.append('.')
655 elif self.field[self.pos] in self.atomends:
656 break
657 else: sdlist.append(self.getatom())
658 return string.join(sdlist, '')
660 def getdelimited(self, beginchar, endchars, allowcomments = 1):
661 """Parse a header fragment delimited by special characters.
663 `beginchar' is the start character for the fragment.
664 If self is not looking at an instance of `beginchar' then
665 getdelimited returns the empty string.
667 `endchars' is a sequence of allowable end-delimiting characters.
668 Parsing stops when one of these is encountered.
670 If `allowcomments' is non-zero, embedded RFC-822 comments
671 are allowed within the parsed fragment.
673 if self.field[self.pos] != beginchar:
674 return ''
676 slist = ['']
677 quote = 0
678 self.pos = self.pos + 1
679 while self.pos < len(self.field):
680 if quote == 1:
681 slist.append(self.field[self.pos])
682 quote = 0
683 elif self.field[self.pos] in endchars:
684 self.pos = self.pos + 1
685 break
686 elif allowcomments and self.field[self.pos] == '(':
687 slist.append(self.getcomment())
688 elif self.field[self.pos] == '\\':
689 quote = 1
690 else:
691 slist.append(self.field[self.pos])
692 self.pos = self.pos + 1
694 return string.join(slist, '')
696 def getquote(self):
697 """Get a quote-delimited fragment from self's field."""
698 return self.getdelimited('"', '"\r', 0)
700 def getcomment(self):
701 """Get a parenthesis-delimited fragment from self's field."""
702 return self.getdelimited('(', ')\r', 1)
704 def getdomainliteral(self):
705 """Parse an RFC-822 domain-literal."""
706 return self.getdelimited('[', ']\r', 0)
708 def getatom(self):
709 """Parse an RFC-822 atom."""
710 atomlist = ['']
712 while self.pos < len(self.field):
713 if self.field[self.pos] in self.atomends:
714 break
715 else: atomlist.append(self.field[self.pos])
716 self.pos = self.pos + 1
718 return string.join(atomlist, '')
720 def getphraselist(self):
721 """Parse a sequence of RFC-822 phrases.
723 A phrase is a sequence of words, which are in turn either
724 RFC-822 atoms or quoted-strings. Phrases are canonicalized
725 by squeezing all runs of continuous whitespace into one space.
727 plist = []
729 while self.pos < len(self.field):
730 if self.field[self.pos] in self.LWS:
731 self.pos = self.pos + 1
732 elif self.field[self.pos] == '"':
733 plist.append(self.getquote())
734 elif self.field[self.pos] == '(':
735 self.commentlist.append(self.getcomment())
736 elif self.field[self.pos] in self.atomends:
737 break
738 else: plist.append(self.getatom())
740 return plist
742 class AddressList(AddrlistClass):
743 """An AddressList encapsulates a list of parsed RFC822 addresses."""
744 def __init__(self, field):
745 AddrlistClass.__init__(self, field)
746 if field:
747 self.addresslist = self.getaddrlist()
748 else:
749 self.addresslist = []
751 def __len__(self):
752 return len(self.addresslist)
754 def __str__(self):
755 return string.joinfields(map(dump_address_pair, self.addresslist),", ")
757 def __add__(self, other):
758 # Set union
759 newaddr = AddressList(None)
760 newaddr.addresslist = self.addresslist[:]
761 for x in other.addresslist:
762 if not x in self.addresslist:
763 newaddr.addresslist.append(x)
764 return newaddr
766 def __sub__(self, other):
767 # Set difference
768 newaddr = AddressList(None)
769 for x in self.addresslist:
770 if not x in other.addresslist:
771 newaddr.addresslist.append(x)
772 return newaddr
774 def __getitem__(self, index):
775 # Make indexing, slices, and 'in' work
776 return self.addresslist[index]
778 def dump_address_pair(pair):
779 """Dump a (name, address) pair in a canonicalized form."""
780 if pair[0]:
781 return '"' + pair[0] + '" <' + pair[1] + '>'
782 else:
783 return pair[1]
785 # Parse a date field
787 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
788 'aug', 'sep', 'oct', 'nov', 'dec',
789 'january', 'february', 'march', 'april', 'may', 'june', 'july',
790 'august', 'september', 'october', 'november', 'december']
791 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
793 # The timezone table does not include the military time zones defined
794 # in RFC822, other than Z. According to RFC1123, the description in
795 # RFC822 gets the signs wrong, so we can't rely on any such time
796 # zones. RFC1123 recommends that numeric timezone indicators be used
797 # instead of timezone names.
799 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
800 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
801 'EST': -500, 'EDT': -400, # Eastern
802 'CST': -600, 'CDT': -500, # Central
803 'MST': -700, 'MDT': -600, # Mountain
804 'PST': -800, 'PDT': -700 # Pacific
808 def parsedate_tz(data):
809 """Convert a date string to a time tuple.
811 Accounts for military timezones.
813 data = string.split(data)
814 if data[0][-1] in (',', '.') or string.lower(data[0]) in _daynames:
815 # There's a dayname here. Skip it
816 del data[0]
817 if len(data) == 3: # RFC 850 date, deprecated
818 stuff = string.split(data[0], '-')
819 if len(stuff) == 3:
820 data = stuff + data[1:]
821 if len(data) == 4:
822 s = data[3]
823 i = string.find(s, '+')
824 if i > 0:
825 data[3:] = [s[:i], s[i+1:]]
826 else:
827 data.append('') # Dummy tz
828 if len(data) < 5:
829 return None
830 data = data[:5]
831 [dd, mm, yy, tm, tz] = data
832 mm = string.lower(mm)
833 if not mm in _monthnames:
834 dd, mm = mm, string.lower(dd)
835 if not mm in _monthnames:
836 return None
837 mm = _monthnames.index(mm)+1
838 if mm > 12: mm = mm - 12
839 if dd[-1] == ',':
840 dd = dd[:-1]
841 i = string.find(yy, ':')
842 if i > 0:
843 yy, tm = tm, yy
844 if yy[-1] == ',':
845 yy = yy[:-1]
846 if yy[0] not in string.digits:
847 yy, tz = tz, yy
848 if tm[-1] == ',':
849 tm = tm[:-1]
850 tm = string.splitfields(tm, ':')
851 if len(tm) == 2:
852 [thh, tmm] = tm
853 tss = '0'
854 elif len(tm) == 3:
855 [thh, tmm, tss] = tm
856 else:
857 return None
858 try:
859 yy = string.atoi(yy)
860 dd = string.atoi(dd)
861 thh = string.atoi(thh)
862 tmm = string.atoi(tmm)
863 tss = string.atoi(tss)
864 except string.atoi_error:
865 return None
866 tzoffset=None
867 tz=string.upper(tz)
868 if _timezones.has_key(tz):
869 tzoffset=_timezones[tz]
870 else:
871 try:
872 tzoffset=string.atoi(tz)
873 except string.atoi_error:
874 pass
875 # Convert a timezone offset into seconds ; -0500 -> -18000
876 if tzoffset:
877 if tzoffset < 0:
878 tzsign = -1
879 tzoffset = -tzoffset
880 else:
881 tzsign = 1
882 tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
883 tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
884 return tuple
887 def parsedate(data):
888 """Convert a time string to a time tuple."""
889 t=parsedate_tz(data)
890 if type(t)==type( () ):
891 return t[:9]
892 else: return t
895 def mktime_tz(data):
896 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
897 if data[9] is None:
898 # No zone info, so localtime is better assumption than GMT
899 return time.mktime(data[:8] + (-1,))
900 else:
901 t = time.mktime(data[:8] + (0,))
902 return t - data[9] - time.timezone
904 def formatdate(timeval=None):
905 """Returns time format preferred for Internet standards.
907 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
909 if timeval is None:
910 timeval = time.time()
911 return "%s" % time.strftime('%a, %d %b %Y %H:%M:%S GMT',
912 time.gmtime(timeval))
915 # When used as script, run a small test program.
916 # The first command line argument must be a filename containing one
917 # message in RFC-822 format.
919 if __name__ == '__main__':
920 import sys, os
921 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
922 if sys.argv[1:]: file = sys.argv[1]
923 f = open(file, 'r')
924 m = Message(f)
925 print 'From:', m.getaddr('from')
926 print 'To:', m.getaddrlist('to')
927 print 'Subject:', m.getheader('subject')
928 print 'Date:', m.getheader('date')
929 date = m.getdate_tz('date')
930 tz = date[-1]
931 date = time.localtime(mktime_tz(date))
932 if date:
933 print 'ParsedDate:', time.asctime(date),
934 hhmmss = tz
935 hhmm, ss = divmod(hhmmss, 60)
936 hh, mm = divmod(hhmm, 60)
937 print "%+03d%02d" % (hh, mm),
938 if ss: print ".%02d" % ss,
939 print
940 else:
941 print 'ParsedDate:', None
942 m.rewindbody()
943 n = 0
944 while f.readline():
945 n = n + 1
946 print 'Lines:', n
947 print '-'*70
948 print 'len =', len(m)
949 if m.has_key('Date'): print 'Date =', m['Date']
950 if m.has_key('X-Nonsense'): pass
951 print 'keys =', m.keys()
952 print 'values =', m.values()
953 print 'items =', m.items()