Lib/rfc822.py

   1 """RFC-822 message manipulation class.
   2
   3 XXX This is only a very rough sketch of a full RFC-822 parser;
   4 in particular the tokenizing of addresses does not adhere to all the
   5 quoting rules.
   6
   7 Directions for use:
   8
   9 To create a Message object: first open a file, e.g.:
  10   fp = open(file, 'r')
  11 You can use any other legal way of getting an open file object, e.g. use
  12 sys.stdin or call os.popen().
  13 Then pass the open file object to the Message() constructor:
  14   m = Message(fp)
  15
  16 This class can work with any input object that supports a readline
  17 method.  If the input object has seek and tell capability, the
  18 rewindbody method will work; also illegal lines will be pushed back
  19 onto the input stream.  If the input object lacks seek but has an
  20 `unread' method that can push back a line of input, Message will use
  21 that to push back illegal lines.  Thus this class can be used to parse
  22 messages coming from a buffered stream.
  23
  24 The optional `seekable' argument is provided as a workaround for
  25 certain stdio libraries in which tell() discards buffered data before
  26 discovering that the lseek() system call doesn't work.  For maximum
  27 portability, you should set the seekable argument to zero to prevent
  28 that initial \code{tell} when passing in an unseekable object such as
  29 a a file object created from a socket object.  If it is 1 on entry --
  30 which it is by default -- the tell() method of the open file object is
  31 called once; if this raises an exception, seekable is reset to 0.  For
  32 other nonzero values of seekable, this test is not made.
  33
  34 To get the text of a particular header there are several methods:
  35   str = m.getheader(name)
  36   str = m.getrawheader(name)
  37 where name is the name of the header, e.g. 'Subject'.
  38 The difference is that getheader() strips the leading and trailing
  39 whitespace, while getrawheader() doesn't.  Both functions retain
  40 embedded whitespace (including newlines) exactly as they are
  41 specified in the header, and leave the case of the text unchanged.
  42
  43 For addresses and address lists there are functions
  44   realname, mailaddress = m.getaddr(name) and
  45   list = m.getaddrlist(name)
  46 where the latter returns a list of (realname, mailaddr) tuples.
  47
  48 There is also a method
  49   time = m.getdate(name)
  50 which parses a Date-like field and returns a time-compatible tuple,
  51 i.e. a tuple such as returned by time.localtime() or accepted by
  52 time.mktime().
  53
  54 See the class definition for lower level access methods.
  55
  56 There are also some utility functions here.
  57 """
  58 # Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
  59
  60 import string
  61 import time
  62
  63
  64 _blanklines = ('\r\n', '\n')            # Optimization for islast()
  65
  66
  67 class Message:
  68     """Represents a single RFC-822-compliant message."""
  69
  70     def __init__(self, fp, seekable = 1):
  71         """Initialize the class instance and read the headers."""
  72         if seekable == 1:
  73             # Exercise tell() to make sure it works
  74             # (and then assume seek() works, too)
  75             try:
  76                 fp.tell()
  77             except:
  78                 seekable = 0
  79             else:
  80                 seekable = 1
  81         self.fp = fp
  82         self.seekable = seekable
  83         self.startofheaders = None
  84         self.startofbody = None
  85         #
  86         if self.seekable:
  87             try:
  88                 self.startofheaders = self.fp.tell()
  89             except IOError:
  90                 self.seekable = 0
  91         #
  92         self.readheaders()
  93         #
  94         if self.seekable:
  95             try:
  96                 self.startofbody = self.fp.tell()
  97             except IOError:
  98                 self.seekable = 0
  99
 100     def rewindbody(self):
 101         """Rewind the file to the start of the body (if seekable)."""
 102         if not self.seekable:
 103             raise IOError, "unseekable file"
 104         self.fp.seek(self.startofbody)
 105
 106     def readheaders(self):
 107         """Read header lines.
 108
 109         Read header lines up to the entirely blank line that
 110         terminates them.  The (normally blank) line that ends the
 111         headers is skipped, but not included in the returned list.
 112         If a non-header line ends the headers, (which is an error),
 113         an attempt is made to backspace over it; it is never
 114         included in the returned list.
 115
 116         The variable self.status is set to the empty string if all
 117         went well, otherwise it is an error message.
 118         The variable self.headers is a completely uninterpreted list
 119         of lines contained in the header (so printing them will
 120         reproduce the header exactly as it appears in the file).
 121         """
 122         self.dict = {}
 123         self.unixfrom = ''
 124         self.headers = list = []
 125         self.status = ''
 126         headerseen = ""
 127         firstline = 1
 128         startofline = unread = tell = None
 129         if hasattr(self.fp, 'unread'):
 130             unread = self.fp.unread
 131         elif self.seekable:
 132             tell = self.fp.tell
 133         while 1:
 134             if tell:
 135                 startofline = tell()
 136             line = self.fp.readline()
 137             if not line:
 138                 self.status = 'EOF in headers'
 139                 break
 140             # Skip unix From name time lines
 141             if firstline and line[:5] == 'From ':
 142                 self.unixfrom = self.unixfrom + line
 143                 continue
 144             firstline = 0
 145             if headerseen and line[0] in ' \t':
 146                 # It's a continuation line.
 147                 list.append(line)
 148                 x = (self.dict[headerseen] + "\n " + string.strip(line))
 149                 self.dict[headerseen] = string.strip(x)
 150                 continue
 151             elif self.iscomment(line):
 152                 # It's a comment.  Ignore it.
 153                 continue
 154             elif self.islast(line):
 155                 # Note! No pushback here!  The delimiter line gets eaten.
 156                 break
 157             headerseen = self.isheader(line)
 158             if headerseen:
 159                 # It's a legal header line, save it.
 160                 list.append(line)
 161                 self.dict[headerseen] = string.strip(line[len(headerseen)+1:])
 162                 continue
 163             else:
 164                 # It's not a header line; throw it back and stop here.
 165                 if not self.dict:
 166                     self.status = 'No headers'
 167                 else:
 168                     self.status = 'Non-header line where header expected'
 169                 # Try to undo the read.
 170                 if unread:
 171                     unread(line)
 172                 elif tell:
 173                     self.fp.seek(startofline)
 174                 else:
 175                     self.status = self.status + '; bad seek'
 176                 break
 177
 178     def isheader(self, line):
 179         """Determine whether a given line is a legal header.
 180
 181         This method should return the header name, suitably canonicalized.
 182         You may override this method in order to use Message parsing
 183         on tagged data in RFC822-like formats with special header formats.
 184         """
 185         i = string.find(line, ':')
 186         if i > 0:
 187             return string.lower(line[:i])
 188         else:
 189             return None
 190
 191     def islast(self, line):
 192         """Determine whether a line is a legal end of RFC-822 headers.
 193
 194         You may override this method if your application wants
 195         to bend the rules, e.g. to strip trailing whitespace,
 196         or to recognise MH template separators ('--------').
 197         For convenience (e.g. for code reading from sockets) a
 198         line consisting of \r\n also matches.
 199         """
 200         return line in _blanklines
 201
 202     def iscomment(self, line):
 203         """Determine whether a line should be skipped entirely.
 204
 205         You may override this method in order to use Message parsing
 206         on tagged data in RFC822-like formats that support embedded
 207         comments or free-text data.
 208         """
 209         return None
 210
 211     def getallmatchingheaders(self, name):
 212         """Find all header lines matching a given header name.
 213
 214         Look through the list of headers and find all lines
 215         matching a given header name (and their continuation
 216         lines).  A list of the lines is returned, without
 217         interpretation.  If the header does not occur, an
 218         empty list is returned.  If the header occurs multiple
 219         times, all occurrences are returned.  Case is not
 220         important in the header name.
 221         """
 222         name = string.lower(name) + ':'
 223         n = len(name)
 224         list = []
 225         hit = 0
 226         for line in self.headers:
 227             if string.lower(line[:n]) == name:
 228                 hit = 1
 229             elif line[:1] not in string.whitespace:
 230                 hit = 0
 231             if hit:
 232                 list.append(line)
 233         return list
 234
 235     def getfirstmatchingheader(self, name):
 236         """Get the first header line matching name.
 237
 238         This is similar to getallmatchingheaders, but it returns
 239         only the first matching header (and its continuation
 240         lines).
 241         """
 242         name = string.lower(name) + ':'
 243         n = len(name)
 244         list = []
 245         hit = 0
 246         for line in self.headers:
 247             if hit:
 248                 if line[:1] not in string.whitespace:
 249                     break
 250             elif string.lower(line[:n]) == name:
 251                 hit = 1
 252             if hit:
 253                 list.append(line)
 254         return list
 255
 256     def getrawheader(self, name):
 257         """A higher-level interface to getfirstmatchingheader().
 258
 259         Return a string containing the literal text of the
 260         header but with the keyword stripped.  All leading,
 261         trailing and embedded whitespace is kept in the
 262         string, however.
 263         Return None if the header does not occur.
 264         """
 265
 266         list = self.getfirstmatchingheader(name)
 267         if not list:
 268             return None
 269         list[0] = list[0][len(name) + 1:]
 270         return string.joinfields(list, '')
 271
 272     def getheader(self, name, default=None):
 273         """Get the header value for a name.
 274
 275         This is the normal interface: it returns a stripped
 276         version of the header value for a given header name,
 277         or None if it doesn't exist.  This uses the dictionary
 278         version which finds the *last* such header.
 279         """
 280         try:
 281             return self.dict[string.lower(name)]
 282         except KeyError:
 283             return default
 284     get = getheader
 285
 286     def getheaders(self, name):
 287         """Get all values for a header.
 288
 289         This returns a list of values for headers given more than once;
 290         each value in the result list is stripped in the same way as the
 291         result of getheader().  If the header is not given, return an
 292         empty list.
 293         """
 294         result = []
 295         current = ''
 296         have_header = 0
 297         for s in self.getallmatchingheaders(name):
 298             if s[0] in string.whitespace:
 299                 if current:
 300                     current = "%s\n %s" % (current, string.strip(s))
 301                 else:
 302                     current = string.strip(s)
 303             else:
 304                 if have_header:
 305                     result.append(current)
 306                 current = string.strip(s[string.find(s, ":") + 1:])
 307                 have_header = 1
 308         if have_header:
 309             result.append(current)
 310         return result
 311
 312     def getaddr(self, name):
 313         """Get a single address from a header, as a tuple.
 314
 315         An example return value:
 316         ('Guido van Rossum', 'guido@cwi.nl')
 317         """
 318         # New, by Ben Escoto
 319         alist = self.getaddrlist(name)
 320         if alist:
 321             return alist[0]
 322         else:
 323             return (None, None)
 324
 325     def getaddrlist(self, name):
 326         """Get a list of addresses from a header.
 327
 328         Retrieves a list of addresses from a header, where each address is a
 329         tuple as returned by getaddr().  Scans all named headers, so it works
 330         properly with multiple To: or Cc: headers for example.
 331
 332         """
 333         raw = []
 334         for h in self.getallmatchingheaders(name):
 335             if h[0] in ' \t':
 336                 raw.append(h)
 337             else:
 338                 if raw:
 339                     raw.append(', ')
 340                 i = string.find(h, ':')
 341                 if i > 0:
 342                     addr = h[i+1:]
 343                 raw.append(addr)
 344         alladdrs = string.join(raw, '')
 345         a = AddrlistClass(alladdrs)
 346         return a.getaddrlist()
 347
 348     def getdate(self, name):
 349         """Retrieve a date field from a header.
 350
 351         Retrieves a date field from the named header, returning
 352         a tuple compatible with time.mktime().
 353         """
 354         try:
 355             data = self[name]
 356         except KeyError:
 357             return None
 358         return parsedate(data)
 359
 360     def getdate_tz(self, name):
 361         """Retrieve a date field from a header as a 10-tuple.
 362
 363         The first 9 elements make up a tuple compatible with
 364         time.mktime(), and the 10th is the offset of the poster's
 365         time zone from GMT/UTC.
 366         """
 367         try:
 368             data = self[name]
 369         except KeyError:
 370             return None
 371         return parsedate_tz(data)
 372
 373
 374     # Access as a dictionary (only finds *last* header of each type):
 375
 376     def __len__(self):
 377         """Get the number of headers in a message."""
 378         return len(self.dict)
 379
 380     def __getitem__(self, name):
 381         """Get a specific header, as from a dictionary."""
 382         return self.dict[string.lower(name)]
 383
 384     def __setitem__(self, name, value):
 385         """Set the value of a header.
 386
 387         Note: This is not a perfect inversion of __getitem__, because
 388         any changed headers get stuck at the end of the raw-headers list
 389         rather than where the altered header was.
 390         """
 391         del self[name] # Won't fail if it doesn't exist
 392         self.dict[string.lower(name)] = value
 393         text = name + ": " + value
 394         lines = string.split(text, "\n")
 395         for line in lines:
 396             self.headers.append(line + "\n")
 397
 398     def __delitem__(self, name):
 399         """Delete all occurrences of a specific header, if it is present."""
 400         name = string.lower(name)
 401         if not self.dict.has_key(name):
 402             return
 403         del self.dict[name]
 404         name = name + ':'
 405         n = len(name)
 406         list = []
 407         hit = 0
 408         for i in range(len(self.headers)):
 409             line = self.headers[i]
 410             if string.lower(line[:n]) == name:
 411                 hit = 1
 412             elif line[:1] not in string.whitespace:
 413                 hit = 0
 414             if hit:
 415                 list.append(i)
 416         list.reverse()
 417         for i in list:
 418             del self.headers[i]
 419
 420     def has_key(self, name):
 421         """Determine whether a message contains the named header."""
 422         return self.dict.has_key(string.lower(name))
 423
 424     def keys(self):
 425         """Get all of a message's header field names."""
 426         return self.dict.keys()
 427
 428     def values(self):
 429         """Get all of a message's header field values."""
 430         return self.dict.values()
 431
 432     def items(self):
 433         """Get all of a message's headers.
 434
 435         Returns a list of name, value tuples.
 436         """
 437         return self.dict.items()
 438
 439     def __str__(self):
 440         str = ''
 441         for hdr in self.headers:
 442             str = str + hdr
 443         return str
 444
 445
 446 # Utility functions
 447 # -----------------
 448
 449 # XXX Should fix unquote() and quote() to be really conformant.
 450 # XXX The inverses of the parse functions may also be useful.
 451
 452
 453 def unquote(str):
 454     """Remove quotes from a string."""
 455     if len(str) > 1:
 456         if str[0] == '"' and str[-1:] == '"':
 457             return str[1:-1]
 458         if str[0] == '<' and str[-1:] == '>':
 459             return str[1:-1]
 460     return str
 461
 462
 463 def quote(str):
 464     """Add quotes around a string."""
 465     return '"%s"' % string.join(
 466     string.split(
 467     string.join(
 468     string.split(str, '\\'),
 469     '\\\\'),
 470     '"'),
 471     '\\"')
 472
 473
 474 def parseaddr(address):
 475     """Parse an address into a (realname, mailaddr) tuple."""
 476     a = AddrlistClass(address)
 477     list = a.getaddrlist()
 478     if not list:
 479         return (None, None)
 480     else:
 481         return list[0]
 482
 483
 484 class AddrlistClass:
 485     """Address parser class by Ben Escoto.
 486
 487     To understand what this class does, it helps to have a copy of
 488     RFC-822 in front of you.
 489
 490     Note: this class interface is deprecated and may be removed in the future.
 491     Use rfc822.AddressList instead.
 492     """
 493
 494     def __init__(self, field):
 495         """Initialize a new instance.
 496
 497         `field' is an unparsed address header field, containing
 498         one or more addresses.
 499         """
 500         self.specials = '()<>@,:;.\"[]'
 501         self.pos = 0
 502         self.LWS = ' \t'
 503         self.CR = '\r\n'
 504         self.atomends = self.specials + self.LWS + self.CR
 505         self.field = field
 506         self.commentlist = []
 507
 508     def gotonext(self):
 509         """Parse up to the start of the next address."""
 510         while self.pos < len(self.field):
 511             if self.field[self.pos] in self.LWS + '\n\r':
 512                 self.pos = self.pos + 1
 513             elif self.field[self.pos] == '(':
 514                 self.commentlist.append(self.getcomment())
 515             else: break
 516
 517     def getaddrlist(self):
 518         """Parse all addresses.
 519
 520         Returns a list containing all of the addresses.
 521         """
 522         ad = self.getaddress()
 523         if ad:
 524             return ad + self.getaddrlist()
 525         else: return []
 526
 527     def getaddress(self):
 528         """Parse the next address."""
 529         self.commentlist = []
 530         self.gotonext()
 531
 532         oldpos = self.pos
 533         oldcl = self.commentlist
 534         plist = self.getphraselist()
 535
 536         self.gotonext()
 537         returnlist = []
 538
 539         if self.pos >= len(self.field):
 540             # Bad email address technically, no domain.
 541             if plist:
 542                 returnlist = [(string.join(self.commentlist), plist[0])]
 543
 544         elif self.field[self.pos] in '.@':
 545             # email address is just an addrspec
 546             # this isn't very efficient since we start over
 547             self.pos = oldpos
 548             self.commentlist = oldcl
 549             addrspec = self.getaddrspec()
 550             returnlist = [(string.join(self.commentlist), addrspec)]
 551
 552         elif self.field[self.pos] == ':':
 553             # address is a group
 554             returnlist = []
 555
 556             fieldlen = len(self.field)
 557             self.pos = self.pos + 1
 558             while self.pos < len(self.field):
 559                 self.gotonext()
 560                 if self.pos < fieldlen and self.field[self.pos] == ';':
 561                     self.pos = self.pos + 1
 562                     break
 563                 returnlist = returnlist + self.getaddress()
 564
 565         elif self.field[self.pos] == '<':
 566             # Address is a phrase then a route addr
 567             routeaddr = self.getrouteaddr()
 568
 569             if self.commentlist:
 570                 returnlist = [(string.join(plist) + ' (' + \
 571                          string.join(self.commentlist) + ')', routeaddr)]
 572             else: returnlist = [(string.join(plist), routeaddr)]
 573
 574         else:
 575             if plist:
 576                 returnlist = [(string.join(self.commentlist), plist[0])]
 577             elif self.field[self.pos] in self.specials:
 578                 self.pos = self.pos + 1
 579
 580         self.gotonext()
 581         if self.pos < len(self.field) and self.field[self.pos] == ',':
 582             self.pos = self.pos + 1
 583         return returnlist
 584
 585     def getrouteaddr(self):
 586         """Parse a route address (Return-path value).
 587
 588         This method just skips all the route stuff and returns the addrspec.
 589         """
 590         if self.field[self.pos] != '<':
 591             return
 592
 593         expectroute = 0
 594         self.pos = self.pos + 1
 595         self.gotonext()
 596         adlist = None
 597         while self.pos < len(self.field):
 598             if expectroute:
 599                 self.getdomain()
 600                 expectroute = 0
 601             elif self.field[self.pos] == '>':
 602                 self.pos = self.pos + 1
 603                 break
 604             elif self.field[self.pos] == '@':
 605                 self.pos = self.pos + 1
 606                 expectroute = 1
 607             elif self.field[self.pos] == ':':
 608                 self.pos = self.pos + 1
 609                 expectaddrspec = 1
 610             else:
 611                 adlist = self.getaddrspec()
 612                 self.pos = self.pos + 1
 613                 break
 614             self.gotonext()
 615
 616         return adlist
 617
 618     def getaddrspec(self):
 619         """Parse an RFC-822 addr-spec."""
 620         aslist = []
 621
 622         self.gotonext()
 623         while self.pos < len(self.field):
 624             if self.field[self.pos] == '.':
 625                 aslist.append('.')
 626                 self.pos = self.pos + 1
 627             elif self.field[self.pos] == '"':
 628                 aslist.append('"%s"' % self.getquote())
 629             elif self.field[self.pos] in self.atomends:
 630                 break
 631             else: aslist.append(self.getatom())
 632             self.gotonext()
 633
 634         if self.pos >= len(self.field) or self.field[self.pos] != '@':
 635             return string.join(aslist, '')
 636
 637         aslist.append('@')
 638         self.pos = self.pos + 1
 639         self.gotonext()
 640         return string.join(aslist, '') + self.getdomain()
 641
 642     def getdomain(self):
 643         """Get the complete domain name from an address."""
 644         sdlist = []
 645         while self.pos < len(self.field):
 646             if self.field[self.pos] in self.LWS:
 647                 self.pos = self.pos + 1
 648             elif self.field[self.pos] == '(':
 649                 self.commentlist.append(self.getcomment())
 650             elif self.field[self.pos] == '[':
 651                 sdlist.append(self.getdomainliteral())
 652             elif self.field[self.pos] == '.':
 653                 self.pos = self.pos + 1
 654                 sdlist.append('.')
 655             elif self.field[self.pos] in self.atomends:
 656                 break
 657             else: sdlist.append(self.getatom())
 658         return string.join(sdlist, '')
 659
 660     def getdelimited(self, beginchar, endchars, allowcomments = 1):
 661         """Parse a header fragment delimited by special characters.
 662
 663         `beginchar' is the start character for the fragment.
 664         If self is not looking at an instance of `beginchar' then
 665         getdelimited returns the empty string.
 666
 667         `endchars' is a sequence of allowable end-delimiting characters.
 668         Parsing stops when one of these is encountered.
 669
 670         If `allowcomments' is non-zero, embedded RFC-822 comments
 671         are allowed within the parsed fragment.
 672         """
 673         if self.field[self.pos] != beginchar:
 674             return ''
 675
 676         slist = ['']
 677         quote = 0
 678         self.pos = self.pos + 1
 679         while self.pos < len(self.field):
 680             if quote == 1:
 681                 slist.append(self.field[self.pos])
 682                 quote = 0
 683             elif self.field[self.pos] in endchars:
 684                 self.pos = self.pos + 1
 685                 break
 686             elif allowcomments and self.field[self.pos] == '(':
 687                 slist.append(self.getcomment())
 688             elif self.field[self.pos] == '\\':
 689                 quote = 1
 690             else:
 691                 slist.append(self.field[self.pos])
 692             self.pos = self.pos + 1
 693
 694         return string.join(slist, '')
 695
 696     def getquote(self):
 697         """Get a quote-delimited fragment from self's field."""
 698         return self.getdelimited('"', '"\r', 0)
 699
 700     def getcomment(self):
 701         """Get a parenthesis-delimited fragment from self's field."""
 702         return self.getdelimited('(', ')\r', 1)
 703
 704     def getdomainliteral(self):
 705         """Parse an RFC-822 domain-literal."""
 706         return self.getdelimited('[', ']\r', 0)
 707
 708     def getatom(self):
 709         """Parse an RFC-822 atom."""
 710         atomlist = ['']
 711
 712         while self.pos < len(self.field):
 713             if self.field[self.pos] in self.atomends:
 714                 break
 715             else: atomlist.append(self.field[self.pos])
 716             self.pos = self.pos + 1
 717
 718         return string.join(atomlist, '')
 719
 720     def getphraselist(self):
 721         """Parse a sequence of RFC-822 phrases.
 722
 723         A phrase is a sequence of words, which are in turn either
 724         RFC-822 atoms or quoted-strings.  Phrases are canonicalized
 725         by squeezing all runs of continuous whitespace into one space.
 726         """
 727         plist = []
 728
 729         while self.pos < len(self.field):
 730             if self.field[self.pos] in self.LWS:
 731                 self.pos = self.pos + 1
 732             elif self.field[self.pos] == '"':
 733                 plist.append(self.getquote())
 734             elif self.field[self.pos] == '(':
 735                 self.commentlist.append(self.getcomment())
 736             elif self.field[self.pos] in self.atomends:
 737                 break
 738             else: plist.append(self.getatom())
 739
 740         return plist
 741
 742 class AddressList(AddrlistClass):
 743     """An AddressList encapsulates a list of parsed RFC822 addresses."""
 744     def __init__(self, field):
 745         AddrlistClass.__init__(self, field)
 746         if field:
 747             self.addresslist = self.getaddrlist()
 748         else:
 749             self.addresslist = []
 750
 751     def __len__(self):
 752         return len(self.addresslist)
 753
 754     def __str__(self):
 755         return string.joinfields(map(dump_address_pair, self.addresslist),", ")
 756
 757     def __add__(self, other):
 758         # Set union
 759         newaddr = AddressList(None)
 760         newaddr.addresslist = self.addresslist[:]
 761         for x in other.addresslist:
 762             if not x in self.addresslist:
 763                 newaddr.addresslist.append(x)
 764         return newaddr
 765
 766     def __sub__(self, other):
 767         # Set difference
 768         newaddr = AddressList(None)
 769         for x in self.addresslist:
 770             if not x in other.addresslist:
 771                 newaddr.addresslist.append(x)
 772         return newaddr
 773
 774     def __getitem__(self, index):
 775         # Make indexing, slices, and 'in' work
 776         return self.addresslist[index]
 777
 778 def dump_address_pair(pair):
 779     """Dump a (name, address) pair in a canonicalized form."""
 780     if pair[0]:
 781         return '"' + pair[0] + '" <' + pair[1] + '>'
 782     else:
 783         return pair[1]
 784
 785 # Parse a date field
 786
 787 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
 788                'aug', 'sep', 'oct', 'nov', 'dec',
 789                'january', 'february', 'march', 'april', 'may', 'june', 'july',
 790                'august', 'september', 'october', 'november', 'december']
 791 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
 792
 793 # The timezone table does not include the military time zones defined
 794 # in RFC822, other than Z.  According to RFC1123, the description in
 795 # RFC822 gets the signs wrong, so we can't rely on any such time
 796 # zones.  RFC1123 recommends that numeric timezone indicators be used
 797 # instead of timezone names.
 798
 799 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
 800               'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)
 801               'EST': -500, 'EDT': -400,  # Eastern
 802               'CST': -600, 'CDT': -500,  # Central
 803               'MST': -700, 'MDT': -600,  # Mountain
 804               'PST': -800, 'PDT': -700   # Pacific
 805               }
 806
 807
 808 def parsedate_tz(data):
 809     """Convert a date string to a time tuple.
 810
 811     Accounts for military timezones.
 812     """
 813     data = string.split(data)
 814     if data[0][-1] in (',', '.') or string.lower(data[0]) in _daynames:
 815         # There's a dayname here. Skip it
 816         del data[0]
 817     if len(data) == 3: # RFC 850 date, deprecated
 818         stuff = string.split(data[0], '-')
 819         if len(stuff) == 3:
 820             data = stuff + data[1:]
 821     if len(data) == 4:
 822         s = data[3]
 823         i = string.find(s, '+')
 824         if i > 0:
 825             data[3:] = [s[:i], s[i+1:]]
 826         else:
 827             data.append('') # Dummy tz
 828     if len(data) < 5:
 829         return None
 830     data = data[:5]
 831     [dd, mm, yy, tm, tz] = data
 832     mm = string.lower(mm)
 833     if not mm in _monthnames:
 834         dd, mm = mm, string.lower(dd)
 835         if not mm in _monthnames:
 836             return None
 837     mm = _monthnames.index(mm)+1
 838     if mm > 12: mm = mm - 12
 839     if dd[-1] == ',':
 840         dd = dd[:-1]
 841     i = string.find(yy, ':')
 842     if i > 0:
 843         yy, tm = tm, yy
 844     if yy[-1] == ',':
 845         yy = yy[:-1]
 846     if yy[0] not in string.digits:
 847         yy, tz = tz, yy
 848     if tm[-1] == ',':
 849         tm = tm[:-1]
 850     tm = string.splitfields(tm, ':')
 851     if len(tm) == 2:
 852         [thh, tmm] = tm
 853         tss = '0'
 854     elif len(tm) == 3:
 855         [thh, tmm, tss] = tm
 856     else:
 857         return None
 858     try:
 859         yy = string.atoi(yy)
 860         dd = string.atoi(dd)
 861         thh = string.atoi(thh)
 862         tmm = string.atoi(tmm)
 863         tss = string.atoi(tss)
 864     except string.atoi_error:
 865         return None
 866     tzoffset=None
 867     tz=string.upper(tz)
 868     if _timezones.has_key(tz):
 869         tzoffset=_timezones[tz]
 870     else:
 871         try:
 872             tzoffset=string.atoi(tz)
 873         except string.atoi_error:
 874             pass
 875     # Convert a timezone offset into seconds ; -0500 -> -18000
 876     if tzoffset:
 877         if tzoffset < 0:
 878             tzsign = -1
 879             tzoffset = -tzoffset
 880         else:
 881             tzsign = 1
 882         tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
 883     tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
 884     return tuple
 885
 886
 887 def parsedate(data):
 888     """Convert a time string to a time tuple."""
 889     t=parsedate_tz(data)
 890     if type(t)==type( () ):
 891         return t[:9]
 892     else: return t
 893
 894
 895 def mktime_tz(data):
 896     """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
 897     if data[9] is None:
 898         # No zone info, so localtime is better assumption than GMT
 899         return time.mktime(data[:8] + (-1,))
 900     else:
 901         t = time.mktime(data[:8] + (0,))
 902         return t - data[9] - time.timezone
 903
 904 def formatdate(timeval=None):
 905     """Returns time format preferred for Internet standards.
 906
 907     Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 908     """
 909     if timeval is None:
 910         timeval = time.time()
 911     return "%s" % time.strftime('%a, %d %b %Y %H:%M:%S GMT',
 912                                 time.gmtime(timeval))
 913
 914
 915 # When used as script, run a small test program.
 916 # The first command line argument must be a filename containing one
 917 # message in RFC-822 format.
 918
 919 if __name__ == '__main__':
 920     import sys, os
 921     file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
 922     if sys.argv[1:]: file = sys.argv[1]
 923     f = open(file, 'r')
 924     m = Message(f)
 925     print 'From:', m.getaddr('from')
 926     print 'To:', m.getaddrlist('to')
 927     print 'Subject:', m.getheader('subject')
 928     print 'Date:', m.getheader('date')
 929     date = m.getdate_tz('date')
 930     tz = date[-1]
 931     date = time.localtime(mktime_tz(date))
 932     if date:
 933         print 'ParsedDate:', time.asctime(date),
 934         hhmmss = tz
 935         hhmm, ss = divmod(hhmmss, 60)
 936         hh, mm = divmod(hhmm, 60)
 937         print "%+03d%02d" % (hh, mm),
 938         if ss: print ".%02d" % ss,
 939         print
 940     else:
 941         print 'ParsedDate:', None
 942     m.rewindbody()
 943     n = 0
 944     while f.readline():
 945         n = n + 1
 946     print 'Lines:', n
 947     print '-'*70
 948     print 'len =', len(m)
 949     if m.has_key('Date'): print 'Date =', m['Date']
 950     if m.has_key('X-Nonsense'): pass
 951     print 'keys =', m.keys()
 952     print 'values =', m.values()
 953     print 'items =', m.items()