Lib/rfc822.py

   1 """RFC-822 message manipulation class.
   2
   3 XXX This is only a very rough sketch of a full RFC-822 parser;
   4 in particular the tokenizing of addresses does not adhere to all the
   5 quoting rules.
   6
   7 Directions for use:
   8
   9 To create a Message object: first open a file, e.g.:
  10   fp = open(file, 'r')
  11 You can use any other legal way of getting an open file object, e.g. use
  12 sys.stdin or call os.popen().
  13 Then pass the open file object to the Message() constructor:
  14   m = Message(fp)
  15
  16 This class can work with any input object that supports a readline
  17 method.  If the input object has seek and tell capability, the
  18 rewindbody method will work; also illegal lines will be pushed back
  19 onto the input stream.  If the input object lacks seek but has an
  20 `unread' method that can push back a line of input, Message will use
  21 that to push back illegal lines.  Thus this class can be used to parse
  22 messages coming from a buffered stream.
  23
  24 The optional `seekable' argument is provided as a workaround for
  25 certain stdio libraries in which tell() discards buffered data before
  26 discovering that the lseek() system call doesn't work.  For maximum
  27 portability, you should set the seekable argument to zero to prevent
  28 that initial \code{tell} when passing in an unseekable object such as
  29 a a file object created from a socket object.  If it is 1 on entry --
  30 which it is by default -- the tell() method of the open file object is
  31 called once; if this raises an exception, seekable is reset to 0.  For
  32 other nonzero values of seekable, this test is not made.
  33
  34 To get the text of a particular header there are several methods:
  35   str = m.getheader(name)
  36   str = m.getrawheader(name)
  37 where name is the name of the header, e.g. 'Subject'.
  38 The difference is that getheader() strips the leading and trailing
  39 whitespace, while getrawheader() doesn't.  Both functions retain
  40 embedded whitespace (including newlines) exactly as they are
  41 specified in the header, and leave the case of the text unchanged.
  42
  43 For addresses and address lists there are functions
  44   realname, mailaddress = m.getaddr(name) and
  45   list = m.getaddrlist(name)
  46 where the latter returns a list of (realname, mailaddr) tuples.
  47
  48 There is also a method
  49   time = m.getdate(name)
  50 which parses a Date-like field and returns a time-compatible tuple,
  51 i.e. a tuple such as returned by time.localtime() or accepted by
  52 time.mktime().
  53
  54 See the class definition for lower level access methods.
  55
  56 There are also some utility functions here.
  57 """
  58 # Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
  59
  60 import string
  61 import time
  62
  63
  64 _blanklines = ('\r\n', '\n')            # Optimization for islast()
  65
  66
  67 class Message:
  68     """Represents a single RFC-822-compliant message."""
  69
  70     def __init__(self, fp, seekable = 1):
  71         """Initialize the class instance and read the headers."""
  72         if seekable == 1:
  73             # Exercise tell() to make sure it works
  74             # (and then assume seek() works, too)
  75             try:
  76                 fp.tell()
  77             except:
  78                 seekable = 0
  79             else:
  80                 seekable = 1
  81         self.fp = fp
  82         self.seekable = seekable
  83         self.startofheaders = None
  84         self.startofbody = None
  85         #
  86         if self.seekable:
  87             try:
  88                 self.startofheaders = self.fp.tell()
  89             except IOError:
  90                 self.seekable = 0
  91         #
  92         self.readheaders()
  93         #
  94         if self.seekable:
  95             try:
  96                 self.startofbody = self.fp.tell()
  97             except IOError:
  98                 self.seekable = 0
  99
 100     def rewindbody(self):
 101         """Rewind the file to the start of the body (if seekable)."""
 102         if not self.seekable:
 103             raise IOError, "unseekable file"
 104         self.fp.seek(self.startofbody)
 105
 106     def readheaders(self):
 107         """Read header lines.
 108
 109         Read header lines up to the entirely blank line that
 110         terminates them.  The (normally blank) line that ends the
 111         headers is skipped, but not included in the returned list.
 112         If a non-header line ends the headers, (which is an error),
 113         an attempt is made to backspace over it; it is never
 114         included in the returned list.
 115
 116         The variable self.status is set to the empty string if all
 117         went well, otherwise it is an error message.
 118         The variable self.headers is a completely uninterpreted list
 119         of lines contained in the header (so printing them will
 120         reproduce the header exactly as it appears in the file).
 121         """
 122         self.dict = {}
 123         self.unixfrom = ''
 124         self.headers = list = []
 125         self.status = ''
 126         headerseen = ""
 127         firstline = 1
 128         startofline = unread = tell = None
 129         if hasattr(self.fp, 'unread'):
 130             unread = self.fp.unread
 131         elif self.seekable:
 132             tell = self.fp.tell
 133         while 1:
 134             if tell:
 135                 startofline = tell()
 136             line = self.fp.readline()
 137             if not line:
 138                 self.status = 'EOF in headers'
 139                 break
 140             # Skip unix From name time lines
 141             if firstline and line[:5] == 'From ':
 142                 self.unixfrom = self.unixfrom + line
 143                 continue
 144             firstline = 0
 145             if headerseen and line[0] in ' \t':
 146                 # It's a continuation line.
 147                 list.append(line)
 148                 x = (self.dict[headerseen] + "\n " + string.strip(line))
 149                 self.dict[headerseen] = string.strip(x)
 150                 continue
 151             elif self.iscomment(line):
 152                 # It's a comment.  Ignore it.
 153                 continue
 154             elif self.islast(line):
 155                 # Note! No pushback here!  The delimiter line gets eaten.
 156                 break
 157             headerseen = self.isheader(line)
 158             if headerseen:
 159                 # It's a legal header line, save it.
 160                 list.append(line)
 161                 self.dict[headerseen] = string.strip(line[len(headerseen)+2:])
 162                 continue
 163             else:
 164                 # It's not a header line; throw it back and stop here.
 165                 if not self.dict:
 166                     self.status = 'No headers'
 167                 else:
 168                     self.status = 'Non-header line where header expected'
 169                 # Try to undo the read.
 170                 if unread:
 171                     unread(line)
 172                 elif tell:
 173                     self.fp.seek(startofline)
 174                 else:
 175                     self.status = self.status + '; bad seek'
 176                 break
 177
 178     def isheader(self, line):
 179         """Determine whether a given line is a legal header.
 180
 181         This method should return the header name, suitably canonicalized.
 182         You may override this method in order to use Message parsing
 183         on tagged data in RFC822-like formats with special header formats.
 184         """
 185         i = string.find(line, ':')
 186         if i > 0:
 187             return string.lower(line[:i])
 188         else:
 189             return None
 190
 191     def islast(self, line):
 192         """Determine whether a line is a legal end of RFC-822 headers.
 193
 194         You may override this method if your application wants
 195         to bend the rules, e.g. to strip trailing whitespace,
 196         or to recognise MH template separators ('--------').
 197         For convenience (e.g. for code reading from sockets) a
 198         line consisting of \r\n also matches.
 199         """
 200         return line in _blanklines
 201
 202     def iscomment(self, line):
 203         """Determine whether a line should be skipped entirely.
 204
 205         You may override this method in order to use Message parsing
 206         on tagged data in RFC822-like formats that support embedded
 207         comments or free-text data.
 208         """
 209         return None
 210
 211     def getallmatchingheaders(self, name):
 212         """Find all header lines matching a given header name.
 213
 214         Look through the list of headers and find all lines
 215         matching a given header name (and their continuation
 216         lines).  A list of the lines is returned, without
 217         interpretation.  If the header does not occur, an
 218         empty list is returned.  If the header occurs multiple
 219         times, all occurrences are returned.  Case is not
 220         important in the header name.
 221         """
 222         name = string.lower(name) + ':'
 223         n = len(name)
 224         list = []
 225         hit = 0
 226         for line in self.headers:
 227             if string.lower(line[:n]) == name:
 228                 hit = 1
 229             elif line[:1] not in string.whitespace:
 230                 hit = 0
 231             if hit:
 232                 list.append(line)
 233         return list
 234
 235     def getfirstmatchingheader(self, name):
 236         """Get the first header line matching name.
 237
 238         This is similar to getallmatchingheaders, but it returns
 239         only the first matching header (and its continuation
 240         lines).
 241         """
 242         name = string.lower(name) + ':'
 243         n = len(name)
 244         list = []
 245         hit = 0
 246         for line in self.headers:
 247             if hit:
 248                 if line[:1] not in string.whitespace:
 249                     break
 250             elif string.lower(line[:n]) == name:
 251                 hit = 1
 252             if hit:
 253                 list.append(line)
 254         return list
 255
 256     def getrawheader(self, name):
 257         """A higher-level interface to getfirstmatchingheader().
 258
 259         Return a string containing the literal text of the
 260         header but with the keyword stripped.  All leading,
 261         trailing and embedded whitespace is kept in the
 262         string, however.
 263         Return None if the header does not occur.
 264         """
 265
 266         list = self.getfirstmatchingheader(name)
 267         if not list:
 268             return None
 269         list[0] = list[0][len(name) + 1:]
 270         return string.joinfields(list, '')
 271
 272     def getheader(self, name, default=None):
 273         """Get the header value for a name.
 274
 275         This is the normal interface: it return a stripped
 276         version of the header value for a given header name,
 277         or None if it doesn't exist.  This uses the dictionary
 278         version which finds the *last* such header.
 279         """
 280         try:
 281             return self.dict[string.lower(name)]
 282         except KeyError:
 283             return default
 284     get = getheader
 285
 286     def getaddr(self, name):
 287         """Get a single address from a header, as a tuple.
 288
 289         An example return value:
 290         ('Guido van Rossum', 'guido@cwi.nl')
 291         """
 292         # New, by Ben Escoto
 293         alist = self.getaddrlist(name)
 294         if alist:
 295             return alist[0]
 296         else:
 297             return (None, None)
 298
 299     def getaddrlist(self, name):
 300         """Get a list of addresses from a header.
 301
 302         Retrieves a list of addresses from a header, where each address is a
 303         tuple as returned by getaddr().  Scans all named headers, so it works
 304         properly with multiple To: or Cc: headers for example.
 305
 306         """
 307         raw = []
 308         for h in self.getallmatchingheaders(name):
 309             if h[0] in ' \t':
 310                 raw.append(h)
 311             else:
 312                 if raw:
 313                     raw.append(', ')
 314                 i = string.find(h, ':')
 315                 if i > 0:
 316                     addr = h[i+1:]
 317                 raw.append(addr)
 318         alladdrs = string.join(raw, '')
 319         a = AddrlistClass(alladdrs)
 320         return a.getaddrlist()
 321
 322     def getdate(self, name):
 323         """Retrieve a date field from a header.
 324
 325         Retrieves a date field from the named header, returning
 326         a tuple compatible with time.mktime().
 327         """
 328         try:
 329             data = self[name]
 330         except KeyError:
 331             return None
 332         return parsedate(data)
 333
 334     def getdate_tz(self, name):
 335         """Retrieve a date field from a header as a 10-tuple.
 336
 337         The first 9 elements make up a tuple compatible with
 338         time.mktime(), and the 10th is the offset of the poster's
 339         time zone from GMT/UTC.
 340         """
 341         try:
 342             data = self[name]
 343         except KeyError:
 344             return None
 345         return parsedate_tz(data)
 346
 347
 348     # Access as a dictionary (only finds *last* header of each type):
 349
 350     def __len__(self):
 351         """Get the number of headers in a message."""
 352         return len(self.dict)
 353
 354     def __getitem__(self, name):
 355         """Get a specific header, as from a dictionary."""
 356         return self.dict[string.lower(name)]
 357
 358     def __setitem__(self, name, value):
 359         """Set the value of a header.
 360
 361         Note: This is not a perfect inversion of __getitem__, because
 362         any changed headers get stuck at the end of the raw-headers list
 363         rather than where the altered header was.
 364         """
 365         del self[name] # Won't fail if it doesn't exist
 366         self.dict[string.lower(name)] = value
 367         text = name + ": " + value
 368         lines = string.split(text, "\n")
 369         for line in lines:
 370             self.headers.append(line + "\n")
 371
 372     def __delitem__(self, name):
 373         """Delete all occurrences of a specific header, if it is present."""
 374         name = string.lower(name)
 375         if not self.dict.has_key(name):
 376             return
 377         del self.dict[name]
 378         name = name + ':'
 379         n = len(name)
 380         list = []
 381         hit = 0
 382         for i in range(len(self.headers)):
 383             line = self.headers[i]
 384             if string.lower(line[:n]) == name:
 385                 hit = 1
 386             elif line[:1] not in string.whitespace:
 387                 hit = 0
 388             if hit:
 389                 list.append(i)
 390         list.reverse()
 391         for i in list:
 392             del self.headers[i]
 393
 394     def has_key(self, name):
 395         """Determine whether a message contains the named header."""
 396         return self.dict.has_key(string.lower(name))
 397
 398     def keys(self):
 399         """Get all of a message's header field names."""
 400         return self.dict.keys()
 401
 402     def values(self):
 403         """Get all of a message's header field values."""
 404         return self.dict.values()
 405
 406     def items(self):
 407         """Get all of a message's headers.
 408
 409         Returns a list of name, value tuples.
 410         """
 411         return self.dict.items()
 412
 413     def __str__(self):
 414         str = ''
 415         for hdr in self.headers:
 416             str = str + hdr
 417         return str
 418
 419
 420 # Utility functions
 421 # -----------------
 422
 423 # XXX Should fix unquote() and quote() to be really conformant.
 424 # XXX The inverses of the parse functions may also be useful.
 425
 426
 427 def unquote(str):
 428     """Remove quotes from a string."""
 429     if len(str) > 1:
 430         if str[0] == '"' and str[-1:] == '"':
 431             return str[1:-1]
 432         if str[0] == '<' and str[-1:] == '>':
 433             return str[1:-1]
 434     return str
 435
 436
 437 def quote(str):
 438     """Add quotes around a string."""
 439     return '"%s"' % string.join(
 440     string.split(
 441     string.join(
 442     string.split(str, '\\'),
 443     '\\\\'),
 444     '"'),
 445     '\\"')
 446
 447
 448 def parseaddr(address):
 449     """Parse an address into a (realname, mailaddr) tuple."""
 450     a = AddrlistClass(address)
 451     list = a.getaddrlist()
 452     if not list:
 453         return (None, None)
 454     else:
 455         return list[0]
 456
 457
 458 class AddrlistClass:
 459     """Address parser class by Ben Escoto.
 460
 461     To understand what this class does, it helps to have a copy of
 462     RFC-822 in front of you.
 463
 464     Note: this class interface is deprecated and may be removed in the future.
 465     Use rfc822.AddressList instead.
 466     """
 467
 468     def __init__(self, field):
 469         """Initialize a new instance.
 470
 471         `field' is an unparsed address header field, containing
 472         one or more addresses.
 473         """
 474         self.specials = '()<>@,:;.\"[]'
 475         self.pos = 0
 476         self.LWS = ' \t'
 477         self.CR = '\r\n'
 478         self.atomends = self.specials + self.LWS + self.CR
 479         self.field = field
 480         self.commentlist = []
 481
 482     def gotonext(self):
 483         """Parse up to the start of the next address."""
 484         while self.pos < len(self.field):
 485             if self.field[self.pos] in self.LWS + '\n\r':
 486                 self.pos = self.pos + 1
 487             elif self.field[self.pos] == '(':
 488                 self.commentlist.append(self.getcomment())
 489             else: break
 490
 491     def getaddrlist(self):
 492         """Parse all addresses.
 493
 494         Returns a list containing all of the addresses.
 495         """
 496         ad = self.getaddress()
 497         if ad:
 498             return ad + self.getaddrlist()
 499         else: return []
 500
 501     def getaddress(self):
 502         """Parse the next address."""
 503         self.commentlist = []
 504         self.gotonext()
 505
 506         oldpos = self.pos
 507         oldcl = self.commentlist
 508         plist = self.getphraselist()
 509
 510         self.gotonext()
 511         returnlist = []
 512
 513         if self.pos >= len(self.field):
 514             # Bad email address technically, no domain.
 515             if plist:
 516                 returnlist = [(string.join(self.commentlist), plist[0])]
 517
 518         elif self.field[self.pos] in '.@':
 519             # email address is just an addrspec
 520             # this isn't very efficient since we start over
 521             self.pos = oldpos
 522             self.commentlist = oldcl
 523             addrspec = self.getaddrspec()
 524             returnlist = [(string.join(self.commentlist), addrspec)]
 525
 526         elif self.field[self.pos] == ':':
 527             # address is a group
 528             returnlist = []
 529
 530             self.pos = self.pos + 1
 531             while self.pos < len(self.field):
 532                 self.gotonext()
 533                 if self.field[self.pos] == ';':
 534                     self.pos = self.pos + 1
 535                     break
 536                 returnlist = returnlist + self.getaddress()
 537
 538         elif self.field[self.pos] == '<':
 539             # Address is a phrase then a route addr
 540             routeaddr = self.getrouteaddr()
 541
 542             if self.commentlist:
 543                 returnlist = [(string.join(plist) + ' (' + \
 544                          string.join(self.commentlist) + ')', routeaddr)]
 545             else: returnlist = [(string.join(plist), routeaddr)]
 546
 547         else:
 548             if plist:
 549                 returnlist = [(string.join(self.commentlist), plist[0])]
 550             elif self.field[self.pos] in self.specials:
 551                 self.pos = self.pos + 1
 552
 553         self.gotonext()
 554         if self.pos < len(self.field) and self.field[self.pos] == ',':
 555             self.pos = self.pos + 1
 556         return returnlist
 557
 558     def getrouteaddr(self):
 559         """Parse a route address (Return-path value).
 560
 561         This method just skips all the route stuff and returns the addrspec.
 562         """
 563         if self.field[self.pos] != '<':
 564             return
 565
 566         expectroute = 0
 567         self.pos = self.pos + 1
 568         self.gotonext()
 569         adlist = None
 570         while self.pos < len(self.field):
 571             if expectroute:
 572                 self.getdomain()
 573                 expectroute = 0
 574             elif self.field[self.pos] == '>':
 575                 self.pos = self.pos + 1
 576                 break
 577             elif self.field[self.pos] == '@':
 578                 self.pos = self.pos + 1
 579                 expectroute = 1
 580             elif self.field[self.pos] == ':':
 581                 self.pos = self.pos + 1
 582                 expectaddrspec = 1
 583             else:
 584                 adlist = self.getaddrspec()
 585                 self.pos = self.pos + 1
 586                 break
 587             self.gotonext()
 588
 589         return adlist
 590
 591     def getaddrspec(self):
 592         """Parse an RFC-822 addr-spec."""
 593         aslist = []
 594
 595         self.gotonext()
 596         while self.pos < len(self.field):
 597             if self.field[self.pos] == '.':
 598                 aslist.append('.')
 599                 self.pos = self.pos + 1
 600             elif self.field[self.pos] == '"':
 601                 aslist.append(self.getquote())
 602             elif self.field[self.pos] in self.atomends:
 603                 break
 604             else: aslist.append(self.getatom())
 605             self.gotonext()
 606
 607         if self.pos >= len(self.field) or self.field[self.pos] != '@':
 608             return string.join(aslist, '')
 609
 610         aslist.append('@')
 611         self.pos = self.pos + 1
 612         self.gotonext()
 613         return string.join(aslist, '') + self.getdomain()
 614
 615     def getdomain(self):
 616         """Get the complete domain name from an address."""
 617         sdlist = []
 618         while self.pos < len(self.field):
 619             if self.field[self.pos] in self.LWS:
 620                 self.pos = self.pos + 1
 621             elif self.field[self.pos] == '(':
 622                 self.commentlist.append(self.getcomment())
 623             elif self.field[self.pos] == '[':
 624                 sdlist.append(self.getdomainliteral())
 625             elif self.field[self.pos] == '.':
 626                 self.pos = self.pos + 1
 627                 sdlist.append('.')
 628             elif self.field[self.pos] in self.atomends:
 629                 break
 630             else: sdlist.append(self.getatom())
 631         return string.join(sdlist, '')
 632
 633     def getdelimited(self, beginchar, endchars, allowcomments = 1):
 634         """Parse a header fragment delimited by special characters.
 635
 636         `beginchar' is the start character for the fragment.
 637         If self is not looking at an instance of `beginchar' then
 638         getdelimited returns the empty string.
 639
 640         `endchars' is a sequence of allowable end-delimiting characters.
 641         Parsing stops when one of these is encountered.
 642
 643         If `allowcomments' is non-zero, embedded RFC-822 comments
 644         are allowed within the parsed fragment.
 645         """
 646         if self.field[self.pos] != beginchar:
 647             return ''
 648
 649         slist = ['']
 650         quote = 0
 651         self.pos = self.pos + 1
 652         while self.pos < len(self.field):
 653             if quote == 1:
 654                 slist.append(self.field[self.pos])
 655                 quote = 0
 656             elif self.field[self.pos] in endchars:
 657                 self.pos = self.pos + 1
 658                 break
 659             elif allowcomments and self.field[self.pos] == '(':
 660                 slist.append(self.getcomment())
 661             elif self.field[self.pos] == '\\':
 662                 quote = 1
 663             else:
 664                 slist.append(self.field[self.pos])
 665             self.pos = self.pos + 1
 666
 667         return string.join(slist, '')
 668
 669     def getquote(self):
 670         """Get a quote-delimited fragment from self's field."""
 671         return self.getdelimited('"', '"\r', 0)
 672
 673     def getcomment(self):
 674         """Get a parenthesis-delimited fragment from self's field."""
 675         return self.getdelimited('(', ')\r', 1)
 676
 677     def getdomainliteral(self):
 678         """Parse an RFC-822 domain-literal."""
 679         return self.getdelimited('[', ']\r', 0)
 680
 681     def getatom(self):
 682         """Parse an RFC-822 atom."""
 683         atomlist = ['']
 684
 685         while self.pos < len(self.field):
 686             if self.field[self.pos] in self.atomends:
 687                 break
 688             else: atomlist.append(self.field[self.pos])
 689             self.pos = self.pos + 1
 690
 691         return string.join(atomlist, '')
 692
 693     def getphraselist(self):
 694         """Parse a sequence of RFC-822 phrases.
 695
 696         A phrase is a sequence of words, which are in turn either
 697         RFC-822 atoms or quoted-strings.  Phrases are canonicalized
 698         by squeezing all runs of continuous whitespace into one space.
 699         """
 700         plist = []
 701
 702         while self.pos < len(self.field):
 703             if self.field[self.pos] in self.LWS:
 704                 self.pos = self.pos + 1
 705             elif self.field[self.pos] == '"':
 706                 plist.append(self.getquote())
 707             elif self.field[self.pos] == '(':
 708                 self.commentlist.append(self.getcomment())
 709             elif self.field[self.pos] in self.atomends:
 710                 break
 711             else: plist.append(self.getatom())
 712
 713         return plist
 714
 715 class AddressList(AddrlistClass):
 716     """An AddressList encapsulates a list of parsed RFC822 addresses."""
 717     def __init__(self, field):
 718         AddrlistClass.__init__(self, field)
 719         if field:
 720             self.addresslist = self.getaddrlist()
 721         else:
 722             self.addresslist = []
 723
 724     def __len__(self):
 725         return len(self.addresslist)
 726
 727     def __str__(self):
 728         return string.joinfields(map(dump_address_pair, self.addresslist),", ")
 729
 730     def __add__(self, other):
 731         # Set union
 732         newaddr = AddressList(None)
 733         newaddr.addresslist = self.addresslist[:]
 734         for x in other.addresslist:
 735             if not x in self.addresslist:
 736                 newaddr.addresslist.append(x)
 737         return newaddr
 738
 739     def __sub__(self, other):
 740         # Set difference
 741         newaddr = AddressList(None)
 742         for x in self.addresslist:
 743             if not x in other.addresslist:
 744                 newaddr.addresslist.append(x)
 745         return newaddr
 746
 747     def __getitem__(self, index):
 748         # Make indexing, slices, and 'in' work
 749         return self.addrlist[index]
 750
 751 def dump_address_pair(pair):
 752     """Dump a (name, address) pair in a canonicalized form."""
 753     if pair[0]:
 754         return '"' + pair[0] + '" <' + pair[1] + '>'
 755     else:
 756         return pair[1]
 757
 758 # Parse a date field
 759
 760 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
 761                'aug', 'sep', 'oct', 'nov', 'dec',
 762                'january', 'february', 'march', 'april', 'may', 'june', 'july',
 763                'august', 'september', 'october', 'november', 'december']
 764 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
 765
 766 # The timezone table does not include the military time zones defined
 767 # in RFC822, other than Z.  According to RFC1123, the description in
 768 # RFC822 gets the signs wrong, so we can't rely on any such time
 769 # zones.  RFC1123 recommends that numeric timezone indicators be used
 770 # instead of timezone names.
 771
 772 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
 773               'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)
 774               'EST': -500, 'EDT': -400,  # Eastern
 775               'CST': -600, 'CDT': -500,  # Central
 776               'MST': -700, 'MDT': -600,  # Mountain
 777               'PST': -800, 'PDT': -700   # Pacific
 778               }
 779
 780
 781 def parsedate_tz(data):
 782     """Convert a date string to a time tuple.
 783
 784     Accounts for military timezones.
 785     """
 786     data = string.split(data)
 787     if data[0][-1] in (',', '.') or string.lower(data[0]) in _daynames:
 788         # There's a dayname here. Skip it
 789         del data[0]
 790     if len(data) == 3: # RFC 850 date, deprecated
 791         stuff = string.split(data[0], '-')
 792         if len(stuff) == 3:
 793             data = stuff + data[1:]
 794     if len(data) == 4:
 795         s = data[3]
 796         i = string.find(s, '+')
 797         if i > 0:
 798             data[3:] = [s[:i], s[i+1:]]
 799         else:
 800             data.append('') # Dummy tz
 801     if len(data) < 5:
 802         return None
 803     data = data[:5]
 804     [dd, mm, yy, tm, tz] = data
 805     mm = string.lower(mm)
 806     if not mm in _monthnames:
 807         dd, mm = mm, string.lower(dd)
 808         if not mm in _monthnames:
 809             return None
 810     mm = _monthnames.index(mm)+1
 811     if dd[-1] == ',':
 812         dd = dd[:-1]
 813     i = string.find(yy, ':')
 814     if i > 0:
 815         yy, tm = tm, yy
 816     if yy[-1] == ',':
 817         yy = yy[:-1]
 818     if yy[0] not in string.digits:
 819         yy, tz = tz, yy
 820     if tm[-1] == ',':
 821         tm = tm[:-1]
 822     tm = string.splitfields(tm, ':')
 823     if len(tm) == 2:
 824         [thh, tmm] = tm
 825         tss = '0'
 826     elif len(tm) == 3:
 827         [thh, tmm, tss] = tm
 828     else:
 829         return None
 830     try:
 831         yy = string.atoi(yy)
 832         dd = string.atoi(dd)
 833         thh = string.atoi(thh)
 834         tmm = string.atoi(tmm)
 835         tss = string.atoi(tss)
 836     except string.atoi_error:
 837         return None
 838     tzoffset=None
 839     tz=string.upper(tz)
 840     if _timezones.has_key(tz):
 841         tzoffset=_timezones[tz]
 842     else:
 843         try:
 844             tzoffset=string.atoi(tz)
 845         except string.atoi_error:
 846             pass
 847     # Convert a timezone offset into seconds ; -0500 -> -18000
 848     if tzoffset:
 849         if tzoffset < 0:
 850             tzsign = -1
 851             tzoffset = -tzoffset
 852         else:
 853             tzsign = 1
 854         tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
 855     tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
 856     return tuple
 857
 858
 859 def parsedate(data):
 860     """Convert a time string to a time tuple."""
 861     t=parsedate_tz(data)
 862     if type(t)==type( () ):
 863         return t[:9]
 864     else: return t
 865
 866
 867 def mktime_tz(data):
 868     """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
 869     if data[9] is None:
 870         # No zone info, so localtime is better assumption than GMT
 871         return time.mktime(data[:8] + (-1,))
 872     else:
 873         t = time.mktime(data[:8] + (0,))
 874         return t - data[9] - time.timezone
 875
 876
 877 # When used as script, run a small test program.
 878 # The first command line argument must be a filename containing one
 879 # message in RFC-822 format.
 880
 881 if __name__ == '__main__':
 882     import sys, os
 883     file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
 884     if sys.argv[1:]: file = sys.argv[1]
 885     f = open(file, 'r')
 886     m = Message(f)
 887     print 'From:', m.getaddr('from')
 888     print 'To:', m.getaddrlist('to')
 889     print 'Subject:', m.getheader('subject')
 890     print 'Date:', m.getheader('date')
 891     date = m.getdate_tz('date')
 892     if date:
 893         print 'ParsedDate:', time.asctime(date[:-1]),
 894         hhmmss = date[-1]
 895         hhmm, ss = divmod(hhmmss, 60)
 896         hh, mm = divmod(hhmm, 60)
 897         print "%+03d%02d" % (hh, mm),
 898         if ss: print ".%02d" % ss,
 899         print
 900     else:
 901         print 'ParsedDate:', None
 902     m.rewindbody()
 903     n = 0
 904     while f.readline():
 905         n = n + 1
 906     print 'Lines:', n
 907     print '-'*70
 908     print 'len =', len(m)
 909     if m.has_key('Date'): print 'Date =', m['Date']
 910     if m.has_key('X-Nonsense'): pass
 911     print 'keys =', m.keys()
 912     print 'values =', m.values()
 913     print 'items =', m.items()