Lib/email/Utils.py

   1 # Copyright (C) 2001,2002 Python Software Foundation
   2 # Author: barry@zope.com (Barry Warsaw)
   3
   4 """Miscellaneous utilities.
   5 """
   6
   7 import time
   8 import socket
   9 import re
  10 import random
  11 import os
  12 import warnings
  13 from cStringIO import StringIO
  14 from types import ListType
  15
  16 from email._parseaddr import quote
  17 from email._parseaddr import AddressList as _AddressList
  18 from email._parseaddr import mktime_tz
  19
  20 # We need wormarounds for bugs in these methods in older Pythons (see below)
  21 from email._parseaddr import parsedate as _parsedate
  22 from email._parseaddr import parsedate_tz as _parsedate_tz
  23
  24 try:
  25     True, False
  26 except NameError:
  27     True = 1
  28     False = 0
  29
  30 try:
  31     from quopri import decodestring as _qdecode
  32 except ImportError:
  33     # Python 2.1 doesn't have quopri.decodestring()
  34     def _qdecode(s):
  35         import quopri as _quopri
  36
  37         if not s:
  38             return s
  39         infp = StringIO(s)
  40         outfp = StringIO()
  41         _quopri.decode(infp, outfp)
  42         value = outfp.getvalue()
  43         if not s.endswith('\n') and value.endswith('\n'):
  44             return value[:-1]
  45         return value
  46
  47 import base64
  48
  49 # Intrapackage imports
  50 from email.Encoders import _bencode, _qencode
  51
  52 COMMASPACE = ', '
  53 EMPTYSTRING = ''
  54 UEMPTYSTRING = u''
  55 CRLF = '\r\n'
  56
  57 specialsre = re.compile(r'[][\\()<>@,:;".]')
  58 escapesre = re.compile(r'[][\\()"]')
  59
  60
  61 \f
  62 # Helpers
  63
  64 def _identity(s):
  65     return s
  66
  67
  68 def _bdecode(s):
  69     # We can't quite use base64.encodestring() since it tacks on a "courtesy
  70     # newline".  Blech!
  71     if not s:
  72         return s
  73     value = base64.decodestring(s)
  74     if not s.endswith('\n') and value.endswith('\n'):
  75         return value[:-1]
  76     return value
  77
  78
  79 \f
  80 def fix_eols(s):
  81     """Replace all line-ending characters with \r\n."""
  82     # Fix newlines with no preceding carriage return
  83     s = re.sub(r'(?<!\r)\n', CRLF, s)
  84     # Fix carriage returns with no following newline
  85     s = re.sub(r'\r(?!\n)', CRLF, s)
  86     return s
  87
  88
  89 \f
  90 def formataddr(pair):
  91     """The inverse of parseaddr(), this takes a 2-tuple of the form
  92     (realname, email_address) and returns the string value suitable
  93     for an RFC 2822 From, To or Cc header.
  94
  95     If the first element of pair is false, then the second element is
  96     returned unmodified.
  97     """
  98     name, address = pair
  99     if name:
 100         quotes = ''
 101         if specialsre.search(name):
 102             quotes = '"'
 103         name = escapesre.sub(r'\\\g<0>', name)
 104         return '%s%s%s <%s>' % (quotes, name, quotes, address)
 105     return address
 106
 107 # For backwards compatibility
 108 def dump_address_pair(pair):
 109     warnings.warn('Use email.Utils.formataddr() instead',
 110                   DeprecationWarning, 2)
 111     return formataddr(pair)
 112
 113
 114 \f
 115 def getaddresses(fieldvalues):
 116     """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
 117     all = COMMASPACE.join(fieldvalues)
 118     a = _AddressList(all)
 119     return a.addresslist
 120
 121
 122 \f
 123 ecre = re.compile(r'''
 124   =\?                   # literal =?
 125   (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
 126   \?                    # literal ?
 127   (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
 128   \?                    # literal ?
 129   (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
 130   \?=                   # literal ?=
 131   ''', re.VERBOSE | re.IGNORECASE)
 132
 133
 134 def decode(s):
 135     """Return a decoded string according to RFC 2047, as a unicode string.
 136
 137     NOTE: This function is deprecated.  Use Header.decode_header() instead.
 138     """
 139     warnings.warn('Use Header.decode_header() instead.', DeprecationWarning, 2)
 140     # Intra-package import here to avoid circular import problems.
 141     from email.Header import decode_header
 142     L = decode_header(s)
 143     if not isinstance(L, ListType):
 144         # s wasn't decoded
 145         return s
 146
 147     rtn = []
 148     for atom, charset in L:
 149         if charset is None:
 150             rtn.append(atom)
 151         else:
 152             # Convert the string to Unicode using the given encoding.  Leave
 153             # Unicode conversion errors to strict.
 154             rtn.append(unicode(atom, charset))
 155     # Now that we've decoded everything, we just need to join all the parts
 156     # together into the final string.
 157     return UEMPTYSTRING.join(rtn)
 158
 159
 160 \f
 161 def encode(s, charset='iso-8859-1', encoding='q'):
 162     """Encode a string according to RFC 2047."""
 163     warnings.warn('Use Header.Header.encode() instead.', DeprecationWarning, 2)
 164     encoding = encoding.lower()
 165     if encoding == 'q':
 166         estr = _qencode(s)
 167     elif encoding == 'b':
 168         estr = _bencode(s)
 169     else:
 170         raise ValueError, 'Illegal encoding code: ' + encoding
 171     return '=?%s?%s?%s?=' % (charset.lower(), encoding, estr)
 172
 173
 174 \f
 175 def formatdate(timeval=None, localtime=False):
 176     """Returns a date string as specified by RFC 2822, e.g.:
 177
 178     Fri, 09 Nov 2001 01:08:47 -0000
 179
 180     Optional timeval if given is a floating point time value as accepted by
 181     gmtime() and localtime(), otherwise the current time is used.
 182
 183     Optional localtime is a flag that when True, interprets timeval, and
 184     returns a date relative to the local timezone instead of UTC, properly
 185     taking daylight savings time into account.
 186     """
 187     # Note: we cannot use strftime() because that honors the locale and RFC
 188     # 2822 requires that day and month names be the English abbreviations.
 189     if timeval is None:
 190         timeval = time.time()
 191     if localtime:
 192         now = time.localtime(timeval)
 193         # Calculate timezone offset, based on whether the local zone has
 194         # daylight savings time, and whether DST is in effect.
 195         if time.daylight and now[-1]:
 196             offset = time.altzone
 197         else:
 198             offset = time.timezone
 199         hours, minutes = divmod(abs(offset), 3600)
 200         # Remember offset is in seconds west of UTC, but the timezone is in
 201         # minutes east of UTC, so the signs differ.
 202         if offset > 0:
 203             sign = '-'
 204         else:
 205             sign = '+'
 206         zone = '%s%02d%02d' % (sign, hours, minutes / 60)
 207     else:
 208         now = time.gmtime(timeval)
 209         # Timezone offset is always -0000
 210         zone = '-0000'
 211     return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
 212         ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
 213         now[2],
 214         ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
 215          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
 216         now[0], now[3], now[4], now[5],
 217         zone)
 218
 219
 220 \f
 221 def make_msgid(idstring=None):
 222     """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
 223
 224     <20020201195627.33539.96671@nightshade.la.mastaler.com>
 225
 226     Optional idstring if given is a string used to strengthen the
 227     uniqueness of the message id.
 228     """
 229     timeval = time.time()
 230     utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
 231     pid = os.getpid()
 232     randint = random.randrange(100000)
 233     if idstring is None:
 234         idstring = ''
 235     else:
 236         idstring = '.' + idstring
 237     idhost = socket.getfqdn()
 238     msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
 239     return msgid
 240
 241
 242 \f
 243 # These functions are in the standalone mimelib version only because they've
 244 # subsequently been fixed in the latest Python versions.  We use this to worm
 245 # around broken older Pythons.
 246 def parsedate(data):
 247     if not data:
 248         return None
 249     return _parsedate(data)
 250
 251
 252 def parsedate_tz(data):
 253     if not data:
 254         return None
 255     return _parsedate_tz(data)
 256
 257
 258 def parseaddr(addr):
 259     addrs = _AddressList(addr).addresslist
 260     if not addrs:
 261         return '', ''
 262     return addrs[0]
 263
 264
 265 # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
 266 def unquote(str):
 267     """Remove quotes from a string."""
 268     if len(str) > 1:
 269         if str.startswith('"') and str.endswith('"'):
 270             return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
 271         if str.startswith('<') and str.endswith('>'):
 272             return str[1:-1]
 273     return str
 274
 275
 276 \f
 277 # RFC2231-related functions - parameter encoding and decoding
 278 def decode_rfc2231(s):
 279     """Decode string according to RFC 2231"""
 280     import urllib
 281     parts = s.split("'", 2)
 282     if len(parts) == 1:
 283         return None, None, s
 284     charset, language, s = parts
 285     return charset, language, urllib.unquote(s)
 286
 287
 288 def encode_rfc2231(s, charset=None, language=None):
 289     """Encode string according to RFC 2231.
 290
 291     If neither charset nor language is given, then s is returned as-is.  If
 292     charset is given but not language, the string is encoded using the empty
 293     string for language.
 294     """
 295     import urllib
 296     s = urllib.quote(s, safe='')
 297     if charset is None and language is None:
 298         return s
 299     if language is None:
 300         language = ''
 301     return "%s'%s'%s" % (charset, language, s)
 302
 303
 304 rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
 305
 306 def decode_params(params):
 307     """Decode parameters list according to RFC 2231.
 308
 309     params is a sequence of 2-tuples containing (content type, string value).
 310     """
 311     new_params = []
 312     # maps parameter's name to a list of continuations
 313     rfc2231_params = {}
 314     # params is a sequence of 2-tuples containing (content_type, string value)
 315     name, value = params[0]
 316     new_params.append((name, value))
 317     # Cycle through each of the rest of the parameters.
 318     for name, value in params[1:]:
 319         value = unquote(value)
 320         mo = rfc2231_continuation.match(name)
 321         if mo:
 322             name, num = mo.group('name', 'num')
 323             if num is not None:
 324                 num = int(num)
 325             rfc2231_param1 = rfc2231_params.setdefault(name, [])
 326             rfc2231_param1.append((num, value))
 327         else:
 328             new_params.append((name, '"%s"' % quote(value)))
 329     if rfc2231_params:
 330         for name, continuations in rfc2231_params.items():
 331             value = []
 332             # Sort by number
 333             continuations.sort()
 334             # And now append all values in num order
 335             for num, continuation in continuations:
 336                 value.append(continuation)
 337             charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
 338             new_params.append(
 339                 (name, (charset, language, '"%s"' % quote(value))))
 340     return new_params