Lib/base64.py

   1 #! /usr/bin/env python
   2
   3 """RFC 3548: Base16, Base32, Base64 Data Encodings"""
   4
   5 # Modified 04-Oct-1995 by Jack Jansen to use binascii module
   6 # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
   7 # Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
   8
   9 import re
  10 import struct
  11 import binascii
  12
  13
  14 __all__ = [
  15     # Legacy interface exports traditional RFC 1521 Base64 encodings
  16     'encode', 'decode', 'encodestring', 'decodestring',
  17     # Generalized interface for other encodings
  18     'b64encode', 'b64decode', 'b32encode', 'b32decode',
  19     'b16encode', 'b16decode',
  20     # Standard Base64 encoding
  21     'standard_b64encode', 'standard_b64decode',
  22     # Some common Base64 alternatives.  As referenced by RFC 3458, see thread
  23     # starting at:
  24     #
  25     # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
  26     'urlsafe_b64encode', 'urlsafe_b64decode',
  27     ]
  28
  29
  30 bytes_types = (bytes, bytearray)  # Types acceptable as binary data
  31
  32
  33 def _translate(s, altchars):
  34     if not isinstance(s, bytes_types):
  35         raise TypeError("expected bytes, not %s" % s.__class__.__name__)
  36     translation = bytearray(range(256))
  37     for k, v in altchars.items():
  38         translation[ord(k)] = v[0]
  39     return s.translate(translation)
  40
  41
  42 \f
  43 # Base64 encoding/decoding uses binascii
  44
  45 def b64encode(s, altchars=None):
  46     """Encode a byte string using Base64.
  47
  48     s is the byte string to encode.  Optional altchars must be a byte
  49     string of length 2 which specifies an alternative alphabet for the
  50     '+' and '/' characters.  This allows an application to
  51     e.g. generate url or filesystem safe Base64 strings.
  52
  53     The encoded byte string is returned.
  54     """
  55     if not isinstance(s, bytes_types):
  56         raise TypeError("expected bytes, not %s" % s.__class__.__name__)
  57     # Strip off the trailing newline
  58     encoded = binascii.b2a_base64(s)[:-1]
  59     if altchars is not None:
  60         if not isinstance(altchars, bytes_types):
  61             altchars = TypeError("expected bytes, not %s"
  62                                  % altchars.__class__.__name__)
  63         assert len(altchars) == 2, repr(altchars)
  64         return _translate(encoded, {'+': altchars[0:1], '/': altchars[1:2]})
  65     return encoded
  66
  67
  68 def b64decode(s, altchars=None):
  69     """Decode a Base64 encoded byte string.
  70
  71     s is the byte string to decode.  Optional altchars must be a
  72     string of length 2 which specifies the alternative alphabet used
  73     instead of the '+' and '/' characters.
  74
  75     The decoded byte string is returned.  binascii.Error is raised if
  76     s were incorrectly padded or if there are non-alphabet characters
  77     present in the string.
  78     """
  79     if not isinstance(s, bytes_types):
  80         raise TypeError("expected bytes, not %s" % s.__class__.__name__)
  81     if altchars is not None:
  82         if not isinstance(altchars, bytes_types):
  83             raise TypeError("expected bytes, not %s"
  84                             % altchars.__class__.__name__)
  85         assert len(altchars) == 2, repr(altchars)
  86         s = _translate(s, {chr(altchars[0]): b'+', chr(altchars[1]): b'/'})
  87     return binascii.a2b_base64(s)
  88
  89
  90 def standard_b64encode(s):
  91     """Encode a byte string using the standard Base64 alphabet.
  92
  93     s is the byte string to encode.  The encoded byte string is returned.
  94     """
  95     return b64encode(s)
  96
  97 def standard_b64decode(s):
  98     """Decode a byte string encoded with the standard Base64 alphabet.
  99
 100     s is the byte string to decode.  The decoded byte string is
 101     returned.  binascii.Error is raised if the input is incorrectly
 102     padded or if there are non-alphabet characters present in the
 103     input.
 104     """
 105     return b64decode(s)
 106
 107 def urlsafe_b64encode(s):
 108     """Encode a byte string using a url-safe Base64 alphabet.
 109
 110     s is the byte string to encode.  The encoded byte string is
 111     returned.  The alphabet uses '-' instead of '+' and '_' instead of
 112     '/'.
 113     """
 114     return b64encode(s, b'-_')
 115
 116 def urlsafe_b64decode(s):
 117     """Decode a byte string encoded with the standard Base64 alphabet.
 118
 119     s is the byte string to decode.  The decoded byte string is
 120     returned.  binascii.Error is raised if the input is incorrectly
 121     padded or if there are non-alphabet characters present in the
 122     input.
 123
 124     The alphabet uses '-' instead of '+' and '_' instead of '/'.
 125     """
 126     return b64decode(s, b'-_')
 127
 128
 129 \f
 130 # Base32 encoding/decoding must be done in Python
 131 _b32alphabet = {
 132     0: b'A',  9: b'J', 18: b'S', 27: b'3',
 133     1: b'B', 10: b'K', 19: b'T', 28: b'4',
 134     2: b'C', 11: b'L', 20: b'U', 29: b'5',
 135     3: b'D', 12: b'M', 21: b'V', 30: b'6',
 136     4: b'E', 13: b'N', 22: b'W', 31: b'7',
 137     5: b'F', 14: b'O', 23: b'X',
 138     6: b'G', 15: b'P', 24: b'Y',
 139     7: b'H', 16: b'Q', 25: b'Z',
 140     8: b'I', 17: b'R', 26: b'2',
 141     }
 142
 143 _b32tab = [v[0] for k, v in sorted(_b32alphabet.items())]
 144 _b32rev = dict([(v[0], k) for k, v in _b32alphabet.items()])
 145
 146
 147 def b32encode(s):
 148     """Encode a byte string using Base32.
 149
 150     s is the byte string to encode.  The encoded byte string is returned.
 151     """
 152     if not isinstance(s, bytes_types):
 153         raise TypeError("expected bytes, not %s" % s.__class__.__name__)
 154     quanta, leftover = divmod(len(s), 5)
 155     # Pad the last quantum with zero bits if necessary
 156     if leftover:
 157         s = s + bytes(5 - leftover)  # Don't use += !
 158         quanta += 1
 159     encoded = bytes()
 160     for i in range(quanta):
 161         # c1 and c2 are 16 bits wide, c3 is 8 bits wide.  The intent of this
 162         # code is to process the 40 bits in units of 5 bits.  So we take the 1
 163         # leftover bit of c1 and tack it onto c2.  Then we take the 2 leftover
 164         # bits of c2 and tack them onto c3.  The shifts and masks are intended
 165         # to give us values of exactly 5 bits in width.
 166         c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5])
 167         c2 += (c1 & 1) << 16 # 17 bits wide
 168         c3 += (c2 & 3) << 8  # 10 bits wide
 169         encoded += bytes([_b32tab[c1 >> 11],         # bits 1 - 5
 170                           _b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10
 171                           _b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15
 172                           _b32tab[c2 >> 12],         # bits 16 - 20 (1 - 5)
 173                           _b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10)
 174                           _b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15)
 175                           _b32tab[c3 >> 5],          # bits 31 - 35 (1 - 5)
 176                           _b32tab[c3 & 0x1f],        # bits 36 - 40 (1 - 5)
 177                           ])
 178     # Adjust for any leftover partial quanta
 179     if leftover == 1:
 180         return encoded[:-6] + b'======'
 181     elif leftover == 2:
 182         return encoded[:-4] + b'===='
 183     elif leftover == 3:
 184         return encoded[:-3] + b'==='
 185     elif leftover == 4:
 186         return encoded[:-1] + b'='
 187     return encoded
 188
 189
 190 def b32decode(s, casefold=False, map01=None):
 191     """Decode a Base32 encoded byte string.
 192
 193     s is the byte string to decode.  Optional casefold is a flag
 194     specifying whether a lowercase alphabet is acceptable as input.
 195     For security purposes, the default is False.
 196
 197     RFC 3548 allows for optional mapping of the digit 0 (zero) to the
 198     letter O (oh), and for optional mapping of the digit 1 (one) to
 199     either the letter I (eye) or letter L (el).  The optional argument
 200     map01 when not None, specifies which letter the digit 1 should be
 201     mapped to (when map01 is not None, the digit 0 is always mapped to
 202     the letter O).  For security purposes the default is None, so that
 203     0 and 1 are not allowed in the input.
 204
 205     The decoded byte string is returned.  binascii.Error is raised if
 206     the input is incorrectly padded or if there are non-alphabet
 207     characters present in the input.
 208     """
 209     if not isinstance(s, bytes_types):
 210         raise TypeError("expected bytes, not %s" % s.__class__.__name__)
 211     quanta, leftover = divmod(len(s), 8)
 212     if leftover:
 213         raise binascii.Error('Incorrect padding')
 214     # Handle section 2.4 zero and one mapping.  The flag map01 will be either
 215     # False, or the character to map the digit 1 (one) to.  It should be
 216     # either L (el) or I (eye).
 217     if map01 is not None:
 218         if not isinstance(map01, bytes_types):
 219             raise TypeError("expected bytes, not %s" % map01.__class__.__name__)
 220         assert len(map01) == 1, repr(map01)
 221         s = _translate(s, {b'0': b'O', b'1': map01})
 222     if casefold:
 223         s = s.upper()
 224     # Strip off pad characters from the right.  We need to count the pad
 225     # characters because this will tell us how many null bytes to remove from
 226     # the end of the decoded string.
 227     padchars = 0
 228     mo = re.search('(?P<pad>[=]*)$', s)
 229     if mo:
 230         padchars = len(mo.group('pad'))
 231         if padchars > 0:
 232             s = s[:-padchars]
 233     # Now decode the full quanta
 234     parts = []
 235     acc = 0
 236     shift = 35
 237     for c in s:
 238         val = _b32rev.get(c)
 239         if val is None:
 240             raise TypeError('Non-base32 digit found')
 241         acc += _b32rev[c] << shift
 242         shift -= 5
 243         if shift < 0:
 244             parts.append(binascii.unhexlify('%010x' % acc))
 245             acc = 0
 246             shift = 35
 247     # Process the last, partial quanta
 248     last = binascii.unhexlify(bytes('%010x' % acc, "ascii"))
 249     if padchars == 0:
 250         last = b''                      # No characters
 251     elif padchars == 1:
 252         last = last[:-1]
 253     elif padchars == 3:
 254         last = last[:-2]
 255     elif padchars == 4:
 256         last = last[:-3]
 257     elif padchars == 6:
 258         last = last[:-4]
 259     else:
 260         raise binascii.Error('Incorrect padding')
 261     parts.append(last)
 262     return b''.join(parts)
 263
 264
 265 \f
 266 # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
 267 # lowercase.  The RFC also recommends against accepting input case
 268 # insensitively.
 269 def b16encode(s):
 270     """Encode a byte string using Base16.
 271
 272     s is the byte string to encode.  The encoded byte string is returned.
 273     """
 274     if not isinstance(s, bytes_types):
 275         raise TypeError("expected bytes, not %s" % s.__class__.__name__)
 276     return binascii.hexlify(s).upper()
 277
 278
 279 def b16decode(s, casefold=False):
 280     """Decode a Base16 encoded byte string.
 281
 282     s is the byte string to decode.  Optional casefold is a flag
 283     specifying whether a lowercase alphabet is acceptable as input.
 284     For security purposes, the default is False.
 285
 286     The decoded byte string is returned.  binascii.Error is raised if
 287     s were incorrectly padded or if there are non-alphabet characters
 288     present in the string.
 289     """
 290     if not isinstance(s, bytes_types):
 291         raise TypeError("expected bytes, not %s" % s.__class__.__name__)
 292     if casefold:
 293         s = s.upper()
 294     if re.search('[^0-9A-F]', s):
 295         raise binascii.Error('Non-base16 digit found')
 296     return binascii.unhexlify(s)
 297
 298
 299 \f
 300 # Legacy interface.  This code could be cleaned up since I don't believe
 301 # binascii has any line length limitations.  It just doesn't seem worth it
 302 # though.  The files should be opened in binary mode.
 303
 304 MAXLINESIZE = 76 # Excluding the CRLF
 305 MAXBINSIZE = (MAXLINESIZE//4)*3
 306
 307 def encode(input, output):
 308     """Encode a file; input and output are binary files."""
 309     while True:
 310         s = input.read(MAXBINSIZE)
 311         if not s:
 312             break
 313         while len(s) < MAXBINSIZE:
 314             ns = input.read(MAXBINSIZE-len(s))
 315             if not ns:
 316                 break
 317             s += ns
 318         line = binascii.b2a_base64(s)
 319         output.write(line)
 320
 321
 322 def decode(input, output):
 323     """Decode a file; input and output are binary files."""
 324     while True:
 325         line = input.readline()
 326         if not line:
 327             break
 328         s = binascii.a2b_base64(line)
 329         output.write(s)
 330
 331
 332 def encodestring(s):
 333     """Encode a string into multiple lines of base-64 data.
 334
 335     Argument and return value are bytes.
 336     """
 337     if not isinstance(s, bytes_types):
 338         raise TypeError("expected bytes, not %s" % s.__class__.__name__)
 339     pieces = []
 340     for i in range(0, len(s), MAXBINSIZE):
 341         chunk = s[i : i + MAXBINSIZE]
 342         pieces.append(binascii.b2a_base64(chunk))
 343     return b"".join(pieces)
 344
 345
 346 def decodestring(s):
 347     """Decode a string.
 348
 349     Argument and return value are bytes.
 350     """
 351     if not isinstance(s, bytes_types):
 352         raise TypeError("expected bytes, not %s" % s.__class__.__name__)
 353     return binascii.a2b_base64(s)
 354
 355
 356 \f
 357 # Usable as a script...
 358 def main():
 359     """Small main program"""
 360     import sys, getopt
 361     try:
 362         opts, args = getopt.getopt(sys.argv[1:], 'deut')
 363     except getopt.error as msg:
 364         sys.stdout = sys.stderr
 365         print(msg)
 366         print("""usage: %s [-d|-e|-u|-t] [file|-]
 367         -d, -u: decode
 368         -e: encode (default)
 369         -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0])
 370         sys.exit(2)
 371     func = encode
 372     for o, a in opts:
 373         if o == '-e': func = encode
 374         if o == '-d': func = decode
 375         if o == '-u': func = decode
 376         if o == '-t': test(); return
 377     if args and args[0] != '-':
 378         func(open(args[0], 'rb'), sys.stdout)
 379     else:
 380         func(sys.stdin, sys.stdout)
 381
 382
 383 def test():
 384     s0 = b"Aladdin:open sesame"
 385     print(repr(s0))
 386     s1 = encodestring(s0)
 387     print(repr(s1))
 388     s2 = decodestring(s1)
 389     print(repr(s2))
 390     assert s0 == s2
 391
 392
 393 if __name__ == '__main__':
 394     main()