Lib/mimetypes.py

   1 """Guess the MIME type of a file.
   2
   3 This module defines two useful functions:
   4
   5 guess_type(url) -- guess the MIME type and encoding of a URL.
   6
   7 guess_extension(type) -- guess the extension for a given MIME type.
   8
   9 It also contains the following, for tuning the behavior:
  10
  11 Data:
  12
  13 knownfiles -- list of files to parse
  14 inited -- flag set when init() has been called
  15 suffixes_map -- dictionary mapping suffixes to suffixes
  16 encodings_map -- dictionary mapping suffixes to encodings
  17 types_map -- dictionary mapping suffixes to types
  18
  19 Functions:
  20
  21 init([files]) -- parse a list of files, default knownfiles
  22 read_mime_types(file) -- parse one file, return a dictionary or None
  23
  24 """
  25
  26 import posixpath
  27 import urllib
  28
  29 __all__ = ["guess_type","guess_extension","read_mime_types","init"]
  30
  31 knownfiles = [
  32     "/usr/local/etc/httpd/conf/mime.types",
  33     "/usr/local/lib/netscape/mime.types",
  34     "/usr/local/etc/httpd/conf/mime.types",     # Apache 1.2
  35     "/usr/local/etc/mime.types",                # Apache 1.3
  36     ]
  37
  38 inited = 0
  39
  40 def guess_type(url):
  41     """Guess the type of a file based on its URL.
  42
  43     Return value is a tuple (type, encoding) where type is None if the
  44     type can't be guessed (no or unknown suffix) or a string of the
  45     form type/subtype, usable for a MIME Content-type header; and
  46     encoding is None for no encoding or the name of the program used
  47     to encode (e.g. compress or gzip).  The mappings are table
  48     driven.  Encoding suffixes are case sensitive; type suffixes are
  49     first tried case sensitive, then case insensitive.
  50
  51     The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
  52     to ".tar.gz".  (This is table-driven too, using the dictionary
  53     suffix_map).
  54
  55     """
  56     if not inited:
  57         init()
  58     scheme, url = urllib.splittype(url)
  59     if scheme == 'data':
  60         # syntax of data URLs:
  61         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
  62         # mediatype := [ type "/" subtype ] *( ";" parameter )
  63         # data      := *urlchar
  64         # parameter := attribute "=" value
  65         # type/subtype defaults to "text/plain"
  66         comma = url.find(',')
  67         if comma < 0:
  68             # bad data URL
  69             return None, None
  70         semi = url.find(';', 0, comma)
  71         if semi >= 0:
  72             type = url[:semi]
  73         else:
  74             type = url[:comma]
  75         if '=' in type or '/' not in type:
  76             type = 'text/plain'
  77         return type, None               # never compressed, so encoding is None
  78     base, ext = posixpath.splitext(url)
  79     while suffix_map.has_key(ext):
  80         base, ext = posixpath.splitext(base + suffix_map[ext])
  81     if encodings_map.has_key(ext):
  82         encoding = encodings_map[ext]
  83         base, ext = posixpath.splitext(base)
  84     else:
  85         encoding = None
  86     if types_map.has_key(ext):
  87         return types_map[ext], encoding
  88     elif types_map.has_key(ext.lower()):
  89         return types_map[ext.lower()], encoding
  90     else:
  91         return None, encoding
  92
  93 def guess_extension(type):
  94     """Guess the extension for a file based on its MIME type.
  95
  96     Return value is a string giving a filename extension, including the
  97     leading dot ('.').  The extension is not guaranteed to have been
  98     associated with any particular data stream, but would be mapped to the
  99     MIME type `type' by guess_type().  If no extension can be guessed for
 100     `type', None is returned.
 101     """
 102     global inited
 103     if not inited:
 104         init()
 105     type = type.lower()
 106     for ext, stype in types_map.items():
 107         if type == stype:
 108             return ext
 109     return None
 110
 111 def init(files=None):
 112     global inited
 113     for file in files or knownfiles:
 114         s = read_mime_types(file)
 115         if s:
 116             for key, value in s.items():
 117                 types_map[key] = value
 118     inited = 1
 119
 120 def read_mime_types(file):
 121     try:
 122         f = open(file)
 123     except IOError:
 124         return None
 125     map = {}
 126     while 1:
 127         line = f.readline()
 128         if not line: break
 129         words = line.split()
 130         for i in range(len(words)):
 131             if words[i][0] == '#':
 132                 del words[i:]
 133                 break
 134         if not words: continue
 135         type, suffixes = words[0], words[1:]
 136         for suff in suffixes:
 137             map['.'+suff] = type
 138     f.close()
 139     return map
 140
 141 suffix_map = {
 142     '.tgz': '.tar.gz',
 143     '.taz': '.tar.gz',
 144     '.tz': '.tar.gz',
 145 }
 146
 147 encodings_map = {
 148     '.gz': 'gzip',
 149     '.Z': 'compress',
 150     }
 151
 152 types_map = {
 153     '.a': 'application/octet-stream',
 154     '.ai': 'application/postscript',
 155     '.aif': 'audio/x-aiff',
 156     '.aifc': 'audio/x-aiff',
 157     '.aiff': 'audio/x-aiff',
 158     '.au': 'audio/basic',
 159     '.avi': 'video/x-msvideo',
 160     '.bcpio': 'application/x-bcpio',
 161     '.bin': 'application/octet-stream',
 162     '.cdf': 'application/x-netcdf',
 163     '.cpio': 'application/x-cpio',
 164     '.csh': 'application/x-csh',
 165     '.dll': 'application/octet-stream',
 166     '.dvi': 'application/x-dvi',
 167     '.exe': 'application/octet-stream',
 168     '.eps': 'application/postscript',
 169     '.etx': 'text/x-setext',
 170     '.gif': 'image/gif',
 171     '.gtar': 'application/x-gtar',
 172     '.hdf': 'application/x-hdf',
 173     '.htm': 'text/html',
 174     '.html': 'text/html',
 175     '.ief': 'image/ief',
 176     '.jpe': 'image/jpeg',
 177     '.jpeg': 'image/jpeg',
 178     '.jpg': 'image/jpeg',
 179     '.js': 'application/x-javascript',
 180     '.latex': 'application/x-latex',
 181     '.man': 'application/x-troff-man',
 182     '.me': 'application/x-troff-me',
 183     '.mif': 'application/x-mif',
 184     '.mov': 'video/quicktime',
 185     '.movie': 'video/x-sgi-movie',
 186     '.mpe': 'video/mpeg',
 187     '.mpeg': 'video/mpeg',
 188     '.mpg': 'video/mpeg',
 189     '.ms': 'application/x-troff-ms',
 190     '.nc': 'application/x-netcdf',
 191     '.o': 'application/octet-stream',
 192     '.obj': 'application/octet-stream',
 193     '.oda': 'application/oda',
 194     '.pbm': 'image/x-portable-bitmap',
 195     '.pdf': 'application/pdf',
 196     '.pgm': 'image/x-portable-graymap',
 197     '.pnm': 'image/x-portable-anymap',
 198     '.png': 'image/png',
 199     '.ppm': 'image/x-portable-pixmap',
 200     '.ps': 'application/postscript',
 201     '.py': 'text/x-python',
 202     '.pyc': 'application/x-python-code',
 203     '.pyo': 'application/x-python-code',
 204     '.qt': 'video/quicktime',
 205     '.ras': 'image/x-cmu-raster',
 206     '.rgb': 'image/x-rgb',
 207     '.rdf': 'application/xml',
 208     '.roff': 'application/x-troff',
 209     '.rtf': 'application/rtf',
 210     '.rtx': 'text/richtext',
 211     '.sgm': 'text/x-sgml',
 212     '.sgml': 'text/x-sgml',
 213     '.sh': 'application/x-sh',
 214     '.shar': 'application/x-shar',
 215     '.snd': 'audio/basic',
 216     '.so': 'application/octet-stream',
 217     '.src': 'application/x-wais-source',
 218     '.sv4cpio': 'application/x-sv4cpio',
 219     '.sv4crc': 'application/x-sv4crc',
 220     '.t': 'application/x-troff',
 221     '.tar': 'application/x-tar',
 222     '.tcl': 'application/x-tcl',
 223     '.tex': 'application/x-tex',
 224     '.texi': 'application/x-texinfo',
 225     '.texinfo': 'application/x-texinfo',
 226     '.tif': 'image/tiff',
 227     '.tiff': 'image/tiff',
 228     '.tr': 'application/x-troff',
 229     '.tsv': 'text/tab-separated-values',
 230     '.txt': 'text/plain',
 231     '.ustar': 'application/x-ustar',
 232     '.wav': 'audio/x-wav',
 233     '.xbm': 'image/x-xbitmap',
 234     '.xml': 'text/xml',
 235     '.xsl': 'application/xml',
 236     '.xpm': 'image/x-xpixmap',
 237     '.xwd': 'image/x-xwindowdump',
 238     '.zip': 'application/zip',
 239     }
 240
 241 if __name__ == '__main__':
 242     import sys
 243     print guess_type(sys.argv[1])