1 """Guess the MIME type of a file.
3 This module defines two useful functions:
5 guess_type(url, strict=1) -- guess the MIME type and encoding of a URL.
7 guess_extension(type, strict=1) -- guess the extension for a given MIME type.
9 It also contains the following, for tuning the behavior:
13 knownfiles -- list of files to parse
14 inited -- flag set when init() has been called
15 suffix_map -- dictionary mapping suffixes to suffixes
16 encodings_map -- dictionary mapping suffixes to encodings
17 types_map -- dictionary mapping suffixes to types
21 init([files]) -- parse a list of files, default knownfiles
22 read_mime_types(file) -- parse one file, return a dictionary or None
29 __all__
= ["guess_type","guess_extension","read_mime_types","init"]
32 "/usr/local/etc/httpd/conf/mime.types",
33 "/usr/local/lib/netscape/mime.types",
34 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
35 "/usr/local/etc/mime.types", # Apache 1.3
42 """MIME-types datastore.
44 This datastore can handle information from mime.types-style files
45 and supports basic determination of MIME type from a filename or
46 URL, and can guess a reasonable extension given a MIME type.
49 def __init__(self
, filenames
=()):
52 self
.encodings_map
= encodings_map
.copy()
53 self
.suffix_map
= suffix_map
.copy()
54 self
.types_map
= types_map
.copy()
55 self
.common_types
= common_types
.copy()
56 for name
in filenames
:
59 def guess_type(self
, url
, strict
=1):
60 """Guess the type of a file based on its URL.
62 Return value is a tuple (type, encoding) where type is None if
63 the type can't be guessed (no or unknown suffix) or a string
64 of the form type/subtype, usable for a MIME Content-type
65 header; and encoding is None for no encoding or the name of
66 the program used to encode (e.g. compress or gzip). The
67 mappings are table driven. Encoding suffixes are case
68 sensitive; type suffixes are first tried case sensitive, then
71 The suffixes .tgz, .taz and .tz (case sensitive!) are all
72 mapped to '.tar.gz'. (This is table-driven too, using the
73 dictionary suffix_map.)
75 Optional `strict' argument when false adds a bunch of commonly found,
76 but non-standard types.
78 scheme
, url
= urllib
.splittype(url
)
80 # syntax of data URLs:
81 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
82 # mediatype := [ type "/" subtype ] *( ";" parameter )
84 # parameter := attribute "=" value
85 # type/subtype defaults to "text/plain"
90 semi
= url
.find(';', 0, comma
)
95 if '=' in type or '/' not in type:
97 return type, None # never compressed, so encoding is None
98 base
, ext
= posixpath
.splitext(url
)
99 while ext
in self
.suffix_map
:
100 base
, ext
= posixpath
.splitext(base
+ self
.suffix_map
[ext
])
101 if ext
in self
.encodings_map
:
102 encoding
= self
.encodings_map
[ext
]
103 base
, ext
= posixpath
.splitext(base
)
106 types_map
= self
.types_map
107 common_types
= self
.common_types
109 return types_map
[ext
], encoding
110 elif ext
.lower() in types_map
:
111 return types_map
[ext
.lower()], encoding
113 return None, encoding
114 elif ext
in common_types
:
115 return common_types
[ext
], encoding
116 elif ext
.lower() in common_types
:
117 return common_types
[ext
.lower()], encoding
119 return None, encoding
121 def guess_extension(self
, type, strict
=1):
122 """Guess the extension for a file based on its MIME type.
124 Return value is a string giving a filename extension,
125 including the leading dot ('.'). The extension is not
126 guaranteed to have been associated with any particular data
127 stream, but would be mapped to the MIME type `type' by
128 guess_type(). If no extension can be guessed for `type', None
131 Optional `strict' argument when false adds a bunch of commonly found,
132 but non-standard types.
135 for ext
, stype
in self
.types_map
.items():
139 for ext
, stype
in common_types
.items():
144 def read(self
, filename
):
145 """Read a single mime.types-format file, specified by pathname."""
150 def readfp(self
, fp
):
151 """Read a single mime.types-format file."""
158 for i
in range(len(words
)):
159 if words
[i
][0] == '#':
164 type, suffixes
= words
[0], words
[1:]
165 for suff
in suffixes
:
166 map['.' + suff
] = type
169 def guess_type(url
, strict
=1):
170 """Guess the type of a file based on its URL.
172 Return value is a tuple (type, encoding) where type is None if the
173 type can't be guessed (no or unknown suffix) or a string of the
174 form type/subtype, usable for a MIME Content-type header; and
175 encoding is None for no encoding or the name of the program used
176 to encode (e.g. compress or gzip). The mappings are table
177 driven. Encoding suffixes are case sensitive; type suffixes are
178 first tried case sensitive, then case insensitive.
180 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
181 to ".tar.gz". (This is table-driven too, using the dictionary
184 Optional `strict' argument when false adds a bunch of commonly found, but
188 return guess_type(url
, strict
)
191 def guess_extension(type, strict
=1):
192 """Guess the extension for a file based on its MIME type.
194 Return value is a string giving a filename extension, including the
195 leading dot ('.'). The extension is not guaranteed to have been
196 associated with any particular data stream, but would be mapped to the
197 MIME type `type' by guess_type(). If no extension can be guessed for
198 `type', None is returned.
200 Optional `strict' argument when false adds a bunch of commonly found,
201 but non-standard types.
204 return guess_extension(type, strict
)
207 def init(files
=None):
208 global guess_extension
, guess_type
209 global suffix_map
, types_map
, encodings_map
, common_types
216 if os
.path
.isfile(file):
217 db
.readfp(open(file))
218 encodings_map
= db
.encodings_map
219 suffix_map
= db
.suffix_map
220 types_map
= db
.types_map
221 guess_extension
= db
.guess_extension
222 guess_type
= db
.guess_type
223 common_types
= db
.common_types
226 def read_mime_types(file):
247 # Before adding new types, make sure they are either registered with IANA, at
248 # http://www.isi.edu/in-notes/iana/assignments/media-types
249 # or extensions, i.e. using the x- prefix
251 # If you add to these, please keep them sorted!
253 '.a' : 'application/octet-stream',
254 '.ai' : 'application/postscript',
255 '.aif' : 'audio/x-aiff',
256 '.aifc' : 'audio/x-aiff',
257 '.aiff' : 'audio/x-aiff',
258 '.au' : 'audio/basic',
259 '.avi' : 'video/x-msvideo',
260 '.bat' : 'text/plain',
261 '.bcpio' : 'application/x-bcpio',
262 '.bin' : 'application/octet-stream',
263 '.bmp' : 'image/x-ms-bmp',
266 '.cdf' : 'application/x-cdf',
267 '.cdf' : 'application/x-netcdf',
268 '.cpio' : 'application/x-cpio',
269 '.csh' : 'application/x-csh',
271 '.dll' : 'application/octet-stream',
272 '.doc' : 'application/msword',
273 '.dot' : 'application/msword',
274 '.dvi' : 'application/x-dvi',
275 '.eml' : 'message/rfc822',
276 '.eps' : 'application/postscript',
277 '.etx' : 'text/x-setext',
278 '.exe' : 'application/octet-stream',
279 '.gif' : 'image/gif',
280 '.gtar' : 'application/x-gtar',
282 '.hdf' : 'application/x-hdf',
283 '.htm' : 'text/html',
284 '.html' : 'text/html',
285 '.ief' : 'image/ief',
286 '.jpe' : 'image/jpeg',
287 '.jpeg' : 'image/jpeg',
288 '.jpg' : 'image/jpeg',
289 '.js' : 'application/x-javascript',
290 '.ksh' : 'text/plain',
291 '.latex' : 'application/x-latex',
292 '.m1v' : 'video/mpeg',
293 '.man' : 'application/x-troff-man',
294 '.me' : 'application/x-troff-me',
295 '.mht' : 'message/rfc822',
296 '.mhtml' : 'message/rfc822',
297 '.mif' : 'application/x-mif',
298 '.mov' : 'video/quicktime',
299 '.movie' : 'video/x-sgi-movie',
300 '.mp2' : 'audio/mpeg',
301 '.mp3' : 'audio/mpeg',
302 '.mpa' : 'video/mpeg',
303 '.mpe' : 'video/mpeg',
304 '.mpeg' : 'video/mpeg',
305 '.mpg' : 'video/mpeg',
306 '.ms' : 'application/x-troff-ms',
307 '.nc' : 'application/x-netcdf',
308 '.nws' : 'message/rfc822',
309 '.o' : 'application/octet-stream',
310 '.obj' : 'application/octet-stream',
311 '.oda' : 'application/oda',
312 '.p12' : 'application/x-pkcs12',
313 '.p7c' : 'application/pkcs7-mime',
314 '.pbm' : 'image/x-portable-bitmap',
315 '.pdf' : 'application/pdf',
316 '.pfx' : 'application/x-pkcs12',
317 '.pgm' : 'image/x-portable-graymap',
318 '.pl' : 'text/plain',
319 '.png' : 'image/png',
320 '.pnm' : 'image/x-portable-anymap',
321 '.pot' : 'application/vnd.ms-powerpoint',
322 '.ppa' : 'application/vnd.ms-powerpoint',
323 '.ppm' : 'image/x-portable-pixmap',
324 '.pps' : 'application/vnd.ms-powerpoint',
325 '.ppt' : 'application/vnd.ms-powerpoint',
326 '.ps' : 'application/postscript',
327 '.pwz' : 'application/vnd.ms-powerpoint',
328 '.py' : 'text/x-python',
329 '.pyc' : 'application/x-python-code',
330 '.pyo' : 'application/x-python-code',
331 '.qt' : 'video/quicktime',
332 '.ra' : 'audio/x-pn-realaudio',
333 '.ram' : 'application/x-pn-realaudio',
334 '.ras' : 'image/x-cmu-raster',
335 '.rdf' : 'application/xml',
336 '.rgb' : 'image/x-rgb',
337 '.roff' : 'application/x-troff',
338 '.rtx' : 'text/richtext',
339 '.sgm' : 'text/x-sgml',
340 '.sgml' : 'text/x-sgml',
341 '.sh' : 'application/x-sh',
342 '.shar' : 'application/x-shar',
343 '.snd' : 'audio/basic',
344 '.so' : 'application/octet-stream',
345 '.src' : 'application/x-wais-source',
346 '.sv4cpio': 'application/x-sv4cpio',
347 '.sv4crc' : 'application/x-sv4crc',
348 '.t' : 'application/x-troff',
349 '.tar' : 'application/x-tar',
350 '.tcl' : 'application/x-tcl',
351 '.tex' : 'application/x-tex',
352 '.texi' : 'application/x-texinfo',
353 '.texinfo': 'application/x-texinfo',
354 '.tif' : 'image/tiff',
355 '.tiff' : 'image/tiff',
356 '.tr' : 'application/x-troff',
357 '.tsv' : 'text/tab-separated-values',
358 '.txt' : 'text/plain',
359 '.ustar' : 'application/x-ustar',
360 '.vcf' : 'text/x-vcard',
361 '.wav' : 'audio/x-wav',
362 '.wiz' : 'application/msword',
363 '.xbm' : 'image/x-xbitmap',
364 '.xlb' : 'application/vnd.ms-excel',
366 '.xls' : 'application/excel',
367 '.xls' : 'application/vnd.ms-excel',
369 '.xpm' : 'image/x-xpixmap',
370 '.xsl' : 'application/xml',
371 '.xwd' : 'image/x-xwindowdump',
372 '.zip' : 'application/zip',
375 # These are non-standard types, commonly found in the wild. They will only
376 # match if strict=0 flag is given to the API methods.
378 # Please sort these too
380 '.jpg' : 'image/jpg',
381 '.mid' : 'audio/midi',
382 '.midi': 'audio/midi',
383 '.pct' : 'image/pict',
384 '.pic' : 'image/pict',
385 '.pict': 'image/pict',
386 '.rtf' : 'application/rtf',
391 if __name__
== '__main__':
396 Usage: mimetypes.py [options] type
399 --help / -h -- print this message and exit
400 --lenient / -l -- additionally search of some common, but non-standard
402 --extension / -e -- guess extension instead of type
404 More than one type argument may be given.
407 def usage(code
, msg
=''):
413 opts
, args
= getopt
.getopt(sys
.argv
[1:], 'hle',
414 ['help', 'lenient', 'extension'])
415 except getopt
.error
, msg
:
420 for opt
, arg
in opts
:
421 if opt
in ('-h', '--help'):
423 elif opt
in ('-l', '--lenient'):
425 elif opt
in ('-e', '--extension'):
429 guess
= guess_extension(gtype
, strict
)
430 if not guess
: print "I don't know anything about type", gtype
433 guess
, encoding
= guess_type(gtype
, strict
)
434 if not guess
: print "I don't know anything about type", gtype
435 else: print 'type:', guess
, 'encoding:', encoding