1 """Guess the MIME type of a file.
3 This module defines two useful functions:
5 guess_type(url) -- guess the MIME type and encoding of a URL.
7 guess_extension(type) -- guess the extension for a given MIME type.
9 It also contains the following, for tuning the behavior:
13 knownfiles -- list of files to parse
14 inited -- flag set when init() has been called
15 suffixes_map -- dictionary mapping suffixes to suffixes
16 encodings_map -- dictionary mapping suffixes to encodings
17 types_map -- dictionary mapping suffixes to types
21 init([files]) -- parse a list of files, default knownfiles
22 read_mime_types(file) -- parse one file, return a dictionary or None
29 __all__
= ["guess_type","guess_extension","read_mime_types","init"]
32 "/usr/local/etc/httpd/conf/mime.types",
33 "/usr/local/lib/netscape/mime.types",
34 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
35 "/usr/local/etc/mime.types", # Apache 1.3
41 """Guess the type of a file based on its URL.
43 Return value is a tuple (type, encoding) where type is None if the
44 type can't be guessed (no or unknown suffix) or a string of the
45 form type/subtype, usable for a MIME Content-type header; and
46 encoding is None for no encoding or the name of the program used
47 to encode (e.g. compress or gzip). The mappings are table
48 driven. Encoding suffixes are case sensitive; type suffixes are
49 first tried case sensitive, then case insensitive.
51 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
52 to ".tar.gz". (This is table-driven too, using the dictionary
58 scheme
, url
= urllib
.splittype(url
)
60 # syntax of data URLs:
61 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
62 # mediatype := [ type "/" subtype ] *( ";" parameter )
64 # parameter := attribute "=" value
65 # type/subtype defaults to "text/plain"
70 semi
= url
.find(';', 0, comma
)
75 if '=' in type or '/' not in type:
77 return type, None # never compressed, so encoding is None
78 base
, ext
= posixpath
.splitext(url
)
79 while suffix_map
.has_key(ext
):
80 base
, ext
= posixpath
.splitext(base
+ suffix_map
[ext
])
81 if encodings_map
.has_key(ext
):
82 encoding
= encodings_map
[ext
]
83 base
, ext
= posixpath
.splitext(base
)
86 if types_map
.has_key(ext
):
87 return types_map
[ext
], encoding
88 elif types_map
.has_key(ext
.lower()):
89 return types_map
[ext
.lower()], encoding
93 def guess_extension(type):
94 """Guess the extension for a file based on its MIME type.
96 Return value is a string giving a filename extension, including the
97 leading dot ('.'). The extension is not guaranteed to have been
98 associated with any particular data stream, but would be mapped to the
99 MIME type `type' by guess_type(). If no extension can be guessed for
100 `type', None is returned.
106 for ext
, stype
in types_map
.items():
111 def init(files
=None):
113 for file in files
or knownfiles
:
114 s
= read_mime_types(file)
116 for key
, value
in s
.items():
117 types_map
[key
] = value
120 def read_mime_types(file):
130 for i
in range(len(words
)):
131 if words
[i
][0] == '#':
134 if not words
: continue
135 type, suffixes
= words
[0], words
[1:]
136 for suff
in suffixes
:
153 '.a': 'application/octet-stream',
154 '.ai': 'application/postscript',
155 '.aif': 'audio/x-aiff',
156 '.aifc': 'audio/x-aiff',
157 '.aiff': 'audio/x-aiff',
158 '.au': 'audio/basic',
159 '.avi': 'video/x-msvideo',
160 '.bcpio': 'application/x-bcpio',
161 '.bin': 'application/octet-stream',
162 '.cdf': 'application/x-netcdf',
163 '.cpio': 'application/x-cpio',
164 '.csh': 'application/x-csh',
165 '.dll': 'application/octet-stream',
166 '.dvi': 'application/x-dvi',
167 '.exe': 'application/octet-stream',
168 '.eps': 'application/postscript',
169 '.etx': 'text/x-setext',
171 '.gtar': 'application/x-gtar',
172 '.hdf': 'application/x-hdf',
174 '.html': 'text/html',
176 '.jpe': 'image/jpeg',
177 '.jpeg': 'image/jpeg',
178 '.jpg': 'image/jpeg',
179 '.js': 'application/x-javascript',
180 '.latex': 'application/x-latex',
181 '.man': 'application/x-troff-man',
182 '.me': 'application/x-troff-me',
183 '.mif': 'application/x-mif',
184 '.mov': 'video/quicktime',
185 '.movie': 'video/x-sgi-movie',
186 '.mpe': 'video/mpeg',
187 '.mpeg': 'video/mpeg',
188 '.mpg': 'video/mpeg',
189 '.ms': 'application/x-troff-ms',
190 '.nc': 'application/x-netcdf',
191 '.o': 'application/octet-stream',
192 '.obj': 'application/octet-stream',
193 '.oda': 'application/oda',
194 '.pbm': 'image/x-portable-bitmap',
195 '.pdf': 'application/pdf',
196 '.pgm': 'image/x-portable-graymap',
197 '.pnm': 'image/x-portable-anymap',
199 '.ppm': 'image/x-portable-pixmap',
200 '.py': 'text/x-python',
201 '.pyc': 'application/x-python-code',
202 '.ps': 'application/postscript',
203 '.qt': 'video/quicktime',
204 '.ras': 'image/x-cmu-raster',
205 '.rgb': 'image/x-rgb',
206 '.rdf': 'application/xml',
207 '.roff': 'application/x-troff',
208 '.rtf': 'application/rtf',
209 '.rtx': 'text/richtext',
210 '.sgm': 'text/x-sgml',
211 '.sgml': 'text/x-sgml',
212 '.sh': 'application/x-sh',
213 '.shar': 'application/x-shar',
214 '.snd': 'audio/basic',
215 '.so': 'application/octet-stream',
216 '.src': 'application/x-wais-source',
217 '.sv4cpio': 'application/x-sv4cpio',
218 '.sv4crc': 'application/x-sv4crc',
219 '.t': 'application/x-troff',
220 '.tar': 'application/x-tar',
221 '.tcl': 'application/x-tcl',
222 '.tex': 'application/x-tex',
223 '.texi': 'application/x-texinfo',
224 '.texinfo': 'application/x-texinfo',
225 '.tif': 'image/tiff',
226 '.tiff': 'image/tiff',
227 '.tr': 'application/x-troff',
228 '.tsv': 'text/tab-separated-values',
229 '.txt': 'text/plain',
230 '.ustar': 'application/x-ustar',
231 '.wav': 'audio/x-wav',
232 '.xbm': 'image/x-xbitmap',
234 '.xsl': 'application/xml',
235 '.xpm': 'image/x-xpixmap',
236 '.xwd': 'image/x-xwindowdump',
237 '.zip': 'application/zip',
240 if __name__
== '__main__':
242 print guess_type(sys
.argv
[1])