1 """Guess the MIME type of a file.
3 This module defines two useful functions:
5 guess_type(url) -- guess the MIME type and encoding of a URL.
7 guess_extension(type) -- guess the extension for a given MIME type.
9 It also contains the following, for tuning the behavior:
13 knownfiles -- list of files to parse
14 inited -- flag set when init() has been called
15 suffixes_map -- dictionary mapping suffixes to suffixes
16 encodings_map -- dictionary mapping suffixes to encodings
17 types_map -- dictionary mapping suffixes to types
21 init([files]) -- parse a list of files, default knownfiles
22 read_mime_types(file) -- parse one file, return a dictionary or None
31 "/usr/local/etc/httpd/conf/mime.types",
32 "/usr/local/lib/netscape/mime.types",
33 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
34 "/usr/local/etc/mime.types", # Apache 1.3
40 """Guess the type of a file based on its URL.
42 Return value is a tuple (type, encoding) where type is None if the
43 type can't be guessed (no or unknown suffix) or a string of the
44 form type/subtype, usable for a MIME Content-type header; and
45 encoding is None for no encoding or the name of the program used
46 to encode (e.g. compress or gzip). The mappings are table
47 driven. Encoding suffixes are case sensitive; type suffixes are
48 first tried case sensitive, then case insensitive.
50 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
51 to ".tar.gz". (This is table-driven too, using the dictionary
57 scheme
, url
= urllib
.splittype(url
)
59 # syntax of data URLs:
60 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
61 # mediatype := [ type "/" subtype ] *( ";" parameter )
63 # parameter := attribute "=" value
64 # type/subtype defaults to "text/plain"
65 comma
= string
.find(url
, ',')
69 semi
= string
.find(url
, ';', 0, comma
)
74 if '=' in type or '/' not in type:
76 return type, None # never compressed, so encoding is None
77 base
, ext
= posixpath
.splitext(url
)
78 while suffix_map
.has_key(ext
):
79 base
, ext
= posixpath
.splitext(base
+ suffix_map
[ext
])
80 if encodings_map
.has_key(ext
):
81 encoding
= encodings_map
[ext
]
82 base
, ext
= posixpath
.splitext(base
)
85 if types_map
.has_key(ext
):
86 return types_map
[ext
], encoding
87 elif types_map
.has_key(string
.lower(ext
)):
88 return types_map
[string
.lower(ext
)], encoding
92 def guess_extension(type):
93 """Guess the extension for a file based on its MIME type.
95 Return value is a string giving a filename extension, including the
96 leading dot ('.'). The extension is not guaranteed to have been
97 associated with any particular data stream, but would be mapped to the
98 MIME type `type' by guess_type(). If no extension can be guessed for
99 `type', None is returned.
104 type = string
.lower(type)
105 for ext
, stype
in types_map
.items():
110 def init(files
=None):
112 for file in files
or knownfiles
:
113 s
= read_mime_types(file)
115 for key
, value
in s
.items():
116 types_map
[key
] = value
119 def read_mime_types(file):
128 words
= string
.split(line
)
129 for i
in range(len(words
)):
130 if words
[i
][0] == '#':
133 if not words
: continue
134 type, suffixes
= words
[0], words
[1:]
135 for suff
in suffixes
:
152 '.a': 'application/octet-stream',
153 '.ai': 'application/postscript',
154 '.aif': 'audio/x-aiff',
155 '.aifc': 'audio/x-aiff',
156 '.aiff': 'audio/x-aiff',
157 '.au': 'audio/basic',
158 '.avi': 'video/x-msvideo',
159 '.bcpio': 'application/x-bcpio',
160 '.bin': 'application/octet-stream',
161 '.cdf': 'application/x-netcdf',
162 '.cpio': 'application/x-cpio',
163 '.csh': 'application/x-csh',
164 '.dll': 'application/octet-stream',
165 '.dvi': 'application/x-dvi',
166 '.exe': 'application/octet-stream',
167 '.eps': 'application/postscript',
168 '.etx': 'text/x-setext',
170 '.gtar': 'application/x-gtar',
171 '.hdf': 'application/x-hdf',
173 '.html': 'text/html',
175 '.jpe': 'image/jpeg',
176 '.jpeg': 'image/jpeg',
177 '.jpg': 'image/jpeg',
178 '.latex': 'application/x-latex',
179 '.man': 'application/x-troff-man',
180 '.me': 'application/x-troff-me',
181 '.mif': 'application/x-mif',
182 '.mov': 'video/quicktime',
183 '.movie': 'video/x-sgi-movie',
184 '.mpe': 'video/mpeg',
185 '.mpeg': 'video/mpeg',
186 '.mpg': 'video/mpeg',
187 '.ms': 'application/x-troff-ms',
188 '.nc': 'application/x-netcdf',
189 '.o': 'application/octet-stream',
190 '.obj': 'application/octet-stream',
191 '.oda': 'application/oda',
192 '.pbm': 'image/x-portable-bitmap',
193 '.pdf': 'application/pdf',
194 '.pgm': 'image/x-portable-graymap',
195 '.pnm': 'image/x-portable-anymap',
197 '.ppm': 'image/x-portable-pixmap',
198 '.py': 'text/x-python',
199 '.pyc': 'application/x-python-code',
200 '.ps': 'application/postscript',
201 '.qt': 'video/quicktime',
202 '.ras': 'image/x-cmu-raster',
203 '.rgb': 'image/x-rgb',
204 '.rdf': 'application/xml',
205 '.roff': 'application/x-troff',
206 '.rtf': 'application/rtf',
207 '.rtx': 'text/richtext',
208 '.sgm': 'text/x-sgml',
209 '.sgml': 'text/x-sgml',
210 '.sh': 'application/x-sh',
211 '.shar': 'application/x-shar',
212 '.snd': 'audio/basic',
213 '.so': 'application/octet-stream',
214 '.src': 'application/x-wais-source',
215 '.sv4cpio': 'application/x-sv4cpio',
216 '.sv4crc': 'application/x-sv4crc',
217 '.t': 'application/x-troff',
218 '.tar': 'application/x-tar',
219 '.tcl': 'application/x-tcl',
220 '.tex': 'application/x-tex',
221 '.texi': 'application/x-texinfo',
222 '.texinfo': 'application/x-texinfo',
223 '.tif': 'image/tiff',
224 '.tiff': 'image/tiff',
225 '.tr': 'application/x-troff',
226 '.tsv': 'text/tab-separated-values',
227 '.txt': 'text/plain',
228 '.ustar': 'application/x-ustar',
229 '.wav': 'audio/x-wav',
230 '.xbm': 'image/x-xbitmap',
232 '.xsl': 'application/xml',
233 '.xpm': 'image/x-xpixmap',
234 '.xwd': 'image/x-xwindowdump',
235 '.zip': 'application/zip',