py-cvs-rel2_1 (Rev 1.2) merge
[python/dscho.git] / Lib /
1 """Guess the MIME type of a file.
3 This module defines two useful functions:
5 guess_type(url) -- guess the MIME type and encoding of a URL.
7 guess_extension(type) -- guess the extension for a given MIME type.
9 It also contains the following, for tuning the behavior:
11 Data:
13 knownfiles -- list of files to parse
14 inited -- flag set when init() has been called
15 suffixes_map -- dictionary mapping suffixes to suffixes
16 encodings_map -- dictionary mapping suffixes to encodings
17 types_map -- dictionary mapping suffixes to types
19 Functions:
21 init([files]) -- parse a list of files, default knownfiles
22 read_mime_types(file) -- parse one file, return a dictionary or None
24 """
26 import posixpath
27 import urllib
29 __all__ = ["guess_type","guess_extension","read_mime_types","init"]
31 knownfiles = [
32 "/usr/local/etc/httpd/conf/mime.types",
33 "/usr/local/lib/netscape/mime.types",
34 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
35 "/usr/local/etc/mime.types", # Apache 1.3
38 inited = 0
40 def guess_type(url):
41 """Guess the type of a file based on its URL.
43 Return value is a tuple (type, encoding) where type is None if the
44 type can't be guessed (no or unknown suffix) or a string of the
45 form type/subtype, usable for a MIME Content-type header; and
46 encoding is None for no encoding or the name of the program used
47 to encode (e.g. compress or gzip). The mappings are table
48 driven. Encoding suffixes are case sensitive; type suffixes are
49 first tried case sensitive, then case insensitive.
51 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
52 to ".tar.gz". (This is table-driven too, using the dictionary
53 suffix_map).
55 """
56 if not inited:
57 init()
58 scheme, url = urllib.splittype(url)
59 if scheme == 'data':
60 # syntax of data URLs:
61 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
62 # mediatype := [ type "/" subtype ] *( ";" parameter )
63 # data := *urlchar
64 # parameter := attribute "=" value
65 # type/subtype defaults to "text/plain"
66 comma = url.find(',')
67 if comma < 0:
68 # bad data URL
69 return None, None
70 semi = url.find(';', 0, comma)
71 if semi >= 0:
72 type = url[:semi]
73 else:
74 type = url[:comma]
75 if '=' in type or '/' not in type:
76 type = 'text/plain'
77 return type, None # never compressed, so encoding is None
78 base, ext = posixpath.splitext(url)
79 while suffix_map.has_key(ext):
80 base, ext = posixpath.splitext(base + suffix_map[ext])
81 if encodings_map.has_key(ext):
82 encoding = encodings_map[ext]
83 base, ext = posixpath.splitext(base)
84 else:
85 encoding = None
86 if types_map.has_key(ext):
87 return types_map[ext], encoding
88 elif types_map.has_key(ext.lower()):
89 return types_map[ext.lower()], encoding
90 else:
91 return None, encoding
93 def guess_extension(type):
94 """Guess the extension for a file based on its MIME type.
96 Return value is a string giving a filename extension, including the
97 leading dot ('.'). The extension is not guaranteed to have been
98 associated with any particular data stream, but would be mapped to the
99 MIME type `type' by guess_type(). If no extension can be guessed for
100 `type', None is returned.
102 global inited
103 if not inited:
104 init()
105 type = type.lower()
106 for ext, stype in types_map.items():
107 if type == stype:
108 return ext
109 return None
111 def init(files=None):
112 global inited
113 for file in files or knownfiles:
114 s = read_mime_types(file)
115 if s:
116 for key, value in s.items():
117 types_map[key] = value
118 inited = 1
120 def read_mime_types(file):
121 try:
122 f = open(file)
123 except IOError:
124 return None
125 map = {}
126 while 1:
127 line = f.readline()
128 if not line: break
129 words = line.split()
130 for i in range(len(words)):
131 if words[i][0] == '#':
132 del words[i:]
133 break
134 if not words: continue
135 type, suffixes = words[0], words[1:]
136 for suff in suffixes:
137 map['.'+suff] = type
138 f.close()
139 return map
141 suffix_map = {
142 '.tgz': '.tar.gz',
143 '.taz': '.tar.gz',
144 '.tz': '.tar.gz',
147 encodings_map = {
148 '.gz': 'gzip',
149 '.Z': 'compress',
152 types_map = {
153 '.a': 'application/octet-stream',
154 '.ai': 'application/postscript',
155 '.aif': 'audio/x-aiff',
156 '.aifc': 'audio/x-aiff',
157 '.aiff': 'audio/x-aiff',
158 '.au': 'audio/basic',
159 '.avi': 'video/x-msvideo',
160 '.bcpio': 'application/x-bcpio',
161 '.bin': 'application/octet-stream',
162 '.cdf': 'application/x-netcdf',
163 '.cpio': 'application/x-cpio',
164 '.csh': 'application/x-csh',
165 '.dll': 'application/octet-stream',
166 '.dvi': 'application/x-dvi',
167 '.exe': 'application/octet-stream',
168 '.eps': 'application/postscript',
169 '.etx': 'text/x-setext',
170 '.gif': 'image/gif',
171 '.gtar': 'application/x-gtar',
172 '.hdf': 'application/x-hdf',
173 '.htm': 'text/html',
174 '.html': 'text/html',
175 '.ief': 'image/ief',
176 '.jpe': 'image/jpeg',
177 '.jpeg': 'image/jpeg',
178 '.jpg': 'image/jpeg',
179 '.js': 'application/x-javascript',
180 '.latex': 'application/x-latex',
181 '.man': 'application/x-troff-man',
182 '.me': 'application/x-troff-me',
183 '.mif': 'application/x-mif',
184 '.mov': 'video/quicktime',
185 '.movie': 'video/x-sgi-movie',
186 '.mpe': 'video/mpeg',
187 '.mpeg': 'video/mpeg',
188 '.mpg': 'video/mpeg',
189 '.ms': 'application/x-troff-ms',
190 '.nc': 'application/x-netcdf',
191 '.o': 'application/octet-stream',
192 '.obj': 'application/octet-stream',
193 '.oda': 'application/oda',
194 '.pbm': 'image/x-portable-bitmap',
195 '.pdf': 'application/pdf',
196 '.pgm': 'image/x-portable-graymap',
197 '.pnm': 'image/x-portable-anymap',
198 '.png': 'image/png',
199 '.ppm': 'image/x-portable-pixmap',
200 '.ps': 'application/postscript',
201 '.py': 'text/x-python',
202 '.pyc': 'application/x-python-code',
203 '.pyo': 'application/x-python-code',
204 '.qt': 'video/quicktime',
205 '.ras': 'image/x-cmu-raster',
206 '.rgb': 'image/x-rgb',
207 '.rdf': 'application/xml',
208 '.roff': 'application/x-troff',
209 '.rtf': 'application/rtf',
210 '.rtx': 'text/richtext',
211 '.sgm': 'text/x-sgml',
212 '.sgml': 'text/x-sgml',
213 '.sh': 'application/x-sh',
214 '.shar': 'application/x-shar',
215 '.snd': 'audio/basic',
216 '.so': 'application/octet-stream',
217 '.src': 'application/x-wais-source',
218 '.sv4cpio': 'application/x-sv4cpio',
219 '.sv4crc': 'application/x-sv4crc',
220 '.t': 'application/x-troff',
221 '.tar': 'application/x-tar',
222 '.tcl': 'application/x-tcl',
223 '.tex': 'application/x-tex',
224 '.texi': 'application/x-texinfo',
225 '.texinfo': 'application/x-texinfo',
226 '.tif': 'image/tiff',
227 '.tiff': 'image/tiff',
228 '.tr': 'application/x-troff',
229 '.tsv': 'text/tab-separated-values',
230 '.txt': 'text/plain',
231 '.ustar': 'application/x-ustar',
232 '.wav': 'audio/x-wav',
233 '.xbm': 'image/x-xbitmap',
234 '.xml': 'text/xml',
235 '.xsl': 'application/xml',
236 '.xpm': 'image/x-xpixmap',
237 '.xwd': 'image/x-xwindowdump',
238 '.zip': 'application/zip',
241 if __name__ == '__main__':
242 import sys
243 print guess_type(sys.argv[1])