1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
10 #include "base/containers/hash_tables.h"
11 #include "base/lazy_instance.h"
12 #include "base/logging.h"
13 #include "base/stl_util.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/strings/string_split.h"
16 #include "base/strings/string_util.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "build/build_config.h"
19 #include "net/base/mime_util.h"
20 #include "net/base/platform_mime_util.h"
21 #include "net/http/http_util.h"
27 // Singleton utility class for mime types.
28 class MimeUtil
: public PlatformMimeUtil
{
30 bool GetMimeTypeFromExtension(const base::FilePath::StringType
& ext
,
31 std::string
* mime_type
) const;
33 bool GetMimeTypeFromFile(const base::FilePath
& file_path
,
34 std::string
* mime_type
) const;
36 bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType
& ext
,
37 std::string
* mime_type
) const;
39 bool MatchesMimeType(const std::string
&mime_type_pattern
,
40 const std::string
&mime_type
) const;
42 bool ParseMimeTypeWithoutParameter(const std::string
& type_string
,
43 std::string
* top_level_type
,
44 std::string
* subtype
) const;
46 bool IsValidTopLevelMimeType(const std::string
& type_string
) const;
49 friend struct base::DefaultLazyInstanceTraits
<MimeUtil
>;
53 bool GetMimeTypeFromExtensionHelper(const base::FilePath::StringType
& ext
,
54 bool include_platform_types
,
55 std::string
* mime_type
) const;
58 // This variable is Leaky because we need to access it from WorkerPool threads.
59 static base::LazyInstance
<MimeUtil
>::Leaky g_mime_util
=
60 LAZY_INSTANCE_INITIALIZER
;
62 static const MimeInfo primary_mappings
[] = {
63 { "text/html", "html,htm,shtml,shtm" },
64 { "text/css", "css" },
65 { "text/xml", "xml" },
66 { "image/gif", "gif" },
67 { "image/jpeg", "jpeg,jpg" },
68 { "image/webp", "webp" },
69 { "image/png", "png" },
70 { "video/mp4", "mp4,m4v" },
71 { "audio/x-m4a", "m4a" },
72 { "audio/mp3", "mp3" },
73 { "video/ogg", "ogv,ogm" },
74 { "audio/ogg", "ogg,oga,opus" },
75 { "video/webm", "webm" },
76 { "audio/webm", "webm" },
77 { "audio/wav", "wav" },
78 { "audio/flac", "flac" },
79 { "application/xhtml+xml", "xhtml,xht,xhtm" },
80 { "application/x-chrome-extension", "crx" },
81 { "multipart/related", "mhtml,mht" }
84 static const MimeInfo secondary_mappings
[] = {
85 { "application/octet-stream", "exe,com,bin" },
86 { "application/gzip", "gz" },
87 { "application/pdf", "pdf" },
88 { "application/postscript", "ps,eps,ai" },
89 { "application/javascript", "js" },
90 { "application/font-woff", "woff" },
91 { "image/bmp", "bmp" },
92 { "image/x-icon", "ico" },
93 { "image/vnd.microsoft.icon", "ico" },
94 { "image/jpeg", "jfif,pjpeg,pjp" },
95 { "image/tiff", "tiff,tif" },
96 { "image/x-xbitmap", "xbm" },
97 { "image/svg+xml", "svg,svgz" },
98 { "image/x-png", "png"},
99 { "message/rfc822", "eml" },
100 { "text/plain", "txt,text" },
101 { "text/html", "ehtml" },
102 { "application/rss+xml", "rss" },
103 { "application/rdf+xml", "rdf" },
104 { "text/xml", "xsl,xbl,xslt" },
105 { "application/vnd.mozilla.xul+xml", "xul" },
106 { "application/x-shockwave-flash", "swf,swl" },
107 { "application/pkcs7-mime", "p7m,p7c,p7z" },
108 { "application/pkcs7-signature", "p7s" },
109 { "application/x-mpegurl", "m3u8" },
112 const char* FindMimeType(const MimeInfo
* mappings
,
114 const std::string
& ext
) {
115 for (size_t i
= 0; i
< mappings_len
; ++i
) {
116 const char* extensions
= mappings
[i
].extensions
;
118 size_t end_pos
= strcspn(extensions
, ",");
119 // The length check is required to prevent the StringPiece below from
120 // including uninitialized memory if ext is longer than extensions.
121 if (end_pos
== ext
.size() &&
122 base::EqualsCaseInsensitiveASCII(
123 base::StringPiece(extensions
, ext
.size()), ext
))
124 return mappings
[i
].mime_type
;
125 extensions
+= end_pos
;
128 extensions
+= 1; // skip over comma
134 bool MimeUtil::GetMimeTypeFromExtension(const base::FilePath::StringType
& ext
,
135 string
* result
) const {
136 return GetMimeTypeFromExtensionHelper(ext
, true, result
);
139 bool MimeUtil::GetWellKnownMimeTypeFromExtension(
140 const base::FilePath::StringType
& ext
,
141 string
* result
) const {
142 return GetMimeTypeFromExtensionHelper(ext
, false, result
);
145 bool MimeUtil::GetMimeTypeFromFile(const base::FilePath
& file_path
,
146 string
* result
) const {
147 base::FilePath::StringType file_name_str
= file_path
.Extension();
148 if (file_name_str
.empty())
150 return GetMimeTypeFromExtension(file_name_str
.substr(1), result
);
153 bool MimeUtil::GetMimeTypeFromExtensionHelper(
154 const base::FilePath::StringType
& ext
,
155 bool include_platform_types
,
156 string
* result
) const {
157 // Avoids crash when unable to handle a long file path. See crbug.com/48733.
158 const unsigned kMaxFilePathSize
= 65536;
159 if (ext
.length() > kMaxFilePathSize
)
162 // Reject a string which contains null character.
163 base::FilePath::StringType::size_type nul_pos
=
164 ext
.find(FILE_PATH_LITERAL('\0'));
165 if (nul_pos
!= base::FilePath::StringType::npos
)
168 // We implement the same algorithm as Mozilla for mapping a file extension to
169 // a mime type. That is, we first check a hard-coded list (that cannot be
170 // overridden), and then if not found there, we defer to the system registry.
171 // Finally, we scan a secondary hard-coded list to catch types that we can
172 // deduce but that we also want to allow the OS to override.
174 base::FilePath
path_ext(ext
);
175 const string ext_narrow_str
= path_ext
.AsUTF8Unsafe();
176 const char* mime_type
= FindMimeType(
177 primary_mappings
, arraysize(primary_mappings
), ext_narrow_str
);
183 if (include_platform_types
&& GetPlatformMimeTypeFromExtension(ext
, result
))
186 mime_type
= FindMimeType(secondary_mappings
, arraysize(secondary_mappings
),
196 MimeUtil::MimeUtil() {
199 // Tests for MIME parameter equality. Each parameter in the |mime_type_pattern|
200 // must be matched by a parameter in the |mime_type|. If there are no
201 // parameters in the pattern, the match is a success.
203 // According rfc2045 keys of parameters are case-insensitive, while values may
204 // or may not be case-sensitive, but they are usually case-sensitive. So, this
205 // function matches values in *case-sensitive* manner, however note that this
206 // may produce some false negatives.
207 bool MatchesMimeTypeParameters(const std::string
& mime_type_pattern
,
208 const std::string
& mime_type
) {
209 typedef std::map
<std::string
, std::string
> StringPairMap
;
211 const std::string::size_type semicolon
= mime_type_pattern
.find(';');
212 const std::string::size_type test_semicolon
= mime_type
.find(';');
213 if (semicolon
!= std::string::npos
) {
214 if (test_semicolon
== std::string::npos
)
217 base::StringPairs pattern_parameters
;
218 base::SplitStringIntoKeyValuePairs(mime_type_pattern
.substr(semicolon
+ 1),
219 '=', ';', &pattern_parameters
);
220 base::StringPairs test_parameters
;
221 base::SplitStringIntoKeyValuePairs(mime_type
.substr(test_semicolon
+ 1),
222 '=', ';', &test_parameters
);
224 // Put the parameters to maps with the keys converted to lower case.
225 StringPairMap pattern_parameter_map
;
226 for (const auto& pair
: pattern_parameters
) {
227 pattern_parameter_map
[base::ToLowerASCII(pair
.first
)] = pair
.second
;
230 StringPairMap test_parameter_map
;
231 for (const auto& pair
: test_parameters
) {
232 test_parameter_map
[base::ToLowerASCII(pair
.first
)] = pair
.second
;
235 if (pattern_parameter_map
.size() > test_parameter_map
.size())
238 for (const auto& parameter_pair
: pattern_parameter_map
) {
239 const auto& test_parameter_pair_it
=
240 test_parameter_map
.find(parameter_pair
.first
);
241 if (test_parameter_pair_it
== test_parameter_map
.end())
243 if (parameter_pair
.second
!= test_parameter_pair_it
->second
)
251 // This comparison handles absolute maching and also basic
252 // wildcards. The plugin mime types could be:
257 // Also tests mime parameters -- all parameters in the pattern must be present
258 // in the tested type for a match to succeed.
259 bool MimeUtil::MatchesMimeType(const std::string
& mime_type_pattern
,
260 const std::string
& mime_type
) const {
261 if (mime_type_pattern
.empty())
264 std::string::size_type semicolon
= mime_type_pattern
.find(';');
265 const std::string
base_pattern(mime_type_pattern
.substr(0, semicolon
));
266 semicolon
= mime_type
.find(';');
267 const std::string
base_type(mime_type
.substr(0, semicolon
));
269 if (base_pattern
== "*" || base_pattern
== "*/*")
270 return MatchesMimeTypeParameters(mime_type_pattern
, mime_type
);
272 const std::string::size_type star
= base_pattern
.find('*');
273 if (star
== std::string::npos
) {
274 if (base::EqualsCaseInsensitiveASCII(base_pattern
, base_type
))
275 return MatchesMimeTypeParameters(mime_type_pattern
, mime_type
);
280 // Test length to prevent overlap between |left| and |right|.
281 if (base_type
.length() < base_pattern
.length() - 1)
284 base::StringPiece
base_pattern_piece(base_pattern
);
285 base::StringPiece
left(base_pattern_piece
.substr(0, star
));
286 base::StringPiece
right(base_pattern_piece
.substr(star
+ 1));
288 if (!base::StartsWith(base_type
, left
, base::CompareCase::INSENSITIVE_ASCII
))
291 if (!right
.empty() &&
292 !base::EndsWith(base_type
, right
, base::CompareCase::INSENSITIVE_ASCII
))
295 return MatchesMimeTypeParameters(mime_type_pattern
, mime_type
);
298 // See http://www.iana.org/assignments/media-types/media-types.xhtml
299 static const char* const legal_top_level_types
[] = {
311 bool MimeUtil::ParseMimeTypeWithoutParameter(
312 const std::string
& type_string
,
313 std::string
* top_level_type
,
314 std::string
* subtype
) const {
315 std::vector
<std::string
> components
= base::SplitString(
316 type_string
, "/", base::TRIM_WHITESPACE
, base::SPLIT_WANT_ALL
);
317 if (components
.size() != 2 ||
318 !HttpUtil::IsToken(components
[0]) ||
319 !HttpUtil::IsToken(components
[1]))
323 *top_level_type
= components
[0];
325 *subtype
= components
[1];
329 bool MimeUtil::IsValidTopLevelMimeType(const std::string
& type_string
) const {
330 std::string lower_type
= base::ToLowerASCII(type_string
);
331 for (size_t i
= 0; i
< arraysize(legal_top_level_types
); ++i
) {
332 if (lower_type
.compare(legal_top_level_types
[i
]) == 0)
336 return type_string
.size() > 2 &&
337 base::StartsWith(type_string
, "x-",
338 base::CompareCase::INSENSITIVE_ASCII
);
341 //----------------------------------------------------------------------------
342 // Wrappers for the singleton
343 //----------------------------------------------------------------------------
345 bool GetMimeTypeFromExtension(const base::FilePath::StringType
& ext
,
346 std::string
* mime_type
) {
347 return g_mime_util
.Get().GetMimeTypeFromExtension(ext
, mime_type
);
350 bool GetMimeTypeFromFile(const base::FilePath
& file_path
,
351 std::string
* mime_type
) {
352 return g_mime_util
.Get().GetMimeTypeFromFile(file_path
, mime_type
);
355 bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType
& ext
,
356 std::string
* mime_type
) {
357 return g_mime_util
.Get().GetWellKnownMimeTypeFromExtension(ext
, mime_type
);
360 bool GetPreferredExtensionForMimeType(const std::string
& mime_type
,
361 base::FilePath::StringType
* extension
) {
362 return g_mime_util
.Get().GetPreferredExtensionForMimeType(mime_type
,
366 bool MatchesMimeType(const std::string
& mime_type_pattern
,
367 const std::string
& mime_type
) {
368 return g_mime_util
.Get().MatchesMimeType(mime_type_pattern
, mime_type
);
371 bool ParseMimeTypeWithoutParameter(const std::string
& type_string
,
372 std::string
* top_level_type
,
373 std::string
* subtype
) {
374 return g_mime_util
.Get().ParseMimeTypeWithoutParameter(
375 type_string
, top_level_type
, subtype
);
378 bool IsValidTopLevelMimeType(const std::string
& type_string
) {
379 return g_mime_util
.Get().IsValidTopLevelMimeType(type_string
);
384 // From http://www.w3schools.com/media/media_mimeref.asp and
385 // http://plugindoc.mozdev.org/winmime.php
386 static const char* const kStandardImageTypes
[] = {
398 "image/vnd.microsoft.icon",
399 "image/x-cmu-raster",
402 "image/x-portable-anymap",
403 "image/x-portable-bitmap",
404 "image/x-portable-graymap",
405 "image/x-portable-pixmap",
409 "image/x-xwindowdump"
411 static const char* const kStandardAudioTypes
[] = {
428 "audio/vnd.rn-realaudio",
431 static const char* const kStandardVideoTypes
[] = {
448 struct StandardType
{
449 const char* const leading_mime_type
;
450 const char* const* standard_types
;
451 size_t standard_types_len
;
453 static const StandardType kStandardTypes
[] = {
454 { "image/", kStandardImageTypes
, arraysize(kStandardImageTypes
) },
455 { "audio/", kStandardAudioTypes
, arraysize(kStandardAudioTypes
) },
456 { "video/", kStandardVideoTypes
, arraysize(kStandardVideoTypes
) },
460 void GetExtensionsFromHardCodedMappings(
461 const MimeInfo
* mappings
,
463 const std::string
& leading_mime_type
,
464 base::hash_set
<base::FilePath::StringType
>* extensions
) {
465 for (size_t i
= 0; i
< mappings_len
; ++i
) {
466 if (base::StartsWith(mappings
[i
].mime_type
, leading_mime_type
,
467 base::CompareCase::INSENSITIVE_ASCII
)) {
468 for (const base::StringPiece
& this_extension
: base::SplitStringPiece(
469 mappings
[i
].extensions
, ",", base::TRIM_WHITESPACE
,
470 base::SPLIT_WANT_ALL
)) {
472 extensions
->insert(base::UTF8ToUTF16(this_extension
));
474 extensions
->insert(this_extension
.as_string());
481 void GetExtensionsHelper(
482 const char* const* standard_types
,
483 size_t standard_types_len
,
484 const std::string
& leading_mime_type
,
485 base::hash_set
<base::FilePath::StringType
>* extensions
) {
486 for (size_t i
= 0; i
< standard_types_len
; ++i
) {
487 g_mime_util
.Get().GetPlatformExtensionsForMimeType(standard_types
[i
],
491 // Also look up the extensions from hard-coded mappings in case that some
492 // supported extensions are not registered in the system registry, like ogg.
493 GetExtensionsFromHardCodedMappings(primary_mappings
,
494 arraysize(primary_mappings
),
498 GetExtensionsFromHardCodedMappings(secondary_mappings
,
499 arraysize(secondary_mappings
),
504 // Note that the elements in the source set will be appended to the target
507 void HashSetToVector(base::hash_set
<T
>* source
, std::vector
<T
>* target
) {
508 size_t old_target_size
= target
->size();
509 target
->resize(old_target_size
+ source
->size());
511 for (typename
base::hash_set
<T
>::iterator iter
= source
->begin();
512 iter
!= source
->end(); ++iter
, ++i
)
513 (*target
)[old_target_size
+ i
] = *iter
;
518 void GetExtensionsForMimeType(
519 const std::string
& unsafe_mime_type
,
520 std::vector
<base::FilePath::StringType
>* extensions
) {
521 if (unsafe_mime_type
== "*/*" || unsafe_mime_type
== "*")
524 const std::string mime_type
= base::ToLowerASCII(unsafe_mime_type
);
525 base::hash_set
<base::FilePath::StringType
> unique_extensions
;
527 if (base::EndsWith(mime_type
, "/*", base::CompareCase::INSENSITIVE_ASCII
)) {
528 std::string leading_mime_type
= mime_type
.substr(0, mime_type
.length() - 1);
530 // Find the matching StandardType from within kStandardTypes, or fall
531 // through to the last (default) StandardType.
532 const StandardType
* type
= NULL
;
533 for (size_t i
= 0; i
< arraysize(kStandardTypes
); ++i
) {
534 type
= &(kStandardTypes
[i
]);
535 if (type
->leading_mime_type
&&
536 leading_mime_type
== type
->leading_mime_type
)
540 GetExtensionsHelper(type
->standard_types
,
541 type
->standard_types_len
,
545 g_mime_util
.Get().GetPlatformExtensionsForMimeType(mime_type
,
548 // Also look up the extensions from hard-coded mappings in case that some
549 // supported extensions are not registered in the system registry, like ogg.
550 GetExtensionsFromHardCodedMappings(primary_mappings
,
551 arraysize(primary_mappings
),
555 GetExtensionsFromHardCodedMappings(secondary_mappings
,
556 arraysize(secondary_mappings
),
561 HashSetToVector(&unique_extensions
, extensions
);
564 void AddMultipartValueForUpload(const std::string
& value_name
,
565 const std::string
& value
,
566 const std::string
& mime_boundary
,
567 const std::string
& content_type
,
568 std::string
* post_data
) {
570 // First line is the boundary.
571 post_data
->append("--" + mime_boundary
+ "\r\n");
572 // Next line is the Content-disposition.
573 post_data
->append("Content-Disposition: form-data; name=\"" +
574 value_name
+ "\"\r\n");
575 if (!content_type
.empty()) {
576 // If Content-type is specified, the next line is that.
577 post_data
->append("Content-Type: " + content_type
+ "\r\n");
579 // Leave an empty line and append the value.
580 post_data
->append("\r\n" + value
+ "\r\n");
583 void AddMultipartFinalDelimiterForUpload(const std::string
& mime_boundary
,
584 std::string
* post_data
) {
586 post_data
->append("--" + mime_boundary
+ "--\r\n");