1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/mime_sniffer.h"
7 #include "testing/gtest/include/gtest/gtest.h"
13 using ::testing::Range
;
14 using ::testing::Values
;
15 using ::net::SniffMimeType
; // It is shadowed by SniffMimeType(), below.
21 std::string type_hint
;
22 const char* mime_type
;
25 static void TestArray(SnifferTest
* tests
, size_t count
) {
26 std::string mime_type
;
28 for (size_t i
= 0; i
< count
; ++i
) {
29 SniffMimeType(tests
[i
].content
,
34 EXPECT_EQ(tests
[i
].mime_type
, mime_type
);
38 // TODO(evanm): convert other tests to use SniffMimeType instead of TestArray,
39 // so the error messages produced by test failures are more useful.
40 static std::string
SniffMimeType(const std::string
& content
,
41 const std::string
& url
,
42 const std::string
& mime_type_hint
) {
43 std::string mime_type
;
44 SniffMimeType(content
.data(), content
.size(), GURL(url
),
45 mime_type_hint
, &mime_type
);
49 TEST(MimeSnifferTest
, BoundaryConditionsTest
) {
50 std::string mime_type
;
51 std::string type_hint
;
59 SniffMimeType(buf
, 0, url
, type_hint
, &mime_type
);
60 EXPECT_EQ("text/plain", mime_type
);
61 SniffMimeType(buf
, 1, url
, type_hint
, &mime_type
);
62 EXPECT_EQ("text/plain", mime_type
);
63 SniffMimeType(buf
, 2, url
, type_hint
, &mime_type
);
64 EXPECT_EQ("application/octet-stream", mime_type
);
67 TEST(MimeSnifferTest
, BasicSniffingTest
) {
68 SnifferTest tests
[] = {
69 { "<!DOCTYPE html PUBLIC", sizeof("<!DOCTYPE html PUBLIC")-1,
70 "http://www.example.com/",
72 { "<HtMl><Body></body></htMl>", sizeof("<HtMl><Body></body></htMl>")-1,
73 "http://www.example.com/foo.gif",
74 "application/octet-stream", "application/octet-stream" },
75 { "GIF89a\x1F\x83\x94", sizeof("GIF89a\xAF\x83\x94")-1,
76 "http://www.example.com/foo",
77 "text/plain", "image/gif" },
78 { "Gif87a\x1F\x83\x94", sizeof("Gif87a\xAF\x83\x94")-1,
79 "http://www.example.com/foo?param=tt.gif",
80 "", "application/octet-stream" },
81 { "%!PS-Adobe-3.0", sizeof("%!PS-Adobe-3.0")-1,
82 "http://www.example.com/foo",
83 "text/plain", "text/plain" },
84 { "\x89" "PNG\x0D\x0A\x1A\x0A", sizeof("\x89" "PNG\x0D\x0A\x1A\x0A")-1,
85 "http://www.example.com/foo",
86 "application/octet-stream", "application/octet-stream" },
87 { "\xFF\xD8\xFF\x23\x49\xAF", sizeof("\xFF\xD8\xFF\x23\x49\xAF")-1,
88 "http://www.example.com/foo",
92 TestArray(tests
, arraysize(tests
));
95 TEST(MimeSnifferTest
, ChromeExtensionsTest
) {
96 SnifferTest tests
[] = {
98 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
99 "http://www.example.com/foo.crx",
100 "", "application/x-chrome-extension" },
101 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
102 "https://www.example.com/foo.crx",
103 "", "application/x-chrome-extension" },
104 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
105 "ftp://www.example.com/foo.crx",
106 "", "application/x-chrome-extension" },
108 // some other mimetypes that should get converted
109 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
110 "http://www.example.com/foo.crx",
111 "text/plain", "application/x-chrome-extension" },
112 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
113 "http://www.example.com/foo.crx",
114 "application/octet-stream", "application/x-chrome-extension" },
116 // success edge cases
117 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
118 "http://www.example.com/foo.crx?query=string",
119 "", "application/x-chrome-extension" },
120 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
121 "http://www.example.com/foo..crx",
122 "", "application/x-chrome-extension" },
124 // wrong file extension
125 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
126 "http://www.example.com/foo.bin",
127 "", "application/octet-stream" },
128 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
129 "http://www.example.com/foo.bin?monkey",
130 "", "application/octet-stream" },
131 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
133 "", "application/octet-stream" },
134 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
135 "http://www.example.com",
136 "", "application/octet-stream" },
137 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
138 "http://www.example.com/",
139 "", "application/octet-stream" },
140 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
141 "http://www.example.com/foo",
142 "", "application/octet-stream" },
143 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
144 "http://www.example.com/foocrx",
145 "", "application/octet-stream" },
146 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
147 "http://www.example.com/foo.crx.blech",
148 "", "application/octet-stream" },
151 { "Cr24\x02\x00\x00\x01", sizeof("Cr24\x02\x00\x00\x01")-1,
152 "http://www.example.com/foo.crx?monkey",
153 "", "application/octet-stream" },
154 { "PADDING_Cr24\x02\x00\x00\x00", sizeof("PADDING_Cr24\x02\x00\x00\x00")-1,
155 "http://www.example.com/foo.crx?monkey",
156 "", "application/octet-stream" },
159 TestArray(tests
, arraysize(tests
));
162 TEST(MimeSnifferTest
, MozillaCompatibleTest
) {
163 SnifferTest tests
[] = {
164 { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1,
165 "http://www.example.com/",
167 { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1,
168 "http://www.example.com/",
169 "text/plain", "text/plain" },
170 { "BMjlakdsfk", sizeof("BMjlakdsfk")-1,
171 "http://www.example.com/foo",
173 { "\x00\x00\x30\x00", sizeof("\x00\x00\x30\x00")-1,
174 "http://www.example.com/favicon.ico",
175 "", "application/octet-stream" },
176 { "#!/bin/sh\nls /\n", sizeof("#!/bin/sh\nls /\n")-1,
177 "http://www.example.com/foo",
179 { "From: Fred\nTo: Bob\n\nHi\n.\n",
180 sizeof("From: Fred\nTo: Bob\n\nHi\n.\n")-1,
181 "http://www.example.com/foo",
183 { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
184 sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1,
185 "http://www.example.com/foo",
187 { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
188 sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1,
189 "http://www.example.com/foo",
190 "application/octet-stream", "application/octet-stream" },
193 TestArray(tests
, arraysize(tests
));
196 TEST(MimeSnifferTest
, DontAllowPrivilegeEscalationTest
) {
197 SnifferTest tests
[] = {
198 { "GIF87a\n<html>\n<body>"
199 "<script>alert('haxorzed');\n</script>"
201 sizeof("GIF87a\n<html>\n<body>"
202 "<script>alert('haxorzed');\n</script>"
203 "</body></html>\n")-1,
204 "http://www.example.com/foo",
206 { "GIF87a\n<html>\n<body>"
207 "<script>alert('haxorzed');\n</script>"
209 sizeof("GIF87a\n<html>\n<body>"
210 "<script>alert('haxorzed');\n</script>"
211 "</body></html>\n")-1,
212 "http://www.example.com/foo?q=ttt.html",
214 { "GIF87a\n<html>\n<body>"
215 "<script>alert('haxorzed');\n</script>"
217 sizeof("GIF87a\n<html>\n<body>"
218 "<script>alert('haxorzed');\n</script>"
219 "</body></html>\n")-1,
220 "http://www.example.com/foo#ttt.html",
222 { "a\n<html>\n<body>"
223 "<script>alert('haxorzed');\n</script>"
225 sizeof("a\n<html>\n<body>"
226 "<script>alert('haxorzed');\n</script>"
227 "</body></html>\n")-1,
228 "http://www.example.com/foo",
230 { "a\n<html>\n<body>"
231 "<script>alert('haxorzed');\n</script>"
233 sizeof("a\n<html>\n<body>"
234 "<script>alert('haxorzed');\n</script>"
235 "</body></html>\n")-1,
236 "http://www.example.com/foo?q=ttt.html",
238 { "a\n<html>\n<body>"
239 "<script>alert('haxorzed');\n</script>"
241 sizeof("a\n<html>\n<body>"
242 "<script>alert('haxorzed');\n</script>"
243 "</body></html>\n")-1,
244 "http://www.example.com/foo#ttt.html",
246 { "a\n<html>\n<body>"
247 "<script>alert('haxorzed');\n</script>"
249 sizeof("a\n<html>\n<body>"
250 "<script>alert('haxorzed');\n</script>"
251 "</body></html>\n")-1,
252 "http://www.example.com/foo.html",
256 TestArray(tests
, arraysize(tests
));
259 TEST(MimeSnifferTest
, UnicodeTest
) {
260 SnifferTest tests
[] = {
261 { "\xEF\xBB\xBF" "Hi there", sizeof("\xEF\xBB\xBF" "Hi there")-1,
262 "http://www.example.com/foo",
264 { "\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79",
265 sizeof("\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79")-1,
266 "http://www.example.com/foo",
268 { "\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9",
269 sizeof("\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9")-1,
270 "http://www.example.com/foo",
272 { "\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01",
273 sizeof("\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01")-1,
274 "http://www.example.com/foo",
278 TestArray(tests
, arraysize(tests
));
281 TEST(MimeSnifferTest
, FlashTest
) {
282 SnifferTest tests
[] = {
283 { "CWSdd\x00\xB3", sizeof("CWSdd\x00\xB3")-1,
284 "http://www.example.com/foo",
285 "", "application/octet-stream" },
286 { "FLVjdkl*(#)0sdj\x00", sizeof("FLVjdkl*(#)0sdj\x00")-1,
287 "http://www.example.com/foo?q=ttt.swf",
288 "", "application/octet-stream" },
289 { "FWS3$9\r\b\x00", sizeof("FWS3$9\r\b\x00")-1,
290 "http://www.example.com/foo#ttt.swf",
291 "", "application/octet-stream" },
292 { "FLVjdkl*(#)0sdj", sizeof("FLVjdkl*(#)0sdj")-1,
293 "http://www.example.com/foo.swf",
295 { "FLVjdkl*(#)0s\x01dj", sizeof("FLVjdkl*(#)0s\x01dj")-1,
296 "http://www.example.com/foo/bar.swf",
297 "", "application/octet-stream" },
298 { "FWS3$9\r\b\x1A", sizeof("FWS3$9\r\b\x1A")-1,
299 "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar",
300 "", "application/octet-stream" },
301 { "FWS3$9\r\x1C\b", sizeof("FWS3$9\r\x1C\b")-1,
302 "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar",
303 "text/plain", "application/octet-stream" },
306 TestArray(tests
, arraysize(tests
));
309 TEST(MimeSnifferTest
, XMLTest
) {
310 // An easy feed to identify.
311 EXPECT_EQ("application/atom+xml",
312 SniffMimeType("<?xml?><feed", std::string(), "text/xml"));
313 // Don't sniff out of plain text.
314 EXPECT_EQ("text/plain",
315 SniffMimeType("<?xml?><feed", std::string(), "text/plain"));
317 EXPECT_EQ("application/rss+xml",
319 "<?xml version='1.0'?>\r\n<rss", std::string(), "text/xml"));
321 // The top of CNN's RSS feed, which we'd like to recognize as RSS.
322 static const char kCNNRSS
[] =
323 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
324 "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/rss2full.xsl\" "
325 "type=\"text/xsl\" media=\"screen\"?>"
326 "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/itemcontent.css\" "
327 "type=\"text/css\" media=\"screen\"?>"
328 "<rss xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" "
331 EXPECT_EQ("application/rss+xml",
332 SniffMimeType(kCNNRSS
, std::string(), "text/xml"));
333 EXPECT_EQ("text/plain", SniffMimeType(kCNNRSS
, std::string(), "text/plain"));
335 // Don't sniff random XML as something different.
336 EXPECT_EQ("text/xml",
337 SniffMimeType("<?xml?><notafeed", std::string(), "text/xml"));
338 // Don't sniff random plain-text as something different.
339 EXPECT_EQ("text/plain",
340 SniffMimeType("<?xml?><notafeed", std::string(), "text/plain"));
342 // Positive test for the two instances we upgrade to XHTML.
343 EXPECT_EQ("application/xhtml+xml",
344 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
347 EXPECT_EQ("application/xhtml+xml",
348 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
352 // Following our behavior with HTML, don't call other mime types XHTML.
353 EXPECT_EQ("text/plain",
354 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
357 EXPECT_EQ("application/rss+xml",
358 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
360 "application/rss+xml"));
362 // Don't sniff other HTML-looking bits as HTML.
363 EXPECT_EQ("text/xml",
364 SniffMimeType("<html><head>", std::string(), "text/xml"));
365 EXPECT_EQ("text/xml",
366 SniffMimeType("<foo><html xmlns=\"http://www.w3.org/1999/xhtml\">",
371 // Test content which is >= 1024 bytes, and includes no open angle bracket.
372 // http://code.google.com/p/chromium/issues/detail?id=3521
373 TEST(MimeSnifferTest
, XMLTestLargeNoAngledBracket
) {
374 // Make a large input, with 1024 bytes of "x".
376 content
.resize(1024);
377 std::fill(content
.begin(), content
.end(), 'x');
379 // content.size() >= 1024 so the sniff is unambiguous.
380 std::string mime_type
;
381 EXPECT_TRUE(SniffMimeType(content
.data(), content
.size(), GURL(),
382 "text/xml", &mime_type
));
383 EXPECT_EQ("text/xml", mime_type
);
386 // Test content which is >= 1024 bytes, and includes a binary looking byte.
387 // http://code.google.com/p/chromium/issues/detail?id=15314
388 TEST(MimeSnifferTest
, LooksBinary
) {
389 // Make a large input, with 1024 bytes of "x" and 1 byte of 0x01.
391 content
.resize(1024);
392 std::fill(content
.begin(), content
.end(), 'x');
393 content
[1000] = 0x01;
395 // content.size() >= 1024 so the sniff is unambiguous.
396 std::string mime_type
;
397 EXPECT_TRUE(SniffMimeType(content
.data(), content
.size(), GURL(),
398 "text/plain", &mime_type
));
399 EXPECT_EQ("application/octet-stream", mime_type
);
402 TEST(MimeSnifferTest
, OfficeTest
) {
403 SnifferTest tests
[] = {
404 // Check for URLs incorrectly reported as Microsoft Office files.
406 sizeof("Hi there")-1,
407 "http://www.example.com/foo.doc",
408 "application/msword", "application/octet-stream" },
410 sizeof("Hi there")-1,
411 "http://www.example.com/foo.xls",
412 "application/vnd.ms-excel", "application/octet-stream" },
414 sizeof("Hi there")-1,
415 "http://www.example.com/foo.ppt",
416 "application/vnd.ms-powerpoint", "application/octet-stream" },
417 // Check for Microsoft Office files incorrectly reported as text.
418 { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there",
419 sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1,
420 "http://www.example.com/foo.doc",
421 "text/plain", "application/msword" },
422 { "PK\x03\x04" "Hi there",
423 sizeof("PK\x03\x04" "Hi there")-1,
424 "http://www.example.com/foo.doc",
426 "application/vnd.openxmlformats-officedocument."
427 "wordprocessingml.document" },
428 { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there",
429 sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1,
430 "http://www.example.com/foo.xls",
431 "text/plain", "application/vnd.ms-excel" },
432 { "PK\x03\x04" "Hi there",
433 sizeof("PK\x03\x04" "Hi there")-1,
434 "http://www.example.com/foo.xls",
436 "application/vnd.openxmlformats-officedocument."
437 "spreadsheetml.sheet" },
438 { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there",
439 sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1,
440 "http://www.example.com/foo.ppt",
441 "text/plain", "application/vnd.ms-powerpoint" },
442 { "PK\x03\x04" "Hi there",
443 sizeof("PK\x03\x04" "Hi there")-1,
444 "http://www.example.com/foo.ppt",
446 "application/vnd.openxmlformats-officedocument."
447 "presentationml.presentation" },
450 TestArray(tests
, arraysize(tests
));
453 // TODO(thestig) Add more tests for other AV formats. Add another test case for
455 TEST(MimeSnifferTest
, AudioVideoTest
) {
456 std::string mime_type
;
457 const char kFlacTestData
[] =
458 "fLaC\x00\x00\x00\x22\x12\x00\x12\x00\x00\x00\x00\x00";
459 EXPECT_TRUE(SniffMimeTypeFromLocalData(kFlacTestData
,
460 sizeof(kFlacTestData
),
462 EXPECT_EQ("audio/x-flac", mime_type
);
465 const char kWMATestData
[] =
466 "\x30\x26\xb2\x75\x8e\x66\xcf\x11\xa6\xd9\x00\xaa\x00\x62\xce\x6c";
467 EXPECT_TRUE(SniffMimeTypeFromLocalData(kWMATestData
,
468 sizeof(kWMATestData
),
470 EXPECT_EQ("video/x-ms-asf", mime_type
);
473 // mp4a, m4b, m4p, and alac extension files which share the same container
475 const char kMP4TestData
[] =
476 "\x00\x00\x00\x20\x66\x74\x79\x70\x4d\x34\x41\x20\x00\x00\x00\x00";
477 EXPECT_TRUE(SniffMimeTypeFromLocalData(kMP4TestData
,
478 sizeof(kMP4TestData
),
480 EXPECT_EQ("video/mp4", mime_type
);
483 const char kAACTestData
[] =
484 "\xff\xf1\x50\x80\x02\x20\xb0\x23\x0a\x83\x20\x7d\x61\x90\x3e\xb1";
485 EXPECT_TRUE(SniffMimeTypeFromLocalData(kAACTestData
,
486 sizeof(kAACTestData
),
488 EXPECT_EQ("audio/mpeg", mime_type
);
492 // The tests need char parameters, but the ranges to test include 0xFF, and some
493 // platforms have signed chars and are noisy about it. Using an int parameter
494 // and casting it to char inside the test case solves both these problems.
495 class MimeSnifferBinaryTest
: public ::testing::TestWithParam
<int> {};
497 // From https://mimesniff.spec.whatwg.org/#binary-data-byte :
498 // A binary data byte is a byte in the range 0x00 to 0x08 (NUL to BS), the byte
499 // 0x0B (VT), a byte in the range 0x0E to 0x1A (SO to SUB), or a byte in the
500 // range 0x1C to 0x1F (FS to US).
501 TEST_P(MimeSnifferBinaryTest
, IsBinaryControlCode
) {
502 char param
= static_cast<char>(GetParam());
503 EXPECT_TRUE(LooksLikeBinary(¶m
, 1));
506 // ::testing::Range(a, b) tests an open-ended range, ie. "b" is not included.
507 INSTANTIATE_TEST_CASE_P(MimeSnifferBinaryTestRange1
,
508 MimeSnifferBinaryTest
,
511 INSTANTIATE_TEST_CASE_P(MimeSnifferBinaryTestByte0x0B
,
512 MimeSnifferBinaryTest
,
515 INSTANTIATE_TEST_CASE_P(MimeSnifferBinaryTestRange2
,
516 MimeSnifferBinaryTest
,
519 INSTANTIATE_TEST_CASE_P(MimeSnifferBinaryTestRange3
,
520 MimeSnifferBinaryTest
,
523 class MimeSnifferPlainTextTest
: public ::testing::TestWithParam
<int> {};
525 TEST_P(MimeSnifferPlainTextTest
, NotBinaryControlCode
) {
526 char param
= static_cast<char>(GetParam());
527 EXPECT_FALSE(LooksLikeBinary(¶m
, 1));
530 INSTANTIATE_TEST_CASE_P(MimeSnifferPlainTextTestPlainTextControlCodes
,
531 MimeSnifferPlainTextTest
,
532 Values(0x09, 0x0A, 0x0C, 0x0D, 0x1B));
534 INSTANTIATE_TEST_CASE_P(MimeSnifferPlainTextTestNotControlCodeRange
,
535 MimeSnifferPlainTextTest
,
538 class MimeSnifferControlCodesEdgeCaseTest
539 : public ::testing::TestWithParam
<const char*> {};
541 TEST_P(MimeSnifferControlCodesEdgeCaseTest
, EdgeCase
) {
542 const char* param
= GetParam();
543 EXPECT_TRUE(LooksLikeBinary(param
, strlen(param
)));
546 INSTANTIATE_TEST_CASE_P(MimeSnifferControlCodesEdgeCaseTest
,
547 MimeSnifferControlCodesEdgeCaseTest
,
548 Values("\x01__", // first byte is binary
549 "__\x03", // last byte is binary
550 "_\x02_" // a byte in the middle is binary