1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/basictypes.h"
6 #include "net/base/mime_sniffer.h"
7 #include "testing/gtest/include/gtest/gtest.h"
16 std::string type_hint
;
17 const char* mime_type
;
20 static void TestArray(SnifferTest
* tests
, size_t count
) {
21 std::string mime_type
;
23 for (size_t i
= 0; i
< count
; ++i
) {
24 SniffMimeType(tests
[i
].content
,
29 EXPECT_EQ(tests
[i
].mime_type
, mime_type
);
33 // TODO(evanm): convert other tests to use SniffMimeType instead of TestArray,
34 // so the error messages produced by test failures are more useful.
35 static std::string
SniffMimeType(const std::string
& content
,
36 const std::string
& url
,
37 const std::string
& mime_type_hint
) {
38 std::string mime_type
;
39 SniffMimeType(content
.data(), content
.size(), GURL(url
),
40 mime_type_hint
, &mime_type
);
44 TEST(MimeSnifferTest
, BoundaryConditionsTest
) {
45 std::string mime_type
;
46 std::string type_hint
;
54 SniffMimeType(buf
, 0, url
, type_hint
, &mime_type
);
55 EXPECT_EQ("text/plain", mime_type
);
56 SniffMimeType(buf
, 1, url
, type_hint
, &mime_type
);
57 EXPECT_EQ("text/plain", mime_type
);
58 SniffMimeType(buf
, 2, url
, type_hint
, &mime_type
);
59 EXPECT_EQ("application/octet-stream", mime_type
);
62 TEST(MimeSnifferTest
, BasicSniffingTest
) {
63 SnifferTest tests
[] = {
64 { "<!DOCTYPE html PUBLIC", sizeof("<!DOCTYPE html PUBLIC")-1,
65 "http://www.example.com/",
67 { "<HtMl><Body></body></htMl>", sizeof("<HtMl><Body></body></htMl>")-1,
68 "http://www.example.com/foo.gif",
69 "application/octet-stream", "application/octet-stream" },
70 { "GIF89a\x1F\x83\x94", sizeof("GIF89a\xAF\x83\x94")-1,
71 "http://www.example.com/foo",
72 "text/plain", "image/gif" },
73 { "Gif87a\x1F\x83\x94", sizeof("Gif87a\xAF\x83\x94")-1,
74 "http://www.example.com/foo?param=tt.gif",
75 "", "application/octet-stream" },
76 { "%!PS-Adobe-3.0", sizeof("%!PS-Adobe-3.0")-1,
77 "http://www.example.com/foo",
78 "text/plain", "text/plain" },
79 { "\x89" "PNG\x0D\x0A\x1A\x0A", sizeof("\x89" "PNG\x0D\x0A\x1A\x0A")-1,
80 "http://www.example.com/foo",
81 "application/octet-stream", "application/octet-stream" },
82 { "\xFF\xD8\xFF\x23\x49\xAF", sizeof("\xFF\xD8\xFF\x23\x49\xAF")-1,
83 "http://www.example.com/foo",
87 TestArray(tests
, arraysize(tests
));
90 TEST(MimeSnifferTest
, ChromeExtensionsTest
) {
91 SnifferTest tests
[] = {
93 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
94 "http://www.example.com/foo.crx",
95 "", "application/x-chrome-extension" },
96 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
97 "https://www.example.com/foo.crx",
98 "", "application/x-chrome-extension" },
99 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
100 "ftp://www.example.com/foo.crx",
101 "", "application/x-chrome-extension" },
103 // some other mimetypes that should get converted
104 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
105 "http://www.example.com/foo.crx",
106 "text/plain", "application/x-chrome-extension" },
107 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
108 "http://www.example.com/foo.crx",
109 "application/octet-stream", "application/x-chrome-extension" },
111 // success edge cases
112 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
113 "http://www.example.com/foo.crx?query=string",
114 "", "application/x-chrome-extension" },
115 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
116 "http://www.example.com/foo..crx",
117 "", "application/x-chrome-extension" },
119 // wrong file extension
120 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
121 "http://www.example.com/foo.bin",
122 "", "application/octet-stream" },
123 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
124 "http://www.example.com/foo.bin?monkey",
125 "", "application/octet-stream" },
126 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
128 "", "application/octet-stream" },
129 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
130 "http://www.example.com",
131 "", "application/octet-stream" },
132 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
133 "http://www.example.com/",
134 "", "application/octet-stream" },
135 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
136 "http://www.example.com/foo",
137 "", "application/octet-stream" },
138 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
139 "http://www.example.com/foocrx",
140 "", "application/octet-stream" },
141 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
142 "http://www.example.com/foo.crx.blech",
143 "", "application/octet-stream" },
146 { "Cr24\x02\x00\x00\x01", sizeof("Cr24\x02\x00\x00\x01")-1,
147 "http://www.example.com/foo.crx?monkey",
148 "", "application/octet-stream" },
149 { "PADDING_Cr24\x02\x00\x00\x00", sizeof("PADDING_Cr24\x02\x00\x00\x00")-1,
150 "http://www.example.com/foo.crx?monkey",
151 "", "application/octet-stream" },
154 TestArray(tests
, arraysize(tests
));
157 TEST(MimeSnifferTest
, MozillaCompatibleTest
) {
158 SnifferTest tests
[] = {
159 { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1,
160 "http://www.example.com/",
162 { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1,
163 "http://www.example.com/",
164 "text/plain", "text/plain" },
165 { "BMjlakdsfk", sizeof("BMjlakdsfk")-1,
166 "http://www.example.com/foo",
168 { "\x00\x00\x30\x00", sizeof("\x00\x00\x30\x00")-1,
169 "http://www.example.com/favicon.ico",
170 "", "application/octet-stream" },
171 { "#!/bin/sh\nls /\n", sizeof("#!/bin/sh\nls /\n")-1,
172 "http://www.example.com/foo",
174 { "From: Fred\nTo: Bob\n\nHi\n.\n",
175 sizeof("From: Fred\nTo: Bob\n\nHi\n.\n")-1,
176 "http://www.example.com/foo",
178 { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
179 sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1,
180 "http://www.example.com/foo",
182 { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
183 sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1,
184 "http://www.example.com/foo",
185 "application/octet-stream", "application/octet-stream" },
188 TestArray(tests
, arraysize(tests
));
191 TEST(MimeSnifferTest
, DontAllowPrivilegeEscalationTest
) {
192 SnifferTest tests
[] = {
193 { "GIF87a\n<html>\n<body>"
194 "<script>alert('haxorzed');\n</script>"
196 sizeof("GIF87a\n<html>\n<body>"
197 "<script>alert('haxorzed');\n</script>"
198 "</body></html>\n")-1,
199 "http://www.example.com/foo",
201 { "GIF87a\n<html>\n<body>"
202 "<script>alert('haxorzed');\n</script>"
204 sizeof("GIF87a\n<html>\n<body>"
205 "<script>alert('haxorzed');\n</script>"
206 "</body></html>\n")-1,
207 "http://www.example.com/foo?q=ttt.html",
209 { "GIF87a\n<html>\n<body>"
210 "<script>alert('haxorzed');\n</script>"
212 sizeof("GIF87a\n<html>\n<body>"
213 "<script>alert('haxorzed');\n</script>"
214 "</body></html>\n")-1,
215 "http://www.example.com/foo#ttt.html",
217 { "a\n<html>\n<body>"
218 "<script>alert('haxorzed');\n</script>"
220 sizeof("a\n<html>\n<body>"
221 "<script>alert('haxorzed');\n</script>"
222 "</body></html>\n")-1,
223 "http://www.example.com/foo",
225 { "a\n<html>\n<body>"
226 "<script>alert('haxorzed');\n</script>"
228 sizeof("a\n<html>\n<body>"
229 "<script>alert('haxorzed');\n</script>"
230 "</body></html>\n")-1,
231 "http://www.example.com/foo?q=ttt.html",
233 { "a\n<html>\n<body>"
234 "<script>alert('haxorzed');\n</script>"
236 sizeof("a\n<html>\n<body>"
237 "<script>alert('haxorzed');\n</script>"
238 "</body></html>\n")-1,
239 "http://www.example.com/foo#ttt.html",
241 { "a\n<html>\n<body>"
242 "<script>alert('haxorzed');\n</script>"
244 sizeof("a\n<html>\n<body>"
245 "<script>alert('haxorzed');\n</script>"
246 "</body></html>\n")-1,
247 "http://www.example.com/foo.html",
251 TestArray(tests
, arraysize(tests
));
254 TEST(MimeSnifferTest
, UnicodeTest
) {
255 SnifferTest tests
[] = {
256 { "\xEF\xBB\xBF" "Hi there", sizeof("\xEF\xBB\xBF" "Hi there")-1,
257 "http://www.example.com/foo",
259 { "\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79",
260 sizeof("\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79")-1,
261 "http://www.example.com/foo",
263 { "\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9",
264 sizeof("\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9")-1,
265 "http://www.example.com/foo",
267 { "\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01",
268 sizeof("\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01")-1,
269 "http://www.example.com/foo",
273 TestArray(tests
, arraysize(tests
));
276 TEST(MimeSnifferTest
, FlashTest
) {
277 SnifferTest tests
[] = {
278 { "CWSdd\x00\xB3", sizeof("CWSdd\x00\xB3")-1,
279 "http://www.example.com/foo",
280 "", "application/octet-stream" },
281 { "FLVjdkl*(#)0sdj\x00", sizeof("FLVjdkl*(#)0sdj\x00")-1,
282 "http://www.example.com/foo?q=ttt.swf",
283 "", "application/octet-stream" },
284 { "FWS3$9\r\b\x00", sizeof("FWS3$9\r\b\x00")-1,
285 "http://www.example.com/foo#ttt.swf",
286 "", "application/octet-stream" },
287 { "FLVjdkl*(#)0sdj", sizeof("FLVjdkl*(#)0sdj")-1,
288 "http://www.example.com/foo.swf",
290 { "FLVjdkl*(#)0s\x01dj", sizeof("FLVjdkl*(#)0s\x01dj")-1,
291 "http://www.example.com/foo/bar.swf",
292 "", "application/octet-stream" },
293 { "FWS3$9\r\b\x1A", sizeof("FWS3$9\r\b\x1A")-1,
294 "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar",
295 "", "application/octet-stream" },
296 { "FWS3$9\r\x1C\b", sizeof("FWS3$9\r\x1C\b")-1,
297 "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar",
298 "text/plain", "application/octet-stream" },
301 TestArray(tests
, arraysize(tests
));
304 TEST(MimeSnifferTest
, XMLTest
) {
305 // An easy feed to identify.
306 EXPECT_EQ("application/atom+xml",
307 SniffMimeType("<?xml?><feed", std::string(), "text/xml"));
308 // Don't sniff out of plain text.
309 EXPECT_EQ("text/plain",
310 SniffMimeType("<?xml?><feed", std::string(), "text/plain"));
312 EXPECT_EQ("application/rss+xml",
314 "<?xml version='1.0'?>\r\n<rss", std::string(), "text/xml"));
316 // The top of CNN's RSS feed, which we'd like to recognize as RSS.
317 static const char kCNNRSS
[] =
318 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
319 "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/rss2full.xsl\" "
320 "type=\"text/xsl\" media=\"screen\"?>"
321 "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/itemcontent.css\" "
322 "type=\"text/css\" media=\"screen\"?>"
323 "<rss xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" "
326 EXPECT_EQ("application/rss+xml",
327 SniffMimeType(kCNNRSS
, std::string(), "text/xml"));
328 EXPECT_EQ("text/plain", SniffMimeType(kCNNRSS
, std::string(), "text/plain"));
330 // Don't sniff random XML as something different.
331 EXPECT_EQ("text/xml",
332 SniffMimeType("<?xml?><notafeed", std::string(), "text/xml"));
333 // Don't sniff random plain-text as something different.
334 EXPECT_EQ("text/plain",
335 SniffMimeType("<?xml?><notafeed", std::string(), "text/plain"));
337 // Positive test for the two instances we upgrade to XHTML.
338 EXPECT_EQ("application/xhtml+xml",
339 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
342 EXPECT_EQ("application/xhtml+xml",
343 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
347 // Following our behavior with HTML, don't call other mime types XHTML.
348 EXPECT_EQ("text/plain",
349 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
352 EXPECT_EQ("application/rss+xml",
353 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
355 "application/rss+xml"));
357 // Don't sniff other HTML-looking bits as HTML.
358 EXPECT_EQ("text/xml",
359 SniffMimeType("<html><head>", std::string(), "text/xml"));
360 EXPECT_EQ("text/xml",
361 SniffMimeType("<foo><html xmlns=\"http://www.w3.org/1999/xhtml\">",
366 // Test content which is >= 1024 bytes, and includes no open angle bracket.
367 // http://code.google.com/p/chromium/issues/detail?id=3521
368 TEST(MimeSnifferTest
, XMLTestLargeNoAngledBracket
) {
369 // Make a large input, with 1024 bytes of "x".
371 content
.resize(1024);
372 std::fill(content
.begin(), content
.end(), 'x');
374 // content.size() >= 1024 so the sniff is unambiguous.
375 std::string mime_type
;
376 EXPECT_TRUE(SniffMimeType(content
.data(), content
.size(), GURL(),
377 "text/xml", &mime_type
));
378 EXPECT_EQ("text/xml", mime_type
);
381 // Test content which is >= 1024 bytes, and includes a binary looking byte.
382 // http://code.google.com/p/chromium/issues/detail?id=15314
383 TEST(MimeSnifferTest
, LooksBinary
) {
384 // Make a large input, with 1024 bytes of "x" and 1 byte of 0x01.
386 content
.resize(1024);
387 std::fill(content
.begin(), content
.end(), 'x');
388 content
[1000] = 0x01;
390 // content.size() >= 1024 so the sniff is unambiguous.
391 std::string mime_type
;
392 EXPECT_TRUE(SniffMimeType(content
.data(), content
.size(), GURL(),
393 "text/plain", &mime_type
));
394 EXPECT_EQ("application/octet-stream", mime_type
);
397 TEST(MimeSnifferTest
, OfficeTest
) {
398 SnifferTest tests
[] = {
399 // Check for URLs incorrectly reported as Microsoft Office files.
401 sizeof("Hi there")-1,
402 "http://www.example.com/foo.doc",
403 "application/msword", "application/octet-stream" },
405 sizeof("Hi there")-1,
406 "http://www.example.com/foo.xls",
407 "application/vnd.ms-excel", "application/octet-stream" },
409 sizeof("Hi there")-1,
410 "http://www.example.com/foo.ppt",
411 "application/vnd.ms-powerpoint", "application/octet-stream" },
412 // Check for Microsoft Office files incorrectly reported as text.
413 { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there",
414 sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1,
415 "http://www.example.com/foo.doc",
416 "text/plain", "application/msword" },
417 { "PK\x03\x04" "Hi there",
418 sizeof("PK\x03\x04" "Hi there")-1,
419 "http://www.example.com/foo.doc",
421 "application/vnd.openxmlformats-officedocument."
422 "wordprocessingml.document" },
423 { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there",
424 sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1,
425 "http://www.example.com/foo.xls",
426 "text/plain", "application/vnd.ms-excel" },
427 { "PK\x03\x04" "Hi there",
428 sizeof("PK\x03\x04" "Hi there")-1,
429 "http://www.example.com/foo.xls",
431 "application/vnd.openxmlformats-officedocument."
432 "spreadsheetml.sheet" },
433 { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there",
434 sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1,
435 "http://www.example.com/foo.ppt",
436 "text/plain", "application/vnd.ms-powerpoint" },
437 { "PK\x03\x04" "Hi there",
438 sizeof("PK\x03\x04" "Hi there")-1,
439 "http://www.example.com/foo.ppt",
441 "application/vnd.openxmlformats-officedocument."
442 "presentationml.presentation" },
445 TestArray(tests
, arraysize(tests
));
448 // TODO(thestig) Add more tests for other AV formats. Add another test case for
450 TEST(MimeSnifferTest
, AudioVideoTest
) {
451 std::string mime_type
;
452 const char kFlacTestData
[] =
453 "fLaC\x00\x00\x00\x22\x12\x00\x12\x00\x00\x00\x00\x00";
454 EXPECT_TRUE(SniffMimeTypeFromLocalData(kFlacTestData
,
455 sizeof(kFlacTestData
),
457 EXPECT_EQ("audio/x-flac", mime_type
);
460 const char kWMATestData
[] =
461 "\x30\x26\xb2\x75\x8e\x66\xcf\x11\xa6\xd9\x00\xaa\x00\x62\xce\x6c";
462 EXPECT_TRUE(SniffMimeTypeFromLocalData(kWMATestData
,
463 sizeof(kWMATestData
),
465 EXPECT_EQ("video/x-ms-asf", mime_type
);
468 // mp4a, m4b, m4p, and alac extension files which share the same container
470 const char kMP4TestData
[] =
471 "\x00\x00\x00\x20\x66\x74\x79\x70\x4d\x34\x41\x20\x00\x00\x00\x00";
472 EXPECT_TRUE(SniffMimeTypeFromLocalData(kMP4TestData
,
473 sizeof(kMP4TestData
),
475 EXPECT_EQ("video/mp4", mime_type
);
478 const char kAACTestData
[] =
479 "\xff\xf1\x50\x80\x02\x20\xb0\x23\x0a\x83\x20\x7d\x61\x90\x3e\xb1";
480 EXPECT_TRUE(SniffMimeTypeFromLocalData(kAACTestData
,
481 sizeof(kAACTestData
),
483 EXPECT_EQ("audio/mpeg", mime_type
);