Probably broke Win7 Tests (dbg)(6). http://build.chromium.org/p/chromium.win/builders...
[chromium-blink-merge.git] / net / base / mime_sniffer_unittest.cc
blobe4f2d5cf3a08cb6f5c5b7d27bad123a834345303
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/basictypes.h"
6 #include "net/base/mime_sniffer.h"
7 #include "testing/gtest/include/gtest/gtest.h"
8 #include "url/gurl.h"
10 namespace net {
12 struct SnifferTest {
13 const char* content;
14 size_t content_len;
15 std::string url;
16 std::string type_hint;
17 const char* mime_type;
20 static void TestArray(SnifferTest* tests, size_t count) {
21 std::string mime_type;
23 for (size_t i = 0; i < count; ++i) {
24 SniffMimeType(tests[i].content,
25 tests[i].content_len,
26 GURL(tests[i].url),
27 tests[i].type_hint,
28 &mime_type);
29 EXPECT_EQ(tests[i].mime_type, mime_type);
33 // TODO(evanm): convert other tests to use SniffMimeType instead of TestArray,
34 // so the error messages produced by test failures are more useful.
35 static std::string SniffMimeType(const std::string& content,
36 const std::string& url,
37 const std::string& mime_type_hint) {
38 std::string mime_type;
39 SniffMimeType(content.data(), content.size(), GURL(url),
40 mime_type_hint, &mime_type);
41 return mime_type;
44 TEST(MimeSnifferTest, BoundaryConditionsTest) {
45 std::string mime_type;
46 std::string type_hint;
48 char buf[] = {
49 'd', '\x1f', '\xFF'
52 GURL url;
54 SniffMimeType(buf, 0, url, type_hint, &mime_type);
55 EXPECT_EQ("text/plain", mime_type);
56 SniffMimeType(buf, 1, url, type_hint, &mime_type);
57 EXPECT_EQ("text/plain", mime_type);
58 SniffMimeType(buf, 2, url, type_hint, &mime_type);
59 EXPECT_EQ("application/octet-stream", mime_type);
62 TEST(MimeSnifferTest, BasicSniffingTest) {
63 SnifferTest tests[] = {
64 { "<!DOCTYPE html PUBLIC", sizeof("<!DOCTYPE html PUBLIC")-1,
65 "http://www.example.com/",
66 "", "text/html" },
67 { "<HtMl><Body></body></htMl>", sizeof("<HtMl><Body></body></htMl>")-1,
68 "http://www.example.com/foo.gif",
69 "application/octet-stream", "application/octet-stream" },
70 { "GIF89a\x1F\x83\x94", sizeof("GIF89a\xAF\x83\x94")-1,
71 "http://www.example.com/foo",
72 "text/plain", "image/gif" },
73 { "Gif87a\x1F\x83\x94", sizeof("Gif87a\xAF\x83\x94")-1,
74 "http://www.example.com/foo?param=tt.gif",
75 "", "application/octet-stream" },
76 { "%!PS-Adobe-3.0", sizeof("%!PS-Adobe-3.0")-1,
77 "http://www.example.com/foo",
78 "text/plain", "text/plain" },
79 { "\x89" "PNG\x0D\x0A\x1A\x0A", sizeof("\x89" "PNG\x0D\x0A\x1A\x0A")-1,
80 "http://www.example.com/foo",
81 "application/octet-stream", "application/octet-stream" },
82 { "\xFF\xD8\xFF\x23\x49\xAF", sizeof("\xFF\xD8\xFF\x23\x49\xAF")-1,
83 "http://www.example.com/foo",
84 "", "image/jpeg" },
87 TestArray(tests, arraysize(tests));
90 TEST(MimeSnifferTest, ChromeExtensionsTest) {
91 SnifferTest tests[] = {
92 // schemes
93 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
94 "http://www.example.com/foo.crx",
95 "", "application/x-chrome-extension" },
96 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
97 "https://www.example.com/foo.crx",
98 "", "application/x-chrome-extension" },
99 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
100 "ftp://www.example.com/foo.crx",
101 "", "application/x-chrome-extension" },
103 // some other mimetypes that should get converted
104 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
105 "http://www.example.com/foo.crx",
106 "text/plain", "application/x-chrome-extension" },
107 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
108 "http://www.example.com/foo.crx",
109 "application/octet-stream", "application/x-chrome-extension" },
111 // success edge cases
112 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
113 "http://www.example.com/foo.crx?query=string",
114 "", "application/x-chrome-extension" },
115 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
116 "http://www.example.com/foo..crx",
117 "", "application/x-chrome-extension" },
119 // wrong file extension
120 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
121 "http://www.example.com/foo.bin",
122 "", "application/octet-stream" },
123 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
124 "http://www.example.com/foo.bin?monkey",
125 "", "application/octet-stream" },
126 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
127 "invalid-url",
128 "", "application/octet-stream" },
129 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
130 "http://www.example.com",
131 "", "application/octet-stream" },
132 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
133 "http://www.example.com/",
134 "", "application/octet-stream" },
135 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
136 "http://www.example.com/foo",
137 "", "application/octet-stream" },
138 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
139 "http://www.example.com/foocrx",
140 "", "application/octet-stream" },
141 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
142 "http://www.example.com/foo.crx.blech",
143 "", "application/octet-stream" },
145 // wrong magic
146 { "Cr24\x02\x00\x00\x01", sizeof("Cr24\x02\x00\x00\x01")-1,
147 "http://www.example.com/foo.crx?monkey",
148 "", "application/octet-stream" },
149 { "PADDING_Cr24\x02\x00\x00\x00", sizeof("PADDING_Cr24\x02\x00\x00\x00")-1,
150 "http://www.example.com/foo.crx?monkey",
151 "", "application/octet-stream" },
154 TestArray(tests, arraysize(tests));
157 TEST(MimeSnifferTest, MozillaCompatibleTest) {
158 SnifferTest tests[] = {
159 { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1,
160 "http://www.example.com/",
161 "", "text/html" },
162 { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1,
163 "http://www.example.com/",
164 "text/plain", "text/plain" },
165 { "BMjlakdsfk", sizeof("BMjlakdsfk")-1,
166 "http://www.example.com/foo",
167 "", "image/bmp" },
168 { "\x00\x00\x30\x00", sizeof("\x00\x00\x30\x00")-1,
169 "http://www.example.com/favicon.ico",
170 "", "application/octet-stream" },
171 { "#!/bin/sh\nls /\n", sizeof("#!/bin/sh\nls /\n")-1,
172 "http://www.example.com/foo",
173 "", "text/plain" },
174 { "From: Fred\nTo: Bob\n\nHi\n.\n",
175 sizeof("From: Fred\nTo: Bob\n\nHi\n.\n")-1,
176 "http://www.example.com/foo",
177 "", "text/plain" },
178 { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
179 sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1,
180 "http://www.example.com/foo",
181 "", "text/xml" },
182 { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
183 sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1,
184 "http://www.example.com/foo",
185 "application/octet-stream", "application/octet-stream" },
188 TestArray(tests, arraysize(tests));
191 TEST(MimeSnifferTest, DontAllowPrivilegeEscalationTest) {
192 SnifferTest tests[] = {
193 { "GIF87a\n<html>\n<body>"
194 "<script>alert('haxorzed');\n</script>"
195 "</body></html>\n",
196 sizeof("GIF87a\n<html>\n<body>"
197 "<script>alert('haxorzed');\n</script>"
198 "</body></html>\n")-1,
199 "http://www.example.com/foo",
200 "", "image/gif" },
201 { "GIF87a\n<html>\n<body>"
202 "<script>alert('haxorzed');\n</script>"
203 "</body></html>\n",
204 sizeof("GIF87a\n<html>\n<body>"
205 "<script>alert('haxorzed');\n</script>"
206 "</body></html>\n")-1,
207 "http://www.example.com/foo?q=ttt.html",
208 "", "image/gif" },
209 { "GIF87a\n<html>\n<body>"
210 "<script>alert('haxorzed');\n</script>"
211 "</body></html>\n",
212 sizeof("GIF87a\n<html>\n<body>"
213 "<script>alert('haxorzed');\n</script>"
214 "</body></html>\n")-1,
215 "http://www.example.com/foo#ttt.html",
216 "", "image/gif" },
217 { "a\n<html>\n<body>"
218 "<script>alert('haxorzed');\n</script>"
219 "</body></html>\n",
220 sizeof("a\n<html>\n<body>"
221 "<script>alert('haxorzed');\n</script>"
222 "</body></html>\n")-1,
223 "http://www.example.com/foo",
224 "", "text/plain" },
225 { "a\n<html>\n<body>"
226 "<script>alert('haxorzed');\n</script>"
227 "</body></html>\n",
228 sizeof("a\n<html>\n<body>"
229 "<script>alert('haxorzed');\n</script>"
230 "</body></html>\n")-1,
231 "http://www.example.com/foo?q=ttt.html",
232 "", "text/plain" },
233 { "a\n<html>\n<body>"
234 "<script>alert('haxorzed');\n</script>"
235 "</body></html>\n",
236 sizeof("a\n<html>\n<body>"
237 "<script>alert('haxorzed');\n</script>"
238 "</body></html>\n")-1,
239 "http://www.example.com/foo#ttt.html",
240 "", "text/plain" },
241 { "a\n<html>\n<body>"
242 "<script>alert('haxorzed');\n</script>"
243 "</body></html>\n",
244 sizeof("a\n<html>\n<body>"
245 "<script>alert('haxorzed');\n</script>"
246 "</body></html>\n")-1,
247 "http://www.example.com/foo.html",
248 "", "text/plain" },
251 TestArray(tests, arraysize(tests));
254 TEST(MimeSnifferTest, UnicodeTest) {
255 SnifferTest tests[] = {
256 { "\xEF\xBB\xBF" "Hi there", sizeof("\xEF\xBB\xBF" "Hi there")-1,
257 "http://www.example.com/foo",
258 "", "text/plain" },
259 { "\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79",
260 sizeof("\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79")-1,
261 "http://www.example.com/foo",
262 "", "text/plain" },
263 { "\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9",
264 sizeof("\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9")-1,
265 "http://www.example.com/foo",
266 "", "text/plain" },
267 { "\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01",
268 sizeof("\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01")-1,
269 "http://www.example.com/foo",
270 "", "text/plain" },
273 TestArray(tests, arraysize(tests));
276 TEST(MimeSnifferTest, FlashTest) {
277 SnifferTest tests[] = {
278 { "CWSdd\x00\xB3", sizeof("CWSdd\x00\xB3")-1,
279 "http://www.example.com/foo",
280 "", "application/octet-stream" },
281 { "FLVjdkl*(#)0sdj\x00", sizeof("FLVjdkl*(#)0sdj\x00")-1,
282 "http://www.example.com/foo?q=ttt.swf",
283 "", "application/octet-stream" },
284 { "FWS3$9\r\b\x00", sizeof("FWS3$9\r\b\x00")-1,
285 "http://www.example.com/foo#ttt.swf",
286 "", "application/octet-stream" },
287 { "FLVjdkl*(#)0sdj", sizeof("FLVjdkl*(#)0sdj")-1,
288 "http://www.example.com/foo.swf",
289 "", "text/plain" },
290 { "FLVjdkl*(#)0s\x01dj", sizeof("FLVjdkl*(#)0s\x01dj")-1,
291 "http://www.example.com/foo/bar.swf",
292 "", "application/octet-stream" },
293 { "FWS3$9\r\b\x1A", sizeof("FWS3$9\r\b\x1A")-1,
294 "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar",
295 "", "application/octet-stream" },
296 { "FWS3$9\r\x1C\b", sizeof("FWS3$9\r\x1C\b")-1,
297 "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar",
298 "text/plain", "application/octet-stream" },
301 TestArray(tests, arraysize(tests));
304 TEST(MimeSnifferTest, XMLTest) {
305 // An easy feed to identify.
306 EXPECT_EQ("application/atom+xml",
307 SniffMimeType("<?xml?><feed", std::string(), "text/xml"));
308 // Don't sniff out of plain text.
309 EXPECT_EQ("text/plain",
310 SniffMimeType("<?xml?><feed", std::string(), "text/plain"));
311 // Simple RSS.
312 EXPECT_EQ("application/rss+xml",
313 SniffMimeType(
314 "<?xml version='1.0'?>\r\n<rss", std::string(), "text/xml"));
316 // The top of CNN's RSS feed, which we'd like to recognize as RSS.
317 static const char kCNNRSS[] =
318 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
319 "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/rss2full.xsl\" "
320 "type=\"text/xsl\" media=\"screen\"?>"
321 "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/itemcontent.css\" "
322 "type=\"text/css\" media=\"screen\"?>"
323 "<rss xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" "
324 "version=\"2.0\">";
325 // CNN's RSS
326 EXPECT_EQ("application/rss+xml",
327 SniffMimeType(kCNNRSS, std::string(), "text/xml"));
328 EXPECT_EQ("text/plain", SniffMimeType(kCNNRSS, std::string(), "text/plain"));
330 // Don't sniff random XML as something different.
331 EXPECT_EQ("text/xml",
332 SniffMimeType("<?xml?><notafeed", std::string(), "text/xml"));
333 // Don't sniff random plain-text as something different.
334 EXPECT_EQ("text/plain",
335 SniffMimeType("<?xml?><notafeed", std::string(), "text/plain"));
337 // Positive test for the two instances we upgrade to XHTML.
338 EXPECT_EQ("application/xhtml+xml",
339 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
340 std::string(),
341 "text/xml"));
342 EXPECT_EQ("application/xhtml+xml",
343 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
344 std::string(),
345 "application/xml"));
347 // Following our behavior with HTML, don't call other mime types XHTML.
348 EXPECT_EQ("text/plain",
349 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
350 std::string(),
351 "text/plain"));
352 EXPECT_EQ("application/rss+xml",
353 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
354 std::string(),
355 "application/rss+xml"));
357 // Don't sniff other HTML-looking bits as HTML.
358 EXPECT_EQ("text/xml",
359 SniffMimeType("<html><head>", std::string(), "text/xml"));
360 EXPECT_EQ("text/xml",
361 SniffMimeType("<foo><html xmlns=\"http://www.w3.org/1999/xhtml\">",
362 std::string(),
363 "text/xml"));
366 // Test content which is >= 1024 bytes, and includes no open angle bracket.
367 // http://code.google.com/p/chromium/issues/detail?id=3521
368 TEST(MimeSnifferTest, XMLTestLargeNoAngledBracket) {
369 // Make a large input, with 1024 bytes of "x".
370 std::string content;
371 content.resize(1024);
372 std::fill(content.begin(), content.end(), 'x');
374 // content.size() >= 1024 so the sniff is unambiguous.
375 std::string mime_type;
376 EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(),
377 "text/xml", &mime_type));
378 EXPECT_EQ("text/xml", mime_type);
381 // Test content which is >= 1024 bytes, and includes a binary looking byte.
382 // http://code.google.com/p/chromium/issues/detail?id=15314
383 TEST(MimeSnifferTest, LooksBinary) {
384 // Make a large input, with 1024 bytes of "x" and 1 byte of 0x01.
385 std::string content;
386 content.resize(1024);
387 std::fill(content.begin(), content.end(), 'x');
388 content[1000] = 0x01;
390 // content.size() >= 1024 so the sniff is unambiguous.
391 std::string mime_type;
392 EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(),
393 "text/plain", &mime_type));
394 EXPECT_EQ("application/octet-stream", mime_type);
397 TEST(MimeSnifferTest, OfficeTest) {
398 SnifferTest tests[] = {
399 // Check for URLs incorrectly reported as Microsoft Office files.
400 { "Hi there",
401 sizeof("Hi there")-1,
402 "http://www.example.com/foo.doc",
403 "application/msword", "application/octet-stream" },
404 { "Hi there",
405 sizeof("Hi there")-1,
406 "http://www.example.com/foo.xls",
407 "application/vnd.ms-excel", "application/octet-stream" },
408 { "Hi there",
409 sizeof("Hi there")-1,
410 "http://www.example.com/foo.ppt",
411 "application/vnd.ms-powerpoint", "application/octet-stream" },
412 // Check for Microsoft Office files incorrectly reported as text.
413 { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there",
414 sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1,
415 "http://www.example.com/foo.doc",
416 "text/plain", "application/msword" },
417 { "PK\x03\x04" "Hi there",
418 sizeof("PK\x03\x04" "Hi there")-1,
419 "http://www.example.com/foo.doc",
420 "text/plain",
421 "application/vnd.openxmlformats-officedocument."
422 "wordprocessingml.document" },
423 { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there",
424 sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1,
425 "http://www.example.com/foo.xls",
426 "text/plain", "application/vnd.ms-excel" },
427 { "PK\x03\x04" "Hi there",
428 sizeof("PK\x03\x04" "Hi there")-1,
429 "http://www.example.com/foo.xls",
430 "text/plain",
431 "application/vnd.openxmlformats-officedocument."
432 "spreadsheetml.sheet" },
433 { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there",
434 sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1,
435 "http://www.example.com/foo.ppt",
436 "text/plain", "application/vnd.ms-powerpoint" },
437 { "PK\x03\x04" "Hi there",
438 sizeof("PK\x03\x04" "Hi there")-1,
439 "http://www.example.com/foo.ppt",
440 "text/plain",
441 "application/vnd.openxmlformats-officedocument."
442 "presentationml.presentation" },
445 TestArray(tests, arraysize(tests));
448 // TODO(thestig) Add more tests for other AV formats. Add another test case for
449 // RAW images.
450 TEST(MimeSnifferTest, AudioVideoTest) {
451 std::string mime_type;
452 const char kFlacTestData[] =
453 "fLaC\x00\x00\x00\x22\x12\x00\x12\x00\x00\x00\x00\x00";
454 EXPECT_TRUE(SniffMimeTypeFromLocalData(kFlacTestData,
455 sizeof(kFlacTestData),
456 &mime_type));
457 EXPECT_EQ("audio/x-flac", mime_type);
458 mime_type.clear();
460 const char kWMATestData[] =
461 "\x30\x26\xb2\x75\x8e\x66\xcf\x11\xa6\xd9\x00\xaa\x00\x62\xce\x6c";
462 EXPECT_TRUE(SniffMimeTypeFromLocalData(kWMATestData,
463 sizeof(kWMATestData),
464 &mime_type));
465 EXPECT_EQ("video/x-ms-asf", mime_type);
466 mime_type.clear();
468 // mp4a, m4b, m4p, and alac extension files which share the same container
469 // format.
470 const char kMP4TestData[] =
471 "\x00\x00\x00\x20\x66\x74\x79\x70\x4d\x34\x41\x20\x00\x00\x00\x00";
472 EXPECT_TRUE(SniffMimeTypeFromLocalData(kMP4TestData,
473 sizeof(kMP4TestData),
474 &mime_type));
475 EXPECT_EQ("video/mp4", mime_type);
476 mime_type.clear();
478 const char kAACTestData[] =
479 "\xff\xf1\x50\x80\x02\x20\xb0\x23\x0a\x83\x20\x7d\x61\x90\x3e\xb1";
480 EXPECT_TRUE(SniffMimeTypeFromLocalData(kAACTestData,
481 sizeof(kAACTestData),
482 &mime_type));
483 EXPECT_EQ("audio/mpeg", mime_type);
484 mime_type.clear();
487 } // namespace net