1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/basictypes.h"
6 #include "net/base/data_url.h"
7 #include "testing/gtest/include/gtest/gtest.h"
12 struct ParseTestData
{
15 const char* mime_type
;
22 TEST(DataURLTest
, Parse
) {
23 const ParseTestData tests
[] = {
42 { "data:;charset=,test",
48 { "data:TeXt/HtMl,<b>x</b>",
60 { "data:;base64,aGVsbG8gd29ybGQ=",
66 // Allow invalid mediatype for backward compatibility but set mime_type to
67 // "text/plain" instead of the invalid mediatype.
74 // When accepting an invalid mediatype, override charset with "US-ASCII"
75 { "data:foo;charset=UTF-8,boo",
81 // Invalid mediatype. Includes a slash but the type part is not a token.
82 { "data:f(oo/bar;baz=1;charset=kk,boo",
88 { "data:foo/bar;baz=1;charset=kk,boo",
94 { "data:foo/bar;charset=kk;baz=1,boo",
100 { "data:text/html,%3Chtml%3E%3Cbody%3E%3Cb%3Ehello%20world"
101 "%3C%2Fb%3E%3C%2Fbody%3E%3C%2Fhtml%3E",
105 "<html><body><b>hello world</b></body></html>" },
107 { "data:text/html,<html><body><b>hello world</b></body></html>",
111 "<html><body><b>hello world</b></body></html>" },
113 // the comma cannot be url-escaped!
120 // invalid base64 content
121 { "data:;base64,aGVs_-_-",
127 // Spaces should be removed from non-text data URLs (we already tested
129 { " bG8gd2 9ybGQ=",
142 // Other whitespace should also be removed from anything base-64 encoded.
143 { "data:;base64,aGVs bG8gd2 \n9ybGQ=",
149 // In base64 encoding, escaped whitespace should be stripped.
150 // (This test was taken from acid3)
152 { "data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207"
159 // Only unescaped whitespace should be stripped in non-base64.
161 { "data:img/png,A B %20 %0A C",
167 { "data:text/plain;charset=utf-8;base64,SGVsbMO2",
173 // Not sufficiently padded.
174 { "data:;base64,aGVsbG8gd29ybGQ",
180 // Bad encoding (truncated).
181 { "data:;base64,aGVsbG8gd29yb",
187 // BiDi control characters should be unescaped and preserved as is, and
188 // should not be replaced with % versions. In the below case, \xE2\x80\x8F
189 // is the RTL mark and the parsed text should preserve it as is.
191 "data:text/plain;charset=utf-8,\xE2\x80\x8Ftest",
197 // Same as above but with Arabic text after RTL mark.
199 "data:text/plain;charset=utf-8,"
200 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
204 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"},
206 // RTL mark encoded as %E2%80%8F should be unescaped too. Note that when
207 // wrapped in a GURL, this URL and the next effectively become the same as
208 // the previous two URLs.
210 "data:text/plain;charset=utf-8,%E2%80%8Ftest",
216 // Same as above but with Arabic text after RTL mark.
218 "data:text/plain;charset=utf-8,"
219 "%E2%80%8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
223 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"}
225 // TODO(darin): add more interesting tests
228 for (size_t i
= 0; i
< arraysize(tests
); ++i
) {
229 std::string mime_type
;
233 net::DataURL::Parse(GURL(tests
[i
].url
), &mime_type
, &charset
, &data
);
234 EXPECT_EQ(ok
, tests
[i
].is_valid
);
235 if (tests
[i
].is_valid
) {
236 EXPECT_EQ(tests
[i
].mime_type
, mime_type
);
237 EXPECT_EQ(tests
[i
].charset
, charset
);
238 EXPECT_EQ(tests
[i
].data
, data
);