1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/basictypes.h"
6 #include "net/base/data_url.h"
7 #include "testing/gtest/include/gtest/gtest.h"
14 struct ParseTestData
{
17 const char* mime_type
;
24 TEST(DataURLTest
, Parse
) {
25 const ParseTestData tests
[] = {
44 { "data:;charset=,test",
50 { "data:TeXt/HtMl,<b>x</b>",
62 { "data:;base64,aGVsbG8gd29ybGQ=",
68 // Allow invalid mediatype for backward compatibility but set mime_type to
69 // "text/plain" instead of the invalid mediatype.
76 // When accepting an invalid mediatype, override charset with "US-ASCII"
77 { "data:foo;charset=UTF-8,boo",
83 // Invalid mediatype. Includes a slash but the type part is not a token.
84 { "data:f(oo/bar;baz=1;charset=kk,boo",
90 { "data:foo/bar;baz=1;charset=kk,boo",
96 { "data:foo/bar;charset=kk;baz=1,boo",
102 { "data:text/html,%3Chtml%3E%3Cbody%3E%3Cb%3Ehello%20world"
103 "%3C%2Fb%3E%3C%2Fbody%3E%3C%2Fhtml%3E",
107 "<html><body><b>hello world</b></body></html>" },
109 { "data:text/html,<html><body><b>hello world</b></body></html>",
113 "<html><body><b>hello world</b></body></html>" },
115 // the comma cannot be url-escaped!
122 // invalid base64 content
123 { "data:;base64,aGVs_-_-",
129 // Spaces should be removed from non-text data URLs (we already tested
131 { " bG8gd2 9ybGQ=",
144 // Other whitespace should also be removed from anything base-64 encoded.
145 { "data:;base64,aGVs bG8gd2 \n9ybGQ=",
151 // In base64 encoding, escaped whitespace should be stripped.
152 // (This test was taken from acid3)
154 { "data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207"
161 // Only unescaped whitespace should be stripped in non-base64.
163 { "data:img/png,A B %20 %0A C",
169 { "data:text/plain;charset=utf-8;base64,SGVsbMO2",
175 // Not sufficiently padded.
176 { "data:;base64,aGVsbG8gd29ybGQ",
182 // Bad encoding (truncated).
183 { "data:;base64,aGVsbG8gd29yb",
189 // BiDi control characters should be unescaped and preserved as is, and
190 // should not be replaced with % versions. In the below case, \xE2\x80\x8F
191 // is the RTL mark and the parsed text should preserve it as is.
193 "data:text/plain;charset=utf-8,\xE2\x80\x8Ftest",
199 // Same as above but with Arabic text after RTL mark.
201 "data:text/plain;charset=utf-8,"
202 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
206 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"},
208 // RTL mark encoded as %E2%80%8F should be unescaped too. Note that when
209 // wrapped in a GURL, this URL and the next effectively become the same as
210 // the previous two URLs.
212 "data:text/plain;charset=utf-8,%E2%80%8Ftest",
218 // Same as above but with Arabic text after RTL mark.
220 "data:text/plain;charset=utf-8,"
221 "%E2%80%8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
225 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"}
227 // TODO(darin): add more interesting tests
230 for (size_t i
= 0; i
< arraysize(tests
); ++i
) {
231 std::string mime_type
;
234 bool ok
= DataURL::Parse(GURL(tests
[i
].url
), &mime_type
, &charset
, &data
);
235 EXPECT_EQ(ok
, tests
[i
].is_valid
);
236 if (tests
[i
].is_valid
) {
237 EXPECT_EQ(tests
[i
].mime_type
, mime_type
);
238 EXPECT_EQ(tests
[i
].charset
, charset
);
239 EXPECT_EQ(tests
[i
].data
, data
);