Update V8 to version 4.7.42.
[chromium-blink-merge.git] / net / base / data_url_unittest.cc
blobfb7dc76d9540b8ebbb964bf7a1ee560d81f3ea95
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/basictypes.h"
6 #include "net/base/data_url.h"
7 #include "testing/gtest/include/gtest/gtest.h"
8 #include "url/gurl.h"
10 namespace net {
12 namespace {
14 struct ParseTestData {
15 const char* url;
16 bool is_valid;
17 const char* mime_type;
18 const char* charset;
19 const char* data;
22 } // namespace
24 TEST(DataURLTest, Parse) {
25 const ParseTestData tests[] = {
26 { "data:",
27 false,
28 "",
29 "",
30 "" },
32 { "data:,",
33 true,
34 "text/plain",
35 "US-ASCII",
36 "" },
38 { "data:;base64,",
39 true,
40 "text/plain",
41 "US-ASCII",
42 "" },
44 { "data:;charset=,test",
45 false,
46 "",
47 "",
48 "" },
50 { "data:TeXt/HtMl,<b>x</b>",
51 true,
52 "text/html",
53 "US-ASCII",
54 "<b>x</b>" },
56 { "data:,foo",
57 true,
58 "text/plain",
59 "US-ASCII",
60 "foo" },
62 { "data:;base64,aGVsbG8gd29ybGQ=",
63 true,
64 "text/plain",
65 "US-ASCII",
66 "hello world" },
68 // Allow invalid mediatype for backward compatibility but set mime_type to
69 // "text/plain" instead of the invalid mediatype.
70 { "data:foo,boo",
71 true,
72 "text/plain",
73 "US-ASCII",
74 "boo" },
76 // When accepting an invalid mediatype, override charset with "US-ASCII"
77 { "data:foo;charset=UTF-8,boo",
78 true,
79 "text/plain",
80 "US-ASCII",
81 "boo" },
83 // Invalid mediatype. Includes a slash but the type part is not a token.
84 { "data:f(oo/bar;baz=1;charset=kk,boo",
85 true,
86 "text/plain",
87 "US-ASCII",
88 "boo" },
90 { "data:foo/bar;baz=1;charset=kk,boo",
91 true,
92 "foo/bar",
93 "kk",
94 "boo" },
96 { "data:foo/bar;charset=kk;baz=1,boo",
97 true,
98 "foo/bar",
99 "kk",
100 "boo" },
102 { "data:text/html,%3Chtml%3E%3Cbody%3E%3Cb%3Ehello%20world"
103 "%3C%2Fb%3E%3C%2Fbody%3E%3C%2Fhtml%3E",
104 true,
105 "text/html",
106 "US-ASCII",
107 "<html><body><b>hello world</b></body></html>" },
109 { "data:text/html,<html><body><b>hello world</b></body></html>",
110 true,
111 "text/html",
112 "US-ASCII",
113 "<html><body><b>hello world</b></body></html>" },
115 // the comma cannot be url-escaped!
116 { "data:%2Cblah",
117 false,
120 "" },
122 // invalid base64 content
123 { "data:;base64,aGVs_-_-",
124 false,
127 "" },
129 // Spaces should be removed from non-text data URLs (we already tested
130 // spaces above).
131 { " bG8gd2 9ybGQ=",
139 true,
140 "text/plain",
141 "US-ASCII",
142 "hello world" },
144 // Other whitespace should also be removed from anything base-64 encoded.
145 { "data:;base64,aGVs bG8gd2 \n9ybGQ=",
146 true,
147 "text/plain",
148 "US-ASCII",
149 "hello world" },
151 // In base64 encoding, escaped whitespace should be stripped.
152 // (This test was taken from acid3)
153 // http://b/1054495
154 { "data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207"
155 "%20",
156 true,
157 "text/javascript",
158 "US-ASCII",
159 "d4 = 'four';" },
161 // Only unescaped whitespace should be stripped in non-base64.
162 // http://b/1157796
163 { "data:img/png,A B %20 %0A C",
164 true,
165 "img/png",
166 "US-ASCII",
167 "AB \nC" },
169 { "data:text/plain;charset=utf-8;base64,SGVsbMO2",
170 true,
171 "text/plain",
172 "utf-8",
173 "Hell\xC3\xB6" },
175 // Not sufficiently padded.
176 { "data:;base64,aGVsbG8gd29ybGQ",
177 true,
178 "text/plain",
179 "US-ASCII",
180 "hello world" },
182 // Bad encoding (truncated).
183 { "data:;base64,aGVsbG8gd29yb",
184 false,
187 "" },
189 // BiDi control characters should be unescaped and preserved as is, and
190 // should not be replaced with % versions. In the below case, \xE2\x80\x8F
191 // is the RTL mark and the parsed text should preserve it as is.
193 "data:text/plain;charset=utf-8,\xE2\x80\x8Ftest",
194 true,
195 "text/plain",
196 "utf-8",
197 "\xE2\x80\x8Ftest"},
199 // Same as above but with Arabic text after RTL mark.
201 "data:text/plain;charset=utf-8,"
202 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
203 true,
204 "text/plain",
205 "utf-8",
206 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"},
208 // RTL mark encoded as %E2%80%8F should be unescaped too. Note that when
209 // wrapped in a GURL, this URL and the next effectively become the same as
210 // the previous two URLs.
212 "data:text/plain;charset=utf-8,%E2%80%8Ftest",
213 true,
214 "text/plain",
215 "utf-8",
216 "\xE2\x80\x8Ftest"},
218 // Same as above but with Arabic text after RTL mark.
220 "data:text/plain;charset=utf-8,"
221 "%E2%80%8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
222 true,
223 "text/plain",
224 "utf-8",
225 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"}
227 // TODO(darin): add more interesting tests
230 for (size_t i = 0; i < arraysize(tests); ++i) {
231 std::string mime_type;
232 std::string charset;
233 std::string data;
234 bool ok = DataURL::Parse(GURL(tests[i].url), &mime_type, &charset, &data);
235 EXPECT_EQ(ok, tests[i].is_valid);
236 if (tests[i].is_valid) {
237 EXPECT_EQ(tests[i].mime_type, mime_type);
238 EXPECT_EQ(tests[i].charset, charset);
239 EXPECT_EQ(tests[i].data, data);
244 } // namespace net