net/base/data_url_unittest.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/basictypes.h"
   6 #include "net/base/data_url.h"
   7 #include "testing/gtest/include/gtest/gtest.h"
   8 #include "url/gurl.h"
   9
  10 namespace net {
  11
  12 namespace {
  13
  14 struct ParseTestData {
  15   const char* url;
  16   bool is_valid;
  17   const char* mime_type;
  18   const char* charset;
  19   const char* data;
  20 };
  21
  22 }  // namespace
  23
  24 TEST(DataURLTest, Parse) {
  25   const ParseTestData tests[] = {
  26     { "data:",
  27        false,
  28        "",
  29        "",
  30        "" },
  31
  32     { "data:,",
  33       true,
  34       "text/plain",
  35       "US-ASCII",
  36       "" },
  37
  38     { "data:;base64,",
  39       true,
  40       "text/plain",
  41       "US-ASCII",
  42       "" },
  43
  44     { "data:;charset=,test",
  45       false,
  46       "",
  47       "",
  48       "" },
  49
  50     { "data:TeXt/HtMl,<b>x</b>",
  51       true,
  52       "text/html",
  53       "US-ASCII",
  54       "<b>x</b>" },
  55
  56     { "data:,foo",
  57       true,
  58       "text/plain",
  59       "US-ASCII",
  60       "foo" },
  61
  62     { "data:;base64,aGVsbG8gd29ybGQ=",
  63       true,
  64       "text/plain",
  65       "US-ASCII",
  66       "hello world" },
  67
  68     // Allow invalid mediatype for backward compatibility but set mime_type to
  69     // "text/plain" instead of the invalid mediatype.
  70     { "data:foo,boo",
  71       true,
  72       "text/plain",
  73       "US-ASCII",
  74       "boo" },
  75
  76     // When accepting an invalid mediatype, override charset with "US-ASCII"
  77     { "data:foo;charset=UTF-8,boo",
  78       true,
  79       "text/plain",
  80       "US-ASCII",
  81       "boo" },
  82
  83     // Invalid mediatype. Includes a slash but the type part is not a token.
  84     { "data:f(oo/bar;baz=1;charset=kk,boo",
  85       true,
  86       "text/plain",
  87       "US-ASCII",
  88       "boo" },
  89
  90     { "data:foo/bar;baz=1;charset=kk,boo",
  91       true,
  92       "foo/bar",
  93       "kk",
  94       "boo" },
  95
  96     { "data:foo/bar;charset=kk;baz=1,boo",
  97       true,
  98       "foo/bar",
  99       "kk",
 100       "boo" },
 101
 102     { "data:text/html,%3Chtml%3E%3Cbody%3E%3Cb%3Ehello%20world"
 103           "%3C%2Fb%3E%3C%2Fbody%3E%3C%2Fhtml%3E",
 104       true,
 105       "text/html",
 106       "US-ASCII",
 107       "<html><body><b>hello world</b></body></html>" },
 108
 109     { "data:text/html,<html><body><b>hello world</b></body></html>",
 110       true,
 111       "text/html",
 112       "US-ASCII",
 113       "<html><body><b>hello world</b></body></html>" },
 114
 115     // the comma cannot be url-escaped!
 116     { "data:%2Cblah",
 117       false,
 118       "",
 119       "",
 120       "" },
 121
 122     // invalid base64 content
 123     { "data:;base64,aGVs_-_-",
 124       false,
 125       "",
 126       "",
 127       "" },
 128
 129     // Spaces should be removed from non-text data URLs (we already tested
 130     // spaces above).
 131     { "data:image/fractal,a b c d e f g",
 132       true,
 133       "image/fractal",
 134       "US-ASCII",
 135       "abcdefg" },
 136
 137     // Spaces should also be removed from anything base-64 encoded
 138     { "data:;base64,aGVs bG8gd2  9ybGQ=",
 139       true,
 140       "text/plain",
 141       "US-ASCII",
 142       "hello world" },
 143
 144     // Other whitespace should also be removed from anything base-64 encoded.
 145     { "data:;base64,aGVs bG8gd2  \n9ybGQ=",
 146       true,
 147       "text/plain",
 148       "US-ASCII",
 149       "hello world" },
 150
 151     // In base64 encoding, escaped whitespace should be stripped.
 152     // (This test was taken from acid3)
 153     // http://b/1054495
 154     { "data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207"
 155           "%20",
 156       true,
 157       "text/javascript",
 158       "US-ASCII",
 159       "d4 = 'four';" },
 160
 161     // Only unescaped whitespace should be stripped in non-base64.
 162     // http://b/1157796
 163     { "data:img/png,A  B  %20  %0A  C",
 164       true,
 165       "img/png",
 166       "US-ASCII",
 167       "AB \nC" },
 168
 169     { "data:text/plain;charset=utf-8;base64,SGVsbMO2",
 170       true,
 171       "text/plain",
 172       "utf-8",
 173       "Hell\xC3\xB6" },
 174
 175     // Not sufficiently padded.
 176     { "data:;base64,aGVsbG8gd29ybGQ",
 177       true,
 178       "text/plain",
 179       "US-ASCII",
 180       "hello world" },
 181
 182     // Bad encoding (truncated).
 183     { "data:;base64,aGVsbG8gd29yb",
 184       false,
 185       "",
 186       "",
 187       "" },
 188
 189     // BiDi control characters should be unescaped and preserved as is, and
 190     // should not be replaced with % versions. In the below case, \xE2\x80\x8F
 191     // is the RTL mark and the parsed text should preserve it as is.
 192     {
 193       "data:text/plain;charset=utf-8,\xE2\x80\x8Ftest",
 194       true,
 195       "text/plain",
 196       "utf-8",
 197       "\xE2\x80\x8Ftest"},
 198
 199     // Same as above but with Arabic text after RTL mark.
 200     {
 201       "data:text/plain;charset=utf-8,"
 202           "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
 203       true,
 204       "text/plain",
 205       "utf-8",
 206       "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"},
 207
 208     // RTL mark encoded as %E2%80%8F should be unescaped too. Note that when
 209     // wrapped in a GURL, this URL and the next effectively become the same as
 210     // the previous two URLs.
 211     {
 212       "data:text/plain;charset=utf-8,%E2%80%8Ftest",
 213       true,
 214       "text/plain",
 215       "utf-8",
 216       "\xE2\x80\x8Ftest"},
 217
 218     // Same as above but with Arabic text after RTL mark.
 219     {
 220       "data:text/plain;charset=utf-8,"
 221           "%E2%80%8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
 222       true,
 223       "text/plain",
 224       "utf-8",
 225       "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"}
 226
 227     // TODO(darin): add more interesting tests
 228   };
 229
 230   for (size_t i = 0; i < arraysize(tests); ++i) {
 231     std::string mime_type;
 232     std::string charset;
 233     std::string data;
 234     bool ok = DataURL::Parse(GURL(tests[i].url), &mime_type, &charset, &data);
 235     EXPECT_EQ(ok, tests[i].is_valid);
 236     if (tests[i].is_valid) {
 237       EXPECT_EQ(tests[i].mime_type, mime_type);
 238       EXPECT_EQ(tests[i].charset, charset);
 239       EXPECT_EQ(tests[i].data, data);
 240     }
 241   }
 242 }
 243
 244 }  // namespace net