third_party/WebKit/Source/wtf/text/WTFStringTest.cpp

   1 /*
   2  * Copyright (C) 2012 Apple Inc. All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions
   6  * are met:
   7  * 1. Redistributions of source code must retain the above copyright
   8  *    notice, this list of conditions and the following disclaimer.
   9  * 2. Redistributions in binary form must reproduce the above copyright
  10  *    notice, this list of conditions and the following disclaimer in the
  11  *    documentation and/or other materials provided with the distribution.
  12  *
  13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
  14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
  17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  23  * THE POSSIBILITY OF SUCH DAMAGE.
  24  */
  25
  26 #include "config.h"
  27 #include "wtf/text/WTFString.h"
  28
  29 #include "wtf/MathExtras.h"
  30 #include "wtf/text/CString.h"
  31 #include <gtest/gtest.h>
  32 #include <limits>
  33
  34 namespace WTF {
  35
  36 TEST(StringTest, CreationFromLiteral)
  37 {
  38     String stringFromLiteral("Explicit construction syntax");
  39     EXPECT_EQ(strlen("Explicit construction syntax"), stringFromLiteral.length());
  40     EXPECT_TRUE(stringFromLiteral == "Explicit construction syntax");
  41     EXPECT_TRUE(stringFromLiteral.is8Bit());
  42     EXPECT_TRUE(String("Explicit construction syntax") == stringFromLiteral);
  43 }
  44
  45 TEST(StringTest, ASCII)
  46 {
  47     CString output;
  48
  49     // Null String.
  50     output = String().ascii();
  51     EXPECT_STREQ("", output.data());
  52
  53     // Empty String.
  54     output = emptyString().ascii();
  55     EXPECT_STREQ("", output.data());
  56
  57     // Regular String.
  58     output = String("foobar").ascii();
  59     EXPECT_STREQ("foobar", output.data());
  60 }
  61
  62 namespace {
  63
  64 void testNumberToStringECMAScript(double number, const char* reference)
  65 {
  66     CString numberString = String::numberToStringECMAScript(number).latin1();
  67     EXPECT_STREQ(reference, numberString.data());
  68 }
  69
  70 } // anonymous namespace
  71
  72 TEST(StringTest, NumberToStringECMAScriptBoundaries)
  73 {
  74     typedef std::numeric_limits<double> Limits;
  75
  76     // Infinity.
  77     testNumberToStringECMAScript(Limits::infinity(), "Infinity");
  78     testNumberToStringECMAScript(-Limits::infinity(), "-Infinity");
  79
  80     // NaN.
  81     testNumberToStringECMAScript(-Limits::quiet_NaN(), "NaN");
  82
  83     // Zeros.
  84     testNumberToStringECMAScript(0, "0");
  85     testNumberToStringECMAScript(-0, "0");
  86
  87     // Min-Max.
  88     testNumberToStringECMAScript(Limits::min(), "2.2250738585072014e-308");
  89     testNumberToStringECMAScript(Limits::max(), "1.7976931348623157e+308");
  90 }
  91
  92 TEST(StringTest, NumberToStringECMAScriptRegularNumbers)
  93 {
  94     // Pi.
  95     testNumberToStringECMAScript(piDouble, "3.141592653589793");
  96     testNumberToStringECMAScript(piFloat, "3.1415927410125732");
  97     testNumberToStringECMAScript(piOverTwoDouble, "1.5707963267948966");
  98     testNumberToStringECMAScript(piOverTwoFloat, "1.5707963705062866");
  99     testNumberToStringECMAScript(piOverFourDouble, "0.7853981633974483");
 100     testNumberToStringECMAScript(piOverFourFloat, "0.7853981852531433");
 101
 102     // e.
 103     const double e = 2.71828182845904523536028747135266249775724709369995;
 104     testNumberToStringECMAScript(e, "2.718281828459045");
 105
 106     // c, speed of light in m/s.
 107     const double c = 299792458;
 108     testNumberToStringECMAScript(c, "299792458");
 109
 110     // Golen ratio.
 111     const double phi = 1.6180339887498948482;
 112     testNumberToStringECMAScript(phi, "1.618033988749895");
 113 }
 114
 115 TEST(StringTest, ReplaceWithLiteral)
 116 {
 117     // Cases for 8Bit source.
 118     String testString = "1224";
 119     EXPECT_TRUE(testString.is8Bit());
 120     testString.replaceWithLiteral('2', "");
 121     EXPECT_STREQ("14", testString.utf8().data());
 122
 123     testString = "1224";
 124     EXPECT_TRUE(testString.is8Bit());
 125     testString.replaceWithLiteral('2', "3");
 126     EXPECT_STREQ("1334", testString.utf8().data());
 127
 128     testString = "1224";
 129     EXPECT_TRUE(testString.is8Bit());
 130     testString.replaceWithLiteral('2', "555");
 131     EXPECT_STREQ("15555554", testString.utf8().data());
 132
 133     testString = "1224";
 134     EXPECT_TRUE(testString.is8Bit());
 135     testString.replaceWithLiteral('3', "NotFound");
 136     EXPECT_STREQ("1224", testString.utf8().data());
 137
 138     // Cases for 16Bit source.
 139     // U+00E9 (=0xC3 0xA9 in UTF-8) is e with accent.
 140     testString = String::fromUTF8("r\xC3\xA9sum\xC3\xA9");
 141     EXPECT_FALSE(testString.is8Bit());
 142     testString.replaceWithLiteral(UChar(0x00E9), "e");
 143     EXPECT_STREQ("resume", testString.utf8().data());
 144
 145     testString = String::fromUTF8("r\xC3\xA9sum\xC3\xA9");
 146     EXPECT_FALSE(testString.is8Bit());
 147     testString.replaceWithLiteral(UChar(0x00E9), "");
 148     EXPECT_STREQ("rsum", testString.utf8().data());
 149
 150     testString = String::fromUTF8("r\xC3\xA9sum\xC3\xA9");
 151     EXPECT_FALSE(testString.is8Bit());
 152     testString.replaceWithLiteral('3', "NotFound");
 153     EXPECT_STREQ("r\xC3\xA9sum\xC3\xA9", testString.utf8().data());
 154 }
 155
 156 TEST(StringTest, ComparisonOfSameStringVectors)
 157 {
 158     Vector<String> stringVector;
 159     stringVector.append("one");
 160     stringVector.append("two");
 161
 162     Vector<String> sameStringVector;
 163     sameStringVector.append("one");
 164     sameStringVector.append("two");
 165
 166     EXPECT_EQ(stringVector, sameStringVector);
 167 }
 168
 169 TEST(WTF, SimplifyWhiteSpace)
 170 {
 171     String extraSpaces("  Hello  world  ");
 172     EXPECT_EQ(String("Hello world"), extraSpaces.simplifyWhiteSpace());
 173     EXPECT_EQ(String("  Hello  world  "), extraSpaces.simplifyWhiteSpace(WTF::DoNotStripWhiteSpace));
 174
 175     String extraSpacesAndNewlines(" \nHello\n world\n ");
 176     EXPECT_EQ(String("Hello world"), extraSpacesAndNewlines.simplifyWhiteSpace());
 177     EXPECT_EQ(String("  Hello  world  "), extraSpacesAndNewlines.simplifyWhiteSpace(WTF::DoNotStripWhiteSpace));
 178
 179     String extraSpacesAndTabs(" \nHello\t world\t ");
 180     EXPECT_EQ(String("Hello world"), extraSpacesAndTabs.simplifyWhiteSpace());
 181     EXPECT_EQ(String("  Hello  world  "), extraSpacesAndTabs.simplifyWhiteSpace(WTF::DoNotStripWhiteSpace));
 182 }
 183
 184 struct CaseFoldingTestData {
 185     const char* sourceDescription;
 186     const char* source;
 187     const char** localeList;
 188     size_t localeListLength;
 189     const char* expected;
 190 };
 191
 192 // \xC4\xB0 = U+0130 (capital dotted I)
 193 // \xC4\xB1 = U+0131 (lowercase dotless I)
 194 const char* turkicInput = "Isi\xC4\xB0 \xC4\xB0s\xC4\xB1I";
 195 const char* greekInput = "\xCE\x9F\xCE\x94\xCE\x8C\xCE\xA3 \xCE\x9F\xCE\xB4\xCF\x8C\xCF\x82 \xCE\xA3\xCE\xBF \xCE\xA3\xCE\x9F o\xCE\xA3 \xCE\x9F\xCE\xA3 \xCF\x83 \xE1\xBC\x95\xCE\xBE";
 196 const char* lithuanianInput = "I \xC3\x8F J J\xCC\x88 \xC4\xAE \xC4\xAE\xCC\x88 \xC3\x8C \xC3\x8D \xC4\xA8 xi\xCC\x87\xCC\x88 xj\xCC\x87\xCC\x88 x\xC4\xAF\xCC\x87\xCC\x88 xi\xCC\x87\xCC\x80 xi\xCC\x87\xCC\x81 xi\xCC\x87\xCC\x83 XI X\xC3\x8F XJ XJ\xCC\x88 X\xC4\xAE X\xC4\xAE\xCC\x88";
 197
 198
 199 const char* turkicLocales[] = {
 200     "tr", "tr-TR", "tr_TR", "tr@foo=bar", "tr-US", "TR", "tr-tr", "tR",
 201     "az", "az-AZ", "az_AZ", "az@foo=bar", "az-US", "Az", "AZ-AZ", };
 202 const char* nonTurkicLocales[] = {
 203     "en", "en-US", "en_US", "en@foo=bar", "EN", "En",
 204     "ja", "el", "fil", "fi", "lt", };
 205 const char* greekLocales[] = {
 206     "el", "el-GR", "el_GR", "el@foo=bar", "el-US", "EL", "el-gr", "eL",
 207 };
 208 const char* nonGreekLocales[] = {
 209     "en", "en-US", "en_US", "en@foo=bar", "EN", "En",
 210     "ja", "tr", "az", "fil", "fi", "lt", };
 211 const char* lithuanianLocales[] = {
 212     "lt", "lt-LT", "lt_LT", "lt@foo=bar", "lt-US", "LT", "lt-lt", "lT",
 213 };
 214 // Should not have "tr" or "az" because "lt" and 'tr/az' rules conflict with each other.
 215 const char* nonLithuanianLocales[] = {
 216     "en", "en-US", "en_US", "en@foo=bar", "EN", "En", "ja", "fil", "fi", "el", };
 217
 218 TEST(StringTest, ToUpperLocale)
 219 {
 220     CaseFoldingTestData testDataList[] = {
 221         {
 222             "Turkic input",
 223             turkicInput,
 224             turkicLocales,
 225             sizeof(turkicLocales) / sizeof(const char*),
 226             "IS\xC4\xB0\xC4\xB0 \xC4\xB0SII",
 227         }, {
 228             "Turkic input",
 229             turkicInput,
 230             nonTurkicLocales,
 231             sizeof(nonTurkicLocales) / sizeof(const char*),
 232             "ISI\xC4\xB0 \xC4\xB0SII",
 233         }, {
 234             "Greek input",
 235             greekInput,
 236             greekLocales,
 237             sizeof(greekLocales) / sizeof(const char*),
 238             "\xCE\x9F\xCE\x94\xCE\x9F\xCE\xA3 \xCE\x9F\xCE\x94\xCE\x9F\xCE\xA3 \xCE\xA3\xCE\x9F \xCE\xA3\xCE\x9F \x4F\xCE\xA3 \xCE\x9F\xCE\xA3 \xCE\xA3 \xCE\x95\xCE\x9E",
 239         }, {
 240             "Greek input",
 241             greekInput,
 242             nonGreekLocales,
 243             sizeof(nonGreekLocales) / sizeof(const char*),
 244             "\xCE\x9F\xCE\x94\xCE\x8C\xCE\xA3 \xCE\x9F\xCE\x94\xCE\x8C\xCE\xA3 \xCE\xA3\xCE\x9F \xCE\xA3\xCE\x9F \x4F\xCE\xA3 \xCE\x9F\xCE\xA3 \xCE\xA3 \xE1\xBC\x9D\xCE\x9E",
 245         }, {
 246             "Lithuanian input",
 247             lithuanianInput,
 248             lithuanianLocales,
 249             sizeof(lithuanianLocales) / sizeof(const char*),
 250             "I \xC3\x8F J J\xCC\x88 \xC4\xAE \xC4\xAE\xCC\x88 \xC3\x8C \xC3\x8D \xC4\xA8 XI\xCC\x88 XJ\xCC\x88 X\xC4\xAE\xCC\x88 XI\xCC\x80 XI\xCC\x81 XI\xCC\x83 XI X\xC3\x8F XJ XJ\xCC\x88 X\xC4\xAE X\xC4\xAE\xCC\x88",
 251         }, {
 252             "Lithuanian input",
 253             lithuanianInput,
 254             nonLithuanianLocales,
 255             sizeof(nonLithuanianLocales) / sizeof(const char*),
 256             "I \xC3\x8F J J\xCC\x88 \xC4\xAE \xC4\xAE\xCC\x88 \xC3\x8C \xC3\x8D \xC4\xA8 XI\xCC\x87\xCC\x88 XJ\xCC\x87\xCC\x88 X\xC4\xAE\xCC\x87\xCC\x88 XI\xCC\x87\xCC\x80 XI\xCC\x87\xCC\x81 XI\xCC\x87\xCC\x83 XI X\xC3\x8F XJ XJ\xCC\x88 X\xC4\xAE X\xC4\xAE\xCC\x88",
 257         },
 258     };
 259
 260     for (size_t i = 0; i < sizeof(testDataList) / sizeof(testDataList[0]); ++i) {
 261         const char* expected = testDataList[i].expected;
 262         String source = String::fromUTF8(testDataList[i].source);
 263         for (size_t j = 0; j < testDataList[i].localeListLength; ++j) {
 264             const char* locale = testDataList[i].localeList[j];
 265             EXPECT_STREQ(expected, source.upper(locale).utf8().data()) << testDataList[i].sourceDescription << "; locale=" << locale;
 266         }
 267     }
 268 }
 269
 270 TEST(StringTest, ToLowerLocale)
 271 {
 272     CaseFoldingTestData testDataList[] = {
 273         {
 274             "Turkic input",
 275             turkicInput,
 276             turkicLocales,
 277             sizeof(turkicLocales) / sizeof(const char*),
 278             "\xC4\xB1sii is\xC4\xB1\xC4\xB1",
 279         }, {
 280             "Turkic input",
 281             turkicInput,
 282             nonTurkicLocales,
 283             sizeof(nonTurkicLocales) / sizeof(const char*),
 284             // U+0130 is lowercased to U+0069 followed by U+0307
 285             "isii\xCC\x87 i\xCC\x87s\xC4\xB1i",
 286         }, {
 287             "Greek input",
 288             greekInput,
 289             greekLocales,
 290             sizeof(greekLocales) / sizeof(const char*),
 291             "\xCE\xBF\xCE\xB4\xCF\x8C\xCF\x82 \xCE\xBF\xCE\xB4\xCF\x8C\xCF\x82 \xCF\x83\xCE\xBF \xCF\x83\xCE\xBF \x6F\xCF\x82 \xCE\xBF\xCF\x82 \xCF\x83 \xE1\xBC\x95\xCE\xBE",
 292         }, {
 293             "Greek input",
 294             greekInput,
 295             nonGreekLocales,
 296             sizeof(greekLocales) / sizeof(const char*),
 297             "\xCE\xBF\xCE\xB4\xCF\x8C\xCF\x82 \xCE\xBF\xCE\xB4\xCF\x8C\xCF\x82 \xCF\x83\xCE\xBF \xCF\x83\xCE\xBF \x6F\xCF\x82 \xCE\xBF\xCF\x82 \xCF\x83 \xE1\xBC\x95\xCE\xBE",
 298         }, {
 299             "Lithuanian input",
 300             lithuanianInput,
 301             lithuanianLocales,
 302             sizeof(lithuanianLocales) / sizeof(const char*),
 303             "i \xC3\xAF j j\xCC\x87\xCC\x88 \xC4\xAF \xC4\xAF\xCC\x87\xCC\x88 i\xCC\x87\xCC\x80 i\xCC\x87\xCC\x81 i\xCC\x87\xCC\x83 xi\xCC\x87\xCC\x88 xj\xCC\x87\xCC\x88 x\xC4\xAF\xCC\x87\xCC\x88 xi\xCC\x87\xCC\x80 xi\xCC\x87\xCC\x81 xi\xCC\x87\xCC\x83 xi x\xC3\xAF xj xj\xCC\x87\xCC\x88 x\xC4\xAF x\xC4\xAF\xCC\x87\xCC\x88",
 304         }, {
 305             "Lithuanian input",
 306             lithuanianInput,
 307             nonLithuanianLocales,
 308             sizeof(nonLithuanianLocales) / sizeof(const char*),
 309             "\x69 \xC3\xAF \x6A \x6A\xCC\x88 \xC4\xAF \xC4\xAF\xCC\x88 \xC3\xAC \xC3\xAD \xC4\xA9 \x78\x69\xCC\x87\xCC\x88 \x78\x6A\xCC\x87\xCC\x88 \x78\xC4\xAF\xCC\x87\xCC\x88 \x78\x69\xCC\x87\xCC\x80 \x78\x69\xCC\x87\xCC\x81 \x78\x69\xCC\x87\xCC\x83 \x78\x69 \x78\xC3\xAF \x78\x6A \x78\x6A\xCC\x88 \x78\xC4\xAF \x78\xC4\xAF\xCC\x88",
 310         },
 311     };
 312
 313     for (size_t i = 0; i < sizeof(testDataList) / sizeof(testDataList[0]); ++i) {
 314         const char* expected = testDataList[i].expected;
 315         String source = String::fromUTF8(testDataList[i].source);
 316         for (size_t j = 0; j < testDataList[i].localeListLength; ++j) {
 317             const char* locale = testDataList[i].localeList[j];
 318             EXPECT_STREQ(expected, source.lower(locale).utf8().data()) << testDataList[i].sourceDescription << "; locale=" << locale;
 319         }
 320     }
 321 }
 322
 323 TEST(WTF, StartsWithIgnoringASCIICase)
 324 {
 325     String allASCII("LINK");
 326     String allASCIILowerCase("link");
 327     EXPECT_TRUE(startsWithIgnoringASCIICase(allASCII, allASCIILowerCase));
 328     String allASCIIMixedCase("lInK");
 329     EXPECT_TRUE(startsWithIgnoringASCIICase(allASCII, allASCIIMixedCase));
 330     String allASCIIDifferent("foo");
 331     EXPECT_FALSE(startsWithIgnoringASCIICase(allASCII, allASCIIDifferent));
 332     String nonASCII = String::fromUTF8("LIN\xE2\x84\xAA");
 333     EXPECT_FALSE(startsWithIgnoringASCIICase(allASCII, nonASCII));
 334     EXPECT_TRUE(startsWithIgnoringASCIICase(allASCII, nonASCII.lower()));
 335
 336     EXPECT_FALSE(startsWithIgnoringASCIICase(nonASCII, allASCII));
 337     EXPECT_FALSE(startsWithIgnoringASCIICase(nonASCII, allASCIILowerCase));
 338     EXPECT_FALSE(startsWithIgnoringASCIICase(nonASCII, allASCIIMixedCase));
 339     EXPECT_FALSE(startsWithIgnoringASCIICase(nonASCII, allASCIIDifferent));
 340 }
 341
 342 TEST(StringTest, Lower)
 343 {
 344     EXPECT_STREQ("link", String("LINK").lower().ascii().data());
 345     EXPECT_STREQ("link", String("lInk").lower().ascii().data());
 346     EXPECT_STREQ("lin\xE1k", String("lIn\xC1k").lower().latin1().data());
 347     EXPECT_STREQ("link", String::fromUTF8("LIN\xE2\x84\xAA").lower().utf8().data());
 348 }
 349
 350 } // namespace WTF