base/i18n/case_conversion_unittest.cc

   1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/i18n/case_conversion.h"
   6 #include "base/i18n/rtl.h"
   7 #include "base/strings/utf_string_conversions.h"
   8 #include "base/test/icu_test_util.h"
   9 #include "testing/gtest/include/gtest/gtest.h"
  10 #include "third_party/icu/source/i18n/unicode/usearch.h"
  11
  12 namespace base {
  13 namespace i18n {
  14
  15 namespace {
  16
  17 const wchar_t kNonASCIIMixed[] =
  18     L"\xC4\xD6\xE4\xF6\x20\xCF\xEF\x20\xF7\x25"
  19     L"\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F07\x1F0F"
  20     L"\x20\x1E00\x1E01";
  21 const wchar_t kNonASCIILower[] =
  22     L"\xE4\xF6\xE4\xF6\x20\xEF\xEF"
  23     L"\x20\xF7\x25\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F07"
  24     L"\x1F07\x20\x1E01\x1E01";
  25 const wchar_t kNonASCIIUpper[] =
  26     L"\xC4\xD6\xC4\xD6\x20\xCF\xCF"
  27     L"\x20\xF7\x25\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F0F"
  28     L"\x1F0F\x20\x1E00\x1E00";
  29
  30 }  // namespace
  31
  32 // Test upper and lower case string conversion.
  33 TEST(CaseConversionTest, UpperLower) {
  34   const string16 mixed(ASCIIToUTF16("Text with UPPer & lowER casE."));
  35   const string16 expected_lower(ASCIIToUTF16("text with upper & lower case."));
  36   const string16 expected_upper(ASCIIToUTF16("TEXT WITH UPPER & LOWER CASE."));
  37
  38   string16 result = ToLower(mixed);
  39   EXPECT_EQ(expected_lower, result);
  40
  41   result = ToUpper(mixed);
  42   EXPECT_EQ(expected_upper, result);
  43 }
  44
  45 TEST(CaseConversionTest, NonASCII) {
  46   const string16 mixed(WideToUTF16(kNonASCIIMixed));
  47   const string16 expected_lower(WideToUTF16(kNonASCIILower));
  48   const string16 expected_upper(WideToUTF16(kNonASCIIUpper));
  49
  50   string16 result = ToLower(mixed);
  51   EXPECT_EQ(expected_lower, result);
  52
  53   result = ToUpper(mixed);
  54   EXPECT_EQ(expected_upper, result);
  55 }
  56
  57 TEST(CaseConversionTest, TurkishLocaleConversion) {
  58   const string16 mixed(WideToUTF16(L"\x49\x131"));
  59   const string16 expected_lower(WideToUTF16(L"\x69\x131"));
  60   const string16 expected_upper(WideToUTF16(L"\x49\x49"));
  61
  62   test::ScopedRestoreICUDefaultLocale restore_locale;
  63   i18n::SetICUDefaultLocale("en_US");
  64
  65   string16 result = ToLower(mixed);
  66   EXPECT_EQ(expected_lower, result);
  67
  68   result = ToUpper(mixed);
  69   EXPECT_EQ(expected_upper, result);
  70
  71   i18n::SetICUDefaultLocale("tr");
  72
  73   const string16 expected_lower_turkish(WideToUTF16(L"\x131\x131"));
  74   const string16 expected_upper_turkish(WideToUTF16(L"\x49\x49"));
  75
  76   result = ToLower(mixed);
  77   EXPECT_EQ(expected_lower_turkish, result);
  78
  79   result = ToUpper(mixed);
  80   EXPECT_EQ(expected_upper_turkish, result);
  81 }
  82
  83 TEST(CaseConversionTest, FoldCase) {
  84   // Simple ASCII, should lower-case.
  85   EXPECT_EQ(ASCIIToUTF16("hello, world"),
  86             FoldCase(ASCIIToUTF16("Hello, World")));
  87
  88   // Non-ASCII cases from above. They should all fold to the same result.
  89   EXPECT_EQ(FoldCase(WideToUTF16(kNonASCIIMixed)),
  90             FoldCase(WideToUTF16(kNonASCIILower)));
  91   EXPECT_EQ(FoldCase(WideToUTF16(kNonASCIIMixed)),
  92             FoldCase(WideToUTF16(kNonASCIIUpper)));
  93
  94   // Turkish cases from above. This is the lower-case expected result from the
  95   // US locale. It should be the same even when the current locale is Turkish.
  96   const string16 turkish(WideToUTF16(L"\x49\x131"));
  97   const string16 turkish_expected(WideToUTF16(L"\x69\x131"));
  98
  99   test::ScopedRestoreICUDefaultLocale restore_locale;
 100   i18n::SetICUDefaultLocale("en_US");
 101   EXPECT_EQ(turkish_expected, FoldCase(turkish));
 102
 103   i18n::SetICUDefaultLocale("tr");
 104   EXPECT_EQ(turkish_expected, FoldCase(turkish));
 105
 106   // Test a case that gets bigger when processed.
 107   // U+130 = LATIN CAPITAL LETTER I WITH DOT ABOVE gets folded to a lower case
 108   // "i" followed by U+307 COMBINING DOT ABOVE.
 109   EXPECT_EQ(WideToUTF16(L"i\u0307j"), FoldCase(WideToUTF16(L"\u0130j")));
 110
 111   // U+00DF (SHARP S) and U+1E9E (CAPIRAL SHARP S) are both folded to "ss".
 112   EXPECT_EQ(ASCIIToUTF16("ssss"), FoldCase(WideToUTF16(L"\u00DF\u1E9E")));
 113 }
 114
 115 }  // namespace i18n
 116 }  // namespace base
 117
 118
 119