base/strings/utf_string_conversions.cc

   1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/strings/utf_string_conversions.h"
   6
   7 #include "base/strings/string_piece.h"
   8 #include "base/strings/string_util.h"
   9 #include "base/strings/utf_string_conversion_utils.h"
  10
  11 namespace base {
  12
  13 namespace {
  14
  15 // Generalized Unicode converter -----------------------------------------------
  16
  17 // Converts the given source Unicode character type to the given destination
  18 // Unicode character type as a STL string. The given input buffer and size
  19 // determine the source, and the given output STL string will be replaced by
  20 // the result.
  21 template<typename SRC_CHAR, typename DEST_STRING>
  22 bool ConvertUnicode(const SRC_CHAR* src,
  23                     size_t src_len,
  24                     DEST_STRING* output) {
  25   // ICU requires 32-bit numbers.
  26   bool success = true;
  27   int32 src_len32 = static_cast<int32>(src_len);
  28   for (int32 i = 0; i < src_len32; i++) {
  29     uint32 code_point;
  30     if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
  31       WriteUnicodeCharacter(code_point, output);
  32     } else {
  33       WriteUnicodeCharacter(0xFFFD, output);
  34       success = false;
  35     }
  36   }
  37
  38   return success;
  39 }
  40
  41 }  // namespace
  42
  43 // UTF-8 <-> Wide --------------------------------------------------------------
  44
  45 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
  46   PrepareForUTF8Output(src, src_len, output);
  47   return ConvertUnicode(src, src_len, output);
  48 }
  49
  50 std::string WideToUTF8(const std::wstring& wide) {
  51   std::string ret;
  52   // Ignore the success flag of this call, it will do the best it can for
  53   // invalid input, which is what we want here.
  54   WideToUTF8(wide.data(), wide.length(), &ret);
  55   return ret;
  56 }
  57
  58 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
  59   PrepareForUTF16Or32Output(src, src_len, output);
  60   return ConvertUnicode(src, src_len, output);
  61 }
  62
  63 std::wstring UTF8ToWide(const StringPiece& utf8) {
  64   std::wstring ret;
  65   UTF8ToWide(utf8.data(), utf8.length(), &ret);
  66   return ret;
  67 }
  68
  69 // UTF-16 <-> Wide -------------------------------------------------------------
  70
  71 #if defined(WCHAR_T_IS_UTF16)
  72
  73 // When wide == UTF-16, then conversions are a NOP.
  74 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
  75   output->assign(src, src_len);
  76   return true;
  77 }
  78
  79 string16 WideToUTF16(const std::wstring& wide) {
  80   return wide;
  81 }
  82
  83 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
  84   output->assign(src, src_len);
  85   return true;
  86 }
  87
  88 std::wstring UTF16ToWide(const string16& utf16) {
  89   return utf16;
  90 }
  91
  92 #elif defined(WCHAR_T_IS_UTF32)
  93
  94 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
  95   output->clear();
  96   // Assume that normally we won't have any non-BMP characters so the counts
  97   // will be the same.
  98   output->reserve(src_len);
  99   return ConvertUnicode(src, src_len, output);
 100 }
 101
 102 string16 WideToUTF16(const std::wstring& wide) {
 103   string16 ret;
 104   WideToUTF16(wide.data(), wide.length(), &ret);
 105   return ret;
 106 }
 107
 108 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
 109   output->clear();
 110   // Assume that normally we won't have any non-BMP characters so the counts
 111   // will be the same.
 112   output->reserve(src_len);
 113   return ConvertUnicode(src, src_len, output);
 114 }
 115
 116 std::wstring UTF16ToWide(const string16& utf16) {
 117   std::wstring ret;
 118   UTF16ToWide(utf16.data(), utf16.length(), &ret);
 119   return ret;
 120 }
 121
 122 #endif  // defined(WCHAR_T_IS_UTF32)
 123
 124 // UTF16 <-> UTF8 --------------------------------------------------------------
 125
 126 #if defined(WCHAR_T_IS_UTF32)
 127
 128 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
 129   PrepareForUTF16Or32Output(src, src_len, output);
 130   return ConvertUnicode(src, src_len, output);
 131 }
 132
 133 string16 UTF8ToUTF16(const StringPiece& utf8) {
 134   string16 ret;
 135   // Ignore the success flag of this call, it will do the best it can for
 136   // invalid input, which is what we want here.
 137   UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
 138   return ret;
 139 }
 140
 141 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
 142   PrepareForUTF8Output(src, src_len, output);
 143   return ConvertUnicode(src, src_len, output);
 144 }
 145
 146 std::string UTF16ToUTF8(const string16& utf16) {
 147   std::string ret;
 148   // Ignore the success flag of this call, it will do the best it can for
 149   // invalid input, which is what we want here.
 150   UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
 151   return ret;
 152 }
 153
 154 #elif defined(WCHAR_T_IS_UTF16)
 155 // Easy case since we can use the "wide" versions we already wrote above.
 156
 157 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
 158   return UTF8ToWide(src, src_len, output);
 159 }
 160
 161 string16 UTF8ToUTF16(const StringPiece& utf8) {
 162   return UTF8ToWide(utf8);
 163 }
 164
 165 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
 166   return WideToUTF8(src, src_len, output);
 167 }
 168
 169 std::string UTF16ToUTF8(const string16& utf16) {
 170   return WideToUTF8(utf16);
 171 }
 172
 173 #endif
 174
 175 std::wstring ASCIIToWide(const StringPiece& ascii) {
 176   DCHECK(IsStringASCII(ascii)) << ascii;
 177   return std::wstring(ascii.begin(), ascii.end());
 178 }
 179
 180 string16 ASCIIToUTF16(const StringPiece& ascii) {
 181   DCHECK(IsStringASCII(ascii)) << ascii;
 182   return string16(ascii.begin(), ascii.end());
 183 }
 184
 185 std::string UTF16ToASCII(const string16& utf16) {
 186   DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16);
 187   return std::string(utf16.begin(), utf16.end());
 188 }
 189
 190 }  // namespace base