base/strings/sys_string_conversions_posix.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/strings/sys_string_conversions.h"
   6
   7 #include <wchar.h>
   8
   9 #include "base/strings/string_piece.h"
  10 #include "base/strings/utf_string_conversions.h"
  11
  12 namespace base {
  13
  14 std::string SysWideToUTF8(const std::wstring& wide) {
  15   // In theory this should be using the system-provided conversion rather
  16   // than our ICU, but this will do for now.
  17   return WideToUTF8(wide);
  18 }
  19 std::wstring SysUTF8ToWide(const StringPiece& utf8) {
  20   // In theory this should be using the system-provided conversion rather
  21   // than our ICU, but this will do for now.
  22   std::wstring out;
  23   UTF8ToWide(utf8.data(), utf8.size(), &out);
  24   return out;
  25 }
  26
  27 #if defined(SYSTEM_NATIVE_UTF8) || defined(OS_ANDROID)
  28 // TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
  29 // support and a better understanding of what calls these routines.
  30
  31 std::string SysWideToNativeMB(const std::wstring& wide) {
  32   return WideToUTF8(wide);
  33 }
  34
  35 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
  36   return SysUTF8ToWide(native_mb);
  37 }
  38
  39 #else
  40
  41 std::string SysWideToNativeMB(const std::wstring& wide) {
  42   mbstate_t ps;
  43
  44   // Calculate the number of multi-byte characters.  We walk through the string
  45   // without writing the output, counting the number of multi-byte characters.
  46   size_t num_out_chars = 0;
  47   memset(&ps, 0, sizeof(ps));
  48   for (size_t i = 0; i < wide.size(); ++i) {
  49     const wchar_t src = wide[i];
  50     // Use a temp buffer since calling wcrtomb with an output of NULL does not
  51     // calculate the output length.
  52     char buf[16];
  53     // Skip NULLs to avoid wcrtomb's special handling of them.
  54     size_t res = src ? wcrtomb(buf, src, &ps) : 0;
  55     switch (res) {
  56       // Handle any errors and return an empty string.
  57       case static_cast<size_t>(-1):
  58         return std::string();
  59         break;
  60       case 0:
  61         // We hit an embedded null byte, keep going.
  62         ++num_out_chars;
  63         break;
  64       default:
  65         num_out_chars += res;
  66         break;
  67     }
  68   }
  69
  70   if (num_out_chars == 0)
  71     return std::string();
  72
  73   std::string out;
  74   out.resize(num_out_chars);
  75
  76   // We walk the input string again, with |i| tracking the index of the
  77   // wide input, and |j| tracking the multi-byte output.
  78   memset(&ps, 0, sizeof(ps));
  79   for (size_t i = 0, j = 0; i < wide.size(); ++i) {
  80     const wchar_t src = wide[i];
  81     // We don't want wcrtomb to do its funkiness for embedded NULLs.
  82     size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
  83     switch (res) {
  84       // Handle any errors and return an empty string.
  85       case static_cast<size_t>(-1):
  86         return std::string();
  87         break;
  88       case 0:
  89         // We hit an embedded null byte, keep going.
  90         ++j;  // Output is already zeroed.
  91         break;
  92       default:
  93         j += res;
  94         break;
  95     }
  96   }
  97
  98   return out;
  99 }
 100
 101 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
 102   mbstate_t ps;
 103
 104   // Calculate the number of wide characters.  We walk through the string
 105   // without writing the output, counting the number of wide characters.
 106   size_t num_out_chars = 0;
 107   memset(&ps, 0, sizeof(ps));
 108   for (size_t i = 0; i < native_mb.size(); ) {
 109     const char* src = native_mb.data() + i;
 110     size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
 111     switch (res) {
 112       // Handle any errors and return an empty string.
 113       case static_cast<size_t>(-2):
 114       case static_cast<size_t>(-1):
 115         return std::wstring();
 116         break;
 117       case 0:
 118         // We hit an embedded null byte, keep going.
 119         i += 1;  // Fall through.
 120       default:
 121         i += res;
 122         ++num_out_chars;
 123         break;
 124     }
 125   }
 126
 127   if (num_out_chars == 0)
 128     return std::wstring();
 129
 130   std::wstring out;
 131   out.resize(num_out_chars);
 132
 133   memset(&ps, 0, sizeof(ps));  // Clear the shift state.
 134   // We walk the input string again, with |i| tracking the index of the
 135   // multi-byte input, and |j| tracking the wide output.
 136   for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
 137     const char* src = native_mb.data() + i;
 138     wchar_t* dst = &out[j];
 139     size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
 140     switch (res) {
 141       // Handle any errors and return an empty string.
 142       case static_cast<size_t>(-2):
 143       case static_cast<size_t>(-1):
 144         return std::wstring();
 145         break;
 146       case 0:
 147         i += 1;  // Skip null byte.
 148         break;
 149       default:
 150         i += res;
 151         break;
 152     }
 153   }
 154
 155   return out;
 156 }
 157
 158 #endif  // OS_CHROMEOS
 159
 160 }  // namespace base