1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/utf_string_conversions.h"
7 #include "base/string_piece.h"
8 #include "base/string_util.h"
9 #include "base/utf_string_conversion_utils.h"
11 using base::PrepareForUTF8Output
;
12 using base::PrepareForUTF16Or32Output
;
13 using base::ReadUnicodeCharacter
;
14 using base::WriteUnicodeCharacter
;
18 // Generalized Unicode converter -----------------------------------------------
20 // Converts the given source Unicode character type to the given destination
21 // Unicode character type as a STL string. The given input buffer and size
22 // determine the source, and the given output STL string will be replaced by
24 template<typename SRC_CHAR
, typename DEST_STRING
>
25 bool ConvertUnicode(const SRC_CHAR
* src
,
27 DEST_STRING
* output
) {
28 // ICU requires 32-bit numbers.
30 int32 src_len32
= static_cast<int32
>(src_len
);
31 for (int32 i
= 0; i
< src_len32
; i
++) {
33 if (ReadUnicodeCharacter(src
, src_len32
, &i
, &code_point
)) {
34 WriteUnicodeCharacter(code_point
, output
);
36 WriteUnicodeCharacter(0xFFFD, output
);
46 // UTF-8 <-> Wide --------------------------------------------------------------
48 bool WideToUTF8(const wchar_t* src
, size_t src_len
, std::string
* output
) {
49 PrepareForUTF8Output(src
, src_len
, output
);
50 return ConvertUnicode(src
, src_len
, output
);
53 std::string
WideToUTF8(const std::wstring
& wide
) {
55 // Ignore the success flag of this call, it will do the best it can for
56 // invalid input, which is what we want here.
57 WideToUTF8(wide
.data(), wide
.length(), &ret
);
61 bool UTF8ToWide(const char* src
, size_t src_len
, std::wstring
* output
) {
62 PrepareForUTF16Or32Output(src
, src_len
, output
);
63 return ConvertUnicode(src
, src_len
, output
);
66 std::wstring
UTF8ToWide(const base::StringPiece
& utf8
) {
68 UTF8ToWide(utf8
.data(), utf8
.length(), &ret
);
72 // UTF-16 <-> Wide -------------------------------------------------------------
74 #if defined(WCHAR_T_IS_UTF16)
76 // When wide == UTF-16, then conversions are a NOP.
77 bool WideToUTF16(const wchar_t* src
, size_t src_len
, string16
* output
) {
78 output
->assign(src
, src_len
);
82 string16
WideToUTF16(const std::wstring
& wide
) {
86 bool UTF16ToWide(const char16
* src
, size_t src_len
, std::wstring
* output
) {
87 output
->assign(src
, src_len
);
91 std::wstring
UTF16ToWide(const string16
& utf16
) {
95 #elif defined(WCHAR_T_IS_UTF32)
97 bool WideToUTF16(const wchar_t* src
, size_t src_len
, string16
* output
) {
99 // Assume that normally we won't have any non-BMP characters so the counts
101 output
->reserve(src_len
);
102 return ConvertUnicode(src
, src_len
, output
);
105 string16
WideToUTF16(const std::wstring
& wide
) {
107 WideToUTF16(wide
.data(), wide
.length(), &ret
);
111 bool UTF16ToWide(const char16
* src
, size_t src_len
, std::wstring
* output
) {
113 // Assume that normally we won't have any non-BMP characters so the counts
115 output
->reserve(src_len
);
116 return ConvertUnicode(src
, src_len
, output
);
119 std::wstring
UTF16ToWide(const string16
& utf16
) {
121 UTF16ToWide(utf16
.data(), utf16
.length(), &ret
);
125 #endif // defined(WCHAR_T_IS_UTF32)
127 // UTF16 <-> UTF8 --------------------------------------------------------------
129 #if defined(WCHAR_T_IS_UTF32)
131 bool UTF8ToUTF16(const char* src
, size_t src_len
, string16
* output
) {
132 PrepareForUTF16Or32Output(src
, src_len
, output
);
133 return ConvertUnicode(src
, src_len
, output
);
136 string16
UTF8ToUTF16(const base::StringPiece
& utf8
) {
138 // Ignore the success flag of this call, it will do the best it can for
139 // invalid input, which is what we want here.
140 UTF8ToUTF16(utf8
.data(), utf8
.length(), &ret
);
144 bool UTF16ToUTF8(const char16
* src
, size_t src_len
, std::string
* output
) {
145 PrepareForUTF8Output(src
, src_len
, output
);
146 return ConvertUnicode(src
, src_len
, output
);
149 std::string
UTF16ToUTF8(const string16
& utf16
) {
151 // Ignore the success flag of this call, it will do the best it can for
152 // invalid input, which is what we want here.
153 UTF16ToUTF8(utf16
.data(), utf16
.length(), &ret
);
157 #elif defined(WCHAR_T_IS_UTF16)
158 // Easy case since we can use the "wide" versions we already wrote above.
160 bool UTF8ToUTF16(const char* src
, size_t src_len
, string16
* output
) {
161 return UTF8ToWide(src
, src_len
, output
);
164 string16
UTF8ToUTF16(const base::StringPiece
& utf8
) {
165 return UTF8ToWide(utf8
);
168 bool UTF16ToUTF8(const char16
* src
, size_t src_len
, std::string
* output
) {
169 return WideToUTF8(src
, src_len
, output
);
172 std::string
UTF16ToUTF8(const string16
& utf16
) {
173 return WideToUTF8(utf16
);
178 std::wstring
ASCIIToWide(const base::StringPiece
& ascii
) {
179 DCHECK(IsStringASCII(ascii
)) << ascii
;
180 return std::wstring(ascii
.begin(), ascii
.end());
183 string16
ASCIIToUTF16(const base::StringPiece
& ascii
) {
184 DCHECK(IsStringASCII(ascii
)) << ascii
;
185 return string16(ascii
.begin(), ascii
.end());