1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/utf_offset_string_conversions.h"
7 #include "base/string_piece.h"
8 #include "base/utf_string_conversion_utils.h"
10 using base::PrepareForUTF16Or32Output
;
11 using base::ReadUnicodeCharacter
;
12 using base::WriteUnicodeCharacter
;
14 // Generalized Unicode converter -----------------------------------------------
16 // Converts the given source Unicode character type to the given destination
17 // Unicode character type as a STL string. The given input buffer and size
18 // determine the source, and the given output STL string will be replaced by
20 template<typename SRC_CHAR
>
21 bool ConvertUnicode(const SRC_CHAR
* src
,
24 size_t* offset_for_adjustment
) {
25 size_t output_offset
=
26 (offset_for_adjustment
&& *offset_for_adjustment
< src_len
) ?
27 *offset_for_adjustment
: std::wstring::npos
;
29 // ICU requires 32-bit numbers.
31 int32 src_len32
= static_cast<int32
>(src_len
);
32 for (int32 i
= 0; i
< src_len32
; i
++) {
34 size_t original_i
= i
;
35 size_t chars_written
= 0;
36 if (ReadUnicodeCharacter(src
, src_len32
, &i
, &code_point
)) {
37 chars_written
= WriteUnicodeCharacter(code_point
, output
);
39 chars_written
= WriteUnicodeCharacter(0xFFFD, output
);
42 if ((output_offset
!= std::wstring::npos
) &&
43 (*offset_for_adjustment
> original_i
)) {
44 // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
45 // character read, not after it (so that incrementing it in the loop
46 // increment will place it at the right location), so we need to account
47 // for that in determining the amount that was read.
48 if (*offset_for_adjustment
<= static_cast<size_t>(i
))
49 output_offset
= std::wstring::npos
;
51 output_offset
+= chars_written
- (i
- original_i
+ 1);
55 if (offset_for_adjustment
)
56 *offset_for_adjustment
= output_offset
;
60 // UTF-8 <-> Wide --------------------------------------------------------------
62 bool UTF8ToWideAndAdjustOffset(const char* src
,
65 size_t* offset_for_adjustment
) {
66 PrepareForUTF16Or32Output(src
, src_len
, output
);
67 return ConvertUnicode(src
, src_len
, output
, offset_for_adjustment
);
70 std::wstring
UTF8ToWideAndAdjustOffset(const base::StringPiece
& utf8
,
71 size_t* offset_for_adjustment
) {
73 UTF8ToWideAndAdjustOffset(utf8
.data(), utf8
.length(), &ret
,
74 offset_for_adjustment
);
78 // UTF-16 <-> Wide -------------------------------------------------------------
80 #if defined(WCHAR_T_IS_UTF16)
82 // When wide == UTF-16, then conversions are a NOP.
83 bool UTF16ToWideAndAdjustOffset(const char16
* src
,
86 size_t* offset_for_adjustment
) {
87 output
->assign(src
, src_len
);
88 if (offset_for_adjustment
&& (*offset_for_adjustment
>= src_len
))
89 *offset_for_adjustment
= std::wstring::npos
;
93 std::wstring
UTF16ToWideAndAdjustOffset(const string16
& utf16
,
94 size_t* offset_for_adjustment
) {
95 if (offset_for_adjustment
&& (*offset_for_adjustment
>= utf16
.length()))
96 *offset_for_adjustment
= std::wstring::npos
;
100 #elif defined(WCHAR_T_IS_UTF32)
102 bool UTF16ToWideAndAdjustOffset(const char16
* src
,
104 std::wstring
* output
,
105 size_t* offset_for_adjustment
) {
107 // Assume that normally we won't have any non-BMP characters so the counts
109 output
->reserve(src_len
);
110 return ConvertUnicode(src
, src_len
, output
, offset_for_adjustment
);
113 std::wstring
UTF16ToWideAndAdjustOffset(const string16
& utf16
,
114 size_t* offset_for_adjustment
) {
116 UTF16ToWideAndAdjustOffset(utf16
.data(), utf16
.length(), &ret
,
117 offset_for_adjustment
);
121 #endif // defined(WCHAR_T_IS_UTF32)