1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/utf_offset_string_conversions.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "base/strings/string_piece.h"
11 #include "base/strings/utf_string_conversion_utils.h"
15 // Converts the given source Unicode character type to the given destination
16 // Unicode character type as a STL string. The given input buffer and size
17 // determine the source, and the given output STL string will be replaced by
19 template<typename SrcChar
, typename DestStdString
>
20 bool ConvertUnicode(const SrcChar
* src
,
22 DestStdString
* output
,
23 std::vector
<size_t>* offsets_for_adjustment
) {
24 if (offsets_for_adjustment
) {
25 std::for_each(offsets_for_adjustment
->begin(),
26 offsets_for_adjustment
->end(),
27 LimitOffset
<DestStdString
>(src_len
));
30 // ICU requires 32-bit numbers.
32 OffsetAdjuster
offset_adjuster(offsets_for_adjustment
);
33 int32 src_len32
= static_cast<int32
>(src_len
);
34 for (int32 i
= 0; i
< src_len32
; i
++) {
36 size_t original_i
= i
;
37 size_t chars_written
= 0;
38 if (ReadUnicodeCharacter(src
, src_len32
, &i
, &code_point
)) {
39 chars_written
= WriteUnicodeCharacter(code_point
, output
);
41 chars_written
= WriteUnicodeCharacter(0xFFFD, output
);
44 if (offsets_for_adjustment
) {
45 // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
46 // character read, not after it (so that incrementing it in the loop
47 // increment will place it at the right location), so we need to account
48 // for that in determining the amount that was read.
49 offset_adjuster
.Add(OffsetAdjuster::Adjustment(original_i
,
50 i
- original_i
+ 1, chars_written
));
56 bool UTF8ToUTF16AndAdjustOffset(const char* src
,
59 size_t* offset_for_adjustment
) {
60 std::vector
<size_t> offsets
;
61 if (offset_for_adjustment
)
62 offsets
.push_back(*offset_for_adjustment
);
63 PrepareForUTF16Or32Output(src
, src_len
, output
);
64 bool ret
= ConvertUnicode(src
, src_len
, output
, &offsets
);
65 if (offset_for_adjustment
)
66 *offset_for_adjustment
= offsets
[0];
70 bool UTF8ToUTF16AndAdjustOffsets(const char* src
,
73 std::vector
<size_t>* offsets_for_adjustment
) {
74 PrepareForUTF16Or32Output(src
, src_len
, output
);
75 return ConvertUnicode(src
, src_len
, output
, offsets_for_adjustment
);
78 string16
UTF8ToUTF16AndAdjustOffset(const base::StringPiece
& utf8
,
79 size_t* offset_for_adjustment
) {
80 std::vector
<size_t> offsets
;
81 if (offset_for_adjustment
)
82 offsets
.push_back(*offset_for_adjustment
);
84 UTF8ToUTF16AndAdjustOffsets(utf8
.data(), utf8
.length(), &result
,
86 if (offset_for_adjustment
)
87 *offset_for_adjustment
= offsets
[0];
91 string16
UTF8ToUTF16AndAdjustOffsets(
92 const base::StringPiece
& utf8
,
93 std::vector
<size_t>* offsets_for_adjustment
) {
95 UTF8ToUTF16AndAdjustOffsets(utf8
.data(), utf8
.length(), &result
,
96 offsets_for_adjustment
);
100 std::string
UTF16ToUTF8AndAdjustOffset(
101 const base::StringPiece16
& utf16
,
102 size_t* offset_for_adjustment
) {
103 std::vector
<size_t> offsets
;
104 if (offset_for_adjustment
)
105 offsets
.push_back(*offset_for_adjustment
);
106 std::string result
= UTF16ToUTF8AndAdjustOffsets(utf16
, &offsets
);
107 if (offset_for_adjustment
)
108 *offset_for_adjustment
= offsets
[0];
112 std::string
UTF16ToUTF8AndAdjustOffsets(
113 const base::StringPiece16
& utf16
,
114 std::vector
<size_t>* offsets_for_adjustment
) {
116 PrepareForUTF8Output(utf16
.data(), utf16
.length(), &result
);
117 ConvertUnicode(utf16
.data(), utf16
.length(), &result
, offsets_for_adjustment
);
121 OffsetAdjuster::Adjustment::Adjustment(size_t original_offset
,
122 size_t original_length
,
123 size_t output_length
)
124 : original_offset(original_offset
),
125 original_length(original_length
),
126 output_length(output_length
) {
129 OffsetAdjuster::OffsetAdjuster(std::vector
<size_t>* offsets_for_adjustment
)
130 : offsets_for_adjustment_(offsets_for_adjustment
) {
133 OffsetAdjuster::~OffsetAdjuster() {
134 if (!offsets_for_adjustment_
|| adjustments_
.empty())
136 for (std::vector
<size_t>::iterator
i(offsets_for_adjustment_
->begin());
137 i
!= offsets_for_adjustment_
->end(); ++i
)
141 void OffsetAdjuster::Add(const Adjustment
& adjustment
) {
142 adjustments_
.push_back(adjustment
);
145 void OffsetAdjuster::AdjustOffset(std::vector
<size_t>::iterator offset
) {
146 if (*offset
== string16::npos
)
148 size_t adjustment
= 0;
149 for (std::vector
<Adjustment
>::const_iterator i
= adjustments_
.begin();
150 i
!= adjustments_
.end(); ++i
) {
151 if (*offset
== i
->original_offset
&& i
->output_length
== 0) {
152 *offset
= string16::npos
;
155 if (*offset
<= i
->original_offset
)
157 if (*offset
< (i
->original_offset
+ i
->original_length
)) {
158 *offset
= string16::npos
;
161 adjustment
+= (i
->original_length
- i
->output_length
);
163 *offset
-= adjustment
;