1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/utf_offset_string_conversions.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "base/string_piece.h"
11 #include "base/utf_string_conversion_utils.h"
13 using base::PrepareForUTF16Or32Output
;
14 using base::PrepareForUTF8Output
;
15 using base::ReadUnicodeCharacter
;
16 using base::WriteUnicodeCharacter
;
18 // Converts the given source Unicode character type to the given destination
19 // Unicode character type as a STL string. The given input buffer and size
20 // determine the source, and the given output STL string will be replaced by
22 template<typename SrcChar
, typename DestStdString
>
23 bool ConvertUnicode(const SrcChar
* src
,
25 DestStdString
* output
,
26 std::vector
<size_t>* offsets_for_adjustment
) {
27 if (offsets_for_adjustment
) {
28 std::for_each(offsets_for_adjustment
->begin(),
29 offsets_for_adjustment
->end(),
30 LimitOffset
<DestStdString
>(src_len
));
33 // ICU requires 32-bit numbers.
35 OffsetAdjuster
offset_adjuster(offsets_for_adjustment
);
36 int32 src_len32
= static_cast<int32
>(src_len
);
37 for (int32 i
= 0; i
< src_len32
; i
++) {
39 size_t original_i
= i
;
40 size_t chars_written
= 0;
41 if (ReadUnicodeCharacter(src
, src_len32
, &i
, &code_point
)) {
42 chars_written
= WriteUnicodeCharacter(code_point
, output
);
44 chars_written
= WriteUnicodeCharacter(0xFFFD, output
);
47 if (offsets_for_adjustment
) {
48 // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
49 // character read, not after it (so that incrementing it in the loop
50 // increment will place it at the right location), so we need to account
51 // for that in determining the amount that was read.
52 offset_adjuster
.Add(OffsetAdjuster::Adjustment(original_i
,
53 i
- original_i
+ 1, chars_written
));
59 bool UTF8ToUTF16AndAdjustOffset(const char* src
,
62 size_t* offset_for_adjustment
) {
63 std::vector
<size_t> offsets
;
64 if (offset_for_adjustment
)
65 offsets
.push_back(*offset_for_adjustment
);
66 PrepareForUTF16Or32Output(src
, src_len
, output
);
67 bool ret
= ConvertUnicode(src
, src_len
, output
, &offsets
);
68 if (offset_for_adjustment
)
69 *offset_for_adjustment
= offsets
[0];
73 bool UTF8ToUTF16AndAdjustOffsets(const char* src
,
76 std::vector
<size_t>* offsets_for_adjustment
) {
77 PrepareForUTF16Or32Output(src
, src_len
, output
);
78 return ConvertUnicode(src
, src_len
, output
, offsets_for_adjustment
);
81 string16
UTF8ToUTF16AndAdjustOffset(const base::StringPiece
& utf8
,
82 size_t* offset_for_adjustment
) {
83 std::vector
<size_t> offsets
;
84 if (offset_for_adjustment
)
85 offsets
.push_back(*offset_for_adjustment
);
87 UTF8ToUTF16AndAdjustOffsets(utf8
.data(), utf8
.length(), &result
,
89 if (offset_for_adjustment
)
90 *offset_for_adjustment
= offsets
[0];
94 string16
UTF8ToUTF16AndAdjustOffsets(
95 const base::StringPiece
& utf8
,
96 std::vector
<size_t>* offsets_for_adjustment
) {
98 UTF8ToUTF16AndAdjustOffsets(utf8
.data(), utf8
.length(), &result
,
99 offsets_for_adjustment
);
103 std::string
UTF16ToUTF8AndAdjustOffset(
104 const base::StringPiece16
& utf16
,
105 size_t* offset_for_adjustment
) {
106 std::vector
<size_t> offsets
;
107 if (offset_for_adjustment
)
108 offsets
.push_back(*offset_for_adjustment
);
109 std::string result
= UTF16ToUTF8AndAdjustOffsets(utf16
, &offsets
);
110 if (offset_for_adjustment
)
111 *offset_for_adjustment
= offsets
[0];
115 std::string
UTF16ToUTF8AndAdjustOffsets(
116 const base::StringPiece16
& utf16
,
117 std::vector
<size_t>* offsets_for_adjustment
) {
119 PrepareForUTF8Output(utf16
.data(), utf16
.length(), &result
);
120 ConvertUnicode(utf16
.data(), utf16
.length(), &result
, offsets_for_adjustment
);
124 OffsetAdjuster::Adjustment::Adjustment(size_t original_offset
,
125 size_t original_length
,
126 size_t output_length
)
127 : original_offset(original_offset
),
128 original_length(original_length
),
129 output_length(output_length
) {
132 OffsetAdjuster::OffsetAdjuster(std::vector
<size_t>* offsets_for_adjustment
)
133 : offsets_for_adjustment_(offsets_for_adjustment
) {
136 OffsetAdjuster::~OffsetAdjuster() {
137 if (!offsets_for_adjustment_
|| adjustments_
.empty())
139 for (std::vector
<size_t>::iterator
i(offsets_for_adjustment_
->begin());
140 i
!= offsets_for_adjustment_
->end(); ++i
)
144 void OffsetAdjuster::Add(const Adjustment
& adjustment
) {
145 adjustments_
.push_back(adjustment
);
148 void OffsetAdjuster::AdjustOffset(std::vector
<size_t>::iterator offset
) {
149 if (*offset
== string16::npos
)
151 size_t adjustment
= 0;
152 for (std::vector
<Adjustment
>::const_iterator i
= adjustments_
.begin();
153 i
!= adjustments_
.end(); ++i
) {
154 if (*offset
== i
->original_offset
&& i
->output_length
== 0) {
155 *offset
= string16::npos
;
158 if (*offset
<= i
->original_offset
)
160 if (*offset
< (i
->original_offset
+ i
->original_length
)) {
161 *offset
= string16::npos
;
164 adjustment
+= (i
->original_length
- i
->output_length
);
166 *offset
-= adjustment
;