Roll src/third_party/WebKit d9c6159:8139f33 (svn 201974:201975)
[chromium-blink-merge.git] / base / strings / utf_offset_string_conversions.cc
blobc2270bfce2ff3f468e5280f3a0bad9b8a88f9d99
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/utf_offset_string_conversions.h"
7 #include <algorithm>
9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h"
11 #include "base/strings/string_piece.h"
12 #include "base/strings/utf_string_conversion_utils.h"
14 namespace base {
16 OffsetAdjuster::Adjustment::Adjustment(size_t original_offset,
17 size_t original_length,
18 size_t output_length)
19 : original_offset(original_offset),
20 original_length(original_length),
21 output_length(output_length) {
24 // static
25 void OffsetAdjuster::AdjustOffsets(
26 const Adjustments& adjustments,
27 std::vector<size_t>* offsets_for_adjustment) {
28 if (!offsets_for_adjustment || adjustments.empty())
29 return;
30 for (std::vector<size_t>::iterator i(offsets_for_adjustment->begin());
31 i != offsets_for_adjustment->end(); ++i)
32 AdjustOffset(adjustments, &(*i));
35 // static
36 void OffsetAdjuster::AdjustOffset(const Adjustments& adjustments,
37 size_t* offset) {
38 if (*offset == string16::npos)
39 return;
40 int adjustment = 0;
41 for (Adjustments::const_iterator i = adjustments.begin();
42 i != adjustments.end(); ++i) {
43 if (*offset <= i->original_offset)
44 break;
45 if (*offset < (i->original_offset + i->original_length)) {
46 *offset = string16::npos;
47 return;
49 adjustment += static_cast<int>(i->original_length - i->output_length);
51 *offset -= adjustment;
54 // static
55 void OffsetAdjuster::UnadjustOffsets(
56 const Adjustments& adjustments,
57 std::vector<size_t>* offsets_for_unadjustment) {
58 if (!offsets_for_unadjustment || adjustments.empty())
59 return;
60 for (std::vector<size_t>::iterator i(offsets_for_unadjustment->begin());
61 i != offsets_for_unadjustment->end(); ++i)
62 UnadjustOffset(adjustments, &(*i));
65 // static
66 void OffsetAdjuster::UnadjustOffset(const Adjustments& adjustments,
67 size_t* offset) {
68 if (*offset == string16::npos)
69 return;
70 int adjustment = 0;
71 for (Adjustments::const_iterator i = adjustments.begin();
72 i != adjustments.end(); ++i) {
73 if (*offset + adjustment <= i->original_offset)
74 break;
75 adjustment += static_cast<int>(i->original_length - i->output_length);
76 if ((*offset + adjustment) <
77 (i->original_offset + i->original_length)) {
78 *offset = string16::npos;
79 return;
82 *offset += adjustment;
85 // static
86 void OffsetAdjuster::MergeSequentialAdjustments(
87 const Adjustments& first_adjustments,
88 Adjustments* adjustments_on_adjusted_string) {
89 Adjustments::iterator adjusted_iter = adjustments_on_adjusted_string->begin();
90 Adjustments::const_iterator first_iter = first_adjustments.begin();
91 // Simultaneously iterate over all |adjustments_on_adjusted_string| and
92 // |first_adjustments|, adding adjustments to or correcting the adjustments
93 // in |adjustments_on_adjusted_string| as we go. |shift| keeps track of the
94 // current number of characters collapsed by |first_adjustments| up to this
95 // point. |currently_collapsing| keeps track of the number of characters
96 // collapsed by |first_adjustments| into the current |adjusted_iter|'s
97 // length. These are characters that will change |shift| as soon as we're
98 // done processing the current |adjusted_iter|; they are not yet reflected in
99 // |shift|.
100 size_t shift = 0;
101 size_t currently_collapsing = 0;
102 while (adjusted_iter != adjustments_on_adjusted_string->end()) {
103 if ((first_iter == first_adjustments.end()) ||
104 ((adjusted_iter->original_offset + shift +
105 adjusted_iter->original_length) <= first_iter->original_offset)) {
106 // Entire |adjusted_iter| (accounting for its shift and including its
107 // whole original length) comes before |first_iter|.
109 // Correct the offset at |adjusted_iter| and move onto the next
110 // adjustment that needs revising.
111 adjusted_iter->original_offset += shift;
112 shift += currently_collapsing;
113 currently_collapsing = 0;
114 ++adjusted_iter;
115 } else if ((adjusted_iter->original_offset + shift) >
116 first_iter->original_offset) {
117 // |first_iter| comes before the |adjusted_iter| (as adjusted by |shift|).
119 // It's not possible for the adjustments to overlap. (It shouldn't
120 // be possible that we have an |adjusted_iter->original_offset| that,
121 // when adjusted by the computed |shift|, is in the middle of
122 // |first_iter|'s output's length. After all, that would mean the
123 // current adjustment_on_adjusted_string somehow points to an offset
124 // that was supposed to have been eliminated by the first set of
125 // adjustments.)
126 DCHECK_LE(first_iter->original_offset + first_iter->output_length,
127 adjusted_iter->original_offset + shift);
129 // Add the |first_adjustment_iter| to the full set of adjustments while
130 // making sure |adjusted_iter| continues pointing to the same element.
131 // We do this by inserting the |first_adjustment_iter| right before
132 // |adjusted_iter|, then incrementing |adjusted_iter| so it points to
133 // the following element.
134 shift += first_iter->original_length - first_iter->output_length;
135 adjusted_iter = adjustments_on_adjusted_string->insert(
136 adjusted_iter, *first_iter);
137 ++adjusted_iter;
138 ++first_iter;
139 } else {
140 // The first adjustment adjusted something that then got further adjusted
141 // by the second set of adjustments. In other words, |first_iter| points
142 // to something in the range covered by |adjusted_iter|'s length (after
143 // accounting for |shift|). Precisely,
144 // adjusted_iter->original_offset + shift
145 // <=
146 // first_iter->original_offset
147 // <=
148 // adjusted_iter->original_offset + shift +
149 // adjusted_iter->original_length
151 // Modify the current |adjusted_iter| to include whatever collapsing
152 // happened in |first_iter|, then advance to the next |first_adjustments|
153 // because we dealt with the current one.
154 const int collapse = static_cast<int>(first_iter->original_length) -
155 static_cast<int>(first_iter->output_length);
156 // This function does not know how to deal with a string that expands and
157 // then gets modified, only strings that collapse and then get modified.
158 DCHECK_GT(collapse, 0);
159 adjusted_iter->original_length += collapse;
160 currently_collapsing += collapse;
161 ++first_iter;
164 DCHECK_EQ(0u, currently_collapsing);
165 if (first_iter != first_adjustments.end()) {
166 // Only first adjustments are left. These do not need to be modified.
167 // (Their offsets are already correct with respect to the original string.)
168 // Append them all.
169 DCHECK(adjusted_iter == adjustments_on_adjusted_string->end());
170 adjustments_on_adjusted_string->insert(
171 adjustments_on_adjusted_string->end(), first_iter,
172 first_adjustments.end());
176 // Converts the given source Unicode character type to the given destination
177 // Unicode character type as a STL string. The given input buffer and size
178 // determine the source, and the given output STL string will be replaced by
179 // the result. If non-NULL, |adjustments| is set to reflect the all the
180 // alterations to the string that are not one-character-to-one-character.
181 // It will always be sorted by increasing offset.
182 template<typename SrcChar, typename DestStdString>
183 bool ConvertUnicode(const SrcChar* src,
184 size_t src_len,
185 DestStdString* output,
186 OffsetAdjuster::Adjustments* adjustments) {
187 if (adjustments)
188 adjustments->clear();
189 // ICU requires 32-bit numbers.
190 bool success = true;
191 int32 src_len32 = static_cast<int32>(src_len);
192 for (int32 i = 0; i < src_len32; i++) {
193 uint32 code_point;
194 size_t original_i = i;
195 size_t chars_written = 0;
196 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
197 chars_written = WriteUnicodeCharacter(code_point, output);
198 } else {
199 chars_written = WriteUnicodeCharacter(0xFFFD, output);
200 success = false;
203 // Only bother writing an adjustment if this modification changed the
204 // length of this character.
205 // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
206 // character read, not after it (so that incrementing it in the loop
207 // increment will place it at the right location), so we need to account
208 // for that in determining the amount that was read.
209 if (adjustments && ((i - original_i + 1) != chars_written)) {
210 adjustments->push_back(OffsetAdjuster::Adjustment(
211 original_i, i - original_i + 1, chars_written));
214 return success;
217 bool UTF8ToUTF16WithAdjustments(
218 const char* src,
219 size_t src_len,
220 string16* output,
221 base::OffsetAdjuster::Adjustments* adjustments) {
222 PrepareForUTF16Or32Output(src, src_len, output);
223 return ConvertUnicode(src, src_len, output, adjustments);
226 string16 UTF8ToUTF16WithAdjustments(
227 const base::StringPiece& utf8,
228 base::OffsetAdjuster::Adjustments* adjustments) {
229 string16 result;
230 UTF8ToUTF16WithAdjustments(utf8.data(), utf8.length(), &result, adjustments);
231 return result;
234 string16 UTF8ToUTF16AndAdjustOffsets(
235 const base::StringPiece& utf8,
236 std::vector<size_t>* offsets_for_adjustment) {
237 std::for_each(offsets_for_adjustment->begin(),
238 offsets_for_adjustment->end(),
239 LimitOffset<base::StringPiece>(utf8.length()));
240 OffsetAdjuster::Adjustments adjustments;
241 string16 result = UTF8ToUTF16WithAdjustments(utf8, &adjustments);
242 OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);
243 return result;
246 std::string UTF16ToUTF8AndAdjustOffsets(
247 const base::StringPiece16& utf16,
248 std::vector<size_t>* offsets_for_adjustment) {
249 std::for_each(offsets_for_adjustment->begin(),
250 offsets_for_adjustment->end(),
251 LimitOffset<base::StringPiece16>(utf16.length()));
252 std::string result;
253 PrepareForUTF8Output(utf16.data(), utf16.length(), &result);
254 OffsetAdjuster::Adjustments adjustments;
255 ConvertUnicode(utf16.data(), utf16.length(), &result, &adjustments);
256 OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);
257 return result;
260 } // namespace base