Adding WebView support to telemetry scripts.
[chromium-blink-merge.git] / base / strings / utf_offset_string_conversions.cc
blobbb402e4d24e11244cb85982d0e75d3c0d1d528c4
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/utf_offset_string_conversions.h"
7 #include <algorithm>
9 #include "base/memory/scoped_ptr.h"
10 #include "base/strings/string_piece.h"
11 #include "base/strings/utf_string_conversion_utils.h"
13 namespace base {
15 // Converts the given source Unicode character type to the given destination
16 // Unicode character type as a STL string. The given input buffer and size
17 // determine the source, and the given output STL string will be replaced by
18 // the result.
19 template<typename SrcChar, typename DestStdString>
20 bool ConvertUnicode(const SrcChar* src,
21 size_t src_len,
22 DestStdString* output,
23 std::vector<size_t>* offsets_for_adjustment) {
24 if (offsets_for_adjustment) {
25 std::for_each(offsets_for_adjustment->begin(),
26 offsets_for_adjustment->end(),
27 LimitOffset<DestStdString>(src_len));
30 // ICU requires 32-bit numbers.
31 bool success = true;
32 OffsetAdjuster offset_adjuster(offsets_for_adjustment);
33 int32 src_len32 = static_cast<int32>(src_len);
34 for (int32 i = 0; i < src_len32; i++) {
35 uint32 code_point;
36 size_t original_i = i;
37 size_t chars_written = 0;
38 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
39 chars_written = WriteUnicodeCharacter(code_point, output);
40 } else {
41 chars_written = WriteUnicodeCharacter(0xFFFD, output);
42 success = false;
44 if (offsets_for_adjustment) {
45 // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
46 // character read, not after it (so that incrementing it in the loop
47 // increment will place it at the right location), so we need to account
48 // for that in determining the amount that was read.
49 offset_adjuster.Add(OffsetAdjuster::Adjustment(original_i,
50 i - original_i + 1, chars_written));
53 return success;
56 bool UTF8ToUTF16AndAdjustOffset(const char* src,
57 size_t src_len,
58 string16* output,
59 size_t* offset_for_adjustment) {
60 std::vector<size_t> offsets;
61 if (offset_for_adjustment)
62 offsets.push_back(*offset_for_adjustment);
63 PrepareForUTF16Or32Output(src, src_len, output);
64 bool ret = ConvertUnicode(src, src_len, output, &offsets);
65 if (offset_for_adjustment)
66 *offset_for_adjustment = offsets[0];
67 return ret;
70 bool UTF8ToUTF16AndAdjustOffsets(const char* src,
71 size_t src_len,
72 string16* output,
73 std::vector<size_t>* offsets_for_adjustment) {
74 PrepareForUTF16Or32Output(src, src_len, output);
75 return ConvertUnicode(src, src_len, output, offsets_for_adjustment);
78 string16 UTF8ToUTF16AndAdjustOffset(const base::StringPiece& utf8,
79 size_t* offset_for_adjustment) {
80 std::vector<size_t> offsets;
81 if (offset_for_adjustment)
82 offsets.push_back(*offset_for_adjustment);
83 string16 result;
84 UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result,
85 &offsets);
86 if (offset_for_adjustment)
87 *offset_for_adjustment = offsets[0];
88 return result;
91 string16 UTF8ToUTF16AndAdjustOffsets(
92 const base::StringPiece& utf8,
93 std::vector<size_t>* offsets_for_adjustment) {
94 string16 result;
95 UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result,
96 offsets_for_adjustment);
97 return result;
100 std::string UTF16ToUTF8AndAdjustOffset(
101 const base::StringPiece16& utf16,
102 size_t* offset_for_adjustment) {
103 std::vector<size_t> offsets;
104 if (offset_for_adjustment)
105 offsets.push_back(*offset_for_adjustment);
106 std::string result = UTF16ToUTF8AndAdjustOffsets(utf16, &offsets);
107 if (offset_for_adjustment)
108 *offset_for_adjustment = offsets[0];
109 return result;
112 std::string UTF16ToUTF8AndAdjustOffsets(
113 const base::StringPiece16& utf16,
114 std::vector<size_t>* offsets_for_adjustment) {
115 std::string result;
116 PrepareForUTF8Output(utf16.data(), utf16.length(), &result);
117 ConvertUnicode(utf16.data(), utf16.length(), &result, offsets_for_adjustment);
118 return result;
121 OffsetAdjuster::Adjustment::Adjustment(size_t original_offset,
122 size_t original_length,
123 size_t output_length)
124 : original_offset(original_offset),
125 original_length(original_length),
126 output_length(output_length) {
129 OffsetAdjuster::OffsetAdjuster(std::vector<size_t>* offsets_for_adjustment)
130 : offsets_for_adjustment_(offsets_for_adjustment) {
133 OffsetAdjuster::~OffsetAdjuster() {
134 if (!offsets_for_adjustment_ || adjustments_.empty())
135 return;
136 for (std::vector<size_t>::iterator i(offsets_for_adjustment_->begin());
137 i != offsets_for_adjustment_->end(); ++i)
138 AdjustOffset(i);
141 void OffsetAdjuster::Add(const Adjustment& adjustment) {
142 adjustments_.push_back(adjustment);
145 void OffsetAdjuster::AdjustOffset(std::vector<size_t>::iterator offset) {
146 if (*offset == string16::npos)
147 return;
148 size_t adjustment = 0;
149 for (std::vector<Adjustment>::const_iterator i = adjustments_.begin();
150 i != adjustments_.end(); ++i) {
151 if (*offset == i->original_offset && i->output_length == 0) {
152 *offset = string16::npos;
153 return;
155 if (*offset <= i->original_offset)
156 break;
157 if (*offset < (i->original_offset + i->original_length)) {
158 *offset = string16::npos;
159 return;
161 adjustment += (i->original_length - i->output_length);
163 *offset -= adjustment;
166 } // namespace base