1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
6 #define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
11 #include "base/base_export.h"
12 #include "base/strings/string16.h"
13 #include "base/strings/string_piece.h"
17 // A helper class and associated data structures to adjust offsets into a
18 // string in response to various adjustments one might do to that string
19 // (e.g., eliminating a range). For details on offsets, see the comments by
20 // the AdjustOffsets() function below.
21 class BASE_EXPORT OffsetAdjuster
{
23 struct BASE_EXPORT Adjustment
{
24 Adjustment(size_t original_offset
,
25 size_t original_length
,
26 size_t output_length
);
28 size_t original_offset
;
29 size_t original_length
;
32 typedef std::vector
<Adjustment
> Adjustments
;
34 // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments
35 // recorded in |adjustments|.
37 // Offsets represents insertion/selection points between characters: if |src|
38 // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the
39 // end of the string. Valid input offsets range from 0 to |src_len|. On
40 // exit, each offset will have been modified to point at the same logical
41 // position in the output string. If an offset cannot be successfully
42 // adjusted (e.g., because it points into the middle of a multibyte sequence),
43 // it will be set to string16::npos.
44 static void AdjustOffsets(const Adjustments
& adjustments
,
45 std::vector
<size_t>* offsets_for_adjustment
);
47 // Adjusts the single |offset| to reflect the adjustments recorded in
49 static void AdjustOffset(const Adjustments
& adjustments
,
52 // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse
53 // of the adjustments recorded in |adjustments|. In other words, the offsets
54 // provided represent offsets into an adjusted string and the caller wants
55 // to know the offsets they correspond to in the original string. If an
56 // offset cannot be successfully unadjusted (e.g., because it points into
57 // the middle of a multibyte sequence), it will be set to string16::npos.
58 static void UnadjustOffsets(const Adjustments
& adjustments
,
59 std::vector
<size_t>* offsets_for_unadjustment
);
61 // Adjusts the single |offset| to reflect the reverse of the adjustments
62 // recorded in |adjustments|.
63 static void UnadjustOffset(const Adjustments
& adjustments
,
66 // Combines two sequential sets of adjustments, storing the combined revised
67 // adjustments in |adjustments_on_adjusted_string|. That is, suppose a
68 // string was altered in some way, with the alterations recorded as
69 // adjustments in |first_adjustments|. Then suppose the resulting string is
70 // further altered, with the alterations recorded as adjustments scored in
71 // |adjustments_on_adjusted_string|, with the offsets recorded in these
72 // adjustments being with respect to the intermediate string. This function
73 // combines the two sets of adjustments into one, storing the result in
74 // |adjustments_on_adjusted_string|, whose offsets are correct with respect
75 // to the original string.
77 // Assumes both parameters are sorted by increasing offset.
79 // WARNING: Only supports |first_adjustments| that involve collapsing ranges
80 // of text, not expanding ranges.
81 static void MergeSequentialAdjustments(
82 const Adjustments
& first_adjustments
,
83 Adjustments
* adjustments_on_adjusted_string
);
86 // Like the conversions in utf_string_conversions.h, but also fills in an
87 // |adjustments| parameter that reflects the alterations done to the string.
89 BASE_EXPORT
bool UTF8ToUTF16WithAdjustments(
93 base::OffsetAdjuster::Adjustments
* adjustments
);
94 BASE_EXPORT string16
UTF8ToUTF16WithAdjustments(
95 const base::StringPiece
& utf8
,
96 base::OffsetAdjuster::Adjustments
* adjustments
);
97 // As above, but instead internally examines the adjustments and applies them
98 // to |offsets_for_adjustment|. See comments by AdjustOffsets().
99 BASE_EXPORT string16
UTF8ToUTF16AndAdjustOffsets(
100 const base::StringPiece
& utf8
,
101 std::vector
<size_t>* offsets_for_adjustment
);
103 BASE_EXPORT
std::string
UTF16ToUTF8AndAdjustOffsets(
104 const base::StringPiece16
& utf16
,
105 std::vector
<size_t>* offsets_for_adjustment
);
107 // Limiting function callable by std::for_each which will replace any value
108 // which is greater than |limit| with npos. Typically this is called with a
109 // string length to clamp offsets into the string to [0, length] (as opposed to
110 // [0, length); see comments above).
111 template <typename T
>
113 explicit LimitOffset(size_t limit
)
116 void operator()(size_t& offset
) {
126 #endif // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_