1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/string_split.h"
7 #include "base/logging.h"
8 #include "base/strings/string_util.h"
9 #include "base/third_party/icu/icu_utf.h"
15 // PieceToOutputType converts a StringPiece as needed to a given output type,
16 // which is either the same type of StringPiece (a NOP) or the corresponding
17 // non-piece string type.
19 // The default converter is a NOP, it works when the OutputType is the
20 // correct StringPiece.
21 template<typename Str
, typename OutputType
>
22 OutputType
PieceToOutputType(BasicStringPiece
<Str
> piece
) {
25 template<> // Convert StringPiece to std::string
26 std::string PieceToOutputType
<std::string
, std::string
>(StringPiece piece
) {
27 return piece
.as_string();
29 template<> // Convert StringPiece16 to string16.
30 string16 PieceToOutputType
<string16
, string16
>(StringPiece16 piece
) {
31 return piece
.as_string();
34 // Returns either the ASCII or UTF-16 whitespace.
35 template<typename Str
> BasicStringPiece
<Str
> WhitespaceForType();
36 template<> StringPiece16 WhitespaceForType
<string16
>() {
37 return kWhitespaceUTF16
;
39 template<> StringPiece WhitespaceForType
<std::string
>() {
40 return kWhitespaceASCII
;
43 // Optimize the single-character case to call find() on the string instead,
44 // since this is the common case and can be made faster. This could have been
45 // done with template specialization too, but would have been less clear.
47 // There is no corresponding FindFirstNotOf because StringPiece already
48 // implements these different versions that do the optimized searching.
49 size_t FindFirstOf(StringPiece piece
, char c
, size_t pos
) {
50 return piece
.find(c
, pos
);
52 size_t FindFirstOf(StringPiece16 piece
, char16 c
, size_t pos
) {
53 return piece
.find(c
, pos
);
55 size_t FindFirstOf(StringPiece piece
, StringPiece one_of
, size_t pos
) {
56 return piece
.find_first_of(one_of
, pos
);
58 size_t FindFirstOf(StringPiece16 piece
, StringPiece16 one_of
, size_t pos
) {
59 return piece
.find_first_of(one_of
, pos
);
62 // General string splitter template. Can take 8- or 16-bit input, can produce
63 // the corresponding string or StringPiece output, and can take single- or
64 // multiple-character delimiters.
66 // DelimiterType is either a character (Str::value_type) or a string piece of
67 // multiple characters (BasicStringPiece<Str>). StringPiece has a version of
68 // find for both of these cases, and the single-character version is the most
69 // common and can be implemented faster, which is why this is a template.
70 template<typename Str
, typename OutputStringType
, typename DelimiterType
>
71 static std::vector
<OutputStringType
> SplitStringT(
72 BasicStringPiece
<Str
> str
,
73 DelimiterType delimiter
,
74 WhitespaceHandling whitespace
,
75 SplitResult result_type
) {
76 std::vector
<OutputStringType
> result
;
81 while (start
!= Str::npos
) {
82 size_t end
= FindFirstOf(str
, delimiter
, start
);
84 BasicStringPiece
<Str
> piece
;
85 if (end
== Str::npos
) {
86 piece
= str
.substr(start
);
89 piece
= str
.substr(start
, end
- start
);
93 if (whitespace
== TRIM_WHITESPACE
)
94 piece
= TrimString(piece
, WhitespaceForType
<Str
>(), TRIM_ALL
);
96 if (result_type
== SPLIT_WANT_ALL
|| !piece
.empty())
97 result
.push_back(PieceToOutputType
<Str
, OutputStringType
>(piece
));
102 bool AppendStringKeyValue(StringPiece input
,
104 StringPairs
* result
) {
105 // Always append a new item regardless of success (it might be empty). The
106 // below code will copy the strings directly into the result pair.
107 result
->resize(result
->size() + 1);
108 auto& result_pair
= result
->back();
110 // Find the delimiter.
111 size_t end_key_pos
= input
.find_first_of(delimiter
);
112 if (end_key_pos
== std::string::npos
) {
113 DVLOG(1) << "cannot find delimiter in: " << input
;
114 return false; // No delimiter.
116 input
.substr(0, end_key_pos
).CopyToString(&result_pair
.first
);
118 // Find the value string.
119 StringPiece remains
= input
.substr(end_key_pos
, input
.size() - end_key_pos
);
120 size_t begin_value_pos
= remains
.find_first_not_of(delimiter
);
121 if (begin_value_pos
== StringPiece::npos
) {
122 DVLOG(1) << "cannot parse value from input: " << input
;
123 return false; // No value.
125 remains
.substr(begin_value_pos
, remains
.size() - begin_value_pos
)
126 .CopyToString(&result_pair
.second
);
131 template <typename Str
>
132 void SplitStringUsingSubstrT(BasicStringPiece
<Str
> input
,
133 BasicStringPiece
<Str
> delimiter
,
134 std::vector
<Str
>* result
) {
135 using Piece
= BasicStringPiece
<Str
>;
136 using size_type
= typename
Piece::size_type
;
139 size_type begin_index
= 0;
141 size_type end_index
= input
.find(delimiter
, begin_index
);
142 if (end_index
== Piece::npos
) {
143 // No delimiter, use the rest of the string.
144 Piece term
= TrimString(input
.substr(begin_index
),
145 WhitespaceForType
<Str
>(), TRIM_ALL
);
146 result
->push_back(term
.as_string());
149 Piece term
= TrimString(input
.substr(begin_index
, end_index
- begin_index
),
150 WhitespaceForType
<Str
>(), TRIM_ALL
);
151 result
->push_back(term
.as_string());
152 begin_index
= end_index
+ delimiter
.size();
158 std::vector
<std::string
> SplitString(StringPiece input
,
159 StringPiece separators
,
160 WhitespaceHandling whitespace
,
161 SplitResult result_type
) {
162 if (separators
.size() == 1) {
163 return SplitStringT
<std::string
, std::string
, char>(
164 input
, separators
[0], whitespace
, result_type
);
166 return SplitStringT
<std::string
, std::string
, StringPiece
>(
167 input
, separators
, whitespace
, result_type
);
170 std::vector
<string16
> SplitString(StringPiece16 input
,
171 StringPiece16 separators
,
172 WhitespaceHandling whitespace
,
173 SplitResult result_type
) {
174 if (separators
.size() == 1) {
175 return SplitStringT
<string16
, string16
, char16
>(
176 input
, separators
[0], whitespace
, result_type
);
178 return SplitStringT
<string16
, string16
, StringPiece16
>(
179 input
, separators
, whitespace
, result_type
);
182 std::vector
<StringPiece
> SplitStringPiece(StringPiece input
,
183 StringPiece separators
,
184 WhitespaceHandling whitespace
,
185 SplitResult result_type
) {
186 if (separators
.size() == 1) {
187 return SplitStringT
<std::string
, StringPiece
, char>(
188 input
, separators
[0], whitespace
, result_type
);
190 return SplitStringT
<std::string
, StringPiece
, StringPiece
>(
191 input
, separators
, whitespace
, result_type
);
194 std::vector
<StringPiece16
> SplitStringPiece(StringPiece16 input
,
195 StringPiece16 separators
,
196 WhitespaceHandling whitespace
,
197 SplitResult result_type
) {
198 if (separators
.size() == 1) {
199 return SplitStringT
<string16
, StringPiece16
, char16
>(
200 input
, separators
[0], whitespace
, result_type
);
202 return SplitStringT
<string16
, StringPiece16
, StringPiece16
>(
203 input
, separators
, whitespace
, result_type
);
206 bool SplitStringIntoKeyValuePairs(StringPiece input
,
207 char key_value_delimiter
,
208 char key_value_pair_delimiter
,
209 StringPairs
* key_value_pairs
) {
210 key_value_pairs
->clear();
212 std::vector
<StringPiece
> pairs
= SplitStringPiece(
213 input
, std::string(1, key_value_pair_delimiter
),
214 TRIM_WHITESPACE
, SPLIT_WANT_NONEMPTY
);
215 key_value_pairs
->reserve(pairs
.size());
218 for (const StringPiece
& pair
: pairs
) {
219 if (!AppendStringKeyValue(pair
, key_value_delimiter
, key_value_pairs
)) {
220 // Don't return here, to allow for pairs without associated
221 // value or key; just record that the split failed.
228 void SplitStringUsingSubstr(StringPiece16 input
,
229 StringPiece16 delimiter
,
230 std::vector
<string16
>* result
) {
231 SplitStringUsingSubstrT(input
, delimiter
, result
);
234 void SplitStringUsingSubstr(StringPiece input
,
235 StringPiece delimiter
,
236 std::vector
<std::string
>* result
) {
237 SplitStringUsingSubstrT(input
, delimiter
, result
);