1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/string_split.h"
7 #include "base/logging.h"
8 #include "base/strings/string_util.h"
9 #include "base/strings/utf_string_conversions.h"
10 #include "base/third_party/icu/icu_utf.h"
14 template<typename STR
>
15 static void SplitStringT(const STR
& str
,
16 const typename
STR::value_type s
,
18 std::vector
<STR
>* r
) {
21 size_t c
= str
.size();
22 for (size_t i
= 0; i
<= c
; ++i
) {
23 if (i
== c
|| str
[i
] == s
) {
24 STR
tmp(str
, last
, i
- last
);
26 TrimWhitespace(tmp
, TRIM_ALL
, &tmp
);
27 // Avoid converting an empty or all-whitespace source string into a vector
28 // of one empty string.
29 if (i
!= c
|| !r
->empty() || !tmp
.empty())
36 void SplitString(const string16
& str
,
38 std::vector
<string16
>* r
) {
39 DCHECK(CBU16_IS_SINGLE(c
));
40 SplitStringT(str
, c
, true, r
);
43 void SplitString(const std::string
& str
,
45 std::vector
<std::string
>* r
) {
50 SplitStringT(str
, c
, true, r
);
53 bool SplitStringIntoKeyValues(
54 const std::string
& line
,
55 char key_value_delimiter
,
56 std::string
* key
, std::vector
<std::string
>* values
) {
60 // Find the key string.
61 size_t end_key_pos
= line
.find_first_of(key_value_delimiter
);
62 if (end_key_pos
== std::string::npos
) {
63 DVLOG(1) << "cannot parse key from line: " << line
;
64 return false; // no key
66 key
->assign(line
, 0, end_key_pos
);
68 // Find the values string.
69 std::string
remains(line
, end_key_pos
, line
.size() - end_key_pos
);
70 size_t begin_values_pos
= remains
.find_first_not_of(key_value_delimiter
);
71 if (begin_values_pos
== std::string::npos
) {
72 DVLOG(1) << "cannot parse value from line: " << line
;
73 return false; // no value
75 std::string
values_string(remains
, begin_values_pos
,
76 remains
.size() - begin_values_pos
);
78 // Construct the values vector.
79 values
->push_back(values_string
);
83 bool SplitStringIntoKeyValuePairs(const std::string
& line
,
84 char key_value_delimiter
,
85 char key_value_pair_delimiter
,
86 StringPairs
* key_value_pairs
) {
87 key_value_pairs
->clear();
89 std::vector
<std::string
> pairs
;
90 SplitString(line
, key_value_pair_delimiter
, &pairs
);
93 for (size_t i
= 0; i
< pairs
.size(); ++i
) {
94 // Empty pair. SplitStringIntoKeyValues is more strict about an empty pair
95 // line, so continue with the next pair.
100 std::vector
<std::string
> value
;
101 if (!SplitStringIntoKeyValues(pairs
[i
],
104 // Don't return here, to allow for keys without associated
105 // values; just record that our split failed.
108 DCHECK_LE(value
.size(), 1U);
109 key_value_pairs
->push_back(
110 make_pair(key
, value
.empty() ? std::string() : value
[0]));
115 template <typename STR
>
116 static void SplitStringUsingSubstrT(const STR
& str
,
118 std::vector
<STR
>* r
) {
120 typename
STR::size_type begin_index
= 0;
122 const typename
STR::size_type end_index
= str
.find(s
, begin_index
);
123 if (end_index
== STR::npos
) {
124 const STR term
= str
.substr(begin_index
);
126 TrimWhitespace(term
, TRIM_ALL
, &tmp
);
130 const STR term
= str
.substr(begin_index
, end_index
- begin_index
);
132 TrimWhitespace(term
, TRIM_ALL
, &tmp
);
134 begin_index
= end_index
+ s
.size();
138 void SplitStringUsingSubstr(const string16
& str
,
140 std::vector
<string16
>* r
) {
141 SplitStringUsingSubstrT(str
, s
, r
);
144 void SplitStringUsingSubstr(const std::string
& str
,
145 const std::string
& s
,
146 std::vector
<std::string
>* r
) {
147 SplitStringUsingSubstrT(str
, s
, r
);
150 void SplitStringDontTrim(const string16
& str
,
152 std::vector
<string16
>* r
) {
153 DCHECK(CBU16_IS_SINGLE(c
));
154 SplitStringT(str
, c
, false, r
);
157 void SplitStringDontTrim(const std::string
& str
,
159 std::vector
<std::string
>* r
) {
160 DCHECK(IsStringUTF8(str
));
165 SplitStringT(str
, c
, false, r
);
168 template<typename STR
>
169 void SplitStringAlongWhitespaceT(const STR
& str
, std::vector
<STR
>* result
) {
171 const size_t length
= str
.length();
175 bool last_was_ws
= false;
176 size_t last_non_ws_start
= 0;
177 for (size_t i
= 0; i
< length
; ++i
) {
179 // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.
189 str
.substr(last_non_ws_start
, i
- last_non_ws_start
));
195 default: // Not a space character.
198 last_non_ws_start
= i
;
205 str
.substr(last_non_ws_start
, length
- last_non_ws_start
));
209 void SplitStringAlongWhitespace(const string16
& str
,
210 std::vector
<string16
>* result
) {
211 SplitStringAlongWhitespaceT(str
, result
);
214 void SplitStringAlongWhitespace(const std::string
& str
,
215 std::vector
<std::string
>* result
) {
216 SplitStringAlongWhitespaceT(str
, result
);