1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef BASE_STRINGS_STRING_SPLIT_H_
6 #define BASE_STRINGS_STRING_SPLIT_H_
12 #include "base/base_export.h"
13 #include "base/strings/string16.h"
14 #include "base/strings/string_piece.h"
18 enum WhitespaceHandling
{
24 // Strictly return all results.
26 // If the input is ",," and the separator is ',' this will return a
27 // vector of three empty strings.
30 // Only nonempty results will be added to the results. Multiple separators
31 // will be coalesced. Separators at the beginning and end of the input will
32 // be ignored. With TRIM_WHITESPACE, whitespace-only results will be dropped.
34 // If the input is ",," and the separator is ',', this will return an empty
39 // Split the given string on ANY of the given separators, returning copies of
42 // To split on either commas or semicolons, keeping all whitespace:
44 // std::vector<std::string> tokens = base::SplitString(
45 // input, ",;", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
46 BASE_EXPORT
std::vector
<std::string
> SplitString(
48 StringPiece separators
,
49 WhitespaceHandling whitespace
,
50 SplitResult result_type
);
51 BASE_EXPORT
std::vector
<string16
> SplitString(
53 StringPiece16 separators
,
54 WhitespaceHandling whitespace
,
55 SplitResult result_type
);
57 // Like SplitString above except it returns a vector of StringPieces which
58 // reference the original buffer without copying. Although you have to be
59 // careful to keep the original string unmodified, this provides an efficient
60 // way to iterate through tokens in a string.
62 // To iterate through all whitespace-separated tokens in an input string:
64 // for (const auto& cur :
65 // base::SplitStringPiece(input, base::kWhitespaceASCII,
66 // base::KEEP_WHITESPACE,
67 // base::SPLIT_WANT_NONEMPTY)) {
69 BASE_EXPORT
std::vector
<StringPiece
> SplitStringPiece(
71 StringPiece separators
,
72 WhitespaceHandling whitespace
,
73 SplitResult result_type
);
74 BASE_EXPORT
std::vector
<StringPiece16
> SplitStringPiece(
76 StringPiece16 separators
,
77 WhitespaceHandling whitespace
,
78 SplitResult result_type
);
80 using StringPairs
= std::vector
<std::pair
<std::string
, std::string
>>;
82 // Splits |line| into key value pairs according to the given delimiters and
83 // removes whitespace leading each key and trailing each value. Returns true
84 // only if each pair has a non-empty key and value. |key_value_pairs| will
85 // include ("","") pairs for entries without |key_value_delimiter|.
86 BASE_EXPORT
bool SplitStringIntoKeyValuePairs(const std::string
& line
,
87 char key_value_delimiter
,
88 char key_value_pair_delimiter
,
89 StringPairs
* key_value_pairs
);
91 // Similar to SplitString, but use a substring delimiter instead of a list of
92 // characters that are all possible delimiters.
94 // TODO(brettw) this should probably be changed and expanded to provide a
95 // mirror of the SplitString[Piece] API above, just with the different
96 // delimiter handling.
97 BASE_EXPORT
void SplitStringUsingSubstr(const string16
& str
,
99 std::vector
<string16
>* r
);
100 BASE_EXPORT
void SplitStringUsingSubstr(const std::string
& str
,
101 const std::string
& s
,
102 std::vector
<std::string
>* r
);
104 // -----------------------------------------------------------------------------
105 // Backwards-compat wrappers
107 // New code should use one of the more general variants above.
108 // TODO(brettw) remove these and convert to the versions above.
110 // Splits |str| into a vector of strings delimited by |c|, placing the results
111 // in |r|. If several instances of |c| are contiguous, or if |str| begins with
112 // or ends with |c|, then an empty string is inserted.
114 // Every substring is trimmed of any leading or trailing white space.
115 // NOTE: |c| must be in BMP (Basic Multilingual Plane)
116 BASE_EXPORT
void SplitString(const string16
& str
,
118 std::vector
<string16
>* r
);
120 // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
121 // the trailing byte of a multi-byte character can be in the ASCII range.
122 // UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
123 // Note: |c| must be in the ASCII range.
124 BASE_EXPORT
void SplitString(const std::string
& str
,
126 std::vector
<std::string
>* r
);
128 // The same as SplitString, but don't trim white space.
129 // NOTE: |c| must be in BMP (Basic Multilingual Plane)
130 BASE_EXPORT
void SplitStringDontTrim(StringPiece16 str
,
132 std::vector
<string16
>* r
);
133 // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
134 // the trailing byte of a multi-byte character can be in the ASCII range.
135 // UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
136 // Note: |c| must be in the ASCII range.
137 BASE_EXPORT
void SplitStringDontTrim(StringPiece str
,
139 std::vector
<std::string
>* result
);
141 // WARNING: this uses whitespace as defined by the HTML5 spec (ASCII whitespace
144 // The difference between this and calling SplitString with the whitespace
145 // characters as separators is the treatment of the first element when the
146 // string starts with whitespace.
148 // Input SplitString SplitStringAlongWhitespace
149 // --------------------------------------------------------
151 BASE_EXPORT
void SplitStringAlongWhitespace(const string16
& str
,
152 std::vector
<string16
>* result
);
153 BASE_EXPORT
void SplitStringAlongWhitespace(const std::string
& str
,
154 std::vector
<std::string
>* result
);
158 #endif // BASE_STRINGS_STRING_SPLIT_H_