Add ICU message format support
[chromium-blink-merge.git] / base / strings / string_split.h
blobdc4108d327ff2a4653bd58090bfcdc19995150b4
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef BASE_STRINGS_STRING_SPLIT_H_
6 #define BASE_STRINGS_STRING_SPLIT_H_
8 #include <string>
9 #include <utility>
10 #include <vector>
12 #include "base/base_export.h"
13 #include "base/strings/string16.h"
14 #include "base/strings/string_piece.h"
16 namespace base {
18 enum WhitespaceHandling {
19 KEEP_WHITESPACE,
20 TRIM_WHITESPACE,
23 enum SplitResult {
24 // Strictly return all results.
26 // If the input is ",," and the separator is ',' this will return a
27 // vector of three empty strings.
28 SPLIT_WANT_ALL,
30 // Only nonempty results will be added to the results. Multiple separators
31 // will be coalesced. Separators at the beginning and end of the input will
32 // be ignored. With TRIM_WHITESPACE, whitespace-only results will be dropped.
34 // If the input is ",," and the separator is ',', this will return an empty
35 // vector.
36 SPLIT_WANT_NONEMPTY,
39 // Split the given string on ANY of the given separators, returning copies of
40 // the result.
42 // To split on either commas or semicolons, keeping all whitespace:
44 // std::vector<std::string> tokens = base::SplitString(
45 // input, ",;", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
46 BASE_EXPORT std::vector<std::string> SplitString(
47 StringPiece input,
48 StringPiece separators,
49 WhitespaceHandling whitespace,
50 SplitResult result_type);
51 BASE_EXPORT std::vector<string16> SplitString(
52 StringPiece16 input,
53 StringPiece16 separators,
54 WhitespaceHandling whitespace,
55 SplitResult result_type);
57 // Like SplitString above except it returns a vector of StringPieces which
58 // reference the original buffer without copying. Although you have to be
59 // careful to keep the original string unmodified, this provides an efficient
60 // way to iterate through tokens in a string.
62 // To iterate through all whitespace-separated tokens in an input string:
64 // for (const auto& cur :
65 // base::SplitStringPiece(input, base::kWhitespaceASCII,
66 // base::KEEP_WHITESPACE,
67 // base::SPLIT_WANT_NONEMPTY)) {
68 // ...
69 BASE_EXPORT std::vector<StringPiece> SplitStringPiece(
70 StringPiece input,
71 StringPiece separators,
72 WhitespaceHandling whitespace,
73 SplitResult result_type);
74 BASE_EXPORT std::vector<StringPiece16> SplitStringPiece(
75 StringPiece16 input,
76 StringPiece16 separators,
77 WhitespaceHandling whitespace,
78 SplitResult result_type);
80 using StringPairs = std::vector<std::pair<std::string, std::string>>;
82 // Splits |line| into key value pairs according to the given delimiters and
83 // removes whitespace leading each key and trailing each value. Returns true
84 // only if each pair has a non-empty key and value. |key_value_pairs| will
85 // include ("","") pairs for entries without |key_value_delimiter|.
86 BASE_EXPORT bool SplitStringIntoKeyValuePairs(const std::string& line,
87 char key_value_delimiter,
88 char key_value_pair_delimiter,
89 StringPairs* key_value_pairs);
91 // Similar to SplitString, but use a substring delimiter instead of a list of
92 // characters that are all possible delimiters.
94 // TODO(brettw) this should probably be changed and expanded to provide a
95 // mirror of the SplitString[Piece] API above, just with the different
96 // delimiter handling.
97 BASE_EXPORT void SplitStringUsingSubstr(const string16& str,
98 const string16& s,
99 std::vector<string16>* r);
100 BASE_EXPORT void SplitStringUsingSubstr(const std::string& str,
101 const std::string& s,
102 std::vector<std::string>* r);
104 // -----------------------------------------------------------------------------
105 // Backwards-compat wrappers
107 // New code should use one of the more general variants above.
108 // TODO(brettw) remove these and convert to the versions above.
110 // Splits |str| into a vector of strings delimited by |c|, placing the results
111 // in |r|. If several instances of |c| are contiguous, or if |str| begins with
112 // or ends with |c|, then an empty string is inserted.
114 // Every substring is trimmed of any leading or trailing white space.
115 // NOTE: |c| must be in BMP (Basic Multilingual Plane)
116 BASE_EXPORT void SplitString(const string16& str,
117 char16 c,
118 std::vector<string16>* r);
120 // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
121 // the trailing byte of a multi-byte character can be in the ASCII range.
122 // UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
123 // Note: |c| must be in the ASCII range.
124 BASE_EXPORT void SplitString(const std::string& str,
125 char c,
126 std::vector<std::string>* r);
128 // The same as SplitString, but don't trim white space.
129 // NOTE: |c| must be in BMP (Basic Multilingual Plane)
130 BASE_EXPORT void SplitStringDontTrim(StringPiece16 str,
131 char16 c,
132 std::vector<string16>* r);
133 // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which
134 // the trailing byte of a multi-byte character can be in the ASCII range.
135 // UTF-8, and other single/multi-byte ASCII-compatible encodings are OK.
136 // Note: |c| must be in the ASCII range.
137 BASE_EXPORT void SplitStringDontTrim(StringPiece str,
138 char c,
139 std::vector<std::string>* result);
141 // WARNING: this uses whitespace as defined by the HTML5 spec (ASCII whitespace
142 // only).
144 // The difference between this and calling SplitString with the whitespace
145 // characters as separators is the treatment of the first element when the
146 // string starts with whitespace.
148 // Input SplitString SplitStringAlongWhitespace
149 // --------------------------------------------------------
150 // " a " "", "a" "a"
151 BASE_EXPORT void SplitStringAlongWhitespace(const string16& str,
152 std::vector<string16>* result);
153 BASE_EXPORT void SplitStringAlongWhitespace(const std::string& str,
154 std::vector<std::string>* result);
156 } // namespace base
158 #endif // BASE_STRINGS_STRING_SPLIT_H_