Include all dupe types (event when value is zero) in scan stats.
[chromium-blink-merge.git] / base / i18n / char_iterator.h
blob8174ef48f2262bab216448b02c8fb641d385522a
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef BASE_I18N_CHAR_ITERATOR_H_
6 #define BASE_I18N_CHAR_ITERATOR_H_
8 #include <string>
10 #include "base/basictypes.h"
11 #include "base/i18n/base_i18n_export.h"
12 #include "base/strings/string16.h"
14 // The CharIterator classes iterate through the characters in UTF8 and
15 // UTF16 strings. Example usage:
17 // UTF8CharIterator iter(&str);
18 // while (!iter.end()) {
19 // VLOG(1) << iter.get();
20 // iter.Advance();
21 // }
23 #if defined(OS_WIN)
24 typedef unsigned char uint8_t;
25 #endif
27 namespace base {
28 namespace i18n {
30 class BASE_I18N_EXPORT UTF8CharIterator {
31 public:
32 // Requires |str| to live as long as the UTF8CharIterator does.
33 explicit UTF8CharIterator(const std::string* str);
34 ~UTF8CharIterator();
36 // Return the starting array index of the current character within the
37 // string.
38 int32 array_pos() const { return array_pos_; }
40 // Return the logical index of the current character, independent of the
41 // number of bytes each character takes.
42 int32 char_pos() const { return char_pos_; }
44 // Return the current char.
45 int32 get() const { return char_; }
47 // Returns true if we're at the end of the string.
48 bool end() const { return array_pos_ == len_; }
50 // Advance to the next actual character. Returns false if we're at the
51 // end of the string.
52 bool Advance();
54 private:
55 // The string we're iterating over.
56 const uint8_t* str_;
58 // The length of the encoded string.
59 int32 len_;
61 // Array index.
62 int32 array_pos_;
64 // The next array index.
65 int32 next_pos_;
67 // Character index.
68 int32 char_pos_;
70 // The current character.
71 int32 char_;
73 DISALLOW_COPY_AND_ASSIGN(UTF8CharIterator);
76 class BASE_I18N_EXPORT UTF16CharIterator {
77 public:
78 // Requires |str| to live as long as the UTF16CharIterator does.
79 explicit UTF16CharIterator(const string16* str);
80 UTF16CharIterator(const char16* str, size_t str_len);
81 ~UTF16CharIterator();
83 // Return the starting array index of the current character within the
84 // string.
85 int32 array_pos() const { return array_pos_; }
87 // Return the logical index of the current character, independent of the
88 // number of codewords each character takes.
89 int32 char_pos() const { return char_pos_; }
91 // Return the current char.
92 int32 get() const { return char_; }
94 // Returns true if we're at the end of the string.
95 bool end() const { return array_pos_ == len_; }
97 // Advance to the next actual character. Returns false if we're at the
98 // end of the string.
99 bool Advance();
101 private:
102 // Fills in the current character we found and advances to the next
103 // character, updating all flags as necessary.
104 void ReadChar();
106 // The string we're iterating over.
107 const char16* str_;
109 // The length of the encoded string.
110 int32 len_;
112 // Array index.
113 int32 array_pos_;
115 // The next array index.
116 int32 next_pos_;
118 // Character index.
119 int32 char_pos_;
121 // The current character.
122 int32 char_;
124 DISALLOW_COPY_AND_ASSIGN(UTF16CharIterator);
127 } // namespace i18n
128 } // namespace base
130 #endif // BASE_I18N_CHAR_ITERATOR_H_