2 // Tokenize a string into an array of strings. The delimeter can be either
3 // a set of chars (as a C string) or a Boost regular expression.
4 // Provides a read-only random access iterator. The array is not
5 // sortable, or otherwise writeable.
7 // Each token in the input string is only extracted when it is asked
8 // for. On construction, the postions of the token delimeters are
9 // found and stored in a std::vector.
11 // Written by C. Ashley Sanders, a.sanders@man.ac.uk, University of
12 // Manchester, 16/7/2004.
14 # ifndef TOKENARRAY_HH_
15 # define TOKENARRAY_HH_
20 # include <boost/regex.hpp>
24 template<class CharT
= char>
27 typedef tokenarray
<CharT
> _Myt
;
28 typedef typename
std::basic_string
<CharT
> String
;
30 typedef typename
boost::basic_regex
<CharT
> RegExp
;
32 typedef typename
String::size_type string_size_type
;
33 typedef std::vector
<std::pair
<string_size_type
, string_size_type
> > tarray
;
34 typedef typename
tarray::size_type size_type
;
36 class const_iterator
{
38 // This class could derive from std::iterator rather than have all
39 // the typedefs below. However, neither of the two versions of the
40 // STL I have seem to cut the mustard when it comes to std::iterator.
41 typedef std::random_access_iterator_tag iterator_category
;
42 typedef String value_type
;
43 typedef typename
tarray::size_type difference_type
;
44 typedef String
* pointer
;
45 typedef String
& reference
;
48 const tokenarray
<CharT
> *pvs
;
51 const_iterator () : pvs(0), i(0) {}
52 const_iterator (const const_iterator
&s
)
53 : pvs(s
.pvs
), i(s
.i
) {};
54 const_iterator (const tokenarray
<CharT
> *ps
, difference_type d
)
56 const_iterator
& operator= (const const_iterator
&s
) {
64 const_iterator
& operator++ () { *this += 1; return *this; };
65 const const_iterator
operator++ (int) {
66 const_iterator
s (*this);
70 const_iterator
& operator+= (difference_type d
) { i
+= d
; return *this; }
71 const_iterator
operator+ (difference_type d
) const {
72 const_iterator
s (*this);
75 const_iterator
& operator-- () { *this -= 1; return *this; };
76 const const_iterator
operator-- (int) {
77 const_iterator
s (*this);
81 const_iterator
& operator-= (difference_type d
) { i
-= d
; return *this; }
82 const_iterator
operator- (difference_type d
) const {
83 const_iterator
s (*this);
86 difference_type
operator- (const const_iterator
&s
) const {
89 bool operator== (const const_iterator
&s
) const {
92 bool operator< (const const_iterator
&s
) const {
95 bool operator<= (const const_iterator
&s
) const {
98 bool operator> (const const_iterator
&s
) const {
101 bool operator>= (const const_iterator
&s
) const {
104 bool operator!= (const const_iterator
&s
) const {
107 const value_type
operator[] (difference_type d
) const {
110 const value_type
operator* () const {
122 tokenarray (const String
&s
, const CharT
*pc
) : st(s
) {
123 string_size_type pos
;
124 for (pos
= 0; (pos
= st
.find_first_of (pc
, pos
)) != String::npos
; ++pos
)
125 vsz
.push_back (std::make_pair (pos
, pos
+1));
127 # ifndef ASH_NO_REGEX
128 tokenarray (const String
&s
, const RegExp
&re
) : st(s
) {
129 boost::match_results
<typename
String::const_iterator
> match
;
130 typename
String::const_iterator pc
= st
.begin (), pcEnd
= st
.end ();
131 string_size_type szOffset
= 0;
132 while (boost::regex_search (pc
, pcEnd
, match
, re
))
134 vsz
.push_back (std::make_pair (szOffset
+ match
.position (),
135 szOffset
+ match
.position () + match
.length ()));
136 pc
= match
[0].second
;
137 szOffset
+= match
.position() + match
.length();
140 # endif // ASH_NO_REGEX
141 tokenarray (const tokenarray
& s
) : vsz(s
.vsz
), st(s
.st
) {};
142 tokenarray
& operator= (const tokenarray
&s
) {
147 void swap (tokenarray
&s
) {
151 String
data () const { return st
; }
152 void data (const String
&s
, const CharT
*pc
) {
153 tokenarray
tmp (s
, pc
);
156 size_type
size () const {
157 return st
.empty () ? 0 : vsz
.size () + 1;
159 const String
operator[] (size_type i
) const {
160 // Note: no bounds checking on vsz.
161 string_size_type start
= (i
) ? vsz
[i
-1].second
: 0;
162 string_size_type len
= (i
< size ()-1) ? vsz
[i
].first
- start
: String::npos
;
163 return st
.substr (start
, len
);
165 const_iterator
begin () const {
166 return const_iterator (this, 0);
168 const_iterator
end () const {
169 return const_iterator (this, size ());
175 # endif // TOKENARRAY_HH_