1 /*=========================================================================
3 Program: Insight Segmentation & Registration Toolkit
4 Module: $RCSfile: cmRegularExpression.h,v $
6 Date: $Date: 2002-02-25 23:14:01 $
7 Version: $Revision: 1.7 $
9 Copyright (c) 2002 Insight Consortium. All rights reserved.
10 See ITKCopyright.txt or http://www.itk.org/HTML/Copyright.htm for details.
12 This software is distributed WITHOUT ANY WARRANTY; without even
13 the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 PURPOSE. See the above copyright notices for more information.
16 =========================================================================*/
17 // Original Copyright notice:
18 // Copyright (C) 1991 Texas Instruments Incorporated.
20 // Permission is granted to any individual or institution to use, copy, modify,
21 // and distribute this software, provided that this complete copyright and
22 // permission notice is maintained, intact, in all copies and supporting
25 // Texas Instruments Incorporated provides this software "as is" without
26 // express or implied warranty.
28 // Created: MNF 06/13/89 Initial Design and Implementation
29 // Updated: LGO 08/09/89 Inherit from Generic
30 // Updated: MBN 09/07/89 Added conditional exception handling
31 // Updated: MBN 12/15/89 Sprinkled "const" qualifiers all over the place!
32 // Updated: DLS 03/22/91 New lite version
35 #ifndef cmRegularExpression_h
36 #define cmRegularExpression_h
38 #include "cmStandardIncludes.h"
40 const int NSUBEXP
= 10;
42 /** \class cmRegularExpression
43 * \brief Implements pattern matching with regular expressions.
45 * This is the header file for the regular expression class. An object of
46 * this class contains a regular expression, in a special "compiled" format.
47 * This compiled format consists of several slots all kept as the objects
48 * private data. The cmRegularExpression class provides a convenient way to
49 * represent regular expressions. It makes it easy to search for the same
50 * regular expression in many different strings without having to compile a
51 * string to regular expression format more than necessary.
53 * This class implements pattern matching via regular expressions.
54 * A regular expression allows a programmer to specify complex
55 * patterns that can be searched for and matched against the
56 * character string of a string object. In its simplest form, a
57 * regular expression is a sequence of characters used to
58 * search for exact character matches. However, many times the
59 * exact sequence to be found is not known, or only a match at
60 * the beginning or end of a string is desired. The cmRegularExpression regu-
61 * lar expression class implements regular expression pattern
62 * matching as is found and implemented in many UNIX commands
65 * Example: The perl code
67 * $filename =~ m"([a-z]+)\.cc";
70 * Is written as follows in C++
72 * cmRegularExpression re("([a-z]+)\\.cc");
74 * cerr << re.match(1);
77 * The regular expression class provides a convenient mechanism
78 * for specifying and manipulating regular expressions. The
79 * regular expression object allows specification of such pat-
80 * terns by using the following regular expression metacharac-
83 * ^ Matches at beginning of a line
85 * $ Matches at end of a line
87 * . Matches any single character
89 * [ ] Matches any character(s) inside the brackets
91 * [^ ] Matches any character(s) not inside the brackets
93 * - Matches any character in range on either side of a dash
95 * * Matches preceding pattern zero or more times
97 * + Matches preceding pattern one or more times
99 * ? Matches preceding pattern zero or once only
101 * () Saves a matched expression and uses it in a later match
103 * Note that more than one of these metacharacters can be used
104 * in a single regular expression in order to create complex
105 * search patterns. For example, the pattern [^ab1-9] says to
106 * match any character sequence that does not begin with the
107 * characters "ab" followed by numbers in the series one
110 * There are three constructors for cmRegularExpression. One just creates an
111 * empty cmRegularExpression object. Another creates a cmRegularExpression
112 * object and initializes it with a regular expression that is given in the
113 * form of a char*. The third takes a reference to a cmRegularExpression
114 * object as an argument and creates an object initialized with the
115 * information from the given cmRegularExpression object.
117 * The find member function finds the first occurence of the regualr
118 * expression of that object in the string given to find as an argument. Find
119 * returns a boolean, and if true, mutates the private data appropriately.
120 * Find sets pointers to the beginning and end of the thing last found, they
121 * are pointers into the actual string that was searched. The start and end
122 * member functions return indicies into the searched string that correspond
123 * to the beginning and end pointers respectively. The compile member
124 * function takes a char* and puts the compiled version of the char* argument
125 * into the object's private data fields. The == and != operators only check
126 * the to see if the compiled regular expression is the same, and the
127 * deep_equal functions also checks to see if the start and end pointers are
128 * the same. The is_valid function returns false if program is set to NULL,
129 * (i.e. there is no valid compiled exression). The set_invalid function sets
130 * the program to NULL (Warning: this deletes the compiled expression). The
131 * following examples may help clarify regular expression usage:
133 * * The regular expression "^hello" matches a "hello" only at the
134 * beginning of a line. It would match "hello there" but not "hi,
137 * * The regular expression "long$" matches a "long" only at the end
138 * of a line. It would match "so long\0", but not "long ago".
140 * * The regular expression "t..t..g" will match anything that has a
141 * "t" then any two characters, another "t", any two characters and
142 * then a "g". It will match "testing", or "test again" but would
143 * not match "toasting"
145 * * The regular expression "[1-9ab]" matches any number one through
146 * nine, and the characters "a" and "b". It would match "hello 1"
147 * or "begin", but would not match "no-match".
149 * * The regular expression "[^1-9ab]" matches any character that is
150 * not a number one through nine, or an "a" or "b". It would NOT
151 * match "hello 1" or "begin", but would match "no-match".
153 * * The regular expression "br* " matches something that begins with
154 * a "b", is followed by zero or more "r"s, and ends in a space. It
155 * would match "brrrrr ", and "b ", but would not match "brrh ".
157 * * The regular expression "br+ " matches something that begins with
158 * a "b", is followed by one or more "r"s, and ends in a space. It
159 * would match "brrrrr ", and "br ", but would not match "b " or
162 * * The regular expression "br? " matches something that begins with
163 * a "b", is followed by zero or one "r"s, and ends in a space. It
164 * would match "br ", and "b ", but would not match "brrrr " or
167 * * The regular expression "(..p)b" matches something ending with pb
168 * and beginning with whatever the two characters before the first p
169 * encounterd in the line were. It would find "repb" in "rep drepa
170 * qrepb". The regular expression "(..p)a" would find "repa qrepb"
171 * in "rep drepa qrepb"
173 * * The regular expression "d(..p)" matches something ending with p,
174 * beginning with d, and having two characters in between that are
175 * the same as the two characters before the first p encounterd in
176 * the line. It would match "drepa qrepb" in "rep drepa qrepb".
179 class cmRegularExpression
183 * Instantiate cmRegularExpression with program=NULL.
185 inline cmRegularExpression ();
188 * Instantiate cmRegularExpression with compiled char*.
190 inline cmRegularExpression (char const*);
193 * Instantiate cmRegularExpression as a copy of another regular expression.
195 cmRegularExpression (cmRegularExpression
const&);
200 inline ~cmRegularExpression();
203 * Compile a regular expression into internal code
204 * for later pattern matching.
206 void compile (char const*);
209 * Matches the regular expression to the given string.
210 * Returns true if found, and sets start and end indexes accordingly.
212 bool find (char const*);
215 * Matches the regular expression to the given std string.
216 * Returns true if found, and sets start and end indexes accordingly.
218 bool find (std::string
const&);
221 * Index to start of first find.
223 inline std::string::size_type
start() const;
226 * Index to end of first find.
228 inline std::string::size_type
end() const;
231 * Returns true if two regular expressions have the same
232 * compiled program for pattern matching.
234 bool operator== (cmRegularExpression
const&) const;
237 * Returns true if two regular expressions have different
238 * compiled program for pattern matching.
240 inline bool operator!= (cmRegularExpression
const&) const;
243 * Returns true if have the same compiled regular expressions
244 * and the same start and end pointers.
246 bool deep_equal (cmRegularExpression
const&) const;
249 * True if the compiled regexp is valid.
251 inline bool is_valid() const;
254 * Marks the regular expression as invalid.
256 inline void set_invalid();
262 std::string::size_type
start(int n
) const;
263 std::string::size_type
end(int n
) const;
264 std::string
match(int n
) const;
267 const char* startp
[NSUBEXP
];
268 const char* endp
[NSUBEXP
];
269 char regstart
; // Internal use only
270 char reganch
; // Internal use only
271 const char* regmust
; // Internal use only
272 int regmlen
; // Internal use only
275 const char* searchstring
;
279 * Create an empty regular expression.
281 inline cmRegularExpression::cmRegularExpression ()
283 this->program
= NULL
;
287 * Creates a regular expression from string s, and
290 inline cmRegularExpression::cmRegularExpression (const char* s
)
292 this->program
= NULL
;
297 * Destroys and frees space allocated for the regular expression.
299 inline cmRegularExpression::~cmRegularExpression ()
302 delete [] this->program
;
307 * Set the start position for the regular expression.
309 inline std::string::size_type
cmRegularExpression::start () const
311 return(this->startp
[0] - searchstring
);
316 * Returns the start/end index of the last item found.
318 inline std::string::size_type
cmRegularExpression::end () const
320 return(this->endp
[0] - searchstring
);
324 * Returns true if two regular expressions have different
325 * compiled program for pattern matching.
327 inline bool cmRegularExpression::operator!= (const cmRegularExpression
& r
) const
329 return(!(*this == r
));
333 * Returns true if a valid regular expression is compiled
334 * and ready for pattern matching.
336 inline bool cmRegularExpression::is_valid () const
338 return (this->program
!= NULL
);
342 inline void cmRegularExpression::set_invalid ()
345 delete [] this->program
;
347 this->program
= NULL
;
351 * Return start index of nth submatch. start(0) is the start of the full match.
353 inline std::string::size_type
cmRegularExpression::start(int n
) const
355 return this->startp
[n
] - searchstring
;
360 * Return end index of nth submatch. end(0) is the end of the full match.
362 inline std::string::size_type
cmRegularExpression::end(int n
) const
364 return this->endp
[n
] - searchstring
;
368 * Return nth submatch as a string.
370 inline std::string
cmRegularExpression::match(int n
) const
372 return std::string(this->startp
[n
], this->endp
[n
] - this->startp
[n
]);
375 #endif // cmRegularExpressionh