Resync
[CMakeLuaTailorHgBridge.git] / CMakeLua / Source / kwsys / RegularExpression.hxx.in
blob2aa8a9aeedee67c17300770f3282867ecce3b752
1 /*=========================================================================
3 Program: KWSys - Kitware System Library
4 Module: $RCSfile: RegularExpression.hxx.in,v $
6 Copyright (c) Kitware, Inc., Insight Consortium. All rights reserved.
7 See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
9 This software is distributed WITHOUT ANY WARRANTY; without even
10 the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
11 PURPOSE. See the above copyright notices for more information.
13 =========================================================================*/
14 // Original Copyright notice:
15 // Copyright (C) 1991 Texas Instruments Incorporated.
17 // Permission is granted to any individual or institution to use, copy, modify,
18 // and distribute this software, provided that this complete copyright and
19 // permission notice is maintained, intact, in all copies and supporting
20 // documentation.
22 // Texas Instruments Incorporated provides this software "as is" without
23 // express or implied warranty.
25 // Created: MNF 06/13/89 Initial Design and Implementation
26 // Updated: LGO 08/09/89 Inherit from Generic
27 // Updated: MBN 09/07/89 Added conditional exception handling
28 // Updated: MBN 12/15/89 Sprinkled "const" qualifiers all over the place!
29 // Updated: DLS 03/22/91 New lite version
32 #ifndef @KWSYS_NAMESPACE@_RegularExpression_hxx
33 #define @KWSYS_NAMESPACE@_RegularExpression_hxx
35 #include <@KWSYS_NAMESPACE@/Configure.h>
36 #include <@KWSYS_NAMESPACE@/Configure.hxx>
38 #include <@KWSYS_NAMESPACE@/stl/string>
40 /* Define this macro temporarily to keep the code readable. */
41 #if !defined (KWSYS_NAMESPACE) && !@KWSYS_NAMESPACE@_NAME_IS_KWSYS
42 # define kwsys_stl @KWSYS_NAMESPACE@_stl
43 #endif
45 namespace @KWSYS_NAMESPACE@
48 /** \class RegularExpression
49 * \brief Implements pattern matching with regular expressions.
51 * This is the header file for the regular expression class. An object of
52 * this class contains a regular expression, in a special "compiled" format.
53 * This compiled format consists of several slots all kept as the objects
54 * private data. The RegularExpression class provides a convenient way to
55 * represent regular expressions. It makes it easy to search for the same
56 * regular expression in many different strings without having to compile a
57 * string to regular expression format more than necessary.
59 * This class implements pattern matching via regular expressions.
60 * A regular expression allows a programmer to specify complex
61 * patterns that can be searched for and matched against the
62 * character string of a string object. In its simplest form, a
63 * regular expression is a sequence of characters used to
64 * search for exact character matches. However, many times the
65 * exact sequence to be found is not known, or only a match at
66 * the beginning or end of a string is desired. The RegularExpression regu-
67 * lar expression class implements regular expression pattern
68 * matching as is found and implemented in many UNIX commands
69 * and utilities.
71 * Example: The perl code
73 * $filename =~ m"([a-z]+)\.cc";
74 * print $1;
76 * Is written as follows in C++
78 * RegularExpression re("([a-z]+)\\.cc");
79 * re.find(filename);
80 * cerr << re.match(1);
83 * The regular expression class provides a convenient mechanism
84 * for specifying and manipulating regular expressions. The
85 * regular expression object allows specification of such pat-
86 * terns by using the following regular expression metacharac-
87 * ters:
89 * ^ Matches at beginning of a line
91 * $ Matches at end of a line
93 * . Matches any single character
95 * [ ] Matches any character(s) inside the brackets
97 * [^ ] Matches any character(s) not inside the brackets
99 * - Matches any character in range on either side of a dash
101 * * Matches preceding pattern zero or more times
103 * + Matches preceding pattern one or more times
105 * ? Matches preceding pattern zero or once only
107 * () Saves a matched expression and uses it in a later match
109 * Note that more than one of these metacharacters can be used
110 * in a single regular expression in order to create complex
111 * search patterns. For example, the pattern [^ab1-9] says to
112 * match any character sequence that does not begin with the
113 * characters "ab" followed by numbers in the series one
114 * through nine.
116 * There are three constructors for RegularExpression. One just creates an
117 * empty RegularExpression object. Another creates a RegularExpression
118 * object and initializes it with a regular expression that is given in the
119 * form of a char*. The third takes a reference to a RegularExpression
120 * object as an argument and creates an object initialized with the
121 * information from the given RegularExpression object.
123 * The find member function finds the first occurence of the regualr
124 * expression of that object in the string given to find as an argument. Find
125 * returns a boolean, and if true, mutates the private data appropriately.
126 * Find sets pointers to the beginning and end of the thing last found, they
127 * are pointers into the actual string that was searched. The start and end
128 * member functions return indicies into the searched string that correspond
129 * to the beginning and end pointers respectively. The compile member
130 * function takes a char* and puts the compiled version of the char* argument
131 * into the object's private data fields. The == and != operators only check
132 * the to see if the compiled regular expression is the same, and the
133 * deep_equal functions also checks to see if the start and end pointers are
134 * the same. The is_valid function returns false if program is set to NULL,
135 * (i.e. there is no valid compiled exression). The set_invalid function sets
136 * the program to NULL (Warning: this deletes the compiled expression). The
137 * following examples may help clarify regular expression usage:
139 * * The regular expression "^hello" matches a "hello" only at the
140 * beginning of a line. It would match "hello there" but not "hi,
141 * hello there".
143 * * The regular expression "long$" matches a "long" only at the end
144 * of a line. It would match "so long\0", but not "long ago".
146 * * The regular expression "t..t..g" will match anything that has a
147 * "t" then any two characters, another "t", any two characters and
148 * then a "g". It will match "testing", or "test again" but would
149 * not match "toasting"
151 * * The regular expression "[1-9ab]" matches any number one through
152 * nine, and the characters "a" and "b". It would match "hello 1"
153 * or "begin", but would not match "no-match".
155 * * The regular expression "[^1-9ab]" matches any character that is
156 * not a number one through nine, or an "a" or "b". It would NOT
157 * match "hello 1" or "begin", but would match "no-match".
159 * * The regular expression "br* " matches something that begins with
160 * a "b", is followed by zero or more "r"s, and ends in a space. It
161 * would match "brrrrr ", and "b ", but would not match "brrh ".
163 * * The regular expression "br+ " matches something that begins with
164 * a "b", is followed by one or more "r"s, and ends in a space. It
165 * would match "brrrrr ", and "br ", but would not match "b " or
166 * "brrh ".
168 * * The regular expression "br? " matches something that begins with
169 * a "b", is followed by zero or one "r"s, and ends in a space. It
170 * would match "br ", and "b ", but would not match "brrrr " or
171 * "brrh ".
173 * * The regular expression "(..p)b" matches something ending with pb
174 * and beginning with whatever the two characters before the first p
175 * encounterd in the line were. It would find "repb" in "rep drepa
176 * qrepb". The regular expression "(..p)a" would find "repa qrepb"
177 * in "rep drepa qrepb"
179 * * The regular expression "d(..p)" matches something ending with p,
180 * beginning with d, and having two characters in between that are
181 * the same as the two characters before the first p encounterd in
182 * the line. It would match "drepa qrepb" in "rep drepa qrepb".
185 class @KWSYS_NAMESPACE@_EXPORT RegularExpression
187 public:
189 * Instantiate RegularExpression with program=NULL.
191 inline RegularExpression ();
194 * Instantiate RegularExpression with compiled char*.
196 inline RegularExpression (char const*);
199 * Instantiate RegularExpression as a copy of another regular expression.
201 RegularExpression (RegularExpression const&);
204 * Destructor.
206 inline ~RegularExpression();
209 * Compile a regular expression into internal code
210 * for later pattern matching.
212 bool compile (char const*);
215 * Matches the regular expression to the given string.
216 * Returns true if found, and sets start and end indexes accordingly.
218 bool find (char const*);
221 * Matches the regular expression to the given std string.
222 * Returns true if found, and sets start and end indexes accordingly.
224 bool find (kwsys_stl::string const&);
227 * Index to start of first find.
229 inline kwsys_stl::string::size_type start() const;
232 * Index to end of first find.
234 inline kwsys_stl::string::size_type end() const;
237 * Copy the given regular expression.
239 RegularExpression& operator= (const RegularExpression& rxp);
242 * Returns true if two regular expressions have the same
243 * compiled program for pattern matching.
245 bool operator== (RegularExpression const&) const;
248 * Returns true if two regular expressions have different
249 * compiled program for pattern matching.
251 inline bool operator!= (RegularExpression const&) const;
254 * Returns true if have the same compiled regular expressions
255 * and the same start and end pointers.
257 bool deep_equal (RegularExpression const&) const;
260 * True if the compiled regexp is valid.
262 inline bool is_valid() const;
265 * Marks the regular expression as invalid.
267 inline void set_invalid();
270 * Destructor.
272 // awf added
273 kwsys_stl::string::size_type start(int n) const;
274 kwsys_stl::string::size_type end(int n) const;
275 kwsys_stl::string match(int n) const;
277 enum { NSUBEXP = 10 };
278 private:
279 const char* startp[NSUBEXP];
280 const char* endp[NSUBEXP];
281 char regstart; // Internal use only
282 char reganch; // Internal use only
283 const char* regmust; // Internal use only
284 kwsys_stl::string::size_type regmlen; // Internal use only
285 char* program;
286 int progsize;
287 const char* searchstring;
291 * Create an empty regular expression.
293 inline RegularExpression::RegularExpression ()
295 this->program = 0;
299 * Creates a regular expression from string s, and
300 * compiles s.
302 inline RegularExpression::RegularExpression (const char* s)
304 this->program = 0;
305 if ( s )
307 this->compile(s);
312 * Destroys and frees space allocated for the regular expression.
314 inline RegularExpression::~RegularExpression ()
316 //#ifndef WIN32
317 delete [] this->program;
318 //#endif
322 * Set the start position for the regular expression.
324 inline kwsys_stl::string::size_type RegularExpression::start () const
326 return static_cast<kwsys_stl::string::size_type>(
327 this->startp[0] - searchstring);
332 * Returns the start/end index of the last item found.
334 inline kwsys_stl::string::size_type RegularExpression::end () const
336 return static_cast<kwsys_stl::string::size_type>(
337 this->endp[0] - searchstring);
341 * Returns true if two regular expressions have different
342 * compiled program for pattern matching.
344 inline bool RegularExpression::operator!= (const RegularExpression& r) const
346 return(!(*this == r));
350 * Returns true if a valid regular expression is compiled
351 * and ready for pattern matching.
353 inline bool RegularExpression::is_valid () const
355 return (this->program != 0);
359 inline void RegularExpression::set_invalid ()
361 //#ifndef WIN32
362 delete [] this->program;
363 //#endif
364 this->program = 0;
368 * Return start index of nth submatch. start(0) is the start of the full match.
370 inline kwsys_stl::string::size_type RegularExpression::start(int n) const
372 return static_cast<kwsys_stl::string::size_type>(
373 this->startp[n] - searchstring);
378 * Return end index of nth submatch. end(0) is the end of the full match.
380 inline kwsys_stl::string::size_type RegularExpression::end(int n) const
382 return static_cast<kwsys_stl::string::size_type>(
383 this->endp[n] - searchstring);
387 * Return nth submatch as a string.
389 inline kwsys_stl::string RegularExpression::match(int n) const
391 if (this->startp[n]==0)
393 return kwsys_stl::string("");
395 else
397 return kwsys_stl::string(this->startp[n],
398 static_cast<kwsys_stl::string::size_type>(
399 this->endp[n] - this->startp[n]));
403 } // namespace @KWSYS_NAMESPACE@
405 /* Undefine temporary macro. */
406 #if !defined (KWSYS_NAMESPACE) && !@KWSYS_NAMESPACE@_NAME_IS_KWSYS
407 # undef kwsys_stl
408 #endif
410 #endif