Update NEWS for 1.6.22
[pkg-k5-afs_openafs.git] / src / WINNT / afsapplib / regexp.cpp
blob5eb62f5387c984c6527dc22914f25bc2ce6a9513
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
10 extern "C" {
11 #include <afs/param.h>
12 #include <afs/stds.h>
15 #include <windows.h>
16 #include <WINNT/regexp.h>
20 * DEFINITIONS ________________________________________________________________
24 #define markREPEAT TEXT('\x01')
25 #define markCHARACTER TEXT('\x02')
26 #define markANYCHAR TEXT('\x04')
27 #define markCHARSET TEXT('\x06')
28 #define markNONCHARSET TEXT('\x08')
29 #define markREFERENCE TEXT('\x0A')
30 #define markLPAREN TEXT('\xFC')
31 #define markRPAREN TEXT('\xFD')
32 #define markENDLINE TEXT('\xFE')
33 #define markENDPATTERN TEXT('\xFF')
37 * CLASS ROUTINES _____________________________________________________________
41 REGEXP::REGEXP (void)
43 m_fMatchFromStart = FALSE;
44 m_achCompiled[0] = TEXT('\0');
47 REGEXP::REGEXP (LPCTSTR pszExpr)
49 m_fMatchFromStart = FALSE;
50 m_achCompiled[0] = TEXT('\0');
51 SetExpression (pszExpr);
54 REGEXP::~REGEXP (void)
56 ; // nothing really to do here
59 BOOL REGEXP::SetExpression (LPCTSTR pszExpr)
61 return Compile (pszExpr);
64 BOOL REGEXP::Matches (LPCTSTR pszExpr, LPCTSTR pszString)
66 REGEXP Expr (pszExpr);
67 return Expr.Matches (pszString);
70 BOOL REGEXP::fIsRegExp (void)
72 if (m_fMatchFromStart) // started with "^"?
73 return TRUE; // it's a regexp.
75 for (LPCTSTR pch = m_achCompiled; (*pch) && (*pch != markENDPATTERN); pch += 2)
77 if (*pch != markCHARACTER)
78 return TRUE;
81 return FALSE; // just a string of characters
84 BOOL REGEXP::fIsRegExp (LPCTSTR pszExpr)
86 REGEXP Expr (pszExpr);
87 return Expr.fIsRegExp();
92 * REGEXP _____________________________________________________________________
96 BOOL REGEXP::Compile (LPCTSTR pszExpr)
98 BYTE aParens[ nCOMPILED_PARENS_MAX ];
99 PBYTE pParen = &aParens[0];
100 LPTSTR pchLastEx = NULL;
101 int nParens = 0;
103 // Erase any previous compiled expression
105 LPTSTR pchCompiled = m_achCompiled;
106 *pchCompiled = TEXT('\0');
107 m_fMatchFromStart = FALSE;
109 if (!pszExpr || !*pszExpr)
111 SetLastError (ERROR_INVALID_PARAMETER);
112 return FALSE;
115 // See if the expression starts with a "^"
117 if ((m_fMatchFromStart = (*pszExpr == TEXT('^'))) == TRUE)
118 ++pszExpr;
120 // Start stripping characters from the expression
122 BOOL rc;
123 for (rc = TRUE; rc; )
125 TCHAR ch;
127 if ((sizeof(TCHAR)*(pchCompiled - m_achCompiled)) > sizeof(m_achCompiled))
129 SetLastError (ERROR_META_EXPANSION_TOO_LONG);
130 rc = FALSE;
131 break;
134 if ((ch = *pszExpr++) == TEXT('\0'))
136 // We finally hit the end of this expression.
138 if (pParen != &aParens[0])
140 SetLastError (ERROR_BAD_FORMAT); // unmatched "\("
141 rc = FALSE;
143 break;
146 if (ch != TEXT('*'))
147 pchLastEx = pchCompiled;
149 switch (ch)
151 case TEXT('.'):
152 case TEXT('?'):
153 *pchCompiled++ = markANYCHAR;
154 break;
156 case TEXT('*'):
157 if ((pchLastEx == NULL) || (*pchLastEx == markLPAREN) || (*pchLastEx == markRPAREN))
159 *pchCompiled++ = markCHARACTER;
160 *pchCompiled++ = ch;
162 else // record that we can repeat the last expression
164 *pchLastEx |= markREPEAT;
166 break;
168 case TEXT('$'):
169 if (*pszExpr != TEXT('\0'))
171 *pchCompiled++ = markCHARACTER;
172 *pchCompiled++ = ch;
174 else // record that we should match end-of-line
176 *pchCompiled++ = markENDLINE;
178 break;
180 case TEXT('['):
181 if ((ch = *pszExpr++) == '^')
183 *pchCompiled++ = markNONCHARSET;
184 ch = *pszExpr++;
186 else
188 *pchCompiled++ = markCHARSET;
191 *pchCompiled++ = 1; // length; this is pchLastEx[1]
193 do {
194 if (ch == TEXT('\0'))
196 SetLastError (ERROR_BAD_FORMAT); // unmatched "\("
197 rc = FALSE;
198 break;
201 if ((ch == TEXT('-')) && (*pchCompiled != pchLastEx[2]))
203 if ((ch = *pszExpr++) == TEXT(']'))
205 *pchCompiled++ = TEXT('-');
206 pchLastEx[1]++;
207 break;
209 while ((BYTE)pchCompiled[-1] < (BYTE)ch)
211 *pchCompiled = pchCompiled[-1] + 1;
212 pchCompiled++;
213 pchLastEx[1]++;
214 if ((sizeof(TCHAR)*(pchCompiled - m_achCompiled)) > sizeof(m_achCompiled))
216 SetLastError (ERROR_META_EXPANSION_TOO_LONG);
217 rc = FALSE;
218 break;
222 else
224 *pchCompiled++ = ch;
225 pchLastEx[1]++;
227 if ((sizeof(TCHAR)*(pchCompiled - m_achCompiled)) > sizeof(m_achCompiled))
229 SetLastError (ERROR_META_EXPANSION_TOO_LONG);
230 rc = FALSE;
231 break;
235 } while ((ch = *pszExpr++) != TEXT(']'));
236 break;
238 case TEXT('\\'):
239 if ((ch = *pszExpr++) == TEXT('('))
241 if (nParens >= nCOMPILED_PARENS_MAX)
243 SetLastError (ERROR_META_EXPANSION_TOO_LONG);
244 rc = FALSE;
245 break;
247 *pParen++ = nParens;
248 *pchCompiled++ = markLPAREN;
249 *pchCompiled++ = nParens++;
251 else if (ch == TEXT(')'))
253 if (pParen == &aParens[0])
255 SetLastError (ERROR_BAD_FORMAT);
256 rc = FALSE;
257 break;
259 *pchCompiled++ = markRPAREN;
260 *pchCompiled++ = *--pParen;
262 else if ((ch >= TEXT('1')) && (ch < (TEXT('1') + nCOMPILED_PARENS_MAX)))
264 *pchCompiled++ = markREFERENCE;
265 *pchCompiled++ = ch - '1';
267 else
269 *pchCompiled++ = markCHARACTER;
270 *pchCompiled++ = ch;
272 break;
274 default:
275 *pchCompiled++ = markCHARACTER;
276 *pchCompiled++ = ch;
277 break;
281 *pchCompiled++ = markENDPATTERN;
282 *pchCompiled++ = 0;
283 return rc;
287 BOOL REGEXP::Matches (LPCTSTR pszString)
289 if (!pszString)
290 return FALSE;
292 // Prepare a place to store information about \( and \) pairs
294 LPCTSTR aParenStarts[ nCOMPILED_PARENS_MAX ];
295 LPCTSTR aParenEnds[ nCOMPILED_PARENS_MAX ];
297 for (size_t ii = 0; ii < nCOMPILED_PARENS_MAX; ii++)
299 aParenStarts[ii] = NULL;
300 aParenEnds[ii] = NULL;
303 // If the expression starts with "^", we can do a quick pattern-match...
305 if (m_fMatchFromStart)
307 return MatchSubset (pszString, m_achCompiled, aParenStarts, aParenEnds);
310 // Otherwise, we have to work a little harder. If the expression
311 // at least starts with a recognized character, we can scan for that
312 // as the start of a pattern...
314 LPTSTR pchCompiled = m_achCompiled;
315 if (*pchCompiled == markCHARACTER)
317 TCHAR chStart = pchCompiled[1];
318 do {
319 if (*pszString != chStart)
320 continue;
321 if (MatchSubset (pszString, pchCompiled, aParenStarts, aParenEnds))
322 return TRUE;
323 } while (*pszString++);
325 return FALSE;
328 // If the expression starts with something weird, we'll have to test
329 // against every character in the string.
331 do {
332 if (MatchSubset (pszString, pchCompiled, aParenStarts, aParenEnds))
333 return TRUE;
334 } while (*pszString++);
336 return FALSE;
340 BOOL REGEXP::MatchSubset (LPCTSTR pszString, LPCTSTR pchCompiled, LPCTSTR *aParenStarts, LPCTSTR *aParenEnds)
342 LPCTSTR pchStartOfEx;
343 int ii;
344 int cchPattern;
346 while (1)
347 switch (*pchCompiled++)
349 case markCHARACTER:
350 if (*pchCompiled++ == *pszString++)
351 continue;
352 return FALSE;
354 case markANYCHAR:
355 if (*pszString++)
356 continue;
357 return FALSE;
359 case markENDLINE:
360 if (*pszString == TEXT('\0'))
361 continue;
362 return FALSE;
364 case markENDPATTERN:
365 return TRUE;
367 case markCHARSET:
368 if (fIsInCharSet (pchCompiled, *pszString++, TRUE))
370 pchCompiled += *pchCompiled;
371 continue;
373 return FALSE;
375 case markNONCHARSET:
376 if (fIsInCharSet (pchCompiled, *pszString++, FALSE))
378 pchCompiled += *pchCompiled;
379 continue;
381 return FALSE;
383 case markLPAREN:
384 aParenStarts[*pchCompiled++] = pszString;
385 continue;
387 case markRPAREN:
388 aParenEnds[*pchCompiled++] = pszString;
389 continue;
391 case markREFERENCE:
392 if (aParenEnds[ii = *pchCompiled++] == 0)
393 return FALSE; // reference to invalid \(\) pair
394 if (CompareParen (ii, pszString, aParenStarts, aParenEnds))
396 pszString += aParenEnds[ii] - aParenStarts[ii];
397 continue;
399 return FALSE;
401 case markREFERENCE|markREPEAT:
402 if (aParenEnds[ii = *pchCompiled++] == 0)
403 return FALSE; // reference to invalid \(\) pair
404 pchStartOfEx = pszString;
405 cchPattern = aParenEnds[ii] - aParenStarts[ii];
406 while (CompareParen (ii, pszString, aParenStarts, aParenEnds))
407 pszString += cchPattern;
408 while (pszString >= pchStartOfEx)
410 if (MatchSubset (pszString, pchCompiled, aParenStarts, aParenEnds))
411 return TRUE;
412 pszString -= cchPattern;
414 continue;
416 case markANYCHAR|markREPEAT:
417 pchStartOfEx = pszString;
418 while (*pszString++)
420 goto star;
422 case markCHARACTER|markREPEAT:
423 pchStartOfEx = pszString;
424 while (*pszString++ == *pchCompiled)
426 pchCompiled++;
427 goto star;
429 case markCHARSET|markREPEAT:
430 case markNONCHARSET|markREPEAT:
431 pchStartOfEx = pszString;
432 while (fIsInCharSet (pchCompiled, *pszString++, (pchCompiled[-1] == (markCHARSET|markREPEAT))))
434 pchCompiled += *pchCompiled;
435 goto star;
437 star:
438 do {
439 pszString--;
440 if (MatchSubset (pszString, pchCompiled, aParenStarts, aParenEnds))
441 return TRUE;
442 } while (pszString > pchStartOfEx);
443 return FALSE;
445 default:
446 return FALSE; // damaged compiled string
451 BOOL REGEXP::CompareParen (int ii, LPCTSTR pszString, LPCTSTR *aParenStarts, LPCTSTR *aParenEnds)
453 LPCTSTR pchInParen = aParenStarts[ii];
454 while (*pchInParen++ == *pszString++)
455 if (pchInParen >= aParenEnds[ii])
456 return TRUE;
457 return FALSE;
461 BOOL REGEXP::fIsInCharSet (LPCTSTR pszCharSet, TCHAR chTest, int fInclusive)
463 if (chTest == 0)
464 return FALSE;
465 for (int n = (int)(*pszCharSet++); --n; )
467 if (*pszCharSet++ == chTest)
468 return fInclusive;
470 return !fInclusive;