4 * Copyright (c) 2000-2003, Darren Hiebert
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains functions for applying regular expression matching.
11 * The code for utlizing the Gnu regex package with regards to processing the
12 * regex option and checking for regex matches was adapted from routines in
19 #include "general.h" /* must always come first */
26 # ifdef HAVE_SYS_TYPES_H
27 # include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
44 /* Back-references \0 through \9 */
45 #define BACK_REFERENCE_COUNT 10
47 #if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
51 #define REGEX_NAME "Regex"
56 #if defined (POSIX_REGEX)
65 enum pType
{ PTRN_TAG
, PTRN_CALLBACK
};
76 regexCallback function
;
84 regexPattern
*patterns
;
92 static boolean regexBroken
= FALSE
;
94 /* Array of pattern sets, indexed by language */
95 static patternSet
* Sets
= NULL
;
96 static int SetUpper
= -1; /* upper language index in list */
99 * FUNCTION DEFINITIONS
102 static void clearPatternSet (const langType language
)
104 if (language
< SetUpper
)
106 patternSet
* const set
= Sets
+ language
;
108 for (i
= 0 ; i
< set
->count
; ++i
)
110 #if defined (POSIX_REGEX)
111 regfree (set
->patterns
[i
].pattern
);
113 eFree (set
->patterns
[i
].pattern
);
114 set
->patterns
[i
].pattern
= NULL
;
116 if (set
->patterns
[i
].type
== PTRN_TAG
)
118 eFree (set
->patterns
[i
].u
.tag
.name_pattern
);
119 set
->patterns
[i
].u
.tag
.name_pattern
= NULL
;
122 if (set
->patterns
!= NULL
)
123 eFree (set
->patterns
);
124 set
->patterns
= NULL
;
130 * Regex psuedo-parser
133 static void makeRegexTag (
134 const vString
* const name
, const struct sKind
* const kind
)
139 Assert (name
!= NULL
&& vStringLength (name
) > 0);
140 Assert (kind
!= NULL
);
141 initTagEntry (&e
, vStringValue (name
));
142 e
.kind
= kind
->letter
;
143 e
.kindName
= kind
->name
;
149 * Regex pattern definition
152 /* Take a string like "/blah/" and turn it into "blah", making sure
153 * that the first and last characters are the same, and handling
154 * quoted separator characters. Actually, stops on the occurrence of
155 * an unquoted separator. Also turns "\t" into a Tab character.
156 * Returns pointer to terminating separator. Works in place. Null
157 * terminates name string.
159 static char* scanSeparators (char* name
)
163 boolean quoted
= FALSE
;
165 for (++name
; *name
!= '\0' ; ++name
)
171 else if (*name
== 't')
175 /* Something else is quoted, so preserve the quote. */
181 else if (*name
== '\\')
183 else if (*name
== sep
)
194 /* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
195 * character is whatever the first character of `regexp' is), by breaking it
196 * up into null terminated strings, removing the separators, and expanding
197 * '\t' into tabs. When complete, `regexp' points to the line matching
198 * pattern, a pointer to the name matching pattern is written to `name', a
199 * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
200 * to the trailing flags is written to `flags'. If the pattern is not in the
201 * correct format, a false value is returned.
203 static boolean
parseTagRegex (
204 char* const regexp
, char** const name
,
205 char** const kinds
, char** const flags
)
207 boolean result
= FALSE
;
208 const int separator
= (unsigned char) regexp
[0];
210 *name
= scanSeparators (regexp
);
212 error (WARNING
, "empty regexp");
213 else if (**name
!= separator
)
214 error (WARNING
, "%s: incomplete regexp", regexp
);
217 char* const third
= scanSeparators (*name
);
219 error (WARNING
, "%s: regexp missing name pattern", regexp
);
220 if ((*name
) [strlen (*name
) - 1] == '\\')
221 error (WARNING
, "error in name pattern: \"%s\"", *name
);
222 if (*third
!= separator
)
223 error (WARNING
, "%s: regexp missing final separator", regexp
);
226 char* const fourth
= scanSeparators (third
);
227 if (*fourth
== separator
)
230 scanSeparators (fourth
);
244 static void addCompiledTagPattern (
245 const langType language
, regex_t
* const pattern
,
246 char* const name
, const char kind
, char* const kindName
,
247 char *const description
)
251 if (language
> SetUpper
)
254 Sets
= xRealloc (Sets
, (language
+ 1), patternSet
);
255 for (i
= SetUpper
+ 1 ; i
<= language
; ++i
)
257 Sets
[i
].patterns
= NULL
;
262 set
= Sets
+ language
;
263 set
->patterns
= xRealloc (set
->patterns
, (set
->count
+ 1), regexPattern
);
264 ptrn
= &set
->patterns
[set
->count
];
267 ptrn
->pattern
= pattern
;
268 ptrn
->type
= PTRN_TAG
;
269 ptrn
->u
.tag
.name_pattern
= name
;
270 ptrn
->u
.tag
.kind
.enabled
= TRUE
;
271 ptrn
->u
.tag
.kind
.letter
= kind
;
272 ptrn
->u
.tag
.kind
.name
= kindName
;
273 ptrn
->u
.tag
.kind
.description
= description
;
276 static void addCompiledCallbackPattern (
277 const langType language
, regex_t
* const pattern
,
278 const regexCallback callback
)
282 if (language
> SetUpper
)
285 Sets
= xRealloc (Sets
, (language
+ 1), patternSet
);
286 for (i
= SetUpper
+ 1 ; i
<= language
; ++i
)
288 Sets
[i
].patterns
= NULL
;
293 set
= Sets
+ language
;
294 set
->patterns
= xRealloc (set
->patterns
, (set
->count
+ 1), regexPattern
);
295 ptrn
= &set
->patterns
[set
->count
];
298 ptrn
->pattern
= pattern
;
299 ptrn
->type
= PTRN_CALLBACK
;
300 ptrn
->u
.callback
.function
= callback
;
303 #if defined (POSIX_REGEX)
305 static regex_t
* compileRegex (const char* const regexp
, const char* const flags
)
307 int cflags
= REG_EXTENDED
| REG_NEWLINE
;
308 regex_t
*result
= NULL
;
311 for (i
= 0 ; flags
!= NULL
&& flags
[i
] != '\0' ; ++i
)
313 switch ((int) flags
[i
])
315 case 'b': cflags
&= ~REG_EXTENDED
; break;
316 case 'e': cflags
|= REG_EXTENDED
; break;
317 case 'i': cflags
|= REG_ICASE
; break;
318 default: error (WARNING
, "unknown regex flag: '%c'", *flags
); break;
321 result
= xMalloc (1, regex_t
);
322 errcode
= regcomp (result
, regexp
, cflags
);
326 regerror (errcode
, result
, errmsg
, 256);
327 error (WARNING
, "%s", errmsg
);
337 static void parseKinds (
338 const char* const kinds
, char* const kind
, char** const kindName
,
344 if (kinds
== NULL
|| kinds
[0] == '\0')
347 *kindName
= eStrdup ("regex");
349 else if (kinds
[0] != '\0')
351 const char* k
= kinds
;
352 if (k
[0] != ',' && (k
[1] == ',' || k
[1] == '\0'))
359 *kindName
= eStrdup ("regex");
362 const char *const comma
= strchr (k
, ',');
364 *kindName
= eStrdup (k
);
367 *kindName
= (char*) eMalloc (comma
- k
+ 1);
368 strncpy (*kindName
, k
, comma
- k
);
369 (*kindName
) [comma
- k
] = '\0';
372 *description
= eStrdup (k
);
378 static void printRegexKind (const regexPattern
*pat
, unsigned int i
, boolean indent
)
380 const struct sKind
*const kind
= &pat
[i
].u
.tag
.kind
;
381 const char *const indentation
= indent
? " " : "";
382 Assert (pat
[i
].type
== PTRN_TAG
);
383 printf ("%s%c %s %s\n", indentation
,
384 kind
->letter
!= '\0' ? kind
->letter
: '?',
385 kind
->description
!= NULL
? kind
->description
: kind
->name
,
386 kind
->enabled
? "" : " [off]");
389 static void processLanguageRegex (const langType language
,
390 const char* const parameter
)
392 if (parameter
== NULL
|| parameter
[0] == '\0')
393 clearPatternSet (language
);
394 else if (parameter
[0] != '@')
395 addLanguageRegex (language
, parameter
);
396 else if (! doesFileExist (parameter
+ 1))
397 error (WARNING
, "cannot open regex file");
400 const char* regexfile
= parameter
+ 1;
401 FILE* const fp
= fopen (regexfile
, "r");
403 error (WARNING
| PERROR
, regexfile
);
406 vString
* const regex
= vStringNew ();
407 while (readLine (regex
, fp
))
408 addLanguageRegex (language
, vStringValue (regex
));
410 vStringDelete (regex
);
416 * Regex pattern matching
419 #if defined (POSIX_REGEX)
421 static vString
* substitute (
422 const char* const in
, const char* out
,
423 const int nmatch
, const regmatch_t
* const pmatch
)
425 vString
* result
= vStringNew ();
427 for (p
= out
; *p
!= '\0' ; p
++)
429 if (*p
== '\\' && isdigit ((int) *++p
))
431 const int dig
= *p
- '0';
432 if (0 < dig
&& dig
< nmatch
&& pmatch
[dig
].rm_so
!= -1)
434 const int diglen
= pmatch
[dig
].rm_eo
- pmatch
[dig
].rm_so
;
435 vStringNCatS (result
, in
+ pmatch
[dig
].rm_so
, diglen
);
438 else if (*p
!= '\n' && *p
!= '\r')
439 vStringPut (result
, *p
);
441 vStringTerminate (result
);
445 static void matchTagPattern (const vString
* const line
,
446 const regexPattern
* const patbuf
,
447 const regmatch_t
* const pmatch
)
449 vString
*const name
= substitute (vStringValue (line
),
450 patbuf
->u
.tag
.name_pattern
, BACK_REFERENCE_COUNT
, pmatch
);
451 vStringStripLeading (name
);
452 vStringStripTrailing (name
);
453 if (vStringLength (name
) > 0)
454 makeRegexTag (name
, &patbuf
->u
.tag
.kind
);
456 error (WARNING
, "%s:%ld: null expansion of name pattern \"%s\"",
457 getInputFileName (), getInputLineNumber (),
458 patbuf
->u
.tag
.name_pattern
);
459 vStringDelete (name
);
462 static void matchCallbackPattern (
463 const vString
* const line
, const regexPattern
* const patbuf
,
464 const regmatch_t
* const pmatch
)
466 regexMatch matches
[BACK_REFERENCE_COUNT
];
467 unsigned int count
= 0;
469 for (i
= 0 ; i
< BACK_REFERENCE_COUNT
&& pmatch
[i
].rm_so
!= -1 ; ++i
)
471 matches
[i
].start
= pmatch
[i
].rm_so
;
472 matches
[i
].length
= pmatch
[i
].rm_eo
- pmatch
[i
].rm_so
;
475 patbuf
->u
.callback
.function (vStringValue (line
), matches
, count
);
478 static boolean
matchRegexPattern (const vString
* const line
,
479 const regexPattern
* const patbuf
)
481 boolean result
= FALSE
;
482 regmatch_t pmatch
[BACK_REFERENCE_COUNT
];
483 const int match
= regexec (patbuf
->pattern
, vStringValue (line
),
484 BACK_REFERENCE_COUNT
, pmatch
, 0);
488 if (patbuf
->type
== PTRN_TAG
)
489 matchTagPattern (line
, patbuf
, pmatch
);
490 else if (patbuf
->type
== PTRN_CALLBACK
)
491 matchCallbackPattern (line
, patbuf
, pmatch
);
494 Assert ("invalid pattern type" == NULL
);
503 /* PUBLIC INTERFACE */
505 /* Match against all patterns for specified language. Returns true if at least
506 * on pattern matched.
508 extern boolean
matchRegex (const vString
* const line
, const langType language
)
510 boolean result
= FALSE
;
511 if (language
!= LANG_IGNORE
&& language
<= SetUpper
&&
512 Sets
[language
].count
> 0)
514 const patternSet
* const set
= Sets
+ language
;
516 for (i
= 0 ; i
< set
->count
; ++i
)
517 if (matchRegexPattern (line
, set
->patterns
+ i
))
523 extern void findRegexTags (void)
525 /* merely read all lines of the file */
526 while (fileReadLine () != NULL
)
530 #endif /* HAVE_REGEX */
532 extern void addTagRegex (
533 const langType language __unused__
,
534 const char* const regex __unused__
,
535 const char* const name __unused__
,
536 const char* const kinds __unused__
,
537 const char* const flags __unused__
)
540 Assert (regex
!= NULL
);
541 Assert (name
!= NULL
);
544 regex_t
* const cp
= compileRegex (regex
, flags
);
550 parseKinds (kinds
, &kind
, &kindName
, &description
);
551 addCompiledTagPattern (language
, cp
, eStrdup (name
),
552 kind
, kindName
, description
);
558 extern void addCallbackRegex (
559 const langType language __unused__
,
560 const char* const regex __unused__
,
561 const char* const flags __unused__
,
562 const regexCallback callback __unused__
)
565 Assert (regex
!= NULL
);
568 regex_t
* const cp
= compileRegex (regex
, flags
);
570 addCompiledCallbackPattern (language
, cp
, callback
);
575 extern void addLanguageRegex (
576 const langType language __unused__
, const char* const regex __unused__
)
581 char *const regex_pat
= eStrdup (regex
);
582 char *name
, *kinds
, *flags
;
583 if (parseTagRegex (regex_pat
, &name
, &kinds
, &flags
))
585 addTagRegex (language
, regex_pat
, name
, kinds
, flags
);
593 * Regex option parsing
596 extern boolean
processRegexOption (const char *const option
,
597 const char *const parameter __unused__
)
599 boolean handled
= FALSE
;
600 const char* const dash
= strchr (option
, '-');
601 if (dash
!= NULL
&& strncmp (option
, "regex", dash
- option
) == 0)
605 language
= getNamedLanguage (dash
+ 1);
606 if (language
== LANG_IGNORE
)
607 error (WARNING
, "unknown language in --%s option", option
);
609 processLanguageRegex (language
, parameter
);
611 error (WARNING
, "regex support not available; required for --%s option",
619 extern void disableRegexKinds (const langType language __unused__
)
622 if (language
<= SetUpper
&& Sets
[language
].count
> 0)
624 patternSet
* const set
= Sets
+ language
;
626 for (i
= 0 ; i
< set
->count
; ++i
)
627 if (set
->patterns
[i
].type
== PTRN_TAG
)
628 set
->patterns
[i
].u
.tag
.kind
.enabled
= FALSE
;
633 extern boolean
enableRegexKind (
634 const langType language __unused__
,
635 const int kind __unused__
, const boolean mode __unused__
)
637 boolean result
= FALSE
;
639 if (language
<= SetUpper
&& Sets
[language
].count
> 0)
641 patternSet
* const set
= Sets
+ language
;
643 for (i
= 0 ; i
< set
->count
; ++i
)
644 if (set
->patterns
[i
].type
== PTRN_TAG
&&
645 set
->patterns
[i
].u
.tag
.kind
.letter
== kind
)
647 set
->patterns
[i
].u
.tag
.kind
.enabled
= mode
;
655 extern void printRegexKinds (const langType language __unused__
, boolean indent
)
658 if (language
<= SetUpper
&& Sets
[language
].count
> 0)
660 patternSet
* const set
= Sets
+ language
;
662 for (i
= 0 ; i
< set
->count
; ++i
)
663 if (set
->patterns
[i
].type
== PTRN_TAG
)
664 printRegexKind (set
->patterns
, i
, indent
);
669 extern void freeRegexResources (void)
673 for (i
= 0 ; i
<= SetUpper
; ++i
)
682 /* Check for broken regcomp() on Cygwin */
683 extern void checkRegex (void)
685 #if defined (HAVE_REGEX) && defined (CHECK_REGCOMP)
688 if (regcomp (&patbuf
, "/hello/", 0) != 0)
690 error (WARNING
, "Disabling broken regex");
696 /* vi:set tabstop=4 shiftwidth=4: */