2 * $Id: lregex.c 747 2009-11-06 02:33:37Z dhiebert $
4 * Copyright (c) 2000-2003, Darren Hiebert
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains functions for applying regular expression matching.
11 * The code for utlizing the Gnu regex package with regards to processing the
12 * regex option and checking for regex matches was adapted from routines in
19 #include "general.h" /* must always come first */
26 # ifdef HAVE_SYS_TYPES_H
27 # include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
44 /* Back-references \0 through \9 */
45 #define BACK_REFERENCE_COUNT 10
47 #if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
51 #define REGEX_NAME "Regex"
56 #if defined (POSIX_REGEX)
65 enum pType
{ PTRN_TAG
, PTRN_CALLBACK
};
76 regexCallback function
;
84 regexPattern
*patterns
;
92 static boolean regexBroken
= FALSE
;
94 /* Array of pattern sets, indexed by language */
95 static patternSet
* Sets
= NULL
;
96 static int SetUpper
= -1; /* upper language index in list */
99 * FUNCTION DEFINITIONS
102 static void clearPatternSet (const langType language
)
104 if (language
<= SetUpper
)
106 patternSet
* const set
= Sets
+ language
;
108 for (i
= 0 ; i
< set
->count
; ++i
)
110 regexPattern
*p
= &set
->patterns
[i
];
111 #if defined (POSIX_REGEX)
112 regfree (p
->pattern
);
117 if (p
->type
== PTRN_TAG
)
119 eFree (p
->u
.tag
.name_pattern
);
120 p
->u
.tag
.name_pattern
= NULL
;
121 eFree (p
->u
.tag
.kind
.name
);
122 p
->u
.tag
.kind
.name
= NULL
;
123 if (p
->u
.tag
.kind
.description
!= NULL
)
125 eFree (p
->u
.tag
.kind
.description
);
126 p
->u
.tag
.kind
.description
= NULL
;
130 if (set
->patterns
!= NULL
)
131 eFree (set
->patterns
);
132 set
->patterns
= NULL
;
138 * Regex psuedo-parser
141 static void makeRegexTag (
142 const vString
* const name
, const struct sKind
* const kind
)
147 Assert (name
!= NULL
&& vStringLength (name
) > 0);
148 Assert (kind
!= NULL
);
149 initTagEntry (&e
, vStringValue (name
));
150 e
.kind
= kind
->letter
;
151 e
.kindName
= kind
->name
;
157 * Regex pattern definition
160 /* Take a string like "/blah/" and turn it into "blah", making sure
161 * that the first and last characters are the same, and handling
162 * quoted separator characters. Actually, stops on the occurrence of
163 * an unquoted separator. Also turns "\t" into a Tab character.
164 * Returns pointer to terminating separator. Works in place. Null
165 * terminates name string.
167 static char* scanSeparators (char* name
)
171 boolean quoted
= FALSE
;
173 for (++name
; *name
!= '\0' ; ++name
)
179 else if (*name
== 't')
183 /* Something else is quoted, so preserve the quote. */
189 else if (*name
== '\\')
191 else if (*name
== sep
)
202 /* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
203 * character is whatever the first character of `regexp' is), by breaking it
204 * up into null terminated strings, removing the separators, and expanding
205 * '\t' into tabs. When complete, `regexp' points to the line matching
206 * pattern, a pointer to the name matching pattern is written to `name', a
207 * pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
208 * to the trailing flags is written to `flags'. If the pattern is not in the
209 * correct format, a false value is returned.
211 static boolean
parseTagRegex (
212 char* const regexp
, char** const name
,
213 char** const kinds
, char** const flags
)
215 boolean result
= FALSE
;
216 const int separator
= (unsigned char) regexp
[0];
218 *name
= scanSeparators (regexp
);
220 error (WARNING
, "empty regexp");
221 else if (**name
!= separator
)
222 error (WARNING
, "%s: incomplete regexp", regexp
);
225 char* const third
= scanSeparators (*name
);
227 error (WARNING
, "%s: regexp missing name pattern", regexp
);
228 if ((*name
) [strlen (*name
) - 1] == '\\')
229 error (WARNING
, "error in name pattern: \"%s\"", *name
);
230 if (*third
!= separator
)
231 error (WARNING
, "%s: regexp missing final separator", regexp
);
234 char* const fourth
= scanSeparators (third
);
235 if (*fourth
== separator
)
238 scanSeparators (fourth
);
252 static void addCompiledTagPattern (
253 const langType language
, regex_t
* const pattern
,
254 char* const name
, const char kind
, char* const kindName
,
255 char *const description
)
259 if (language
> SetUpper
)
262 Sets
= xRealloc (Sets
, (language
+ 1), patternSet
);
263 for (i
= SetUpper
+ 1 ; i
<= language
; ++i
)
265 Sets
[i
].patterns
= NULL
;
270 set
= Sets
+ language
;
271 set
->patterns
= xRealloc (set
->patterns
, (set
->count
+ 1), regexPattern
);
272 ptrn
= &set
->patterns
[set
->count
];
275 ptrn
->pattern
= pattern
;
276 ptrn
->type
= PTRN_TAG
;
277 ptrn
->u
.tag
.name_pattern
= name
;
278 ptrn
->u
.tag
.kind
.enabled
= TRUE
;
279 ptrn
->u
.tag
.kind
.letter
= kind
;
280 ptrn
->u
.tag
.kind
.name
= kindName
;
281 ptrn
->u
.tag
.kind
.description
= description
;
284 static void addCompiledCallbackPattern (
285 const langType language
, regex_t
* const pattern
,
286 const regexCallback callback
)
290 if (language
> SetUpper
)
293 Sets
= xRealloc (Sets
, (language
+ 1), patternSet
);
294 for (i
= SetUpper
+ 1 ; i
<= language
; ++i
)
296 Sets
[i
].patterns
= NULL
;
301 set
= Sets
+ language
;
302 set
->patterns
= xRealloc (set
->patterns
, (set
->count
+ 1), regexPattern
);
303 ptrn
= &set
->patterns
[set
->count
];
306 ptrn
->pattern
= pattern
;
307 ptrn
->type
= PTRN_CALLBACK
;
308 ptrn
->u
.callback
.function
= callback
;
311 #if defined (POSIX_REGEX)
313 static regex_t
* compileRegex (const char* const regexp
, const char* const flags
)
315 int cflags
= REG_EXTENDED
| REG_NEWLINE
;
316 regex_t
*result
= NULL
;
319 for (i
= 0 ; flags
!= NULL
&& flags
[i
] != '\0' ; ++i
)
321 switch ((int) flags
[i
])
323 case 'b': cflags
&= ~REG_EXTENDED
; break;
324 case 'e': cflags
|= REG_EXTENDED
; break;
325 case 'i': cflags
|= REG_ICASE
; break;
326 default: error (WARNING
, "unknown regex flag: '%c'", *flags
); break;
329 result
= xMalloc (1, regex_t
);
330 errcode
= regcomp (result
, regexp
, cflags
);
334 regerror (errcode
, result
, errmsg
, 256);
335 error (WARNING
, "regcomp %s: %s", regexp
, errmsg
);
345 static void parseKinds (
346 const char* const kinds
, char* const kind
, char** const kindName
,
352 if (kinds
== NULL
|| kinds
[0] == '\0')
355 *kindName
= eStrdup ("regex");
357 else if (kinds
[0] != '\0')
359 const char* k
= kinds
;
360 if (k
[0] != ',' && (k
[1] == ',' || k
[1] == '\0'))
367 *kindName
= eStrdup ("regex");
370 const char *const comma
= strchr (k
, ',');
372 *kindName
= eStrdup (k
);
375 *kindName
= (char*) eMalloc (comma
- k
+ 1);
376 strncpy (*kindName
, k
, comma
- k
);
377 (*kindName
) [comma
- k
] = '\0';
380 *description
= eStrdup (k
);
386 static void printRegexKind (const regexPattern
*pat
, unsigned int i
, boolean indent
)
388 const struct sKind
*const kind
= &pat
[i
].u
.tag
.kind
;
389 const char *const indentation
= indent
? " " : "";
390 Assert (pat
[i
].type
== PTRN_TAG
);
391 printf ("%s%c %s %s\n", indentation
,
392 kind
->letter
!= '\0' ? kind
->letter
: '?',
393 kind
->description
!= NULL
? kind
->description
: kind
->name
,
394 kind
->enabled
? "" : " [off]");
397 static void processLanguageRegex (const langType language
,
398 const char* const parameter
)
400 if (parameter
== NULL
|| parameter
[0] == '\0')
401 clearPatternSet (language
);
402 else if (parameter
[0] != '@')
403 addLanguageRegex (language
, parameter
);
404 else if (! doesFileExist (parameter
+ 1))
405 error (WARNING
, "cannot open regex file");
408 const char* regexfile
= parameter
+ 1;
409 FILE* const fp
= fopen (regexfile
, "r");
411 error (WARNING
| PERROR
, "%s", regexfile
);
414 vString
* const regex
= vStringNew ();
415 while (readLine (regex
, fp
))
416 addLanguageRegex (language
, vStringValue (regex
));
418 vStringDelete (regex
);
424 * Regex pattern matching
427 #if defined (POSIX_REGEX)
429 static vString
* substitute (
430 const char* const in
, const char* out
,
431 const int nmatch
, const regmatch_t
* const pmatch
)
433 vString
* result
= vStringNew ();
435 for (p
= out
; *p
!= '\0' ; p
++)
437 if (*p
== '\\' && isdigit ((int) *++p
))
439 const int dig
= *p
- '0';
440 if (0 < dig
&& dig
< nmatch
&& pmatch
[dig
].rm_so
!= -1)
442 const int diglen
= pmatch
[dig
].rm_eo
- pmatch
[dig
].rm_so
;
443 vStringNCatS (result
, in
+ pmatch
[dig
].rm_so
, diglen
);
446 else if (*p
!= '\n' && *p
!= '\r')
447 vStringPut (result
, *p
);
449 vStringTerminate (result
);
453 static void matchTagPattern (const vString
* const line
,
454 const regexPattern
* const patbuf
,
455 const regmatch_t
* const pmatch
)
457 vString
*const name
= substitute (vStringValue (line
),
458 patbuf
->u
.tag
.name_pattern
, BACK_REFERENCE_COUNT
, pmatch
);
459 vStringStripLeading (name
);
460 vStringStripTrailing (name
);
461 if (vStringLength (name
) > 0)
462 makeRegexTag (name
, &patbuf
->u
.tag
.kind
);
464 error (WARNING
, "%s:%ld: null expansion of name pattern \"%s\"",
465 getInputFileName (), getInputLineNumber (),
466 patbuf
->u
.tag
.name_pattern
);
467 vStringDelete (name
);
470 static void matchCallbackPattern (
471 const vString
* const line
, const regexPattern
* const patbuf
,
472 const regmatch_t
* const pmatch
)
474 regexMatch matches
[BACK_REFERENCE_COUNT
];
475 unsigned int count
= 0;
477 for (i
= 0 ; i
< BACK_REFERENCE_COUNT
&& pmatch
[i
].rm_so
!= -1 ; ++i
)
479 matches
[i
].start
= pmatch
[i
].rm_so
;
480 matches
[i
].length
= pmatch
[i
].rm_eo
- pmatch
[i
].rm_so
;
483 patbuf
->u
.callback
.function (vStringValue (line
), matches
, count
);
486 static boolean
matchRegexPattern (const vString
* const line
,
487 const regexPattern
* const patbuf
)
489 boolean result
= FALSE
;
490 regmatch_t pmatch
[BACK_REFERENCE_COUNT
];
491 const int match
= regexec (patbuf
->pattern
, vStringValue (line
),
492 BACK_REFERENCE_COUNT
, pmatch
, 0);
496 if (patbuf
->type
== PTRN_TAG
)
497 matchTagPattern (line
, patbuf
, pmatch
);
498 else if (patbuf
->type
== PTRN_CALLBACK
)
499 matchCallbackPattern (line
, patbuf
, pmatch
);
502 Assert ("invalid pattern type" == NULL
);
511 /* PUBLIC INTERFACE */
513 /* Match against all patterns for specified language. Returns true if at least
514 * on pattern matched.
516 extern boolean
matchRegex (const vString
* const line
, const langType language
)
518 boolean result
= FALSE
;
519 if (language
!= LANG_IGNORE
&& language
<= SetUpper
&&
520 Sets
[language
].count
> 0)
522 const patternSet
* const set
= Sets
+ language
;
524 for (i
= 0 ; i
< set
->count
; ++i
)
525 if (matchRegexPattern (line
, set
->patterns
+ i
))
531 extern void findRegexTags (void)
533 /* merely read all lines of the file */
534 while (fileReadLine () != NULL
)
538 #endif /* HAVE_REGEX */
540 extern void addTagRegex (
541 const langType language __unused__
,
542 const char* const regex __unused__
,
543 const char* const name __unused__
,
544 const char* const kinds __unused__
,
545 const char* const flags __unused__
)
548 Assert (regex
!= NULL
);
549 Assert (name
!= NULL
);
552 regex_t
* const cp
= compileRegex (regex
, flags
);
558 parseKinds (kinds
, &kind
, &kindName
, &description
);
559 addCompiledTagPattern (language
, cp
, eStrdup (name
),
560 kind
, kindName
, description
);
566 extern void addCallbackRegex (
567 const langType language __unused__
,
568 const char* const regex __unused__
,
569 const char* const flags __unused__
,
570 const regexCallback callback __unused__
)
573 Assert (regex
!= NULL
);
576 regex_t
* const cp
= compileRegex (regex
, flags
);
578 addCompiledCallbackPattern (language
, cp
, callback
);
583 extern void addLanguageRegex (
584 const langType language __unused__
, const char* const regex __unused__
)
589 char *const regex_pat
= eStrdup (regex
);
590 char *name
, *kinds
, *flags
;
591 if (parseTagRegex (regex_pat
, &name
, &kinds
, &flags
))
593 addTagRegex (language
, regex_pat
, name
, kinds
, flags
);
601 * Regex option parsing
604 extern boolean
processRegexOption (const char *const option
,
605 const char *const parameter __unused__
)
607 boolean handled
= FALSE
;
608 const char* const dash
= strchr (option
, '-');
609 if (dash
!= NULL
&& strncmp (option
, "regex", dash
- option
) == 0)
613 language
= getNamedLanguage (dash
+ 1);
614 if (language
== LANG_IGNORE
)
615 error (WARNING
, "unknown language \"%s\" in --%s option", (dash
+ 1), option
);
617 processLanguageRegex (language
, parameter
);
619 error (WARNING
, "regex support not available; required for --%s option",
627 extern void disableRegexKinds (const langType language __unused__
)
630 if (language
<= SetUpper
&& Sets
[language
].count
> 0)
632 patternSet
* const set
= Sets
+ language
;
634 for (i
= 0 ; i
< set
->count
; ++i
)
635 if (set
->patterns
[i
].type
== PTRN_TAG
)
636 set
->patterns
[i
].u
.tag
.kind
.enabled
= FALSE
;
641 extern boolean
enableRegexKind (
642 const langType language __unused__
,
643 const int kind __unused__
, const boolean mode __unused__
)
645 boolean result
= FALSE
;
647 if (language
<= SetUpper
&& Sets
[language
].count
> 0)
649 patternSet
* const set
= Sets
+ language
;
651 for (i
= 0 ; i
< set
->count
; ++i
)
652 if (set
->patterns
[i
].type
== PTRN_TAG
&&
653 set
->patterns
[i
].u
.tag
.kind
.letter
== kind
)
655 set
->patterns
[i
].u
.tag
.kind
.enabled
= mode
;
663 extern void printRegexKinds (const langType language __unused__
, boolean indent __unused__
)
666 if (language
<= SetUpper
&& Sets
[language
].count
> 0)
668 patternSet
* const set
= Sets
+ language
;
670 for (i
= 0 ; i
< set
->count
; ++i
)
671 if (set
->patterns
[i
].type
== PTRN_TAG
)
672 printRegexKind (set
->patterns
, i
, indent
);
677 extern void freeRegexResources (void)
681 for (i
= 0 ; i
<= SetUpper
; ++i
)
690 /* Check for broken regcomp() on Cygwin */
691 extern void checkRegex (void)
693 #if defined (HAVE_REGEX) && defined (CHECK_REGCOMP)
696 if (regcomp (&patbuf
, "/hello/", 0) != 0)
698 error (WARNING
, "Disabling broken regex");
704 /* vi:set tabstop=4 shiftwidth=4: */