No empty .Rs/.Re
[netbsd-mini2440.git] / external / ibm-public / postfix / dist / src / util / dict_pcre.c
blob099a2c1fd801b2a5832600dcbc0fd8b1ac707acd
1 /* $NetBSD$ */
3 /*++
4 /* NAME
5 /* dict_pcre 3
6 /* SUMMARY
7 /* dictionary manager interface to PCRE regular expression library
8 /* SYNOPSIS
9 /* #include <dict_pcre.h>
11 /* DICT *dict_pcre_open(name, dummy, dict_flags)
12 /* const char *name;
13 /* int dummy;
14 /* int dict_flags;
15 /* DESCRIPTION
16 /* dict_pcre_open() opens the named file and compiles the contained
17 /* regular expressions. The result object can be used to match strings
18 /* against the table.
19 /* SEE ALSO
20 /* dict(3) generic dictionary manager
21 /* AUTHOR(S)
22 /* Andrew McNamara
23 /* andrewm@connect.com.au
24 /* connect.com.au Pty. Ltd.
25 /* Level 3, 213 Miller St
26 /* North Sydney, NSW, Australia
28 /* Wietse Venema
29 /* IBM T.J. Watson Research
30 /* P.O. Box 704
31 /* Yorktown Heights, NY 10598, USA
32 /*--*/
34 #include "sys_defs.h"
36 #ifdef HAS_PCRE
38 /* System library. */
40 #include <stdio.h> /* sprintf() prototype */
41 #include <stdlib.h>
42 #include <unistd.h>
43 #include <string.h>
44 #include <ctype.h>
46 #ifdef STRCASECMP_IN_STRINGS_H
47 #include <strings.h>
48 #endif
50 /* Utility library. */
52 #include "mymalloc.h"
53 #include "msg.h"
54 #include "safe.h"
55 #include "vstream.h"
56 #include "vstring.h"
57 #include "stringops.h"
58 #include "readlline.h"
59 #include "dict.h"
60 #include "dict_pcre.h"
61 #include "mac_parse.h"
62 #include "pcre.h"
65 * Support for IF/ENDIF based on an idea by Bert Driehuis.
67 #define DICT_PCRE_OP_MATCH 1 /* Match this regexp */
68 #define DICT_PCRE_OP_IF 2 /* Increase if/endif nesting on match */
69 #define DICT_PCRE_OP_ENDIF 3 /* Decrease if/endif nesting on match */
72 * Max strings captured by regexp - essentially the max number of (..)
74 #define PCRE_MAX_CAPTURE 99
77 * Regular expression before and after compilation.
79 typedef struct {
80 char *regexp; /* regular expression */
81 int options; /* options */
82 int match; /* positive or negative match */
83 } DICT_PCRE_REGEXP;
85 typedef struct {
86 pcre *pattern; /* the compiled pattern */
87 pcre_extra *hints; /* hints to speed pattern execution */
88 } DICT_PCRE_ENGINE;
91 * Compiled generic rule, and subclasses that derive from it.
93 typedef struct DICT_PCRE_RULE {
94 int op; /* DICT_PCRE_OP_MATCH/IF/ENDIF */
95 int nesting; /* level of IF/ENDIF nesting */
96 int lineno; /* source file line number */
97 struct DICT_PCRE_RULE *next; /* next rule in dict */
98 } DICT_PCRE_RULE;
100 typedef struct {
101 DICT_PCRE_RULE rule; /* generic part */
102 pcre *pattern; /* compiled pattern */
103 pcre_extra *hints; /* hints to speed pattern execution */
104 char *replacement; /* replacement string */
105 int match; /* positive or negative match */
106 size_t max_sub; /* largest $number in replacement */
107 } DICT_PCRE_MATCH_RULE;
109 typedef struct {
110 DICT_PCRE_RULE rule; /* generic members */
111 pcre *pattern; /* compiled pattern */
112 pcre_extra *hints; /* hints to speed pattern execution */
113 int match; /* positive or negative match */
114 } DICT_PCRE_IF_RULE;
117 * PCRE map.
119 typedef struct {
120 DICT dict; /* generic members */
121 DICT_PCRE_RULE *head;
122 VSTRING *expansion_buf; /* lookup result */
123 } DICT_PCRE;
125 static int dict_pcre_init = 0; /* flag need to init pcre library */
128 * Context for $number expansion callback.
130 typedef struct {
131 DICT_PCRE *dict_pcre; /* the dictionary handle */
132 DICT_PCRE_MATCH_RULE *match_rule; /* the rule we matched */
133 const char *lookup_string; /* string against which we match */
134 int offsets[PCRE_MAX_CAPTURE * 3]; /* Cut substrings */
135 int matches; /* Count of cuts */
136 } DICT_PCRE_EXPAND_CONTEXT;
139 * Context for $number pre-scan callback.
141 typedef struct {
142 const char *mapname; /* name of regexp map */
143 int lineno; /* where in file */
144 size_t max_sub; /* Largest $n seen */
145 char *literal; /* constant result, $$ -> $ */
146 } DICT_PCRE_PRESCAN_CONTEXT;
149 * Compatibility.
151 #ifndef MAC_PARSE_OK
152 #define MAC_PARSE_OK 0
153 #endif
156 * Macros to make dense code more accessible.
158 #define NULL_STARTOFFSET (0)
159 #define NULL_EXEC_OPTIONS (0)
160 #define NULL_OVECTOR ((int *) 0)
161 #define NULL_OVECTOR_LENGTH (0)
163 /* dict_pcre_expand - replace $number with matched text */
165 static int dict_pcre_expand(int type, VSTRING *buf, char *ptr)
167 DICT_PCRE_EXPAND_CONTEXT *ctxt = (DICT_PCRE_EXPAND_CONTEXT *) ptr;
168 DICT_PCRE_MATCH_RULE *match_rule = ctxt->match_rule;
169 DICT_PCRE *dict_pcre = ctxt->dict_pcre;
170 const char *pp;
171 int n;
172 int ret;
175 * Replace $0-${99} with strings cut from matched text.
177 if (type == MAC_PARSE_VARNAME) {
178 n = atoi(vstring_str(buf));
179 ret = pcre_get_substring(ctxt->lookup_string, ctxt->offsets,
180 ctxt->matches, n, &pp);
181 if (ret < 0) {
182 if (ret == PCRE_ERROR_NOSUBSTRING)
183 return (MAC_PARSE_UNDEF);
184 else
185 msg_fatal("pcre map %s, line %d: pcre_get_substring error: %d",
186 dict_pcre->dict.name, match_rule->rule.lineno, ret);
188 if (*pp == 0) {
189 myfree((char *) pp);
190 return (MAC_PARSE_UNDEF);
192 vstring_strcat(dict_pcre->expansion_buf, pp);
193 myfree((char *) pp);
194 return (MAC_PARSE_OK);
198 * Straight text - duplicate with no substitution.
200 else {
201 vstring_strcat(dict_pcre->expansion_buf, vstring_str(buf));
202 return (MAC_PARSE_OK);
206 /* dict_pcre_exec_error - report matching error */
208 static void dict_pcre_exec_error(const char *mapname, int lineno, int errval)
210 switch (errval) {
211 case 0:
212 msg_warn("pcre map %s, line %d: too many (...)",
213 mapname, lineno);
214 return;
215 case PCRE_ERROR_NULL:
216 case PCRE_ERROR_BADOPTION:
217 msg_fatal("pcre map %s, line %d: bad args to re_exec",
218 mapname, lineno);
219 case PCRE_ERROR_BADMAGIC:
220 case PCRE_ERROR_UNKNOWN_NODE:
221 msg_fatal("pcre map %s, line %d: corrupt compiled regexp",
222 mapname, lineno);
223 #ifdef PCRE_ERROR_NOMEMORY
224 case PCRE_ERROR_NOMEMORY:
225 msg_fatal("pcre map %s, line %d: out of memory",
226 mapname, lineno);
227 #endif
228 #ifdef PCRE_ERROR_MATCHLIMIT
229 case PCRE_ERROR_MATCHLIMIT:
230 msg_fatal("pcre map %s, line %d: matched text exceeds buffer limit",
231 mapname, lineno);
232 #endif
233 #ifdef PCRE_ERROR_BADUTF8
234 case PCRE_ERROR_BADUTF8:
235 msg_fatal("pcre map %s, line %d: bad UTF-8 sequence in search string",
236 mapname, lineno);
237 #endif
238 #ifdef PCRE_ERROR_BADUTF8_OFFSET
239 case PCRE_ERROR_BADUTF8_OFFSET:
240 msg_fatal("pcre map %s, line %d: bad UTF-8 start offset in search string",
241 mapname, lineno);
242 #endif
243 default:
244 msg_fatal("pcre map %s, line %d: unknown re_exec error: %d",
245 mapname, lineno, errval);
249 /* dict_pcre_lookup - match string and perform optional substitution */
251 static const char *dict_pcre_lookup(DICT *dict, const char *lookup_string)
253 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
254 DICT_PCRE_RULE *rule;
255 DICT_PCRE_IF_RULE *if_rule;
256 DICT_PCRE_MATCH_RULE *match_rule;
257 int lookup_len = strlen(lookup_string);
258 DICT_PCRE_EXPAND_CONTEXT ctxt;
259 int nesting = 0;
261 dict_errno = 0;
263 if (msg_verbose)
264 msg_info("dict_pcre_lookup: %s: %s", dict->name, lookup_string);
267 * Optionally fold the key.
269 if (dict->flags & DICT_FLAG_FOLD_MUL) {
270 if (dict->fold_buf == 0)
271 dict->fold_buf = vstring_alloc(10);
272 vstring_strcpy(dict->fold_buf, lookup_string);
273 lookup_string = lowercase(vstring_str(dict->fold_buf));
275 for (rule = dict_pcre->head; rule; rule = rule->next) {
278 * Skip rules inside failed IF/ENDIF.
280 if (nesting < rule->nesting)
281 continue;
283 switch (rule->op) {
286 * Search for a matching expression.
288 case DICT_PCRE_OP_MATCH:
289 match_rule = (DICT_PCRE_MATCH_RULE *) rule;
290 ctxt.matches = pcre_exec(match_rule->pattern, match_rule->hints,
291 lookup_string, lookup_len,
292 NULL_STARTOFFSET, NULL_EXEC_OPTIONS,
293 ctxt.offsets, PCRE_MAX_CAPTURE * 3);
295 if (ctxt.matches > 0) {
296 if (!match_rule->match)
297 continue; /* Negative rule matched */
298 } else if (ctxt.matches == PCRE_ERROR_NOMATCH) {
299 if (match_rule->match)
300 continue; /* Positive rule did not
301 * match */
302 } else {
303 dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches);
304 continue; /* pcre_exec failed */
308 * Skip $number substitutions when the replacement text contains
309 * no $number strings, as learned during the compile time
310 * pre-scan. The pre-scan already replaced $$ by $.
312 if (match_rule->max_sub == 0)
313 return match_rule->replacement;
316 * We've got a match. Perform substitution on replacement string.
318 if (dict_pcre->expansion_buf == 0)
319 dict_pcre->expansion_buf = vstring_alloc(10);
320 VSTRING_RESET(dict_pcre->expansion_buf);
321 ctxt.dict_pcre = dict_pcre;
322 ctxt.match_rule = match_rule;
323 ctxt.lookup_string = lookup_string;
325 if (mac_parse(match_rule->replacement, dict_pcre_expand,
326 (char *) &ctxt) & MAC_PARSE_ERROR)
327 msg_fatal("pcre map %s, line %d: bad replacement syntax",
328 dict->name, rule->lineno);
330 VSTRING_TERMINATE(dict_pcre->expansion_buf);
331 return (vstring_str(dict_pcre->expansion_buf));
334 * Conditional. XXX We provide space for matched substring info
335 * because PCRE uses part of it as workspace for backtracking.
336 * PCRE will allocate memory if it runs out of backtracking
337 * storage.
339 case DICT_PCRE_OP_IF:
340 if_rule = (DICT_PCRE_IF_RULE *) rule;
341 ctxt.matches = pcre_exec(if_rule->pattern, if_rule->hints,
342 lookup_string, lookup_len,
343 NULL_STARTOFFSET, NULL_EXEC_OPTIONS,
344 ctxt.offsets, PCRE_MAX_CAPTURE * 3);
346 if (ctxt.matches > 0) {
347 if (!if_rule->match)
348 continue; /* Negative rule matched */
349 } else if (ctxt.matches == PCRE_ERROR_NOMATCH) {
350 if (if_rule->match)
351 continue; /* Positive rule did not
352 * match */
353 } else {
354 dict_pcre_exec_error(dict->name, rule->lineno, ctxt.matches);
355 continue; /* pcre_exec failed */
357 nesting++;
358 continue;
361 * ENDIF after successful IF.
363 case DICT_PCRE_OP_ENDIF:
364 nesting--;
365 continue;
367 default:
368 msg_panic("dict_pcre_lookup: impossible operation %d", rule->op);
371 return (0);
374 /* dict_pcre_close - close pcre dictionary */
376 static void dict_pcre_close(DICT *dict)
378 DICT_PCRE *dict_pcre = (DICT_PCRE *) dict;
379 DICT_PCRE_RULE *rule;
380 DICT_PCRE_RULE *next;
381 DICT_PCRE_MATCH_RULE *match_rule;
382 DICT_PCRE_IF_RULE *if_rule;
384 for (rule = dict_pcre->head; rule; rule = next) {
385 next = rule->next;
386 switch (rule->op) {
387 case DICT_PCRE_OP_MATCH:
388 match_rule = (DICT_PCRE_MATCH_RULE *) rule;
389 if (match_rule->pattern)
390 myfree((char *) match_rule->pattern);
391 if (match_rule->hints)
392 myfree((char *) match_rule->hints);
393 if (match_rule->replacement)
394 myfree((char *) match_rule->replacement);
395 break;
396 case DICT_PCRE_OP_IF:
397 if_rule = (DICT_PCRE_IF_RULE *) rule;
398 if (if_rule->pattern)
399 myfree((char *) if_rule->pattern);
400 if (if_rule->hints)
401 myfree((char *) if_rule->hints);
402 break;
403 case DICT_PCRE_OP_ENDIF:
404 break;
405 default:
406 msg_panic("dict_pcre_close: unknown operation %d", rule->op);
408 myfree((char *) rule);
410 if (dict_pcre->expansion_buf)
411 vstring_free(dict_pcre->expansion_buf);
412 if (dict->fold_buf)
413 vstring_free(dict->fold_buf);
414 dict_free(dict);
417 /* dict_pcre_get_pattern - extract pattern from rule */
419 static int dict_pcre_get_pattern(const char *mapname, int lineno, char **bufp,
420 DICT_PCRE_REGEXP *pattern)
422 char *p = *bufp;
423 char re_delimiter;
426 * Process negation operators.
428 pattern->match = 1;
429 while (*p == '!') {
430 pattern->match = !pattern->match;
431 p++;
435 * Grr...aceful handling of whitespace after '!'.
437 while (*p && ISSPACE(*p))
438 p++;
439 if (*p == 0) {
440 msg_warn("pcre map %s, line %d: no regexp: skipping this rule",
441 mapname, lineno);
442 return (0);
444 re_delimiter = *p++;
445 pattern->regexp = p;
448 * Search for second delimiter, handling backslash escape.
450 while (*p) {
451 if (*p == '\\') {
452 ++p;
453 if (*p == 0)
454 break;
455 } else if (*p == re_delimiter)
456 break;
457 ++p;
460 if (!*p) {
461 msg_warn("pcre map %s, line %d: no closing regexp delimiter \"%c\": "
462 "ignoring this rule", mapname, lineno, re_delimiter);
463 return (0);
465 *p++ = 0; /* Null term the regexp */
468 * Parse any regexp options.
470 pattern->options = PCRE_CASELESS | PCRE_DOTALL;
471 while (*p && !ISSPACE(*p)) {
472 switch (*p) {
473 case 'i':
474 pattern->options ^= PCRE_CASELESS;
475 break;
476 case 'm':
477 pattern->options ^= PCRE_MULTILINE;
478 break;
479 case 's':
480 pattern->options ^= PCRE_DOTALL;
481 break;
482 case 'x':
483 pattern->options ^= PCRE_EXTENDED;
484 break;
485 case 'A':
486 pattern->options ^= PCRE_ANCHORED;
487 break;
488 case 'E':
489 pattern->options ^= PCRE_DOLLAR_ENDONLY;
490 break;
491 case 'U':
492 pattern->options ^= PCRE_UNGREEDY;
493 break;
494 case 'X':
495 pattern->options ^= PCRE_EXTRA;
496 break;
497 default:
498 msg_warn("pcre map %s, line %d: unknown regexp option \"%c\": "
499 "skipping this rule", mapname, lineno, *p);
500 return (0);
502 ++p;
504 *bufp = p;
505 return (1);
508 /* dict_pcre_prescan - sanity check $number instances in replacement text */
510 static int dict_pcre_prescan(int type, VSTRING *buf, char *context)
512 DICT_PCRE_PRESCAN_CONTEXT *ctxt = (DICT_PCRE_PRESCAN_CONTEXT *) context;
513 size_t n;
516 * Keep a copy of literal text (with $$ already replaced by $) if and
517 * only if the replacement text contains no $number expression. This way
518 * we can avoid having to scan the replacement text at lookup time.
520 if (type == MAC_PARSE_VARNAME) {
521 if (ctxt->literal) {
522 myfree(ctxt->literal);
523 ctxt->literal = 0;
525 if (!alldig(vstring_str(buf))) {
526 msg_warn("pcre map %s, line %d: non-numeric replacement index \"%s\"",
527 ctxt->mapname, ctxt->lineno, vstring_str(buf));
528 return (MAC_PARSE_ERROR);
530 n = atoi(vstring_str(buf));
531 if (n < 1) {
532 msg_warn("pcre map %s, line %d: out of range replacement index \"%s\"",
533 ctxt->mapname, ctxt->lineno, vstring_str(buf));
534 return (MAC_PARSE_ERROR);
536 if (n > ctxt->max_sub)
537 ctxt->max_sub = n;
538 } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
539 if (ctxt->literal)
540 msg_panic("pcre map %s, line %d: multiple literals but no $number",
541 ctxt->mapname, ctxt->lineno);
542 ctxt->literal = mystrdup(vstring_str(buf));
544 return (MAC_PARSE_OK);
547 /* dict_pcre_compile - compile pattern */
549 static int dict_pcre_compile(const char *mapname, int lineno,
550 DICT_PCRE_REGEXP *pattern,
551 DICT_PCRE_ENGINE *engine)
553 const char *error;
554 int errptr;
556 engine->pattern = pcre_compile(pattern->regexp, pattern->options,
557 &error, &errptr, NULL);
558 if (engine->pattern == 0) {
559 msg_warn("pcre map %s, line %d: error in regex at offset %d: %s",
560 mapname, lineno, errptr, error);
561 return (0);
563 engine->hints = pcre_study(engine->pattern, 0, &error);
564 if (error != 0) {
565 msg_warn("pcre map %s, line %d: error while studying regex: %s",
566 mapname, lineno, error);
567 myfree((char *) engine->pattern);
568 return (0);
570 return (1);
573 /* dict_pcre_rule_alloc - fill in a generic rule structure */
575 static DICT_PCRE_RULE *dict_pcre_rule_alloc(int op, int nesting,
576 int lineno,
577 size_t size)
579 DICT_PCRE_RULE *rule;
581 rule = (DICT_PCRE_RULE *) mymalloc(size);
582 rule->op = op;
583 rule->nesting = nesting;
584 rule->lineno = lineno;
585 rule->next = 0;
587 return (rule);
590 /* dict_pcre_parse_rule - parse and compile one rule */
592 static DICT_PCRE_RULE *dict_pcre_parse_rule(const char *mapname, int lineno,
593 char *line, int nesting,
594 int dict_flags)
596 char *p;
597 int actual_sub;
599 p = line;
602 * An ordinary match rule takes one pattern and replacement text.
604 if (!ISALNUM(*p)) {
605 DICT_PCRE_REGEXP regexp;
606 DICT_PCRE_ENGINE engine;
607 DICT_PCRE_PRESCAN_CONTEXT prescan_context;
608 DICT_PCRE_MATCH_RULE *match_rule;
611 * Get the pattern string and options.
613 if (dict_pcre_get_pattern(mapname, lineno, &p, &regexp) == 0)
614 return (0);
617 * Get the replacement text.
619 while (*p && ISSPACE(*p))
620 ++p;
621 if (!*p)
622 msg_warn("%s, line %d: no replacement text: using empty string",
623 mapname, lineno);
626 * Sanity check the $number instances in the replacement text.
628 prescan_context.mapname = mapname;
629 prescan_context.lineno = lineno;
630 prescan_context.max_sub = 0;
631 prescan_context.literal = 0;
634 * The optimizer will eliminate code duplication and/or dead code.
636 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
637 if (prescan_context.literal) \
638 myfree(prescan_context.literal); \
639 return (rval); \
640 } while (0)
642 if (mac_parse(p, dict_pcre_prescan, (char *) &prescan_context)
643 & MAC_PARSE_ERROR) {
644 msg_warn("pcre map %s, line %d: bad replacement syntax: "
645 "skipping this rule", mapname, lineno);
646 CREATE_MATCHOP_ERROR_RETURN(0);
650 * Substring replacement not possible with negative regexps.
652 if (prescan_context.max_sub > 0 && regexp.match == 0) {
653 msg_warn("pcre map %s, line %d: $number found in negative match "
654 "replacement text: skipping this rule", mapname, lineno);
655 CREATE_MATCHOP_ERROR_RETURN(0);
657 if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) {
658 msg_warn("pcre map %s, line %d: "
659 "regular expression substitution is not allowed: "
660 "skipping this rule", mapname, lineno);
661 CREATE_MATCHOP_ERROR_RETURN(0);
665 * Compile the pattern.
667 if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
668 CREATE_MATCHOP_ERROR_RETURN(0);
669 #ifdef PCRE_INFO_CAPTURECOUNT
670 if (pcre_fullinfo(engine.pattern, engine.hints,
671 PCRE_INFO_CAPTURECOUNT,
672 (void *) &actual_sub) != 0)
673 msg_panic("pcre map %s, line %d: pcre_fullinfo failed",
674 mapname, lineno);
675 if (prescan_context.max_sub > actual_sub) {
676 msg_warn("pcre map %s, line %d: out of range replacement index \"%d\": "
677 "skipping this rule", mapname, lineno,
678 (int) prescan_context.max_sub);
679 if (engine.pattern)
680 myfree((char *) engine.pattern);
681 if (engine.hints)
682 myfree((char *) engine.hints);
683 CREATE_MATCHOP_ERROR_RETURN(0);
685 #endif
688 * Save the result.
690 match_rule = (DICT_PCRE_MATCH_RULE *)
691 dict_pcre_rule_alloc(DICT_PCRE_OP_MATCH, nesting, lineno,
692 sizeof(DICT_PCRE_MATCH_RULE));
693 match_rule->match = regexp.match;
694 match_rule->max_sub = prescan_context.max_sub;
695 if (prescan_context.literal)
696 match_rule->replacement = prescan_context.literal;
697 else
698 match_rule->replacement = mystrdup(p);
699 match_rule->pattern = engine.pattern;
700 match_rule->hints = engine.hints;
701 return ((DICT_PCRE_RULE *) match_rule);
705 * The IF operator takes one pattern but no replacement text.
707 else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
708 DICT_PCRE_REGEXP regexp;
709 DICT_PCRE_ENGINE engine;
710 DICT_PCRE_IF_RULE *if_rule;
712 p += 2;
715 * Get the pattern.
717 while (*p && ISSPACE(*p))
718 p++;
719 if (!dict_pcre_get_pattern(mapname, lineno, &p, &regexp))
720 return (0);
723 * Warn about out-of-place text.
725 while (*p && ISSPACE(*p))
726 ++p;
727 if (*p) {
728 msg_warn("pcre map %s, line %d: ignoring extra text after "
729 "IF statement: \"%s\"", mapname, lineno, p);
730 msg_warn("pcre map %s, line %d: do not prepend whitespace"
731 " to statements between IF and ENDIF", mapname, lineno);
735 * Compile the pattern.
737 if (dict_pcre_compile(mapname, lineno, &regexp, &engine) == 0)
738 return (0);
741 * Save the result.
743 if_rule = (DICT_PCRE_IF_RULE *)
744 dict_pcre_rule_alloc(DICT_PCRE_OP_IF, nesting, lineno,
745 sizeof(DICT_PCRE_IF_RULE));
746 if_rule->match = regexp.match;
747 if_rule->pattern = engine.pattern;
748 if_rule->hints = engine.hints;
749 return ((DICT_PCRE_RULE *) if_rule);
753 * The ENDIF operator takes no patterns and no replacement text.
755 else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
756 DICT_PCRE_RULE *rule;
758 p += 5;
761 * Warn about out-of-place ENDIFs.
763 if (nesting == 0) {
764 msg_warn("pcre map %s, line %d: ignoring ENDIF without matching IF",
765 mapname, lineno);
766 return (0);
770 * Warn about out-of-place text.
772 while (*p && ISSPACE(*p))
773 ++p;
774 if (*p)
775 msg_warn("pcre map %s, line %d: ignoring extra text after ENDIF",
776 mapname, lineno);
779 * Save the result.
781 rule = dict_pcre_rule_alloc(DICT_PCRE_OP_ENDIF, nesting, lineno,
782 sizeof(DICT_PCRE_RULE));
783 return (rule);
787 * Unrecognized input.
789 else {
790 msg_warn("pcre map %s, line %d: ignoring unrecognized request",
791 mapname, lineno);
792 return (0);
796 /* dict_pcre_open - load and compile a file containing regular expressions */
798 DICT *dict_pcre_open(const char *mapname, int unused_flags, int dict_flags)
800 DICT_PCRE *dict_pcre;
801 VSTREAM *map_fp;
802 VSTRING *line_buffer;
803 DICT_PCRE_RULE *last_rule = 0;
804 DICT_PCRE_RULE *rule;
805 int lineno = 0;
806 int nesting = 0;
807 char *p;
809 line_buffer = vstring_alloc(100);
811 dict_pcre = (DICT_PCRE *) dict_alloc(DICT_TYPE_PCRE, mapname,
812 sizeof(*dict_pcre));
813 dict_pcre->dict.lookup = dict_pcre_lookup;
814 dict_pcre->dict.close = dict_pcre_close;
815 dict_pcre->dict.flags = dict_flags | DICT_FLAG_PATTERN;
816 if (dict_flags & DICT_FLAG_FOLD_MUL)
817 dict_pcre->dict.fold_buf = vstring_alloc(10);
818 dict_pcre->head = 0;
819 dict_pcre->expansion_buf = 0;
821 if (dict_pcre_init == 0) {
822 pcre_malloc = (void *(*) (size_t)) mymalloc;
823 pcre_free = (void (*) (void *)) myfree;
824 dict_pcre_init = 1;
828 * Parse the pcre table.
830 if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0)
831 msg_fatal("open %s: %m", mapname);
833 while (readlline(line_buffer, map_fp, &lineno)) {
834 p = vstring_str(line_buffer);
835 trimblanks(p, 0)[0] = 0; /* Trim space at end */
836 if (*p == 0)
837 continue;
838 rule = dict_pcre_parse_rule(mapname, lineno, p, nesting, dict_flags);
839 if (rule == 0)
840 continue;
841 if (rule->op == DICT_PCRE_OP_IF) {
842 nesting++;
843 } else if (rule->op == DICT_PCRE_OP_ENDIF) {
844 nesting--;
846 if (last_rule == 0)
847 dict_pcre->head = rule;
848 else
849 last_rule->next = rule;
850 last_rule = rule;
853 if (nesting)
854 msg_warn("pcre map %s, line %d: more IFs than ENDIFs",
855 mapname, lineno);
857 vstring_free(line_buffer);
858 vstream_fclose(map_fp);
860 return (DICT_DEBUG (&dict_pcre->dict));
863 #endif /* HAS_PCRE */