The trunk is 4.5.x now
[findutils.git] / lib / regexprops.c
blobe6e89ed4f412bca527df541b3c211929d7eb1fab
1 /* regexprops.c -- document the properties of the regular expressions
2 understood by gnulib.
4 Copyright 2005, 2007 Free Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 The output of this program is included in the GNU findutils source
23 distribution. The copying conditions for that file are generated
24 by the copying() function below.
27 /* Written by James Youngman, <jay@gnu.org>. */
29 #include <config.h>
32 #include <stdio.h>
33 #include <string.h>
34 #include <unistd.h>
35 #include <errno.h>
37 #include "regex.h"
38 #include "regextype.h"
41 /* Name this program was run with. */
42 char *program_name;
44 static void output(const char *s, int escape)
46 (void) escape;
48 fputs(s, stdout);
52 static void newline(void)
54 output("\n", 0);
57 static void content(const char *s)
59 output(s, 1);
62 static void literal(const char *s)
64 output(s, 0);
67 static void directive(const char *s)
69 output(s, 0);
72 static void comment(const char *s)
74 directive("@c ");
75 literal(s);
76 newline();
79 static void enum_item(const char *s)
81 newline();
82 directive("@item ");
83 literal(s);
84 newline();
87 static void begin_subsection(const char *name,
88 const char *next,
89 const char *prev,
90 const char *up)
92 (void) next;
93 (void) prev;
94 (void) up;
96 newline();
98 directive("@node ");
99 content(name);
100 content(" regular expression syntax");
101 newline();
103 directive("@subsection ");
104 output("@samp{", 0);
105 content(name);
106 output("}", 0);
107 content(" regular expression syntax");
108 newline();
111 static void begintable_markup(char const *markup)
113 newline();
114 directive("@table ");
115 literal(markup);
116 newline();
119 static void endtable()
121 newline();
122 directive("@end table");
123 newline();
126 static void beginenum()
128 newline();
129 directive("@enumerate");
130 newline();
133 static void endenum()
135 newline();
136 directive("@end enumerate");
137 newline();
140 static void newpara()
142 content("\n\n");
146 static void
147 describe_regex_syntax(int options)
149 newpara();
150 content("The character @samp{.} matches any single character");
151 if ( (options & RE_DOT_NEWLINE) == 0 )
153 content(" except newline");
155 if (options & RE_DOT_NOT_NULL)
157 if ( (options & RE_DOT_NEWLINE) == 0 )
158 content(" and");
159 else
160 content(" except");
162 content(" the null character");
164 content(". ");
165 newpara();
167 if (!(options & RE_LIMITED_OPS))
169 begintable_markup("@samp");
170 if (options & RE_BK_PLUS_QM)
172 enum_item("\\+");
173 content("indicates that the regular expression should match one"
174 " or more occurrences of the previous atom or regexp. ");
175 enum_item("\\?");
176 content("indicates that the regular expression should match zero"
177 " or one occurrence of the previous atom or regexp. ");
178 enum_item("+ and ? ");
179 content("match themselves. ");
181 else
183 enum_item("+");
184 content("indicates that the regular expression should match one"
185 " or more occurrences of the previous atom or regexp. ");
186 enum_item("?");
187 content("indicates that the regular expression should match zero"
188 " or one occurrence of the previous atom or regexp. ");
189 enum_item("\\+");
190 literal("matches a @samp{+}");
191 enum_item("\\?");
192 literal("matches a @samp{?}. ");
194 endtable();
197 newpara();
199 content("Bracket expressions are used to match ranges of characters. ");
200 literal("Bracket expressions where the range is backward, for example @samp{[z-a]}, are ");
201 if (options & RE_NO_EMPTY_RANGES)
202 content("invalid");
203 else
204 content("ignored");
205 content(". ");
207 if (options & RE_BACKSLASH_ESCAPE_IN_LISTS)
208 literal("Within square brackets, @samp{\\} can be used to quote "
209 "the following character. ");
210 else
211 literal("Within square brackets, @samp{\\} is taken literally. ");
213 if (options & RE_CHAR_CLASSES)
214 content("Character classes are supported; for example "
215 "@samp{[[:digit:]]} will match a single decimal digit. ");
216 else
217 literal("Character classes are not supported, so for example "
218 "you would need to use @samp{[0-9]} "
219 "instead of @samp{[[:digit:]]}. ");
221 if (options & RE_HAT_LISTS_NOT_NEWLINE)
223 literal("Non-matching lists @samp{[^@dots{}]} do not ever match newline. ");
225 newpara();
226 if (options & RE_NO_GNU_OPS)
228 content("GNU extensions are not supported and so "
229 "@samp{\\w}, @samp{\\W}, @samp{\\<}, @samp{\\>}, @samp{\\b}, @samp{\\B}, @samp{\\`}, and @samp{\\'} "
230 "match "
231 "@samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively. ");
233 else
235 content("GNU extensions are supported:");
236 beginenum();
237 enum_item("@samp{\\w} matches a character within a word");
238 enum_item("@samp{\\W} matches a character which is not within a word");
239 enum_item("@samp{\\<} matches the beginning of a word");
240 enum_item("@samp{\\>} matches the end of a word");
241 enum_item("@samp{\\b} matches a word boundary");
242 enum_item("@samp{\\B} matches characters which are not a word boundary");
243 enum_item("@samp{\\`} matches the beginning of the whole input");
244 enum_item("@samp{\\'} matches the end of the whole input");
245 endenum();
248 newpara();
251 if (options & RE_NO_BK_PARENS)
253 literal("Grouping is performed with parentheses @samp{()}. ");
255 if (options & RE_UNMATCHED_RIGHT_PAREN_ORD)
256 literal("An unmatched @samp{)} matches just itself. ");
258 else
260 literal("Grouping is performed with backslashes followed by parentheses @samp{\\(}, @samp{\\)}. ");
263 if (options & RE_NO_BK_REFS)
265 content("A backslash followed by a digit matches that digit. ");
267 else
269 literal("A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number. For example @samp{\\2} matches the second group expression. The order of group expressions is determined by the position of their opening parenthesis ");
270 if (options & RE_NO_BK_PARENS)
271 literal("@samp{(}");
272 else
273 literal("@samp{\\(}");
274 content(". ");
278 newpara();
279 if (!(options & RE_LIMITED_OPS))
281 if (options & RE_NO_BK_VBAR)
282 literal("The alternation operator is @samp{|}. ");
283 else
284 literal("The alternation operator is @samp{\\|}. ");
286 newpara();
288 if (options & RE_CONTEXT_INDEP_ANCHORS)
290 literal("The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets. Within brackets, @samp{^} can be used to invert the membership of the character class being specified. ");
292 else
294 literal("The character @samp{^} only represents the beginning of a string when it appears:");
295 beginenum();
296 enum_item("\nAt the beginning of a regular expression");
297 enum_item("After an open-group, signified by ");
298 if (options & RE_NO_BK_PARENS)
300 literal("@samp{(}");
302 else
304 literal("@samp{\\(}");
306 newline();
307 if (!(options & RE_LIMITED_OPS))
309 if (options & RE_NEWLINE_ALT)
310 enum_item("After a newline");
312 if (options & RE_NO_BK_VBAR )
313 enum_item("After the alternation operator @samp{|}");
314 else
315 enum_item("After the alternation operator @samp{\\|}");
317 endenum();
319 newpara();
320 literal("The character @samp{$} only represents the end of a string when it appears:");
321 beginenum();
322 enum_item("At the end of a regular expression");
323 enum_item("Before a close-group, signified by ");
324 if (options & RE_NO_BK_PARENS)
326 literal("@samp{)}");
328 else
330 literal("@samp{\\)}");
332 if (!(options & RE_LIMITED_OPS))
334 if (options & RE_NEWLINE_ALT)
335 enum_item("Before a newline");
337 if (options & RE_NO_BK_VBAR)
338 enum_item("Before the alternation operator @samp{|}");
339 else
340 enum_item("Before the alternation operator @samp{\\|}");
342 endenum();
344 newpara();
345 if (!(options & RE_LIMITED_OPS) )
347 if ((options & RE_CONTEXT_INDEP_OPS)
348 && !(options & RE_CONTEXT_INVALID_OPS))
350 literal("The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression. ");
352 else
354 if (options & RE_BK_PLUS_QM)
355 literal("@samp{\\*}, @samp{\\+} and @samp{\\?} ");
356 else
357 literal("@samp{*}, @samp{+} and @samp{?} ");
359 if (options & RE_CONTEXT_INVALID_OPS)
361 content("are special at any point in a regular expression except the following places, where they are not allowed:");
363 else
365 content("are special at any point in a regular expression except:");
368 beginenum();
369 enum_item("At the beginning of a regular expression");
370 enum_item("After an open-group, signified by ");
371 if (options & RE_NO_BK_PARENS)
373 literal("@samp{(}");
375 else
377 literal("@samp{\\(}");
379 if (!(options & RE_LIMITED_OPS))
381 if (options & RE_NEWLINE_ALT)
382 enum_item("After a newline");
384 if (options & RE_NO_BK_VBAR)
385 enum_item("After the alternation operator @samp{|}");
386 else
387 enum_item("After the alternation operator @samp{\\|}");
389 endenum();
394 newpara();
395 if (options & RE_INTERVALS)
397 if (options & RE_NO_BK_BRACES)
399 literal("Intervals are specified by @samp{@{} and @samp{@}}. ");
400 if (options & RE_INVALID_INTERVAL_ORD)
402 literal("Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\\@{1}");
404 else
406 literal("Invalid intervals such as @samp{a@{1z} are not accepted. ");
409 else
411 literal("Intervals are specified by @samp{\\@{} and @samp{\\@}}. ");
412 if (options & RE_INVALID_INTERVAL_ORD)
414 literal("Invalid intervals are treated as literals, for example @samp{a\\@{1} is treated as @samp{a@{1}");
416 else
418 literal("Invalid intervals such as @samp{a\\@{1z} are not accepted. ");
424 newpara();
425 if (options & RE_NO_POSIX_BACKTRACKING)
427 content("Matching succeeds as soon as the whole pattern is matched, meaning that the result may not be the longest possible match. ");
429 else
431 content("The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups. ");
433 newpara();
437 static void copying(void)
439 static const char *copy_para[]=
441 "Copyright (C) 1994, 1996, 1998, 2000, 2001, 2003, 2004, 2005, 2006, 2007"
442 ,"Free Software Foundation, Inc."
444 ,"Permission is granted to copy, distribute and/or modify this document"
445 ,"under the terms of the GNU Free Documentation License, Version 1.2 or"
446 ,"any later version published by the Free Software Foundation; with no"
447 ,"Invariant Sections, with no Front-Cover Texts, and with no Back-Cover"
448 ,"Texts. A copy of the license is included in the ``GNU Free"
449 ,"Documentation License'' file as part of this distribution."
451 ,NULL
453 const char **s = copy_para;
454 while (*s)
455 comment(*s++);
458 static int
459 ignore(int ix, const unsigned int context)
461 return 0 == (get_regex_type_context(ix) & context);
464 static void
465 menu(unsigned int context)
467 int i, options;
468 const char *name;
470 output("@menu\n", 0);
471 for (i=0;
472 options = get_regex_type_flags(i),
473 name=get_regex_type_name(i);
474 ++i)
476 if (!ignore(i, context))
478 output("* ", 0);
479 output(name, 0);
480 content(" regular expression syntax");
481 output("::", 0);
482 newline();
485 output("@end menu\n", 0);
490 static const char *
491 get_next(unsigned int ix, unsigned int context)
493 const char *next;
494 while (get_regex_type_name(ix))
496 if (!ignore(ix, context))
498 next = get_regex_type_name(ix);
499 if (NULL == next)
500 return "";
501 else
502 return next;
504 ++ix;
506 return "";
510 static void
511 describe_all(const char *contextname,
512 unsigned int context,
513 const char *up)
515 const char *name, *next, *previous;
516 int options;
517 int i, parent;
519 copying();
520 newline();
521 literal("@c this regular expression description is for: ");
522 literal(contextname);
523 newline();
524 newline();
525 menu(context);
527 previous = "";
529 for (i=0;
530 options = get_regex_type_flags(i),
531 name=get_regex_type_name(i);
532 ++i)
534 if (ignore(i, context))
536 fprintf(stderr,
537 "Skipping regexp type %s for context %s\n",
538 name, contextname);
539 name = previous;
540 continue;
543 next = get_next(i+1, context);
544 if (NULL == next)
545 next = "";
546 begin_subsection(name, next, previous, up);
547 parent = get_regex_type_synonym(i);
548 if (parent >= 0)
550 content("This is a synonym for ");
551 content(get_regex_type_name(parent));
552 content(".");
554 else
556 describe_regex_syntax(options);
558 previous = name;
564 int main (int argc, char *argv[])
566 const char *up = "";
567 unsigned int context = CONTEXT_ALL;
568 const char *contextname = "all";
569 program_name = argv[0];
571 if (argc > 1)
573 up = argv[1];
575 if (argc > 2)
577 contextname = argv[2];
578 if (0 == strcmp(contextname, "findutils"))
579 context = CONTEXT_FINDUTILS;
580 else if (0 == strcmp(contextname, "generic"))
581 context = CONTEXT_GENERIC;
582 else if (0 == strcmp(contextname, "all"))
583 context = CONTEXT_ALL;
584 else
586 fprintf(stderr, "Unexpected context %s",
587 contextname);
588 return 1;
592 describe_all(contextname, context, up);
593 return 0;