1 /***********************************************************************
3 * This software is part of the ast package *
4 * Copyright (c) 1995-2009 AT&T Knowledge Ventures *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Knowledge Ventures *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
13 * Information and Software Systems Research *
17 * Glenn Fowler <gsf@research.att.com> *
19 ***********************************************************************/
22 static const char usage
[] =
23 "[-?\n@(#)$Id: grep (AT&T Research) 2006-06-14 $\n]"
25 "[+NAME?grep - search lines in files for matching patterns]"
26 "[+DESCRIPTION?The \bgrep\b commands search the named input files"
27 " for lines containing a match for the given \apatterns\a."
28 " Matching lines are printed by default. The standard input is searched"
29 " if no files are given or when the file \b-\b is specified.]"
30 "[+?There are six variants of \bgrep\b, each one using a different form of"
31 " \apattern\a, controlled either by option or the command path"
32 " base name. Details of each variant may be found in \bregex\b(3).]"
34 " [+grep?The default basic regular expressions (no alternations.)]"
35 " [+egrep?Extended regular expressions (alternations, one or more.)]"
36 " [+pgrep?\bperl\b(1) regular expressions (lenient extended.)]"
37 " [+xgrep?Augmented regular expressions (conjunction, negation.)]"
38 " [+fgrep?Fixed string expressions.]"
39 " [+agrep?Approximate regular expressions (not implemented.)]"
41 "[G:basic-regexp?\bgrep\b mode (default): basic regular expression \apatterns\a.]"
42 "[E:extended-regexp?\begrep\b mode: extended regular expression \apatterns\a.]"
43 "[X:augmented-regexp?\bxgrep\b mode: augmented regular expression \apatterns\a.]"
44 "[P:perl-regexp?\bpgrep\b mode: \bperl\b(1) regular expression \apatterns\a.]"
45 "[F:fixed-string?\bfgrep\b mode: fixed string \apatterns\a.]"
46 "[A:approximate-regexp?\bagrep\b mode: approximate regular expression \apatterns\a (not implemented.)]"
48 "[C:context?Set the matched line context \abefore\a and \aafter\a count."
49 " By default only matched lines are printed.]:?"
50 " [before[,after]]:=2,2]"
51 "[c:count?Only print a matching line count for each file.]"
52 "[e:expression|pattern|regexp?Specify a matching \apattern\a. More than one"
53 " \apattern\a implies alternation. If this option is specified"
54 " then the command line \apattern\a must be omitted.]:"
56 "[f:file?Each line in \apattern-file\a is a \apattern\a, placed into a single"
57 " alternating expression.]:"
59 "[H:filename|with-filename?Prefix each matched line with the containing file name.]"
60 "[h:no-filename?Suppress containing file name prefix for each matched line.]"
61 "[i:ignore-case?Ignore case when matching.]"
62 "[l:files-with-matches?Only print file names with at least one match.]"
63 "[L:files-without-matches?Only print file names with no matches.]"
64 "[b:highlight?Highlight matches using the ansi terminal bold sequence.]"
65 "[v:invert-match|revert-match?Invert the \apattern\a match sense.]"
66 "[m:label?All patterns must be of the form \alabel\a:\apattern\a. Match and"
67 " count output will be prefixed by the corresponding \alabel\a:.]"
68 "[O:lenient?Enable lenient \apattern\a interpretation. This is the default.]"
69 "[x:line-match|line-regexp?Force \apatterns\a to match complete lines.]"
70 "[n:number|line-number?Prefix each matched line with its line number.]"
71 "[N:name?Set the standard input file name prefix to"
72 " \aname\a.]:[name:=empty]"
73 "[q:quiet|silent?Do not print matching lines.]"
74 "[S:strict?Enable strict \apattern\a interpretation with diagnostics.]"
75 "[s:suppress|no-messages?Suppress error and warning messages.]"
76 "[t:total?Only print a single matching line count for all files.]"
77 "[T:test?Enable implementation specific tests.]:"
79 "[w:word-match|word-regexp?Force \apatterns\a to match complete words.]"
80 "[a?Ignored for GNU compatibility.]"
82 "\n[ pattern ] [ file ... ]\n"
84 "[+DIAGNOSTICS?Exit status 0 if matches were found, 1 if no matches were found,"
85 " where \b-v\b invertes the exit status. Exit status 2 for other"
86 " errors that are accompanied by a message on the standard error.]"
87 "[+SEE ALSO?\bed\b(1), \bsed\b(1), \bperl\b(1), \bregex\b(3)]"
88 "[+CAVEATS?Some expressions of necessity require exponential space"
90 "[+BUGS?Some expressions may use sub-optimal algorithms. For example,"
91 " don't use this implementation to compute primes.]"
105 * snarfed from Doug McElroy's C++ version
107 * this grep is based on the Posix re package.
108 * unfortunately it has to have a nonstandard interface.
109 * 1. fgrep does not have usual operators. REG_LITERAL
111 * 2. grep allows null expressions, hence REG_NULL.
112 * 3. it may be possible to combine the multiple
113 * patterns of grep into single patterns. important
114 * special cases are handled by regcomb().
115 * 4. anchoring by -x has to be done separately from
116 * compilation (remember that fgrep has no ^ or $ operator),
117 * hence REG_LEFT|REG_RIGHT. (An honest, but slow alternative:
118 * run regexec with REG_NOSUB off and nmatch=1 and check
119 * whether the match is full length)
122 typedef struct Item_s
/* list item - sue me for waste */
124 struct Item_s
* next
; /* next in list */
125 regex_t re
; /* compiled re */
126 Sfulong_t hits
; /* labeled pattern matches */
127 Sfulong_t total
; /* total hits */
128 char string
[1]; /* string value */
131 typedef struct List_s
/* generic list */
133 Item_t
* head
; /* list head */
134 Item_t
* tail
; /* list tail */
137 typedef struct State_s
/* program state */
141 char* base
; /* sfsetbuf buffer */
142 size_t size
; /* sfsetbuf size */
143 int noshare
; /* turn off SF_SHARE */
146 List_t file
; /* pattern file list */
147 List_t pattern
; /* pattern list */
148 List_t re
; /* re list */
150 regmatch_t posvec
[1]; /* match position vector */
151 regmatch_t
* pos
; /* match position pointer */
152 int posnum
; /* number of match positions */
154 int any
; /* if any pattern hit */
155 int list
; /* list files with hits */
156 int notfound
; /* some input file not found */
157 int options
; /* regex options */
159 Sfulong_t hits
; /* total matched pattern count */
161 unsigned char byline
; /* multiple pattern line by line*/
162 unsigned char count
; /* count number of hits */
163 unsigned char label
; /* all patterns labeled */
164 unsigned char match
; /* match sense */
165 unsigned char query
; /* return status but no output */
166 unsigned char number
; /* line numbers */
167 unsigned char prefix
; /* print file prefix */
168 unsigned char suppress
; /* no unopenable file messages */
169 unsigned char words
; /* word matches only */
173 addre(State_s
*state
, List_t
* p
, char* s
)
183 if (!(s
= strchr(s
, ':')))
184 error(3, "%s: label:pattern expected", b
);
190 if (!(x
= newof(0, Item_t
, 1, c
)))
191 error(ERROR_SYSTEM
|3, "out of space (pattern `%s')", b
);
193 memcpy(x
->string
, b
, c
);
196 if (!(t
= sfstropen()))
197 error(ERROR_SYSTEM
|3, "out of space (word pattern `%s')", s
);
198 if (!(state
->options
& REG_AUGMENTED
))
202 if (!(state
->options
& REG_AUGMENTED
))
205 if (!(s
= sfstruse(t
)))
206 error(ERROR_SYSTEM
|3, "out of space");
210 if (c
= regcomp(&x
->re
, s
, state
->options
|REG_MULTIPLE
))
211 regfatal(&x
->re
, 3, c
);
216 p
->head
= p
->tail
= x
;
217 if (state
->number
|| !regrecord(&x
->re
))
220 else if (state
->label
|| regcomb(&p
->tail
->re
, &x
->re
))
222 p
->tail
= p
->tail
->next
= x
;
223 if (!state
->byline
&& (state
->number
|| !state
->label
|| !regrecord(&x
->re
)))
231 addstring(State_s
*state
, List_t
* p
, char* s
)
235 if (!(x
= newof(0, Item_t
, 1, strlen(s
))))
236 error(ERROR_SYSTEM
|3, "out of space (string `%s')", s
);
237 strcpy(x
->string
, s
);
246 compile(State_s
*state
)
256 for (x
= state
->pattern
.head
; x
; x
= x
->next
)
257 addre(state
, &state
->re
, x
->string
);
258 for (x
= state
->file
.head
; x
; x
= x
->next
)
261 if (!(f
= sfopen(NiL
, s
, "r")))
262 error(ERROR_SYSTEM
|4, "%s: cannot open", s
);
265 file
= error_info
.file
;
267 line
= error_info
.line
;
269 while (s
= (char*)sfreserve(f
, SF_UNBOUND
, SF_LOCKR
))
271 if (!(n
= sfvalue(f
)))
273 if (s
[n
- 1] != '\n')
275 for (t
= s
+ n
; t
> s
&& *--t
!= '\n'; t
--);
284 addre(state
, &state
->re
, s
);
288 while ((s
= sfgetr(f
, '\n', 1)) || (s
= sfgetr(f
, '\n', -1)))
291 addre(state
, &state
->re
, s
);
293 error_info
.file
= file
;
294 error_info
.line
= line
;
299 error(3, "no pattern");
303 highlight(Sfio_t
* sp
, const char* s
, int n
, int so
, int eo
)
305 static const char bold
[] = {CC_esc
,'[','1','m'};
306 static const char normal
[] = {CC_esc
,'[','0','m'};
309 sfwrite(sp
, bold
, sizeof(bold
));
310 sfwrite(sp
, s
+ so
, eo
- so
);
311 sfwrite(sp
, normal
, sizeof(normal
));
312 sfwrite(sp
, s
+ eo
, n
- eo
);
322 record(void* handle
, const char* s
, size_t len
)
324 record_handle
*r_x
= (record_handle
*)handle
;
325 State_s
*state
= r_x
->state
;
326 Item_t
*item
= r_x
->item
;
329 if (state
->query
|| state
->list
)
334 sfprintf(sfstdout
, "%s:", error_info
.file
);
336 sfprintf(sfstdout
, "%s:", item
->string
);
338 highlight(sfstdout
, s
, len
+ 1, state
->pos
[0].rm_so
, state
->pos
[0].rm_eo
);
340 sfwrite(sfstdout
, s
, len
+ 1);
346 execute(State_s
*state
, Sfio_t
* input
, char* name
)
357 if (state
->buffer
.noshare
)
358 sfset(input
, SF_SHARE
, 0);
359 if (state
->buffer
.size
)
360 sfsetbuf(input
, state
->buffer
.base
, state
->buffer
.size
);
363 file
= error_info
.file
;
364 error_info
.file
= name
;
365 line
= error_info
.line
;
372 if (s
= sfgetr(input
, '\n', 0))
373 len
= sfvalue(input
) - 1;
374 else if (s
= sfgetr(input
, '\n', -1))
376 len
= sfvalue(input
);
378 #if _you_like_the_noise
379 error(1, "newline appended");
384 if (sferror(input
) && errno
!= EISDIR
)
385 error(ERROR_SYSTEM
|2, "read error");
391 if (!(result
= regnexec(&x
->re
, s
, len
, state
->posnum
, state
->pos
, 0)))
396 if (state
->query
|| state
->list
)
401 sfprintf(sfstdout
, "%s:", name
);
403 sfprintf(sfstdout
, "%d:", error_info
.line
);
404 sfprintf(sfstdout
, "%s:", x
->string
);
406 highlight(sfstdout
, s
, len
+ 1, state
->pos
[0].rm_so
, state
->pos
[0].rm_eo
);
408 sfwrite(sfstdout
, s
, len
+ 1);
411 else if (result
!= REG_NOMATCH
)
412 regfatal(&x
->re
, 3, result
);
413 } while (x
= x
->next
);
414 if (!state
->label
&& (x
!= 0) == state
->match
)
417 if (state
->query
|| state
->list
)
422 sfprintf(sfstdout
, "%s:", name
);
424 sfprintf(sfstdout
, "%d:", error_info
.line
);
426 highlight(sfstdout
, s
, len
+ 1, state
->pos
[0].rm_so
, state
->pos
[0].rm_eo
);
428 sfwrite(sfstdout
, s
, len
+ 1);
439 static char* span
= 0;
440 static size_t spansize
= 0;
450 len
= 2 * (e
- s
) + t
- span
+ 1;
451 len
= roundof(len
, SF_BUFSIZE
);
456 if (!(span
= newof(span
, char, spansize
, 0)))
457 error(ERROR_SYSTEM
|3, "%s: line longer than %lu characters", name
, len
+ e
- s
);
463 if (!(s
= sfreserve(input
, SF_UNBOUND
, 0)) || (len
= sfvalue(input
)) <= 0)
465 if ((sfvalue(input
) || sferror(input
)) && errno
!= EISDIR
)
466 error(ERROR_SYSTEM
|2, "%s: read error", name
);
469 else if (!(e
= memchr(s
, '\n', len
)))
474 len
= (e
- s
) + t
- span
;
475 len
= roundof(len
, SF_BUFSIZE
);
480 if (!(span
= newof(span
, char, spansize
, 0)))
481 error(ERROR_SYSTEM
|3, "%s: line longer than %lu characters", name
, len
+ e
- s
);
496 record_handle r_x
= { state
, x
};
497 if ((result
= regrexec(&x
->re
, span
, t
- span
, state
->posnum
, state
->pos
, state
->options
, '\n', (void*)&r_x
, record
)) < 0)
499 if (result
&& result
!= REG_NOMATCH
)
500 regfatal(&x
->re
, 3, result
);
501 } while (x
= x
->next
);
507 if (!(s
= sfreserve(input
, SF_UNBOUND
, 0)))
509 if ((sfvalue(input
) || sferror(input
)) && errno
!= EISDIR
)
510 error(ERROR_SYSTEM
|2, "%s: read error", name
);
513 if ((len
= sfvalue(input
)) <= 0)
524 record_handle r_x
= { state
, x
};
525 if ((result
= regrexec(&x
->re
, s
, t
- s
, state
->posnum
, state
->pos
, state
->options
, '\n', (void*)&r_x
, record
)) < 0)
527 if (result
&& result
!= REG_NOMATCH
)
528 regfatal(&x
->re
, 3, result
);
529 } while (x
= x
->next
);
536 error_info
.file
= file
;
537 error_info
.line
= line
;
538 if (state
->byline
&& !state
->label
)
540 if (hits
&& state
->list
>= 0)
548 if (state
->count
& 2)
553 sfprintf(sfstdout
, "%s:", name
);
554 sfprintf(sfstdout
, "%I*u\n", sizeof(hits
), hits
);
558 else if ((hits
!= 0) == (state
->list
> 0))
562 sfprintf(sfstdout
, "%s\n", name
);
571 if (x
->hits
&& state
->list
>= 0)
583 if (state
->count
& 2)
586 state
->hits
+= x
->hits
;
591 sfprintf(sfstdout
, "%s:", name
);
593 sfprintf(sfstdout
, "%s:", x
->string
);
594 sfprintf(sfstdout
, "%I*u\n", sizeof(x
->hits
), x
->hits
);
598 else if ((x
->hits
!= 0) == (state
->list
> 0))
603 sfprintf(sfstdout
, "%s:%s\n", name
, x
->string
);
605 sfprintf(sfstdout
, "%s\n", name
);
609 } while (x
= x
->next
);
615 int grep_main(int argc
, char** argv
, void *context
)
622 memset(&state
, 0, sizeof(state
));
626 state
.options
= REG_FIRST
|REG_NOSUB
|REG_NULL
;
628 if (strcmp(astconf("CONFORMANCE", NiL
, NiL
), "standard"))
629 state
.options
|= REG_LENIENT
;
630 if (s
= strrchr(argv
[0], '/'))
639 state
.options
|= REG_EXTENDED
;
644 state
.options
|= REG_LITERAL
;
649 state
.options
|= REG_EXTENDED
|REG_LENIENT
;
654 state
.options
|= REG_AUGMENTED
;
661 while (c
= optget(argv
, usage
))
665 state
.options
|= REG_EXTENDED
;
668 state
.options
|= REG_LITERAL
;
671 state
.options
&= ~(REG_AUGMENTED
|REG_EXTENDED
);
674 state
.prefix
= opt_info
.num
;
677 state
.list
= -opt_info
.num
;
683 state
.options
|= REG_LENIENT
;
686 state
.options
|= REG_EXTENDED
|REG_LENIENT
;
689 state
.options
&= ~REG_LENIENT
;
698 state
.buffer
.size
= strton(s
, &s
, NiL
, 1);
699 if (c
== 'b' && !(state
.buffer
.base
= newof(0, char, state
.buffer
.size
, 0)))
700 error(ERROR_SYSTEM
|3, "out of space [test buffer]");
702 error(3, "%s: invalid characters after test", s
);
705 state
.options
|= REG_FIRST
;
708 state
.options
|= REG_LEFT
;
711 state
.buffer
.noshare
= 1;
714 state
.options
|= REG_RIGHT
;
717 error(3, "%s: unknown test", s
);
722 state
.options
|= REG_AUGMENTED
;
727 state
.options
&= ~(REG_FIRST
|REG_NOSUB
);
733 addstring(&state
, &state
.pattern
, opt_info
.arg
);
736 addstring(&state
, &state
.file
, opt_info
.arg
);
742 state
.options
|= REG_ICASE
;
745 state
.list
= opt_info
.num
;
757 state
.suppress
= opt_info
.num
;
763 if (state
.match
= !opt_info
.num
)
764 state
.options
&= ~REG_INVERT
;
766 state
.options
|= REG_INVERT
;
772 state
.options
|= REG_LEFT
|REG_RIGHT
;
775 error(ERROR_USAGE
|4, "%s", opt_info
.arg
);
778 error(2, "%s", opt_info
.arg
);
781 error(3, "%s: not implemented", opt_info
.name
);
784 argv
+= opt_info
.index
;
785 if ((state
.options
& REG_LITERAL
) && (state
.options
& (REG_AUGMENTED
|REG_EXTENDED
)))
786 error(3, "-F and -A or -P or -X are incompatible");
787 if ((state
.options
& REG_LITERAL
) && state
.words
)
788 error(ERROR_SYSTEM
|3, "-F and -w are incompatible");
789 if (!state
.file
.head
&& !state
.pattern
.head
)
792 error(3, "no pattern");
793 addstring(&state
, &state
.pattern
, *argv
++);
795 if (!(state
.options
& (REG_FIRST
|REG_NOSUB
)))
797 if (state
.count
|| state
.list
|| state
.query
|| (state
.options
& REG_INVERT
))
798 state
.options
|= REG_FIRST
|REG_NOSUB
;
801 state
.pos
= state
.posvec
;
802 state
.posnum
= elementsof(state
.posvec
);
808 state
.prefix
= h
? 1 : 0;
809 execute(&state
, sfstdin
, h
);
813 if (state
.prefix
> 1)
819 if (f
= sfopen(NiL
, s
, "r"))
821 execute(&state
, f
, s
);
823 if (state
.query
&& state
.any
)
830 error(ERROR_SYSTEM
|2, "%s: cannot open", s
);
834 if ((state
.count
& 2) && !state
.query
&& !state
.list
)
843 sfprintf(sfstdout
, "%s:%I*u\n", x
->string
, sizeof(x
->total
), x
->total
);
844 } while (x
= x
->next
);
847 sfprintf(sfstdout
, "%I*u\n", sizeof(state
.hits
), state
.hits
);
849 return (state
.notfound
&& !state
.query
) ? 2 : !state
.any
;
853 int b_egrep(int argc
, char** argv
, void *context
)
855 return grep_main(argc
, argv
, context
);
858 int b_grep(int argc
, char** argv
, void *context
)
860 return grep_main(argc
, argv
, context
);
863 int b_fgrep(int argc
, char** argv
, void *context
)
865 return grep_main(argc
, argv
, context
);
868 int b_pgrep(int argc
, char** argv
, void *context
)
870 return grep_main(argc
, argv
, context
);
873 int b_xgrep(int argc
, char** argv
, void *context
)
875 return grep_main(argc
, argv
, context
);