1 /* $OpenBSD: util.c,v 1.48 2014/05/20 01:25:23 guenther Exp $ */
4 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/types.h>
46 * Process a file line by line...
49 static int linesqueued
;
50 static int procline(str_t
*l
, int);
51 static int grep_search(fastgrep_t
*, unsigned char *, size_t,
54 static int grep_cmp(const unsigned char *, const unsigned char *, size_t);
55 static void grep_revstr(unsigned char *, int);
59 grep_tree(char **argv
)
67 fts_flags
= FTS_PHYSICAL
| FTS_NOSTAT
| FTS_NOCHDIR
;
69 if (!(fts
= fts_open(argv
, fts_flags
, NULL
)))
71 while ((p
= fts_read(fts
)) != NULL
) {
72 switch (p
->fts_info
) {
79 warn("%s", p
->fts_path
);
85 c
+= procfile(p
->fts_path
);
96 procfile(const char *fn
)
100 int c
, t
, z
, nottext
;
103 fn
= "(standard input)";
104 f
= grep_fdopen(STDIN_FILENO
, "r");
106 f
= grep_open(fn
, "r");
115 nottext
= grep_bin_file(f
);
116 if (nottext
&& binbehave
== BIN_FILE_SKIP
) {
130 for (c
= 0; c
== 0 || !(lflag
|| qflag
); ) {
131 ln
.off
+= ln
.len
+ 1;
132 if ((ln
.dat
= grep_fgetln(f
, &ln
.len
)) == NULL
)
134 if (ln
.len
> 0 && ln
.dat
[ln
.len
- 1] == '\n')
140 if ((t
= procline(&ln
, nottext
)) == 0 && Bflag
> 0 && z
== 0) {
152 printf("%s:", ln
.file
);
159 if (c
&& !cflag
&& !lflag
&& !Lflag
&&
160 binbehave
== BIN_FILE_BIN
&& nottext
&& !qflag
)
161 printf("Binary file %s matches\n", fn
);
168 * Process an individual line in a file. Return non-zero if it matches.
171 #define isword(x) (isalnum((unsigned char)x) || (x) == '_')
174 procline(str_t
*l
, int nottext
)
180 /* size_t will be converted to regoff_t. ssize_t is guaranteed to fit
182 if (l
->len
> SSIZE_MAX
) {
183 errx(2, "Line is too big to process");
193 for (i
= 0; i
< patterns
; i
++) {
196 if (fg_pattern
[i
].pattern
) {
197 r
= grep_search(&fg_pattern
[i
],
198 (unsigned char *)l
->dat
+ offset
, l
->len
- offset
,
200 pmatch
.rm_so
+= offset
;
201 pmatch
.rm_eo
+= offset
;
203 pmatch
.rm_so
= offset
;
204 pmatch
.rm_eo
= l
->len
;
205 r
= regexec(&r_pattern
[i
], l
->dat
, 1, &pmatch
, eflags
);
207 if (r
== 0 && xflag
) {
208 if (pmatch
.rm_so
!= 0 || pmatch
.rm_eo
!= l
->len
)
213 if (oflag
&& pmatch
.rm_so
!= pmatch
.rm_eo
)
224 if (c
&& binbehave
== BIN_FILE_BIN
&& nottext
)
225 return c
; /* Binary file */
227 if ((tail
> 0 || c
) && !cflag
&& !qflag
) {
229 if (first
> 0 && tail
== 0 && (Bflag
< linesqueued
) &&
237 printline(l
, ':', oflag
? &pmatch
: NULL
);
239 printline(l
, '-', oflag
? &pmatch
: NULL
);
243 if (oflag
&& !matchall
) {
244 offset
= pmatch
.rm_eo
;
252 fgrepcomp(fastgrep_t
*fg
, const unsigned char *pat
)
257 fg
->patternLen
= strlen((const char *)pat
);
261 fg
->reversedSearch
= 0;
264 * Make a copy and upper case it for later if in -i mode,
265 * else just copy the pointer.
268 fg
->pattern
= grep_malloc(fg
->patternLen
+ 1);
269 for (i
= 0; i
< fg
->patternLen
; i
++)
270 fg
->pattern
[i
] = toupper(pat
[i
]);
271 fg
->pattern
[fg
->patternLen
] = '\0';
273 fg
->pattern
= __UNCONST(pat
); /* really const */
275 /* Preprocess pattern. */
276 for (i
= 0; i
<= UCHAR_MAX
; i
++)
277 fg
->qsBc
[i
] = fg
->patternLen
;
278 for (i
= 1; i
< fg
->patternLen
; i
++) {
279 fg
->qsBc
[fg
->pattern
[i
]] = fg
->patternLen
- i
;
281 * If case is ignored, make the jump apply to both upper and
282 * lower cased characters. As the pattern is stored in upper
283 * case, apply the same to the lower case equivalents.
286 fg
->qsBc
[tolower(fg
->pattern
[i
])] = fg
->patternLen
- i
;
292 * Returns: -1 on failure, 0 on success
295 fastcomp(fastgrep_t
*fg
, const char *pat
)
305 int firstHalfDot
= -1;
306 int firstLastHalfDot
= -1;
310 fg
->patternLen
= strlen(pat
);
314 fg
->reversedSearch
= 0;
316 /* Remove end-of-line character ('$'). */
317 if (fg
->patternLen
> 0 && pat
[fg
->patternLen
- 1] == '$') {
323 /* Remove beginning-of-line character ('^'). */
330 /* Remove enclosing [[:<:]] and [[:>:]] (word match). */
332 /* basic re's use \( \), extended re's ( ) */
333 int extra
= Eflag
? 1 : 2;
334 fg
->patternLen
-= 14 + 2 * extra
;
335 fg
->wmatch
= 7 + extra
;
336 } else if (fg
->patternLen
>= 14 &&
337 strncmp(pat
+ fg
->bol
, "[[:<:]]", 7) == 0 &&
338 strncmp(pat
+ fg
->bol
+ fg
->patternLen
- 7, "[[:>:]]", 7) == 0) {
339 fg
->patternLen
-= 14;
344 * Copy pattern minus '^' and '$' characters as well as word
345 * match character classes at the beginning and ending of the
346 * string respectively.
348 fg
->pattern
= grep_malloc(fg
->patternLen
+ 1);
349 memcpy(fg
->pattern
, pat
+ bol
+ fg
->wmatch
, fg
->patternLen
);
350 fg
->pattern
[fg
->patternLen
] = '\0';
352 /* Look for ways to cheat...er...avoid the full regex engine. */
353 for (i
= 0; i
< fg
->patternLen
; i
++)
355 switch (fg
->pattern
[i
]) {
358 if (i
< fg
->patternLen
/ 2) {
359 if (firstHalfDot
< 0)
360 /* Closest dot to the beginning */
363 /* Closest dot to the end of the pattern. */
365 if (firstLastHalfDot
< 0)
366 firstLastHalfDot
= i
;
371 /* Special in BRE if preceded by '\\' */
375 /* Not special in BRE. */
381 /* Free memory and let others know this is empty. */
388 fg
->pattern
[i
] = toupper(fg
->pattern
[i
]);
394 * Determine if a reverse search would be faster based on the placement
397 if ((!(lflag
|| cflag
)) && ((!(bol
|| eol
)) &&
398 ((lastHalfDot
) && ((firstHalfDot
< 0) ||
399 ((fg
->patternLen
- (lastHalfDot
+ 1)) < firstHalfDot
))))) {
400 fg
->reversedSearch
= 1;
401 hasDot
= fg
->patternLen
- (firstHalfDot
< 0 ?
402 firstLastHalfDot
: firstHalfDot
) - 1;
403 grep_revstr(fg
->pattern
, fg
->patternLen
);
407 * Normal Quick Search would require a shift based on the position the
408 * next character after the comparison is within the pattern. With
409 * wildcards, the position of the last dot effects the maximum shift
411 * The closer to the end the wild card is the slower the search. A
412 * reverse version of this algorithm would be useful for wildcards near
413 * the end of the string.
425 /* Adjust the shift based on location of the last dot ('.'). */
426 shiftPatternLen
= fg
->patternLen
- hasDot
;
428 /* Preprocess pattern. */
429 for (i
= 0; i
<= UCHAR_MAX
; i
++)
430 fg
->qsBc
[i
] = shiftPatternLen
;
431 for (i
= hasDot
+ 1; i
< fg
->patternLen
; i
++) {
432 fg
->qsBc
[fg
->pattern
[i
]] = fg
->patternLen
- i
;
434 * If case is ignored, make the jump apply to both upper and
435 * lower cased characters. As the pattern is stored in upper
436 * case, apply the same to the lower case equivalents.
439 fg
->qsBc
[tolower(fg
->pattern
[i
])] = fg
->patternLen
- i
;
443 * Put pattern back to normal after pre-processing to allow for easy
446 if (fg
->reversedSearch
)
447 grep_revstr(fg
->pattern
, fg
->patternLen
);
454 * Word boundaries using regular expressions are defined as the point
455 * of transition from a non-word char to a word char, or vice versa.
456 * This means that grep -w +a and grep -w a+ never match anything,
457 * because they lack a starting or ending transition, but grep -w a+b
458 * does match a line containing a+b.
460 #define wmatch(d, l, s, e) \
461 ((s == 0 || !isword(d[s-1])) && (e == l || !isword(d[e])) && \
462 e > s && isword(d[s]) && isword(d[e-1]))
465 grep_search(fastgrep_t
*fg
, unsigned char *data
, size_t dataLen
,
472 int rtrnVal
= REG_NOMATCH
;
477 /* No point in going farther if we do not have enough data. */
478 if (dataLen
< (size_t)fg
->patternLen
)
481 /* Only try once at the beginning or ending of the line. */
482 if (fg
->bol
|| fg
->eol
) {
483 /* Simple text comparison. */
484 /* Verify data is >= pattern length before searching on it. */
485 if (dataLen
>= (size_t)fg
->patternLen
) {
486 /* Determine where in data to start search at. */
488 j
= dataLen
- fg
->patternLen
;
491 if (!((fg
->bol
&& fg
->eol
) &&
492 (dataLen
!= (size_t)fg
->patternLen
)))
493 if (grep_cmp(fg
->pattern
, data
+ j
,
494 fg
->patternLen
) == -1) {
496 pmatch
->rm_eo
= j
+ fg
->patternLen
;
497 if (!fg
->wmatch
|| wmatch(data
, dataLen
,
498 pmatch
->rm_so
, pmatch
->rm_eo
))
502 } else if (fg
->reversedSearch
) {
503 /* Quick Search algorithm. */
506 if (grep_cmp(fg
->pattern
, data
+ j
- fg
->patternLen
,
507 fg
->patternLen
) == -1) {
508 pmatch
->rm_so
= j
- fg
->patternLen
;
510 if (!fg
->wmatch
|| wmatch(data
, dataLen
,
511 pmatch
->rm_so
, pmatch
->rm_eo
)) {
516 /* Shift if within bounds, otherwise, we are done. */
517 if (j
== fg
->patternLen
)
519 j
-= fg
->qsBc
[(unsigned char)data
[j
- fg
->patternLen
- 1]];
520 } while (j
>= fg
->patternLen
);
522 /* Quick Search algorithm. */
525 if (grep_cmp(fg
->pattern
, data
+ j
, fg
->patternLen
) == -1) {
527 pmatch
->rm_eo
= j
+ fg
->patternLen
;
528 if (fg
->patternLen
== 0 || !fg
->wmatch
||
529 wmatch(data
, dataLen
, pmatch
->rm_so
,
536 /* Shift if within bounds, otherwise, we are done. */
537 if (j
+ fg
->patternLen
== dataLen
)
540 j
+= fg
->qsBc
[(unsigned char)data
[j
+ fg
->patternLen
]];
541 } while (j
<= (dataLen
- fg
->patternLen
));
550 grep_malloc(size_t size
)
554 if ((ptr
= malloc(size
)) == NULL
)
560 grep_calloc(size_t nmemb
, size_t size
)
564 if ((ptr
= calloc(nmemb
, size
)) == NULL
)
570 grep_realloc(void *ptr
, size_t size
)
572 if ((ptr
= realloc(ptr
, size
)) == NULL
)
579 * Returns: i >= 0 on failure (position that it failed)
583 grep_cmp(const unsigned char *pat
, const unsigned char *data
, size_t len
)
587 for (i
= 0; i
< len
; i
++) {
588 if (((pat
[i
] == data
[i
]) || (!Fflag
&& pat
[i
] == '.'))
589 || (iflag
&& pat
[i
] == toupper(data
[i
])))
598 grep_revstr(unsigned char *str
, int len
)
603 for (i
= 0; i
< len
/ 2; i
++) {
605 str
[i
] = str
[len
- i
- 1];
606 str
[len
- i
- 1] = c
;
612 printline(str_t
*line
, int sep
, regmatch_t
*pmatch
)
618 fputs(line
->file
, stdout
);
624 printf("%d", line
->line_no
);
630 printf("%lld", (long long)line
->off
);
636 fwrite(line
->dat
+ pmatch
->rm_so
,
637 pmatch
->rm_eo
- pmatch
->rm_so
, 1, stdout
);
639 fwrite(line
->dat
, line
->len
, 1, stdout
);