coverity appeasement - redundant check
[minix.git] / commands / grep / util.c
blobbbfff156925d163de43ea638b4cea2dee4258b9b
1 /* $OpenBSD: util.c,v 1.35 2007/09/02 15:19:32 deraadt Exp $ */
3 /*-
4 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
29 #include <sys/types.h>
30 #include <sys/stat.h>
32 #include <ctype.h>
33 #include <err.h>
34 #include <errno.h>
35 #include <fts.h>
36 #include <regex.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <unistd.h>
42 #include "grep.h"
45 * Process a file line by line...
48 static int linesqueued;
49 static int procline(str_t *l, int);
50 static int grep_search(fastgrep_t *, unsigned char *, size_t, regmatch_t *pmatch);
51 static int grep_cmp(const unsigned char *, const unsigned char *, size_t);
52 static void grep_revstr(unsigned char *, int);
54 int
55 grep_tree(char **argv)
57 FTS *fts;
58 FTSENT *p;
59 int c, fts_flags;
61 c = fts_flags = 0;
63 if (Hflag)
64 fts_flags = FTS_COMFOLLOW;
65 if (Pflag)
66 fts_flags = FTS_PHYSICAL;
67 if (Sflag)
68 fts_flags = FTS_LOGICAL;
70 fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
72 if (!(fts = fts_open(argv, fts_flags, NULL)))
73 err(2, NULL);
74 while ((p = fts_read(fts)) != NULL) {
75 switch (p->fts_info) {
76 case FTS_DNR:
77 /* FALL THROUGH */
78 case FTS_ERR:
79 errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
80 break;
81 case FTS_D:
82 case FTS_DP:
83 break;
84 default:
85 c += procfile(p->fts_path);
86 break;
89 if (errno)
90 err(2, "fts_read");
92 return c;
95 int
96 procfile(char *fn)
98 str_t ln;
99 file_t *f;
100 int c, t, z, nottext;
102 if (fn == NULL) {
103 fn = "(standard input)";
104 f = grep_fdopen(STDIN_FILENO, "r");
105 } else {
106 f = grep_open(fn, "r");
108 if (f == NULL) {
109 if (!sflag)
110 warn("%s", fn);
111 return 0;
114 nottext = grep_bin_file(f);
115 if (nottext && binbehave == BIN_FILE_SKIP) {
116 grep_close(f);
117 return 0;
120 ln.file = fn;
121 ln.line_no = 0;
122 ln.len = 0;
123 linesqueued = 0;
124 tail = 0;
125 ln.off = -1;
127 if (Bflag > 0)
128 initqueue();
129 for (c = 0; c == 0 || !(lflag || qflag); ) {
130 ln.off += ln.len + 1;
131 if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL)
132 break;
133 if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
134 --ln.len;
135 ln.line_no++;
137 z = tail;
139 if ((t = procline(&ln, nottext)) == 0 && Bflag > 0 && z == 0) {
140 enqueue(&ln);
141 linesqueued++;
143 c += t;
145 if (Bflag > 0)
146 clearqueue();
147 grep_close(f);
149 if (cflag) {
150 if (!hflag)
151 printf("%s:", ln.file);
152 printf("%u\n", c);
154 if (lflag && c != 0)
155 printf("%s\n", fn);
156 if (Lflag && c == 0)
157 printf("%s\n", fn);
158 if (c && !cflag && !lflag && !Lflag &&
159 binbehave == BIN_FILE_BIN && nottext && !qflag)
160 printf("Binary file %s matches\n", fn);
162 return c;
167 * Process an individual line in a file. Return non-zero if it matches.
170 #define isword(x) (isalnum(x) || (x) == '_')
172 static int
173 procline(str_t *l, int nottext)
175 regmatch_t pmatch;
176 int c, i, r;
178 if (matchall) {
179 c = !vflag;
180 goto print;
183 for (c = i = 0; i < patterns; i++) {
184 if (fg_pattern[i].pattern) {
185 r = grep_search(&fg_pattern[i], (unsigned char *)l->dat,
186 l->len, &pmatch);
187 } else {
188 pmatch.rm_so = 0;
189 pmatch.rm_eo = l->len;
190 r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags);
192 if (r == 0 && xflag) {
193 if (pmatch.rm_so != 0 || pmatch.rm_eo != l->len)
194 r = REG_NOMATCH;
196 if (r == 0) {
197 c++;
198 break;
201 if (vflag)
202 c = !c;
204 print:
205 if (c && binbehave == BIN_FILE_BIN && nottext)
206 return c; /* Binary file */
208 if ((tail > 0 || c) && !cflag && !qflag) {
209 if (c) {
210 if (first > 0 && tail == 0 && (Bflag < linesqueued) &&
211 (Aflag || Bflag))
212 printf("--\n");
213 first = 1;
214 tail = Aflag;
215 if (Bflag > 0)
216 printqueue();
217 linesqueued = 0;
218 printline(l, ':');
219 } else {
220 printline(l, '-');
221 tail--;
224 return c;
227 void
228 fgrepcomp(fastgrep_t *fg, const char *pattern)
230 int i;
232 /* Initialize. */
233 fg->patternLen = strlen(pattern);
234 fg->bol = 0;
235 fg->eol = 0;
236 fg->wmatch = wflag;
237 fg->reversedSearch = 0;
240 * Make a copy and upper case it for later if in -i mode,
241 * else just copy the pointer.
243 if (iflag) {
244 fg->pattern = grep_malloc(fg->patternLen + 1);
245 for (i = 0; i < fg->patternLen; i++)
246 fg->pattern[i] = toupper(pattern[i]);
247 fg->pattern[fg->patternLen] = '\0';
248 } else
249 fg->pattern = (unsigned char *)pattern; /* really const */
251 /* Preprocess pattern. */
252 for (i = 0; i <= UCHAR_MAX; i++)
253 fg->qsBc[i] = fg->patternLen;
254 for (i = 1; i < fg->patternLen; i++) {
255 fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
257 * If case is ignored, make the jump apply to both upper and
258 * lower cased characters. As the pattern is stored in upper
259 * case, apply the same to the lower case equivalents.
261 if (iflag)
262 fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
267 * Returns: -1 on failure, 0 on success
270 fastcomp(fastgrep_t *fg, const char *pattern)
272 int i;
273 int bol = 0;
274 int eol = 0;
275 int shiftPatternLen;
276 int hasDot = 0;
277 int firstHalfDot = -1;
278 int firstLastHalfDot = -1;
279 int lastHalfDot = 0;
281 /* Initialize. */
282 fg->patternLen = strlen(pattern);
283 fg->bol = 0;
284 fg->eol = 0;
285 fg->wmatch = 0;
286 fg->reversedSearch = 0;
288 /* Remove end-of-line character ('$'). */
289 if (pattern[fg->patternLen - 1] == '$') {
290 eol++;
291 fg->eol = 1;
292 fg->patternLen--;
295 /* Remove beginning-of-line character ('^'). */
296 if (pattern[0] == '^') {
297 bol++;
298 fg->bol = 1;
299 fg->patternLen--;
302 /* Remove enclosing [[:<:]] and [[:>:]] (word match). */
303 if (wflag) {
304 /* basic re's use \( \), extended re's ( ) */
305 int extra = Eflag ? 1 : 2;
306 fg->patternLen -= 14 + 2 * extra;
307 fg->wmatch = 7 + extra;
308 } else if (fg->patternLen >= 14 &&
309 strncmp(pattern + fg->bol, "[[:<:]]", 7) == 0 &&
310 strncmp(pattern + fg->bol + fg->patternLen - 7, "[[:>:]]", 7) == 0) {
311 fg->patternLen -= 14;
312 fg->wmatch = 7;
316 * Copy pattern minus '^' and '$' characters as well as word
317 * match character classes at the beginning and ending of the
318 * string respectively.
320 fg->pattern = grep_malloc(fg->patternLen + 1);
321 memcpy(fg->pattern, pattern + bol + fg->wmatch, fg->patternLen);
322 fg->pattern[fg->patternLen] = '\0';
324 /* Look for ways to cheat...er...avoid the full regex engine. */
325 for (i = 0; i < fg->patternLen; i++)
327 /* Can still cheat? */
328 if ((isalnum(fg->pattern[i])) || isspace(fg->pattern[i]) ||
329 (fg->pattern[i] == '_') || (fg->pattern[i] == ',') ||
330 (fg->pattern[i] == '=') || (fg->pattern[i] == '-') ||
331 (fg->pattern[i] == ':') || (fg->pattern[i] == '/')) {
332 /* As long as it is good, upper case it for later. */
333 if (iflag)
334 fg->pattern[i] = toupper(fg->pattern[i]);
335 } else if (fg->pattern[i] == '.') {
336 hasDot = i;
337 if (i < fg->patternLen / 2) {
338 if (firstHalfDot < 0)
339 /* Closest dot to the beginning */
340 firstHalfDot = i;
341 } else {
342 /* Closest dot to the end of the pattern. */
343 lastHalfDot = i;
344 if (firstLastHalfDot < 0)
345 firstLastHalfDot = i;
347 } else {
348 /* Free memory and let others know this is empty. */
349 free(fg->pattern);
350 fg->pattern = NULL;
351 return (-1);
356 * Determine if a reverse search would be faster based on the placement
357 * of the dots.
359 if ((!(lflag || cflag)) && ((!(bol || eol)) &&
360 ((lastHalfDot) && ((firstHalfDot < 0) ||
361 ((fg->patternLen - (lastHalfDot + 1)) < firstHalfDot))))) {
362 fg->reversedSearch = 1;
363 hasDot = fg->patternLen - (firstHalfDot < 0 ?
364 firstLastHalfDot : firstHalfDot) - 1;
365 grep_revstr(fg->pattern, fg->patternLen);
369 * Normal Quick Search would require a shift based on the position the
370 * next character after the comparison is within the pattern. With
371 * wildcards, the position of the last dot effects the maximum shift
372 * distance.
373 * The closer to the end the wild card is the slower the search. A
374 * reverse version of this algorithm would be useful for wildcards near
375 * the end of the string.
377 * Examples:
378 * Pattern Max shift
379 * ------- ---------
380 * this 5
381 * .his 4
382 * t.is 3
383 * th.s 2
384 * thi. 1
387 /* Adjust the shift based on location of the last dot ('.'). */
388 shiftPatternLen = fg->patternLen - hasDot;
390 /* Preprocess pattern. */
391 for (i = 0; i <= UCHAR_MAX; i++)
392 fg->qsBc[i] = shiftPatternLen;
393 for (i = hasDot + 1; i < fg->patternLen; i++) {
394 fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
396 * If case is ignored, make the jump apply to both upper and
397 * lower cased characters. As the pattern is stored in upper
398 * case, apply the same to the lower case equivalents.
400 if (iflag)
401 fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
405 * Put pattern back to normal after pre-processing to allow for easy
406 * comparisons later.
408 if (fg->reversedSearch)
409 grep_revstr(fg->pattern, fg->patternLen);
411 return (0);
415 * Word boundaries using regular expressions are defined as the point
416 * of transition from a non-word char to a word char, or vice versa.
417 * This means that grep -w +a and grep -w a+ never match anything,
418 * because they lack a starting or ending transition, but grep -w a+b
419 * does match a line containing a+b.
421 #define wmatch(d, l, s, e) \
422 ((s == 0 || !isword(d[s-1])) && (e == l || !isword(d[e])) && \
423 e > s && isword(d[s]) && isword(d[e-1]))
425 static int
426 grep_search(fastgrep_t *fg, unsigned char *data, size_t dataLen, regmatch_t *pmatch)
428 int j;
429 int rtrnVal = REG_NOMATCH;
431 pmatch->rm_so = -1;
432 pmatch->rm_eo = -1;
434 /* No point in going farther if we do not have enough data. */
435 if (dataLen < fg->patternLen)
436 return (rtrnVal);
438 /* Only try once at the beginning or ending of the line. */
439 if (fg->bol || fg->eol) {
440 /* Simple text comparison. */
441 /* Verify data is >= pattern length before searching on it. */
442 if (dataLen >= fg->patternLen) {
443 /* Determine where in data to start search at. */
444 if (fg->eol)
445 j = dataLen - fg->patternLen;
446 else
447 j = 0;
448 if (!((fg->bol && fg->eol) && (dataLen != fg->patternLen)))
449 if (grep_cmp(fg->pattern, data + j,
450 fg->patternLen) == -1) {
451 pmatch->rm_so = j;
452 pmatch->rm_eo = j + fg->patternLen;
453 if (!fg->wmatch || wmatch(data, dataLen,
454 pmatch->rm_so, pmatch->rm_eo))
455 rtrnVal = 0;
458 } else if (fg->reversedSearch) {
459 /* Quick Search algorithm. */
460 j = dataLen;
461 do {
462 if (grep_cmp(fg->pattern, data + j - fg->patternLen,
463 fg->patternLen) == -1) {
464 pmatch->rm_so = j - fg->patternLen;
465 pmatch->rm_eo = j;
466 if (!fg->wmatch || wmatch(data, dataLen,
467 pmatch->rm_so, pmatch->rm_eo)) {
468 rtrnVal = 0;
469 break;
472 /* Shift if within bounds, otherwise, we are done. */
473 if (j == fg->patternLen)
474 break;
475 j -= fg->qsBc[data[j - fg->patternLen - 1]];
476 } while (j >= fg->patternLen);
477 } else {
478 /* Quick Search algorithm. */
479 j = 0;
480 do {
481 if (grep_cmp(fg->pattern, data + j, fg->patternLen) == -1) {
482 pmatch->rm_so = j;
483 pmatch->rm_eo = j + fg->patternLen;
484 if (fg->patternLen == 0 || !fg->wmatch ||
485 wmatch(data, dataLen, pmatch->rm_so,
486 pmatch->rm_eo)) {
487 rtrnVal = 0;
488 break;
492 /* Shift if within bounds, otherwise, we are done. */
493 if (j + fg->patternLen == dataLen)
494 break;
495 else
496 j += fg->qsBc[data[j + fg->patternLen]];
497 } while (j <= (dataLen - fg->patternLen));
500 return (rtrnVal);
504 void *
505 grep_malloc(size_t size)
507 void *ptr;
509 if ((ptr = malloc(size)) == NULL)
510 err(2, "malloc");
511 return ptr;
514 void *
515 grep_calloc(size_t nmemb, size_t size)
517 void *ptr;
519 if ((ptr = calloc(nmemb, size)) == NULL && (size > 0) && (nmemb > 0))
520 err(2, "calloc");
521 return ptr;
524 void *
525 grep_realloc(void *ptr, size_t size)
527 if ((ptr = realloc(ptr, size)) == NULL)
528 err(2, "realloc");
529 return ptr;
533 * Returns: i >= 0 on failure (position that it failed)
534 * -1 on success
536 static int
537 grep_cmp(const unsigned char *pattern, const unsigned char *data, size_t len)
539 int i;
541 for (i = 0; i < len; i++) {
542 if (((pattern[i] == data[i]) || (!Fflag && pattern[i] == '.'))
543 || (iflag && pattern[i] == toupper(data[i])))
544 continue;
545 return (i);
548 return (-1);
551 static void
552 grep_revstr(unsigned char *str, int len)
554 int i;
555 char c;
557 for (i = 0; i < len / 2; i++) {
558 c = str[i];
559 str[i] = str[len - i - 1];
560 str[len - i - 1] = c;
564 void
565 printline(str_t *line, int sep)
567 int n;
569 n = 0;
570 if (!hflag) {
571 fputs(line->file, stdout);
572 ++n;
574 if (nflag) {
575 if (n)
576 putchar(sep);
577 printf("%d", line->line_no);
578 ++n;
580 if (bflag) {
581 if (n)
582 putchar(sep);
583 #ifndef __minix
584 printf("%lld", (long long)line->off);
585 #else
586 printf("%ld", (long)line->off);
587 #endif
588 ++n;
590 if (n)
591 putchar(sep);
592 fwrite(line->dat, line->len, 1, stdout);
593 putchar('\n');