8354 sync regcomp(3C) with upstream (fix make catalog)
[unleashed/tickless.git] / usr / src / cmd / fgrep / fgrep.c
blob31724cda400259665c241508537277d4db2a9e9f
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
30 /* Copyright (c) 1987, 1988 Microsoft Corporation */
31 /* All Rights Reserved */
34 * Copyright 2013 Damian Bogel. All rights reserved.
38 * fgrep -- print all lines containing any of a set of keywords
40 * status returns:
41 * 0 - ok, and some matches
42 * 1 - ok, but no matches
43 * 2 - some error
46 #include <stdio.h>
47 #include <ctype.h>
48 #include <sys/types.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <locale.h>
52 #include <libintl.h>
53 #include <euc.h>
54 #include <sys/stat.h>
55 #include <fcntl.h>
57 #include <getwidth.h>
59 eucwidth_t WW;
60 #define WIDTH1 WW._eucw1
61 #define WIDTH2 WW._eucw2
62 #define WIDTH3 WW._eucw3
63 #define MULTI_BYTE WW._multibyte
64 #define GETONE(lc, p) \
65 cw = ISASCII(lc = (unsigned char)*p++) ? 1 : \
66 (ISSET2(lc) ? WIDTH2 : \
67 (ISSET3(lc) ? WIDTH3 : WIDTH1)); \
68 if (--cw > --ccount) { \
69 cw -= ccount; \
70 while (ccount--) \
71 lc = (lc << 7) | ((*p++) & 0177); \
72 if (p >= &buf[fw_lBufsiz + BUFSIZ]) { \
73 if (nlp == buf) { \
74 /* Increase the buffer size */ \
75 fw_lBufsiz += BUFSIZ; \
76 if ((buf = realloc(buf, \
77 fw_lBufsiz + BUFSIZ)) == NULL) { \
78 exit(2); /* out of memory */ \
79 } \
80 nlp = buf; \
81 p = &buf[fw_lBufsiz]; \
82 } else { \
83 /* shift the buffer contents down */ \
84 (void) memmove(buf, nlp, \
85 &buf[fw_lBufsiz + BUFSIZ] - nlp);\
86 p -= nlp - buf; \
87 nlp = buf; \
88 } \
89 } \
90 if (p > &buf[fw_lBufsiz]) { \
91 if ((ccount = fread(p, sizeof (char), \
92 &buf[fw_lBufsiz + BUFSIZ] - p, fptr))\
93 <= 0) break; \
94 } else if ((ccount = fread(p, \
95 sizeof (char), BUFSIZ, fptr)) <= 0) \
96 break; \
97 blkno += (long long)ccount; \
98 } \
99 ccount -= cw; \
100 while (cw--) \
101 lc = (lc << 7) | ((*p++) & 0177)
104 * The same() macro and letter() function were inserted to allow for
105 * the -i option work for the multi-byte environment.
107 wchar_t letter();
108 #define same(a, b) \
109 (a == b || iflag && (!MULTI_BYTE || ISASCII(a)) && (a ^ b) == ' ' && \
110 letter(a) == letter(b))
112 #define STDIN_FILENAME gettext("(standard input)")
114 #define QSIZE 400
115 struct words {
116 wchar_t inp;
117 char out;
118 struct words *nst;
119 struct words *link;
120 struct words *fail;
121 } *w = NULL, *smax, *q;
123 FILE *fptr;
124 long long lnum;
125 int bflag, cflag, lflag, fflag, nflag, vflag, xflag, eflag, qflag;
126 int Hflag, hflag, iflag;
127 int retcode = 0;
128 int nfile;
129 long long blkno;
130 int nsucc;
131 long long tln;
132 FILE *wordf;
133 char *argptr;
134 off_t input_size = 0;
136 void execute(char *);
137 void cgotofn(void);
138 void overflo(void);
139 void cfail(void);
141 static long fw_lBufsiz = 0;
144 main(int argc, char **argv)
146 int c;
147 int errflg = 0;
148 struct stat file_stat;
150 (void) setlocale(LC_ALL, "");
151 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
152 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
153 #endif
154 (void) textdomain(TEXT_DOMAIN);
156 while ((c = getopt(argc, argv, "Hhybcie:f:lnvxqs")) != EOF)
157 switch (c) {
159 case 'q':
160 case 's': /* Solaris: legacy option */
161 qflag++;
162 continue;
163 case 'H':
164 Hflag++;
165 hflag = 0;
166 continue;
167 case 'h':
168 hflag++;
169 Hflag = 0;
170 continue;
171 case 'b':
172 bflag++;
173 continue;
175 case 'i':
176 case 'y':
177 iflag++;
178 continue;
180 case 'c':
181 cflag++;
182 continue;
184 case 'e':
185 eflag++;
186 argptr = optarg;
187 input_size = strlen(argptr);
188 continue;
190 case 'f':
191 fflag++;
192 wordf = fopen(optarg, "r");
193 if (wordf == NULL) {
194 (void) fprintf(stderr,
195 gettext("fgrep: can't open %s\n"),
196 optarg);
197 exit(2);
200 if (fstat(fileno(wordf), &file_stat) == 0) {
201 input_size = file_stat.st_size;
202 } else {
203 (void) fprintf(stderr,
204 gettext("fgrep: can't fstat %s\n"),
205 optarg);
206 exit(2);
209 continue;
211 case 'l':
212 lflag++;
213 continue;
215 case 'n':
216 nflag++;
217 continue;
219 case 'v':
220 vflag++;
221 continue;
223 case 'x':
224 xflag++;
225 continue;
227 case '?':
228 errflg++;
231 argc -= optind;
232 if (errflg || ((argc <= 0) && !fflag && !eflag)) {
233 (void) printf(gettext("usage: fgrep [ -bcHhilnqsvx ] "
234 "[ -e exp ] [ -f file ] [ strings ] [ file ] ...\n"));
235 exit(2);
237 if (!eflag && !fflag) {
238 argptr = argv[optind];
239 input_size = strlen(argptr);
240 input_size++;
241 optind++;
242 argc--;
246 * Normally we need one struct words for each letter in the pattern
247 * plus one terminating struct words with outp = 1, but when -x option
248 * is specified we require one more struct words for `\n` character so we
249 * calculate the input_size as below. We add extra 1 because
250 * (input_size/2) rounds off odd numbers
253 if (xflag) {
254 input_size = input_size + (input_size/2) + 1;
257 input_size++;
259 w = (struct words *)calloc(input_size, sizeof (struct words));
260 if (w == NULL) {
261 (void) fprintf(stderr,
262 gettext("fgrep: could not allocate "
263 "memory for wordlist\n"));
264 exit(2);
267 getwidth(&WW);
268 if ((WIDTH1 == 0) && (WIDTH2 == 0) &&
269 (WIDTH3 == 0)) {
271 * If non EUC-based locale,
272 * assume WIDTH1 is 1.
274 WIDTH1 = 1;
276 WIDTH2++;
277 WIDTH3++;
279 cgotofn();
280 cfail();
281 nfile = argc;
282 argv = &argv[optind];
283 if (argc <= 0) {
284 execute((char *)NULL);
285 } else
286 while (--argc >= 0) {
287 execute(*argv);
288 argv++;
291 if (w != NULL) {
292 free(w);
295 return (retcode != 0 ? retcode : nsucc == 0);
298 void
299 execute(char *file)
301 char *p;
302 struct words *c;
303 int ccount;
304 static char *buf = NULL;
305 int failed;
306 char *nlp;
307 wchar_t lc;
308 int cw;
310 if (buf == NULL) {
311 fw_lBufsiz = BUFSIZ;
312 if ((buf = malloc(fw_lBufsiz + BUFSIZ)) == NULL) {
313 exit(2); /* out of memory */
317 if (file) {
318 if ((fptr = fopen(file, "r")) == NULL) {
319 (void) fprintf(stderr,
320 gettext("fgrep: can't open %s\n"), file);
321 retcode = 2;
322 return;
324 } else {
325 fptr = stdin;
326 file = STDIN_FILENAME;
328 ccount = 0;
329 failed = 0;
330 lnum = 1;
331 tln = 0;
332 blkno = 0;
333 p = buf;
334 nlp = p;
335 c = w;
336 for (;;) {
337 if (c == 0)
338 break;
339 if (ccount <= 0) {
340 if (p >= &buf[fw_lBufsiz + BUFSIZ]) {
341 if (nlp == buf) {
342 /* increase the buffer size */
343 fw_lBufsiz += BUFSIZ;
344 if ((buf = realloc(buf,
345 fw_lBufsiz + BUFSIZ)) == NULL) {
346 exit(2); /* out of memory */
348 nlp = buf;
349 p = &buf[fw_lBufsiz];
350 } else {
351 /* shift the buffer down */
352 (void) memmove(buf, nlp,
353 &buf[fw_lBufsiz + BUFSIZ]
354 - nlp);
355 p -= nlp - buf;
356 nlp = buf;
360 if (p > &buf[fw_lBufsiz]) {
361 if ((ccount = fread(p, sizeof (char),
362 &buf[fw_lBufsiz + BUFSIZ] - p, fptr))
363 <= 0)
364 break;
365 } else if ((ccount = fread(p, sizeof (char),
366 BUFSIZ, fptr)) <= 0)
367 break;
368 blkno += (long long)ccount;
370 GETONE(lc, p);
371 nstate:
372 if (same(c->inp, lc)) {
373 c = c->nst;
374 } else if (c->link != 0) {
375 c = c->link;
376 goto nstate;
377 } else {
378 c = c->fail;
379 failed = 1;
380 if (c == 0) {
381 c = w;
382 istate:
383 if (same(c->inp, lc)) {
384 c = c->nst;
385 } else if (c->link != 0) {
386 c = c->link;
387 goto istate;
389 } else
390 goto nstate;
393 if (c == 0)
394 break;
396 if (c->out) {
397 while (lc != '\n') {
398 if (ccount <= 0) {
399 if (p == &buf[fw_lBufsiz + BUFSIZ]) {
400 if (nlp == buf) {
401 /* increase buffer size */
402 fw_lBufsiz += BUFSIZ;
403 if ((buf = realloc(buf, fw_lBufsiz + BUFSIZ)) == NULL) {
404 exit(2); /* out of memory */
406 nlp = buf;
407 p = &buf[fw_lBufsiz];
408 } else {
409 /* shift buffer down */
410 (void) memmove(buf, nlp, &buf[fw_lBufsiz + BUFSIZ] - nlp);
411 p -= nlp - buf;
412 nlp = buf;
415 if (p > &buf[fw_lBufsiz]) {
416 if ((ccount = fread(p, sizeof (char),
417 &buf[fw_lBufsiz + BUFSIZ] - p, fptr)) <= 0) break;
418 } else if ((ccount = fread(p, sizeof (char), BUFSIZ,
419 fptr)) <= 0) break;
420 blkno += (long long)ccount;
422 GETONE(lc, p);
424 if ((vflag && (failed == 0 || xflag == 0)) ||
425 (vflag == 0 && xflag && failed))
426 goto nomatch;
427 succeed:
428 nsucc = 1;
429 if (lflag || qflag) {
430 if (!qflag)
431 (void) printf("%s\n", file);
432 (void) fclose(fptr);
433 return;
435 if (cflag) {
436 tln++;
437 } else {
438 if (Hflag || (nfile > 1 && !hflag))
439 (void) printf("%s:", file);
440 if (bflag)
441 (void) printf("%lld:",
442 (blkno - (long long)(ccount-1))
443 / BUFSIZ);
444 if (nflag)
445 (void) printf("%lld:", lnum);
446 if (p <= nlp) {
447 while (nlp < &buf[fw_lBufsiz + BUFSIZ])
448 (void) putchar(*nlp++);
449 nlp = buf;
451 while (nlp < p)
452 (void) putchar(*nlp++);
454 nomatch:
455 lnum++;
456 nlp = p;
457 c = w;
458 failed = 0;
459 continue;
461 if (lc == '\n')
462 if (vflag)
463 goto succeed;
464 else {
465 lnum++;
466 nlp = p;
467 c = w;
468 failed = 0;
471 (void) fclose(fptr);
472 if (cflag && !qflag) {
473 if (Hflag || (nfile > 1 && !hflag))
474 (void) printf("%s:", file);
475 (void) printf("%lld\n", tln);
480 wchar_t
481 getargc(void)
483 /* appends a newline to shell quoted argument list so */
484 /* the list looks like it came from an ed style file */
485 wchar_t c;
486 int cw;
487 int b;
488 static int endflg;
491 if (wordf) {
492 if ((b = getc(wordf)) == EOF)
493 return (EOF);
494 cw = ISASCII(c = (wchar_t)b) ? 1 :
495 (ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
496 while (--cw) {
497 if ((b = getc(wordf)) == EOF)
498 return (EOF);
499 c = (c << 7) | (b & 0177);
501 return (iflag ? letter(c) : c);
504 if (endflg)
505 return (EOF);
508 cw = ISASCII(c = (unsigned char)*argptr++) ? 1 :
509 (ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
511 while (--cw)
512 c = (c << 7) | ((*argptr++) & 0177);
513 if (c == '\0') {
514 endflg++;
515 return ('\n');
518 return (iflag ? letter(c) : c);
523 void
524 cgotofn(void)
526 int c;
527 struct words *s;
529 s = smax = w;
530 nword:
531 for (;;) {
532 c = getargc();
533 if (c == EOF)
534 return;
535 if (c == 0)
536 goto enter;
537 if (c == '\n') {
538 if (xflag) {
539 for (;;) {
540 if (s->inp == c) {
541 s = s->nst;
542 break;
544 if (s->inp == 0)
545 goto nenter;
546 if (s->link == 0) {
547 if (smax >= &w[input_size -1])
548 overflo();
549 s->link = ++smax;
550 s = smax;
551 goto nenter;
553 s = s->link;
556 s->out = 1;
557 s = w;
558 } else {
559 loop:
560 if (s->inp == c) {
561 s = s->nst;
562 continue;
564 if (s->inp == 0)
565 goto enter;
566 if (s->link == 0) {
567 if (smax >= &w[input_size -1])
568 overflo();
569 s->link = ++smax;
570 s = smax;
571 goto enter;
573 s = s->link;
574 goto loop;
578 enter:
579 do {
580 s->inp = c;
581 if (smax >= &w[input_size -1])
582 overflo();
583 s->nst = ++smax;
584 s = smax;
585 } while ((c = getargc()) != '\n' && c != EOF);
586 if (xflag) {
587 nenter:
588 s->inp = '\n';
589 if (smax >= &w[input_size -1])
590 overflo();
591 s->nst = ++smax;
593 smax->out = 1;
594 s = w;
595 if (c != EOF)
596 goto nword;
600 * This function is an unexpected condition, since input_size should have been
601 * calculated correctly before hand.
604 void
605 overflo(void)
607 (void) fprintf(stderr, gettext("fgrep: wordlist too large\n"));
608 exit(2);
611 void
612 cfail(void)
614 int qsize = QSIZE;
615 struct words **queue = NULL;
618 * front and rear are pointers used to traverse the global words
619 * structure "w" which contains the data of input pattern file
621 struct words **front, **rear;
622 struct words *state;
623 unsigned long frontoffset = 0, rearoffset = 0;
624 char c;
625 struct words *s;
626 s = w;
627 if ((queue = (struct words **)calloc(qsize, sizeof (struct words *)))
628 == NULL) {
629 perror("fgrep");
630 exit(2);
632 front = rear = queue;
633 init:
634 if ((s->inp) != 0) {
635 *rear++ = s->nst;
637 * Reallocates the queue if the number of distinct starting
638 * character of patterns exceeds the qsize value
640 if (rear >= &queue[qsize - 1]) {
641 frontoffset = front - queue;
642 rearoffset = rear - queue;
643 qsize += QSIZE;
644 if ((queue = (struct words **)realloc(queue,
645 qsize * sizeof (struct words *))) == NULL) {
646 perror("fgrep");
647 exit(2);
649 front = queue + frontoffset;
650 rear = queue + rearoffset;
653 if ((s = s->link) != 0) {
654 goto init;
657 while (rear != front) {
658 s = *front++;
659 cloop:
660 if ((c = s->inp) != 0) {
661 *rear++ = (q = s->nst);
663 * Reallocate the queue if the rear pointer reaches the end
664 * queue
666 if (rear >= &queue[qsize - 1]) {
667 frontoffset = front - queue;
668 rearoffset = rear - queue;
669 qsize += QSIZE;
670 if ((queue = (struct words **)realloc(queue,
671 qsize * sizeof (struct words *))) == NULL) {
672 perror("fgrep");
673 exit(2);
675 front = queue + frontoffset;
676 rear = queue + rearoffset;
678 state = s->fail;
679 floop:
680 if (state == 0)
681 state = w;
682 if (state->inp == c) {
683 qloop:
684 q->fail = state->nst;
685 if ((state->nst)->out == 1)
686 q->out = 1;
687 if ((q = q->link) != 0)
688 goto qloop;
689 } else if ((state = state->link) != 0)
690 goto floop;
692 if ((s = s->link) != 0)
693 goto cloop;
697 wchar_t
698 letter(wchar_t c)
700 if (c >= 'a' && c <= 'z')
701 return (c);
702 if (c >= 'A' && c <= 'Z')
703 return (c + 'a' - 'A');
704 return (c);