dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / usr / src / lib / libcmd / common / cut.c
blobabafdc5070548ae739addfe7c6b265300dcec65e
1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1992-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * *
20 ***********************************************************************/
21 #pragma prototyped
23 * David Korn
24 * AT&T Bell Laboratories
26 * cut fields or columns from fields from a file
29 static const char usage[] =
30 "[-?\n@(#)$Id: cut (AT&T Research) 2009-12-04 $\n]"
31 USAGE_LICENSE
32 "[+NAME?cut - cut out selected columns or fields of each line of a file]"
33 "[+DESCRIPTION?\bcut\b bytes, characters, or character-delimited fields "
34 "from one or more files, contatenating them on standard output.]"
35 "[+?The option argument \alist\a is a comma-separated or blank-separated "
36 "list of positive numbers and ranges. Ranges can be of three "
37 "forms. The first is two positive integers separated by a hyphen "
38 "(\alow\a\b-\b\ahigh\a), which represents all fields from \alow\a to "
39 "\ahigh\a. The second is a positive number preceded by a hyphen "
40 "(\b-\b\ahigh\a), which represents all fields from field \b1\b to "
41 "\ahigh\a. The last is a positive number followed by a hyphen "
42 "(\alow\a\b-\b), which represents all fields from \alow\a to the "
43 "last field, inclusive. Elements in the \alist\a can be repeated, "
44 "can overlap, and can appear in any order. The order of the "
45 "output is that of the input.]"
46 "[+?One and only one of \b-b\b, \b-c\b, or \b-f\b must be specified.]"
47 "[+?If no \afile\a is given, or if the \afile\a is \b-\b, \bcut\b "
48 "cuts from standard input. The start of the file is defined "
49 "as the current offset.]"
50 "[b:bytes]:[list?\bcut\b based on a list of byte counts.]"
51 "[c:characters]:[list?\bcut\b based on a list of character counts.]"
52 "[d:delimiter]:[delim?The field character for the \b-f\b option is set "
53 "to \adelim\a. The default is the \btab\b character.]"
54 "[f:fields]:[list?\bcut\b based on fields separated by the delimiter "
55 "character specified with the \b-d\b optiion.]"
56 "[n!:split?Split multibyte characters selected by the \b-b\b option.]"
57 "[R|r:reclen]#[reclen?If \areclen\a > 0, the input will be read as fixed length "
58 "records of length \areclen\a when used with the \b-b\b or \b-c\b "
59 "option.]"
60 "[s:suppress|only-delimited?Suppress lines with no delimiter characters, "
61 "when used with the \b-f\b option. By default, lines with no "
62 "delimiters will be passsed in untouched.]"
63 "[D:line-delimeter|output-delimiter]:[ldelim?The line delimiter character for "
64 "the \b-f\b option is set to \aldelim\a. The default is the "
65 "\bnewline\b character.]"
66 "[N!:newline?Output new-lines at end of each record when used "
67 "with the \b-b\b or \b-c\b option.]"
68 "\n"
69 "\n[file ...]\n"
70 "\n"
71 "[+EXIT STATUS?]{"
72 "[+0?All files processed successfully.]"
73 "[+>0?One or more files failed to open or could not be read.]"
74 "}"
75 "[+SEE ALSO?\bpaste\b(1), \bgrep\b(1)]"
78 #include <cmd.h>
79 #include <ctype.h>
81 typedef struct Delim_s
83 char* str;
84 int len;
85 int chr;
86 } Delim_t;
88 typedef struct Cut_s
90 int mb;
91 int eob;
92 int cflag;
93 int nosplit;
94 int sflag;
95 int nlflag;
96 int reclen;
97 Delim_t wdelim;
98 Delim_t ldelim;
99 unsigned char space[UCHAR_MAX+1];
100 int list[2]; /* NOTE: must be last member */
101 } Cut_t;
103 #define HUGE INT_MAX
104 #define BLOCK 8*1024
105 #define C_BYTES 1
106 #define C_CHARS 2
107 #define C_FIELDS 4
108 #define C_SUPRESS 8
109 #define C_NOSPLIT 16
110 #define C_NONEWLINE 32
112 #define SP_LINE 1
113 #define SP_WORD 2
114 #define SP_WIDE 3
116 #define mb2wc(w,p,n) (*ast.mb_towc)(&w,(char*)p,n)
119 * compare the first of an array of integers
122 static int
123 mycomp(register const void* a, register const void* b)
125 if (*((int*)a) < *((int*)b))
126 return -1;
127 if (*((int*)a) > *((int*)b))
128 return 1;
129 return 0;
132 static Cut_t*
133 cutinit(int mode, char* str, Delim_t* wdelim, Delim_t* ldelim, size_t reclen)
135 register int* lp;
136 register int c;
137 register int n = 0;
138 register int range = 0;
139 register char* cp = str;
140 Cut_t* cut;
142 if (!(cut = (Cut_t*)stakalloc(sizeof(Cut_t) + strlen(cp) * sizeof(int))))
143 error(ERROR_exit(1), "out of space");
144 if (cut->mb = mbwide())
146 memset(cut->space, 0, sizeof(cut->space) / 2);
147 memset(cut->space + sizeof(cut->space) / 2, SP_WIDE, sizeof(cut->space) / 2);
149 else
150 memset(cut->space, 0, sizeof(cut->space));
151 cut->wdelim = *wdelim;
152 if (wdelim->len == 1)
153 cut->space[wdelim->chr] = SP_WORD;
154 cut->ldelim = *ldelim;
155 cut->eob = (ldelim->len == 1) ? ldelim->chr : 0;
156 cut->space[cut->eob] = SP_LINE;
157 cut->cflag = (mode&C_CHARS) && cut->mb;
158 cut->nosplit = (mode&(C_BYTES|C_NOSPLIT)) == (C_BYTES|C_NOSPLIT) && cut->mb;
159 cut->sflag = (mode&C_SUPRESS) != 0;
160 cut->nlflag = (mode&C_NONEWLINE) != 0;
161 cut->reclen = reclen;
162 lp = cut->list;
163 for (;;)
164 switch(c = *cp++)
166 case ' ':
167 case '\t':
168 while(*cp==' ' || *cp=='\t')
169 cp++;
170 /*FALLTHROUGH*/
171 case 0:
172 case ',':
173 if(range)
175 --range;
176 if((n = (n ? (n-range) : (HUGE-1))) < 0)
177 error(ERROR_exit(1),"invalid range for c/f option");
178 *lp++ = range;
179 *lp++ = n;
181 else
183 *lp++ = --n;
184 *lp++ = 1;
186 if(c==0)
188 register int *dp;
189 *lp = HUGE;
190 n = 1 + (lp-cut->list)/2;
191 qsort(lp=cut->list,n,2*sizeof(*lp),mycomp);
192 /* eliminate overlapping regions */
193 for(n=0,range= -2,dp=lp; *lp!=HUGE; lp+=2)
195 if(lp[0] <= range)
197 if(lp[1]==HUGE)
199 dp[-1] = HUGE;
200 break;
202 if((c = lp[0]+lp[1]-range)>0)
204 range += c;
205 dp[-1] += c;
208 else
210 range = *dp++ = lp[0];
211 if(lp[1]==HUGE)
213 *dp++ = HUGE;
214 break;
216 range += (*dp++ = lp[1]);
219 *dp = HUGE;
220 lp = cut->list;
221 /* convert ranges into gaps */
222 for(n=0; *lp!=HUGE; lp+=2)
224 c = *lp;
225 *lp -= n;
226 n = c+lp[1];
228 return cut;
230 n = range = 0;
231 break;
233 case '-':
234 if(range)
235 error(ERROR_exit(1),"bad list for c/f option");
236 range = n?n:1;
237 n = 0;
238 break;
240 default:
241 if(!isdigit(c))
242 error(ERROR_exit(1),"bad list for c/f option");
243 n = 10*n + (c-'0');
244 break;
246 /* NOTREACHED */
250 * cut each line of file <fdin> and put results to <fdout> using list <list>
253 static void
254 cutcols(Cut_t* cut, Sfio_t* fdin, Sfio_t* fdout)
256 register int c;
257 register int len;
258 register int ncol = 0;
259 register const int* lp = cut->list;
260 register char* bp;
261 register int skip; /* non-zero for don't copy */
262 int must;
263 char* ep;
264 const char* xx;
266 for (;;)
268 if (len = cut->reclen)
269 bp = sfreserve(fdin, len, -1);
270 else
271 bp = sfgetr(fdin, '\n', 0);
272 if (!bp && !(bp = sfgetr(fdin, 0, SF_LASTR)))
273 break;
274 len = sfvalue(fdin);
275 ep = bp + len;
276 xx = 0;
277 if (!(ncol = skip = *(lp = cut->list)))
278 ncol = *++lp;
279 must = 1;
282 if (cut->nosplit)
284 register const char* s = bp;
285 register int w = len < ncol ? len : ncol;
286 register int z;
288 while (w > 0)
290 if (!(*s & 0x80))
291 z = 1;
292 else if ((z = mblen(s, w)) <= 0)
294 if (s == bp && xx)
296 w += s - xx;
297 bp = (char*)(s = xx);
298 xx = 0;
299 continue;
301 xx = s;
302 if (skip)
303 s += w;
304 w = 0;
305 break;
307 s += z;
308 w -= z;
310 c = s - bp;
311 ncol = !w && ncol >= len;
313 else if (cut->cflag)
315 register const char* s = bp;
316 register int w = len;
317 register int z;
319 while (w > 0 && ncol > 0)
321 ncol--;
322 if (!(*s & 0x80) || (z = mblen(s, w)) <= 0)
323 z = 1;
324 s += z;
325 w -= z;
328 c = s - bp;
329 ncol = !w && (ncol || !skip);
331 else
333 if ((c = ncol) > len)
334 c = len;
335 else if (c == len && !skip)
336 ncol++;
337 ncol -= c;
339 if (!skip && c)
341 if (sfwrite(fdout, (char*)bp, c) < 0)
342 return;
343 must = 0;
345 bp += c;
346 if (ncol)
347 break;
348 len -= c;
349 ncol = *++lp;
350 skip = !skip;
351 } while (ncol != HUGE);
352 if (!cut->nlflag && (skip || must || cut->reclen))
354 if (cut->ldelim.len > 1)
355 sfwrite(fdout, cut->ldelim.str, cut->ldelim.len);
356 else
357 sfputc(fdout, cut->ldelim.chr);
363 * cut each line of file <fdin> and put results to <fdout> using list <list>
364 * stream <fdin> must be line buffered
367 static void
368 cutfields(Cut_t* cut, Sfio_t* fdin, Sfio_t* fdout)
370 register unsigned char *sp = cut->space;
371 register unsigned char *cp;
372 register unsigned char *wp;
373 register int c, nfields;
374 register const int *lp = cut->list;
375 register unsigned char *copy;
376 register int nodelim, empty, inword=0;
377 register unsigned char *ep;
378 unsigned char *bp, *first;
379 int lastchar;
380 wchar_t w;
381 Sfio_t *fdtmp = 0;
382 long offset = 0;
383 unsigned char mb[8];
384 /* process each buffer */
385 while ((bp = (unsigned char*)sfreserve(fdin, SF_UNBOUND, -1)) && (c = sfvalue(fdin)) > 0)
387 cp = bp;
388 ep = cp + --c;
389 if((lastchar = cp[c]) != cut->eob)
390 *ep = cut->eob;
391 /* process each line in the buffer */
392 while (cp <= ep)
394 first = cp;
395 if (!inword)
397 nodelim = empty = 1;
398 copy = cp;
399 if (nfields = *(lp = cut->list))
400 copy = 0;
401 else
402 nfields = *++lp;
404 else if (copy)
405 copy = cp;
406 inword = 0;
409 /* skip over non-delimiter characters */
410 if (cut->mb)
411 for (;;)
413 switch (c = sp[*(unsigned char*)cp++])
415 case 0:
416 continue;
417 case SP_WIDE:
418 wp = --cp;
419 while ((c = mb2wc(w, cp, ep - cp)) <= 0)
421 /* mb char possibly spanning buffer boundary -- fun stuff */
422 if ((ep - cp) < mbmax())
424 int i;
425 int j;
426 int k;
428 if (lastchar != cut->eob)
430 *ep = lastchar;
431 if ((c = mb2wc(w, cp, ep - cp)) > 0)
432 break;
434 if (copy)
436 empty = 0;
437 if ((c = cp - copy) > 0 && sfwrite(fdout, (char*)copy, c) < 0)
438 goto failed;
440 for (i = 0; i <= (ep - cp); i++)
441 mb[i] = cp[i];
442 if (!(bp = (unsigned char*)sfreserve(fdin, SF_UNBOUND, -1)) || (c = sfvalue(fdin)) <= 0)
443 goto failed;
444 cp = bp;
445 ep = cp + --c;
446 if ((lastchar = cp[c]) != cut->eob)
447 *ep = cut->eob;
448 j = i;
449 k = 0;
450 while (j < mbmax())
451 mb[j++] = cp[k++];
452 if ((c = mb2wc(w, (char*)mb, j)) <= 0)
454 c = i;
455 w = 0;
457 first = bp = cp += c - i;
458 if (copy)
460 copy = bp;
461 if (w == cut->ldelim.chr)
462 lastchar = cut->ldelim.chr;
463 else if (w != cut->wdelim.chr)
465 empty = 0;
466 if (sfwrite(fdout, (char*)mb, c) < 0)
467 goto failed;
470 c = 0;
472 else
474 w = *cp;
475 c = 1;
477 break;
479 cp += c;
480 c = w;
481 if (c == cut->wdelim.chr)
483 c = SP_WORD;
484 break;
486 if (c == cut->ldelim.chr)
488 c = SP_LINE;
489 break;
491 continue;
492 default:
493 wp = cp - 1;
494 break;
496 break;
498 else
500 while (!(c = sp[*cp++]));
501 wp = cp - 1;
503 /* check for end-of-line */
504 if (c == SP_LINE)
506 if (cp <= ep)
507 break;
508 if (lastchar == cut->ldelim.chr)
509 break;
510 /* restore cut->last character */
511 if (lastchar != cut->eob)
512 *ep = lastchar;
513 inword++;
514 if (!sp[lastchar])
515 break;
517 nodelim = 0;
518 if (--nfields > 0)
519 continue;
520 nfields = *++lp;
521 if (copy)
523 empty = 0;
524 if ((c = wp - copy) > 0 && sfwrite(fdout, (char*)copy, c) < 0)
525 goto failed;
526 copy = 0;
528 else
529 /* set to delimiter unless the first field */
530 copy = empty ? cp : wp;
531 } while (!inword);
532 if (!inword)
534 if (!copy)
536 if (nodelim)
538 if (!cut->sflag)
540 if (offset)
542 sfseek(fdtmp,(Sfoff_t)0,SEEK_SET);
543 sfmove(fdtmp,fdout,offset,-1);
545 copy = first;
548 else
549 sfputc(fdout,'\n');
551 if (offset)
552 sfseek(fdtmp,offset=0,SEEK_SET);
554 if (copy && (c=cp-copy)>0 && (!nodelim || !cut->sflag) && sfwrite(fdout,(char*)copy,c)< 0)
555 goto failed;
557 /* see whether to save in tmp file */
558 if(inword && nodelim && !cut->sflag && (c=cp-first)>0)
560 /* copy line to tmpfile in case no fields */
561 if(!fdtmp)
562 fdtmp = sftmp(BLOCK);
563 sfwrite(fdtmp,(char*)first,c);
564 offset +=c;
567 failed:
568 if(fdtmp)
569 sfclose(fdtmp);
573 b_cut(int argc, char** argv, void* context)
575 register char* cp = 0;
576 register Sfio_t* fp;
577 char* s;
578 int n;
579 Cut_t* cut;
580 int mode = 0;
581 Delim_t wdelim;
582 Delim_t ldelim;
583 size_t reclen = 0;
585 cmdinit(argc, argv, context, ERROR_CATALOG, 0);
586 wdelim.chr = '\t';
587 ldelim.chr = '\n';
588 wdelim.len = ldelim.len = 1;
589 for (;;)
591 switch (n = optget(argv, usage))
593 case 0:
594 break;
595 case 'b':
596 case 'c':
597 if(mode&C_FIELDS)
599 error(2, "f option already specified");
600 continue;
602 cp = opt_info.arg;
603 if(n=='b')
604 mode |= C_BYTES;
605 else
606 mode |= C_CHARS;
607 continue;
608 case 'D':
609 ldelim.str = opt_info.arg;
610 if (mbwide())
612 s = opt_info.arg;
613 ldelim.chr = mbchar(s);
614 if ((n = s - opt_info.arg) > 1)
616 ldelim.len = n;
617 continue;
620 ldelim.chr = *(unsigned char*)opt_info.arg;
621 ldelim.len = 1;
622 continue;
623 case 'd':
624 wdelim.str = opt_info.arg;
625 if (mbwide())
627 s = opt_info.arg;
628 wdelim.chr = mbchar(s);
629 if ((n = s - opt_info.arg) > 1)
631 wdelim.len = n;
632 continue;
635 wdelim.chr = *(unsigned char*)opt_info.arg;
636 wdelim.len = 1;
637 continue;
638 case 'f':
639 if(mode&(C_CHARS|C_BYTES))
641 error(2, "c option already specified");
642 continue;
644 cp = opt_info.arg;
645 mode |= C_FIELDS;
646 continue;
647 case 'n':
648 mode |= C_NOSPLIT;
649 continue;
650 case 'N':
651 mode |= C_NONEWLINE;
652 continue;
653 case 'R':
654 case 'r':
655 if(opt_info.num>0)
656 reclen = opt_info.num;
657 continue;
658 case 's':
659 mode |= C_SUPRESS;
660 continue;
661 case ':':
662 error(2, "%s", opt_info.arg);
663 break;
664 case '?':
665 error(ERROR_usage(2), "%s", opt_info.arg);
666 break;
668 break;
670 argv += opt_info.index;
671 if (error_info.errors)
672 error(ERROR_usage(2), "%s",optusage(NiL));
673 if(!cp)
675 error(2, "b, c or f option must be specified");
676 error(ERROR_usage(2), "%s", optusage(NiL));
678 if(!*cp)
679 error(3, "non-empty b, c or f option must be specified");
680 if((mode & (C_FIELDS|C_SUPRESS)) == C_SUPRESS)
681 error(3, "s option requires f option");
682 cut = cutinit(mode, cp, &wdelim, &ldelim, reclen);
683 if(cp = *argv)
684 argv++;
687 if(!cp || streq(cp,"-"))
688 fp = sfstdin;
689 else if(!(fp = sfopen(NiL,cp,"r")))
691 error(ERROR_system(0),"%s: cannot open",cp);
692 continue;
694 if(mode&C_FIELDS)
695 cutfields(cut,fp,sfstdout);
696 else
697 cutcols(cut,fp,sfstdout);
698 if(fp!=sfstdin)
699 sfclose(fp);
700 } while(cp = *argv++);
701 if (sfsync(sfstdout))
702 error(ERROR_system(0), "write error");
703 return error_info.errors != 0;