1 /* vi: set sw=4 ts=4: */
3 * cut.c - minimalist version of cut
5 * Copyright (C) 1999,2000,2001 by Lineo, inc.
6 * Written by Mark Whitley <markw@codepoet.org>
7 * debloated by Bernhard Fischer
9 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
14 /* This is a NOEXEC applet. Be very careful! */
18 static const char optstring
[] = "b:c:f:d:sn";
19 #define CUT_OPT_BYTE_FLGS (1<<0)
20 #define CUT_OPT_CHAR_FLGS (1<<1)
21 #define CUT_OPT_FIELDS_FLGS (1<<2)
22 #define CUT_OPT_DELIM_FLGS (1<<3)
23 #define CUT_OPT_SUPPRESS_FLGS (1<<4)
25 static char delim
= '\t'; /* delimiter, default is tab */
38 /* growable array holding a series of lists */
39 static struct cut_list
*cut_lists
;
40 static unsigned int nlists
; /* number of elements in above list */
43 static int cmpfunc(const void *a
, const void *b
)
45 return (((struct cut_list
*) a
)->startpos
-
46 ((struct cut_list
*) b
)->startpos
);
50 static void cut_file(FILE * file
)
53 unsigned int linenum
= 0; /* keep these zero-based to be consistent */
55 /* go through every line in the file */
56 while ((line
= xmalloc_getline(file
)) != NULL
) {
58 /* set up a list so we can keep track of what's been printed */
59 char * printed
= xzalloc(strlen(line
) * sizeof(char));
60 char * orig_line
= line
;
61 unsigned int cl_pos
= 0;
64 /* cut based on chars/bytes XXX: only works when sizeof(char) == byte */
65 if (option_mask32
& (CUT_OPT_CHAR_FLGS
| CUT_OPT_BYTE_FLGS
)) {
66 /* print the chars specified in each cut list */
67 for (; cl_pos
< nlists
; cl_pos
++) {
68 spos
= cut_lists
[cl_pos
].startpos
;
69 while (spos
< strlen(line
)) {
75 if (spos
> cut_lists
[cl_pos
].endpos
76 || cut_lists
[cl_pos
].endpos
== NON_RANGE
)
80 } else if (delim
== '\n') { /* cut by lines */
81 spos
= cut_lists
[cl_pos
].startpos
;
83 /* get out if we have no more lists to process or if the lines
84 * are lower than what we're interested in */
85 if (linenum
< spos
|| cl_pos
>= nlists
)
88 /* if the line we're looking for is lower than the one we were
89 * passed, it means we displayed it already, so move on */
90 while (spos
< linenum
) {
92 /* go to the next list if we're at the end of this one */
93 if (spos
> cut_lists
[cl_pos
].endpos
94 || cut_lists
[cl_pos
].endpos
== NON_RANGE
) {
96 /* get out if there's no more lists to process */
99 spos
= cut_lists
[cl_pos
].startpos
;
100 /* get out if the current line is lower than the one
101 * we just became interested in */
107 /* If we made it here, it means we've found the line we're
108 * looking for, so print it */
111 } else { /* cut by fields */
112 int ndelim
= -1; /* zero-based / one-based problem */
113 int nfields_printed
= 0;
115 const char delimiter
[2] = { delim
, 0 };
117 /* does this line contain any delimiters? */
118 if (strchr(line
, delim
) == NULL
) {
119 if (!(option_mask32
& CUT_OPT_SUPPRESS_FLGS
))
124 /* process each list on this line, for as long as we've got
125 * a line to process */
126 for (; cl_pos
< nlists
&& line
; cl_pos
++) {
127 spos
= cut_lists
[cl_pos
].startpos
;
129 /* find the field we're looking for */
130 while (line
&& ndelim
< spos
) {
131 field
= strsep(&line
, delimiter
);
135 /* we found it, and it hasn't been printed yet */
136 if (field
&& ndelim
== spos
&& !printed
[ndelim
]) {
137 /* if this isn't our first time through, we need to
138 * print the delimiter after the last field that was
140 if (nfields_printed
> 0)
142 fputs(field
, stdout
);
143 printed
[ndelim
] = 'X';
144 nfields_printed
++; /* shouldn't overflow.. */
149 /* keep going as long as we have a line to work with,
150 * this is a list, and we're not at the end of that
152 } while (spos
<= cut_lists
[cl_pos
].endpos
&& line
153 && cut_lists
[cl_pos
].endpos
!= NON_RANGE
);
156 /* if we printed anything at all, we need to finish it with a
157 * newline cuz we were handed a chomped line */
166 static const char _op_on_field
[] = " only when operating on fields";
168 int cut_main(int argc
, char **argv
);
169 int cut_main(int argc
, char **argv
)
173 opt_complementary
= "b--bcf:c--bcf:f--bcf";
174 getopt32(argc
, argv
, optstring
, &sopt
, &sopt
, &sopt
, <ok
);
177 if (!(option_mask32
& (CUT_OPT_BYTE_FLGS
| CUT_OPT_CHAR_FLGS
| CUT_OPT_FIELDS_FLGS
)))
178 bb_error_msg_and_die("expected a list of bytes, characters, or fields");
179 if (option_mask32
& BB_GETOPT_ERROR
)
180 bb_error_msg_and_die("only one type of list may be specified");
182 if (option_mask32
& CUT_OPT_DELIM_FLGS
) {
183 if (strlen(ltok
) > 1) {
184 bb_error_msg_and_die("the delimiter must be a single character");
189 /* non-field (char or byte) cutting has some special handling */
190 if (!(option_mask32
& CUT_OPT_FIELDS_FLGS
)) {
191 if (option_mask32
& CUT_OPT_SUPPRESS_FLGS
) {
193 ("suppressing non-delimited lines makes sense%s",
198 ("a delimiter may be specified%s", _op_on_field
);
203 * parse list and put values into startpos and endpos.
204 * valid list formats: N, N-, N-M, -M
205 * more than one list can be separated by commas
211 /* take apart the lists, one by one (they are separated with commas */
212 while ((ltok
= strsep(&sopt
, ",")) != NULL
) {
214 /* it's actually legal to pass an empty list */
215 if (strlen(ltok
) == 0)
218 /* get the start pos */
219 ntok
= strsep(<ok
, "-");
222 ("internal error: ntok is null for start pos!?\n");
223 } else if (strlen(ntok
) == 0) {
227 /* account for the fact that arrays are zero based, while
228 * the user expects the first char on the line to be char #1 */
233 /* get the end pos */
234 ntok
= strsep(<ok
, "-");
237 } else if (strlen(ntok
) == 0) {
241 /* if the user specified and end position of 0, that means "til the
245 e
--; /* again, arrays are zero based, lines are 1 based */
250 /* if there's something left to tokenize, the user passed
253 bb_error_msg_and_die("invalid byte or field list");
255 /* add the new list */
256 cut_lists
= xrealloc(cut_lists
, sizeof(struct cut_list
) * (++nlists
));
257 cut_lists
[nlists
-1].startpos
= s
;
258 cut_lists
[nlists
-1].endpos
= e
;
261 /* make sure we got some cut positions out of all that */
263 bb_error_msg_and_die("missing list of positions");
265 /* now that the lists are parsed, we need to sort them to make life
266 * easier on us when it comes time to print the chars / fields / lines
268 qsort(cut_lists
, nlists
, sizeof(struct cut_list
), cmpfunc
);
271 /* argv[0..argc-1] should be names of file to process. If no
272 * files were specified or '-' was specified, take input from stdin.
273 * Otherwise, we process all the files specified. */
274 if (argv
[0] == NULL
|| LONE_DASH(argv
[0])) {
280 file
= fopen_or_warn(argv
[0], "r");
287 if (ENABLE_FEATURE_CLEAN_UP
)