1 /* cut - remove parts of lines of files
2 Copyright (C) 1997-2016 Free Software Foundation, Inc.
3 Copyright (C) 1984 David M. Ihnat
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 /* Written by David Ihnat. */
20 /* POSIX changes, bug fixes, long-named options, and cleanup
21 by David MacKenzie <djm@gnu.ai.mit.edu>.
23 Rewrite cut_fields and cut_bytes -- Jim Meyering. */
30 #include <sys/types.h>
35 #include "getndelim2.h"
39 #include "set-fields.h"
41 /* The official name of this program (e.g., no 'g' prefix). */
42 #define PROGRAM_NAME "cut"
45 proper_name ("David M. Ihnat"), \
46 proper_name ("David MacKenzie"), \
47 proper_name ("Jim Meyering")
49 #define FATAL_ERROR(Message) \
52 error (0, 0, (Message)); \
53 usage (EXIT_FAILURE); \
58 /* Pointer inside RP. When checking if a byte or field is selected
59 by a finite range, we check if it is between CURRENT_RP.LO
60 and CURRENT_RP.HI. If the byte or field index is greater than
61 CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
62 static struct field_range_pair
*current_rp
;
64 /* This buffer is used to support the semantics of the -s option
65 (or lack of same) when the specified field list includes (does
66 not include) the first field. In both of those cases, the entire
67 first field must be read into this buffer to determine whether it
68 is followed by a delimiter or a newline before any of it may be
69 output. Otherwise, cut_fields can do the job without using this
71 static char *field_1_buffer
;
73 /* The number of bytes allocated for FIELD_1_BUFFER. */
74 static size_t field_1_bufsize
;
80 /* Output characters that are in the given bytes. */
83 /* Output the given delimiter-separated fields. */
87 static enum operating_mode operating_mode
;
89 /* If true do not output lines containing no delimiter characters.
90 Otherwise, all such lines are printed. This option is valid only
92 static bool suppress_non_delimited
;
94 /* If true, print all bytes, characters, or fields _except_
95 those that were specified. */
96 static bool complement
;
98 /* The delimiter character for field mode. */
99 static unsigned char delim
;
101 /* The delimiter for each line/record. */
102 static unsigned char line_delim
= '\n';
104 /* True if the --output-delimiter=STRING option was specified. */
105 static bool output_delimiter_specified
;
107 /* The length of output_delimiter_string. */
108 static size_t output_delimiter_length
;
110 /* The output field separator string. Defaults to the 1-character
111 string consisting of the input delimiter. */
112 static char *output_delimiter_string
;
114 /* True if we have ever read standard input. */
115 static bool have_read_stdin
;
117 /* For long options that have no equivalent short option, use a
118 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
121 OUTPUT_DELIMITER_OPTION
= CHAR_MAX
+ 1,
125 static struct option
const longopts
[] =
127 {"bytes", required_argument
, NULL
, 'b'},
128 {"characters", required_argument
, NULL
, 'c'},
129 {"fields", required_argument
, NULL
, 'f'},
130 {"delimiter", required_argument
, NULL
, 'd'},
131 {"only-delimited", no_argument
, NULL
, 's'},
132 {"output-delimiter", required_argument
, NULL
, OUTPUT_DELIMITER_OPTION
},
133 {"complement", no_argument
, NULL
, COMPLEMENT_OPTION
},
134 {"zero-terminated", no_argument
, NULL
, 'z'},
135 {GETOPT_HELP_OPTION_DECL
},
136 {GETOPT_VERSION_OPTION_DECL
},
143 if (status
!= EXIT_SUCCESS
)
148 Usage: %s OPTION... [FILE]...\n\
152 Print selected parts of lines from each FILE to standard output.\n\
156 emit_mandatory_arg_note ();
159 -b, --bytes=LIST select only these bytes\n\
160 -c, --characters=LIST select only these characters\n\
161 -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
164 -f, --fields=LIST select only these fields; also print any line\n\
165 that contains no delimiter character, unless\n\
166 the -s option is specified\n\
170 --complement complement the set of selected bytes, characters\n\
174 -s, --only-delimited do not print lines not containing delimiters\n\
175 --output-delimiter=STRING use STRING as the output delimiter\n\
176 the default is to use the input delimiter\n\
179 -z, --zero-terminated line delimiter is NUL, not newline\n\
181 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
182 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
185 Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
186 range, or many ranges separated by commas. Selected input is written\n\
187 in the same order that it is read, and is written exactly once.\n\
190 Each range is one of:\n\
192 N N'th byte, character or field, counted from 1\n\
193 N- from N'th byte, character or field, to end of line\n\
194 N-M from N'th to M'th (included) byte, character or field\n\
195 -M from first to M'th (included) byte, character or field\n\
197 emit_ancillary_info (PROGRAM_NAME
);
203 /* Increment *ITEM_IDX (i.e., a field or byte index),
204 and if required CURRENT_RP. */
207 next_item (size_t *item_idx
)
210 if ((*item_idx
) > current_rp
->hi
)
214 /* Return nonzero if the K'th field or byte is printable. */
219 return current_rp
->lo
<= k
;
222 /* Return nonzero if K'th byte is the beginning of a range. */
225 is_range_start_index (size_t k
)
227 return k
== current_rp
->lo
;
230 /* Read from stream STREAM, printing to standard output any selected bytes. */
233 cut_bytes (FILE *stream
)
235 size_t byte_idx
; /* Number of bytes in the line so far. */
236 /* Whether to begin printing delimiters between ranges for the current line.
237 Set after we've begun printing data corresponding to the first range. */
238 bool print_delimiter
;
241 print_delimiter
= false;
245 int c
; /* Each character from the file. */
253 print_delimiter
= false;
259 putchar (line_delim
);
264 next_item (&byte_idx
);
265 if (print_kth (byte_idx
))
267 if (output_delimiter_specified
)
269 if (print_delimiter
&& is_range_start_index (byte_idx
))
271 fwrite (output_delimiter_string
, sizeof (char),
272 output_delimiter_length
, stdout
);
274 print_delimiter
= true;
283 /* Read from stream STREAM, printing to standard output any selected fields. */
286 cut_fields (FILE *stream
)
289 size_t field_idx
= 1;
290 bool found_any_selected_field
= false;
291 bool buffer_first_field
;
302 /* To support the semantics of the -s flag, we may have to buffer
303 all of the first field to determine whether it is 'delimited.'
304 But that is unnecessary if all non-delimited lines must be printed
305 and the first field has been selected, or if non-delimited lines
306 must be suppressed and the first field has *not* been selected.
307 That is because a non-delimited line has exactly one field. */
308 buffer_first_field
= (suppress_non_delimited
^ !print_kth (1));
312 if (field_idx
== 1 && buffer_first_field
)
317 len
= getndelim2 (&field_1_buffer
, &field_1_bufsize
, 0,
318 GETNLINE_NO_LIMIT
, delim
, line_delim
, stream
);
321 free (field_1_buffer
);
322 field_1_buffer
= NULL
;
323 if (ferror (stream
) || feof (stream
))
329 assert (n_bytes
!= 0);
333 /* If the first field extends to the end of line (it is not
334 delimited) and we are printing all non-delimited lines,
336 if (to_uchar (field_1_buffer
[n_bytes
- 1]) != delim
)
338 if (suppress_non_delimited
)
344 fwrite (field_1_buffer
, sizeof (char), n_bytes
, stdout
);
345 /* Make sure the output line is newline terminated. */
346 if (field_1_buffer
[n_bytes
- 1] != line_delim
)
347 putchar (line_delim
);
354 /* Print the field, but not the trailing delimiter. */
355 fwrite (field_1_buffer
, sizeof (char), n_bytes
- 1, stdout
);
357 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
358 if (delim
== line_delim
)
360 int last_c
= getc (stream
);
363 ungetc (last_c
, stream
);
364 found_any_selected_field
= true;
368 found_any_selected_field
= true;
370 next_item (&field_idx
);
375 if (print_kth (field_idx
))
377 if (found_any_selected_field
)
379 fwrite (output_delimiter_string
, sizeof (char),
380 output_delimiter_length
, stdout
);
382 found_any_selected_field
= true;
384 while ((c
= getc (stream
)) != delim
&& c
!= line_delim
&& c
!= EOF
)
392 while ((c
= getc (stream
)) != delim
&& c
!= line_delim
&& c
!= EOF
)
398 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
399 if (delim
== line_delim
&& c
== delim
)
401 int last_c
= getc (stream
);
403 ungetc (last_c
, stream
);
409 next_item (&field_idx
);
410 else if (c
== line_delim
|| c
== EOF
)
412 if (found_any_selected_field
413 || !(suppress_non_delimited
&& field_idx
== 1))
415 if (c
== line_delim
|| prev_c
!= line_delim
416 || delim
== line_delim
)
417 putchar (line_delim
);
423 found_any_selected_field
= false;
429 cut_stream (FILE *stream
)
431 if (operating_mode
== byte_mode
)
437 /* Process file FILE to standard output.
438 Return true if successful. */
441 cut_file (char const *file
)
445 if (STREQ (file
, "-"))
447 have_read_stdin
= true;
452 stream
= fopen (file
, "r");
455 error (0, errno
, "%s", quotef (file
));
460 fadvise (stream
, FADVISE_SEQUENTIAL
);
466 error (0, errno
, "%s", quotef (file
));
469 if (STREQ (file
, "-"))
470 clearerr (stream
); /* Also clear EOF. */
471 else if (fclose (stream
) == EOF
)
473 error (0, errno
, "%s", quotef (file
));
480 main (int argc
, char **argv
)
484 bool delim_specified
= false;
485 char *spec_list_string
IF_LINT ( = NULL
);
487 initialize_main (&argc
, &argv
);
488 set_program_name (argv
[0]);
489 setlocale (LC_ALL
, "");
490 bindtextdomain (PACKAGE
, LOCALEDIR
);
491 textdomain (PACKAGE
);
493 atexit (close_stdout
);
495 operating_mode
= undefined_mode
;
497 /* By default, all non-delimited lines are printed. */
498 suppress_non_delimited
= false;
501 have_read_stdin
= false;
503 while ((optc
= getopt_long (argc
, argv
, "b:c:d:f:nsz", longopts
, NULL
)) != -1)
509 /* Build the byte list. */
510 if (operating_mode
!= undefined_mode
)
511 FATAL_ERROR (_("only one type of list may be specified"));
512 operating_mode
= byte_mode
;
513 spec_list_string
= optarg
;
517 /* Build the field list. */
518 if (operating_mode
!= undefined_mode
)
519 FATAL_ERROR (_("only one type of list may be specified"));
520 operating_mode
= field_mode
;
521 spec_list_string
= optarg
;
526 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
527 if (optarg
[0] != '\0' && optarg
[1] != '\0')
528 FATAL_ERROR (_("the delimiter must be a single character"));
530 delim_specified
= true;
533 case OUTPUT_DELIMITER_OPTION
:
534 output_delimiter_specified
= true;
535 /* Interpret --output-delimiter='' to mean
536 'use the NUL byte as the delimiter.' */
537 output_delimiter_length
= (optarg
[0] == '\0'
538 ? 1 : strlen (optarg
));
539 output_delimiter_string
= xstrdup (optarg
);
546 suppress_non_delimited
= true;
553 case COMPLEMENT_OPTION
:
557 case_GETOPT_HELP_CHAR
;
559 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
562 usage (EXIT_FAILURE
);
566 if (operating_mode
== undefined_mode
)
567 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
569 if (delim_specified
&& operating_mode
!= field_mode
)
570 FATAL_ERROR (_("an input delimiter may be specified only\
571 when operating on fields"));
573 if (suppress_non_delimited
&& operating_mode
!= field_mode
)
574 FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
575 \tonly when operating on fields"));
577 set_fields (spec_list_string
,
578 ( (operating_mode
== field_mode
) ? 0 : SETFLD_ERRMSG_USE_POS
)
579 | (complement
? SETFLD_COMPLEMENT
: 0) );
581 if (!delim_specified
)
584 if (output_delimiter_string
== NULL
)
586 static char dummy
[2];
589 output_delimiter_string
= dummy
;
590 output_delimiter_length
= 1;
596 for (ok
= true; optind
< argc
; optind
++)
597 ok
&= cut_file (argv
[optind
]);
600 if (have_read_stdin
&& fclose (stdin
) == EOF
)
602 error (0, errno
, "-");
606 IF_LINT (reset_fields ());
608 return ok
? EXIT_SUCCESS
: EXIT_FAILURE
;