1 /* cut - remove parts of lines of files
2 Copyright (C) 1997-2024 Free Software Foundation, Inc.
3 Copyright (C) 1984 David M. Ihnat
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 /* Written by David Ihnat. */
20 /* POSIX changes, bug fixes, long-named options, and cleanup
21 by David MacKenzie <djm@gnu.ai.mit.edu>.
23 Rewrite cut_fields and cut_bytes -- Jim Meyering. */
29 #include <sys/types.h>
34 #include "getndelim2.h"
36 #include "set-fields.h"
38 /* The official name of this program (e.g., no 'g' prefix). */
39 #define PROGRAM_NAME "cut"
42 proper_name ("David M. Ihnat"), \
43 proper_name ("David MacKenzie"), \
44 proper_name ("Jim Meyering")
46 #define FATAL_ERROR(Message) \
49 error (0, 0, (Message)); \
50 usage (EXIT_FAILURE); \
55 /* Pointer inside RP. When checking if a byte or field is selected
56 by a finite range, we check if it is between CURRENT_RP.LO
57 and CURRENT_RP.HI. If the byte or field index is greater than
58 CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
59 static struct field_range_pair
*current_rp
;
61 /* This buffer is used to support the semantics of the -s option
62 (or lack of same) when the specified field list includes (does
63 not include) the first field. In both of those cases, the entire
64 first field must be read into this buffer to determine whether it
65 is followed by a delimiter or a newline before any of it may be
66 output. Otherwise, cut_fields can do the job without using this
68 static char *field_1_buffer
;
70 /* The number of bytes allocated for FIELD_1_BUFFER. */
71 static size_t field_1_bufsize
;
73 /* If true, do not output lines containing no delimiter characters.
74 Otherwise, all such lines are printed. This option is valid only
76 static bool suppress_non_delimited
;
78 /* If true, print all bytes, characters, or fields _except_
79 those that were specified. */
80 static bool complement
;
82 /* The delimiter character for field mode. */
83 static unsigned char delim
;
85 /* The delimiter for each line/record. */
86 static unsigned char line_delim
= '\n';
88 /* The length of output_delimiter_string. */
89 static size_t output_delimiter_length
;
91 /* The output field separator string. Defaults to the 1-character
92 string consisting of the input delimiter. */
93 static char *output_delimiter_string
;
95 /* The output delimiter string contents, if the default. */
96 static char output_delimiter_default
[1];
98 /* True if we have ever read standard input. */
99 static bool have_read_stdin
;
101 /* For long options that have no equivalent short option, use a
102 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
105 OUTPUT_DELIMITER_OPTION
= CHAR_MAX
+ 1,
109 static struct option
const longopts
[] =
111 {"bytes", required_argument
, nullptr, 'b'},
112 {"characters", required_argument
, nullptr, 'c'},
113 {"fields", required_argument
, nullptr, 'f'},
114 {"delimiter", required_argument
, nullptr, 'd'},
115 {"only-delimited", no_argument
, nullptr, 's'},
116 {"output-delimiter", required_argument
, nullptr, OUTPUT_DELIMITER_OPTION
},
117 {"complement", no_argument
, nullptr, COMPLEMENT_OPTION
},
118 {"zero-terminated", no_argument
, nullptr, 'z'},
119 {GETOPT_HELP_OPTION_DECL
},
120 {GETOPT_VERSION_OPTION_DECL
},
121 {nullptr, 0, nullptr, 0}
127 if (status
!= EXIT_SUCCESS
)
132 Usage: %s OPTION... [FILE]...\n\
136 Print selected parts of lines from each FILE to standard output.\n\
140 emit_mandatory_arg_note ();
143 -b, --bytes=LIST select only these bytes\n\
144 -c, --characters=LIST select only these characters\n\
145 -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
148 -f, --fields=LIST select only these fields; also print any line\n\
149 that contains no delimiter character, unless\n\
150 the -s option is specified\n\
154 --complement complement the set of selected bytes, characters\n\
158 -s, --only-delimited do not print lines not containing delimiters\n\
159 --output-delimiter=STRING use STRING as the output delimiter\n\
160 the default is to use the input delimiter\n\
163 -z, --zero-terminated line delimiter is NUL, not newline\n\
165 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
166 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
169 Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
170 range, or many ranges separated by commas. Selected input is written\n\
171 in the same order that it is read, and is written exactly once.\n\
174 Each range is one of:\n\
176 N N'th byte, character or field, counted from 1\n\
177 N- from N'th byte, character or field, to end of line\n\
178 N-M from N'th to M'th (included) byte, character or field\n\
179 -M from first to M'th (included) byte, character or field\n\
181 emit_ancillary_info (PROGRAM_NAME
);
187 /* Increment *ITEM_IDX (i.e., a field or byte index),
188 and if required CURRENT_RP. */
191 next_item (uintmax_t *item_idx
)
194 if ((*item_idx
) > current_rp
->hi
)
198 /* Return nonzero if the K'th field or byte is printable. */
201 print_kth (uintmax_t k
)
203 return current_rp
->lo
<= k
;
206 /* Return nonzero if K'th byte is the beginning of a range. */
209 is_range_start_index (uintmax_t k
)
211 return k
== current_rp
->lo
;
214 /* Read from stream STREAM, printing to standard output any selected bytes. */
217 cut_bytes (FILE *stream
)
219 uintmax_t byte_idx
; /* Number of bytes in the line so far. */
220 /* Whether to begin printing delimiters between ranges for the current line.
221 Set after we've begun printing data corresponding to the first range. */
222 bool print_delimiter
;
225 print_delimiter
= false;
229 int c
; /* Each character from the file. */
238 print_delimiter
= false;
245 if (putchar (line_delim
) < 0)
252 next_item (&byte_idx
);
253 if (print_kth (byte_idx
))
255 if (output_delimiter_string
!= output_delimiter_default
)
257 if (print_delimiter
&& is_range_start_index (byte_idx
))
259 if (fwrite (output_delimiter_string
, sizeof (char),
260 output_delimiter_length
, stdout
)
261 != output_delimiter_length
)
264 print_delimiter
= true;
274 /* Read from stream STREAM, printing to standard output any selected fields. */
277 cut_fields (FILE *stream
)
279 int c
; /* Each character from the file. */
280 uintmax_t field_idx
= 1;
281 bool found_any_selected_field
= false;
282 bool buffer_first_field
;
293 /* To support the semantics of the -s flag, we may have to buffer
294 all of the first field to determine whether it is 'delimited.'
295 But that is unnecessary if all non-delimited lines must be printed
296 and the first field has been selected, or if non-delimited lines
297 must be suppressed and the first field has *not* been selected.
298 That is because a non-delimited line has exactly one field. */
299 buffer_first_field
= (suppress_non_delimited
^ !print_kth (1));
303 if (field_idx
== 1 && buffer_first_field
)
308 len
= getndelim2 (&field_1_buffer
, &field_1_bufsize
, 0,
309 GETNLINE_NO_LIMIT
, delim
, line_delim
, stream
);
312 free (field_1_buffer
);
313 field_1_buffer
= nullptr;
314 if (ferror (stream
) || feof (stream
))
320 affirm (n_bytes
!= 0);
324 /* If the first field extends to the end of line (it is not
325 delimited) and we are printing all non-delimited lines,
327 if (to_uchar (field_1_buffer
[n_bytes
- 1]) != delim
)
329 if (suppress_non_delimited
)
335 if (fwrite (field_1_buffer
, sizeof (char), n_bytes
, stdout
)
338 /* Make sure the output line is newline terminated. */
339 if (field_1_buffer
[n_bytes
- 1] != line_delim
)
341 if (putchar (line_delim
) < 0)
351 /* Print the field, but not the trailing delimiter. */
352 if (fwrite (field_1_buffer
, sizeof (char), n_bytes
- 1, stdout
)
356 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
357 if (delim
== line_delim
)
359 int last_c
= getc (stream
);
362 ungetc (last_c
, stream
);
363 found_any_selected_field
= true;
368 found_any_selected_field
= true;
371 next_item (&field_idx
);
376 if (print_kth (field_idx
))
378 if (found_any_selected_field
)
380 if (fwrite (output_delimiter_string
, sizeof (char),
381 output_delimiter_length
, stdout
)
382 != output_delimiter_length
)
385 found_any_selected_field
= true;
387 while ((c
= getc (stream
)) != delim
&& c
!= line_delim
&& c
!= EOF
)
396 while ((c
= getc (stream
)) != delim
&& c
!= line_delim
&& c
!= EOF
)
400 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
401 if (delim
== line_delim
&& c
== delim
)
403 int last_c
= getc (stream
);
405 ungetc (last_c
, stream
);
411 next_item (&field_idx
);
412 else if (c
== line_delim
|| c
== EOF
)
414 if (found_any_selected_field
415 || !(suppress_non_delimited
&& field_idx
== 1))
417 /* Make sure the output line is newline terminated. */
418 if (c
== line_delim
|| prev_c
!= line_delim
419 || delim
== line_delim
)
421 if (putchar (line_delim
) < 0)
428 /* Start processing the next input line. */
431 found_any_selected_field
= false;
436 /* Process file FILE to standard output, using CUT_STREAM.
437 Return true if successful. */
440 cut_file (char const *file
, void (*cut_stream
) (FILE *))
444 if (STREQ (file
, "-"))
446 have_read_stdin
= true;
448 assume (stream
); /* Pacify GCC bug#109613. */
452 stream
= fopen (file
, "r");
453 if (stream
== nullptr)
455 error (0, errno
, "%s", quotef (file
));
460 fadvise (stream
, FADVISE_SEQUENTIAL
);
465 if (!ferror (stream
))
467 if (STREQ (file
, "-"))
468 clearerr (stream
); /* Also clear EOF. */
469 else if (fclose (stream
) == EOF
)
473 error (0, err
, "%s", quotef (file
));
480 main (int argc
, char **argv
)
484 bool delim_specified
= false;
485 bool byte_mode
= false;
486 char *spec_list_string
= nullptr;
488 initialize_main (&argc
, &argv
);
489 set_program_name (argv
[0]);
490 setlocale (LC_ALL
, "");
491 bindtextdomain (PACKAGE
, LOCALEDIR
);
492 textdomain (PACKAGE
);
494 atexit (close_stdout
);
496 /* By default, all non-delimited lines are printed. */
497 suppress_non_delimited
= false;
500 have_read_stdin
= false;
502 while ((optc
= getopt_long (argc
, argv
, "b:c:d:f:nsz", longopts
, nullptr))
509 /* Build the byte list. */
513 /* Build the field list. */
514 if (spec_list_string
)
515 FATAL_ERROR (_("only one list may be specified"));
516 spec_list_string
= optarg
;
521 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
522 if (optarg
[0] != '\0' && optarg
[1] != '\0')
523 FATAL_ERROR (_("the delimiter must be a single character"));
525 delim_specified
= true;
528 case OUTPUT_DELIMITER_OPTION
:
529 /* Interpret --output-delimiter='' to mean
530 'use the NUL byte as the delimiter.' */
531 output_delimiter_length
= (optarg
[0] == '\0'
532 ? 1 : strlen (optarg
));
533 output_delimiter_string
= optarg
;
540 suppress_non_delimited
= true;
547 case COMPLEMENT_OPTION
:
551 case_GETOPT_HELP_CHAR
;
552 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
554 usage (EXIT_FAILURE
);
558 if (!spec_list_string
)
559 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
564 FATAL_ERROR (_("an input delimiter may be specified only\
565 when operating on fields"));
567 if (suppress_non_delimited
)
568 FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
569 \tonly when operating on fields"));
572 set_fields (spec_list_string
,
573 ((byte_mode
? SETFLD_ERRMSG_USE_POS
: 0)
574 | (complement
? SETFLD_COMPLEMENT
: 0)));
576 if (!delim_specified
)
579 if (output_delimiter_string
== nullptr)
581 output_delimiter_default
[0] = delim
;
582 output_delimiter_string
= output_delimiter_default
;
583 output_delimiter_length
= 1;
586 void (*cut_stream
) (FILE *) = byte_mode
? cut_bytes
: cut_fields
;
588 ok
= cut_file ("-", cut_stream
);
590 for (ok
= true; optind
< argc
; optind
++)
591 ok
&= cut_file (argv
[optind
], cut_stream
);
594 if (have_read_stdin
&& fclose (stdin
) == EOF
)
596 error (0, errno
, "-");
600 return ok
? EXIT_SUCCESS
: EXIT_FAILURE
;