tests: add fold(1) test for --bytes option
[coreutils.git] / src / cat.c
blobb7aa853af386e3bbac95ec0c188b16a0251c064b
1 /* cat -- concatenate files and print on the standard output.
2 Copyright (C) 1988-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Differences from the Unix cat:
18 * Always unbuffered, -u is ignored.
19 * Usually much faster than other versions of cat, the difference
20 is especially apparent when using the -v option.
22 By tege@sics.se, Torbjörn Granlund, advised by rms, Richard Stallman. */
24 #include <config.h>
26 #include <stdio.h>
27 #include <getopt.h>
28 #include <sys/types.h>
30 #if HAVE_STROPTS_H
31 # include <stropts.h>
32 #endif
33 #include <sys/ioctl.h>
35 #include "system.h"
36 #include "alignalloc.h"
37 #include "ioblksize.h"
38 #include "fadvise.h"
39 #include "full-write.h"
40 #include "safe-read.h"
41 #include "xbinary-io.h"
43 /* The official name of this program (e.g., no 'g' prefix). */
44 #define PROGRAM_NAME "cat"
46 #define AUTHORS \
47 proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
48 proper_name ("Richard M. Stallman")
50 /* Name of input file. May be "-". */
51 static char const *infile;
53 /* Descriptor on which input file is open. */
54 static int input_desc;
56 /* Buffer for line numbers.
57 An 11 digit counter may overflow within an hour on a P2/466,
58 an 18 digit counter needs about 1000y */
59 #define LINE_COUNTER_BUF_LEN 20
60 static char line_buf[LINE_COUNTER_BUF_LEN] =
62 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
63 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0',
64 '\t', '\0'
67 /* Position in 'line_buf' where printing starts. This will not change
68 unless the number of lines is larger than 999999. */
69 static char *line_num_print = line_buf + LINE_COUNTER_BUF_LEN - 8;
71 /* Position of the first digit in 'line_buf'. */
72 static char *line_num_start = line_buf + LINE_COUNTER_BUF_LEN - 3;
74 /* Position of the last digit in 'line_buf'. */
75 static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3;
77 /* Preserves the 'cat' function's local 'newlines' between invocations. */
78 static int newlines2 = 0;
80 /* Whether there is a pending CR to process. */
81 static bool pending_cr = false;
83 void
84 usage (int status)
86 if (status != EXIT_SUCCESS)
87 emit_try_help ();
88 else
90 printf (_("\
91 Usage: %s [OPTION]... [FILE]...\n\
92 "),
93 program_name);
94 fputs (_("\
95 Concatenate FILE(s) to standard output.\n\
96 "), stdout);
98 emit_stdin_note ();
100 fputs (_("\
102 -A, --show-all equivalent to -vET\n\
103 -b, --number-nonblank number nonempty output lines, overrides -n\n\
104 -e equivalent to -vE\n\
105 -E, --show-ends display $ at end of each line\n\
106 -n, --number number all output lines\n\
107 -s, --squeeze-blank suppress repeated empty output lines\n\
108 "), stdout);
109 fputs (_("\
110 -t equivalent to -vT\n\
111 -T, --show-tabs display TAB characters as ^I\n\
112 -u (ignored)\n\
113 -v, --show-nonprinting use ^ and M- notation, except for LFD and TAB\n\
114 "), stdout);
115 fputs (HELP_OPTION_DESCRIPTION, stdout);
116 fputs (VERSION_OPTION_DESCRIPTION, stdout);
117 printf (_("\
119 Examples:\n\
120 %s f - g Output f's contents, then standard input, then g's contents.\n\
121 %s Copy standard input to standard output.\n\
123 program_name, program_name);
124 emit_ancillary_info (PROGRAM_NAME);
126 exit (status);
129 /* Compute the next line number. */
131 static void
132 next_line_num (void)
134 char *endp = line_num_end;
137 if ((*endp)++ < '9')
138 return;
139 *endp-- = '0';
141 while (endp >= line_num_start);
143 if (line_num_start > line_buf)
144 *--line_num_start = '1';
145 else
146 *line_buf = '>';
147 if (line_num_start < line_num_print)
148 line_num_print--;
151 /* Plain cat. Copy the file behind 'input_desc' to STDOUT_FILENO.
152 BUF (of size BUFSIZE) is the I/O buffer, used by reads and writes.
153 Return true if successful. */
155 static bool
156 simple_cat (char *buf, idx_t bufsize)
158 /* Loop until the end of the file. */
160 while (true)
162 /* Read a block of input. */
164 ptrdiff_t n_read = safe_read (input_desc, buf, bufsize);
165 if (n_read < 0)
167 error (0, errno, "%s", quotef (infile));
168 return false;
171 /* End of this file? */
173 if (n_read == 0)
174 return true;
176 /* Write this block out. */
178 if (full_write (STDOUT_FILENO, buf, n_read) != n_read)
179 write_error ();
183 /* Write any pending output to STDOUT_FILENO.
184 Pending is defined to be the *BPOUT - OUTBUF bytes starting at OUTBUF.
185 Then set *BPOUT to OUTPUT if it's not already that value. */
187 static inline void
188 write_pending (char *outbuf, char **bpout)
190 idx_t n_write = *bpout - outbuf;
191 if (0 < n_write)
193 if (full_write (STDOUT_FILENO, outbuf, n_write) != n_write)
194 write_error ();
195 *bpout = outbuf;
199 /* Copy the file behind 'input_desc' to STDOUT_FILENO.
200 Use INBUF and read INSIZE with each call,
201 and OUTBUF and write OUTSIZE with each call.
202 (The buffers are a bit larger than the I/O sizes.)
203 The remaining boolean args say what 'cat' options to use.
205 Return true if successful.
206 Called if any option more than -u was specified.
208 A newline character is always put at the end of the buffer, to make
209 an explicit test for buffer end unnecessary. */
211 static bool
212 cat (char *inbuf, idx_t insize, char *outbuf, idx_t outsize,
213 bool show_nonprinting, bool show_tabs, bool number, bool number_nonblank,
214 bool show_ends, bool squeeze_blank)
216 /* Last character read from the input buffer. */
217 unsigned char ch;
219 /* Determines how many consecutive newlines there have been in the
220 input. 0 newlines makes NEWLINES -1, 1 newline makes NEWLINES 1,
221 etc. Initially 0 to indicate that we are at the beginning of a
222 new line. The "state" of the procedure is determined by
223 NEWLINES. */
224 int newlines = newlines2;
226 #ifdef FIONREAD
227 /* If nonzero, use the FIONREAD ioctl, as an optimization.
228 (On Ultrix, it is not supported on NFS file systems.) */
229 bool use_fionread = true;
230 #endif
232 /* The inbuf pointers are initialized so that BPIN > EOB, and thereby input
233 is read immediately. */
235 /* Pointer to the first non-valid byte in the input buffer, i.e., the
236 current end of the buffer. */
237 char *eob = inbuf;
239 /* Pointer to the next character in the input buffer. */
240 char *bpin = eob + 1;
242 /* Pointer to the position where the next character shall be written. */
243 char *bpout = outbuf;
245 while (true)
249 /* Write if there are at least OUTSIZE bytes in OUTBUF. */
251 if (outbuf + outsize <= bpout)
253 char *wp = outbuf;
254 idx_t remaining_bytes;
257 if (full_write (STDOUT_FILENO, wp, outsize) != outsize)
258 write_error ();
259 wp += outsize;
260 remaining_bytes = bpout - wp;
262 while (outsize <= remaining_bytes);
264 /* Move the remaining bytes to the beginning of the
265 buffer. */
267 memmove (outbuf, wp, remaining_bytes);
268 bpout = outbuf + remaining_bytes;
271 /* Is INBUF empty? */
273 if (bpin > eob)
275 bool input_pending = false;
276 #ifdef FIONREAD
277 int n_to_read = 0;
279 /* Is there any input to read immediately?
280 If not, we are about to wait,
281 so write all buffered output before waiting. */
283 if (use_fionread
284 && ioctl (input_desc, FIONREAD, &n_to_read) < 0)
286 /* Ultrix returns EOPNOTSUPP on NFS;
287 HP-UX returns ENOTTY on pipes.
288 SunOS returns EINVAL and
289 More/BSD returns ENODEV on special files
290 like /dev/null.
291 Irix-5 returns ENOSYS on pipes. */
292 if (errno == EOPNOTSUPP || errno == ENOTTY
293 || errno == EINVAL || errno == ENODEV
294 || errno == ENOSYS)
295 use_fionread = false;
296 else
298 error (0, errno, _("cannot do ioctl on %s"),
299 quoteaf (infile));
300 newlines2 = newlines;
301 return false;
304 if (n_to_read != 0)
305 input_pending = true;
306 #endif
308 if (!input_pending)
309 write_pending (outbuf, &bpout);
311 /* Read more input into INBUF. */
313 ptrdiff_t n_read = safe_read (input_desc, inbuf, insize);
314 if (n_read < 0)
316 error (0, errno, "%s", quotef (infile));
317 write_pending (outbuf, &bpout);
318 newlines2 = newlines;
319 return false;
321 if (n_read == 0)
323 write_pending (outbuf, &bpout);
324 newlines2 = newlines;
325 return true;
328 /* Update the pointers and insert a sentinel at the buffer
329 end. */
331 bpin = inbuf;
332 eob = bpin + n_read;
333 *eob = '\n';
335 else
337 /* It was a real (not a sentinel) newline. */
339 /* Was the last line empty?
340 (i.e., have two or more consecutive newlines been read?) */
342 if (++newlines > 0)
344 if (newlines >= 2)
346 /* Limit this to 2 here. Otherwise, with lots of
347 consecutive newlines, the counter could wrap
348 around at INT_MAX. */
349 newlines = 2;
351 /* Are multiple adjacent empty lines to be substituted
352 by single ditto (-s), and this was the second empty
353 line? */
354 if (squeeze_blank)
356 ch = *bpin++;
357 continue;
361 /* Are line numbers to be written at empty lines (-n)? */
363 if (number && !number_nonblank)
365 next_line_num ();
366 bpout = stpcpy (bpout, line_num_print);
370 /* Output a currency symbol if requested (-e). */
371 if (show_ends)
373 if (pending_cr)
375 *bpout++ = '^';
376 *bpout++ = 'M';
377 pending_cr = false;
379 *bpout++ = '$';
382 /* Output the newline. */
384 *bpout++ = '\n';
386 ch = *bpin++;
388 while (ch == '\n');
390 /* Here CH cannot contain a newline character. */
392 if (pending_cr)
394 *bpout++ = '\r';
395 pending_cr = false;
398 /* Are we at the beginning of a line, and line numbers are requested? */
400 if (newlines >= 0 && number)
402 next_line_num ();
403 bpout = stpcpy (bpout, line_num_print);
406 /* The loops below continue until a newline character is found,
407 which means that the buffer is empty or that a proper newline
408 has been found. */
410 /* If quoting, i.e., at least one of -v, -e, or -t specified,
411 scan for chars that need conversion. */
412 if (show_nonprinting)
414 while (true)
416 if (ch >= 32)
418 if (ch < 127)
419 *bpout++ = ch;
420 else if (ch == 127)
422 *bpout++ = '^';
423 *bpout++ = '?';
425 else
427 *bpout++ = 'M';
428 *bpout++ = '-';
429 if (ch >= 128 + 32)
431 if (ch < 128 + 127)
432 *bpout++ = ch - 128;
433 else
435 *bpout++ = '^';
436 *bpout++ = '?';
439 else
441 *bpout++ = '^';
442 *bpout++ = ch - 128 + 64;
446 else if (ch == '\t' && !show_tabs)
447 *bpout++ = '\t';
448 else if (ch == '\n')
450 newlines = -1;
451 break;
453 else
455 *bpout++ = '^';
456 *bpout++ = ch + 64;
459 ch = *bpin++;
462 else
464 /* Not quoting, neither of -v, -e, or -t specified. */
465 while (true)
467 if (ch == '\t' && show_tabs)
469 *bpout++ = '^';
470 *bpout++ = ch + 64;
472 else if (ch != '\n')
474 if (ch == '\r' && *bpin == '\n' && show_ends)
476 if (bpin == eob)
477 pending_cr = true;
478 else
480 *bpout++ = '^';
481 *bpout++ = 'M';
484 else
485 *bpout++ = ch;
487 else
489 newlines = -1;
490 break;
493 ch = *bpin++;
499 /* Copy data from input to output using copy_file_range if possible.
500 Return 1 if successful, 0 if ordinary read+write should be tried,
501 -1 if a serious problem has been diagnosed. */
503 static int
504 copy_cat (void)
506 /* Copy at most COPY_MAX bytes at a time; this is min
507 (SSIZE_MAX, SIZE_MAX) truncated to a value that is
508 surely aligned well. */
509 ssize_t copy_max = MIN (SSIZE_MAX, SIZE_MAX) >> 30 << 30;
511 /* copy_file_range does not support some cases, and it
512 incorrectly returns 0 when reading from the proc file
513 system on the Linux kernel through at least 5.6.19 (2020),
514 so fall back on read+write if the copy_file_range is
515 unsupported or the input file seems empty. */
517 for (bool some_copied = false; ; some_copied = true)
518 switch (copy_file_range (input_desc, nullptr, STDOUT_FILENO, nullptr,
519 copy_max, 0))
521 case 0:
522 return some_copied;
524 case -1:
525 if (errno == ENOSYS || is_ENOTSUP (errno) || errno == EINVAL
526 || errno == EBADF || errno == EXDEV || errno == ETXTBSY
527 || errno == EPERM)
528 return 0;
529 error (0, errno, "%s", quotef (infile));
530 return -1;
536 main (int argc, char **argv)
538 /* Nonzero if we have ever read standard input. */
539 bool have_read_stdin = false;
541 struct stat stat_buf;
543 /* Variables that are set according to the specified options. */
544 bool number = false;
545 bool number_nonblank = false;
546 bool squeeze_blank = false;
547 bool show_ends = false;
548 bool show_nonprinting = false;
549 bool show_tabs = false;
550 int file_open_mode = O_RDONLY;
552 static struct option const long_options[] =
554 {"number-nonblank", no_argument, nullptr, 'b'},
555 {"number", no_argument, nullptr, 'n'},
556 {"squeeze-blank", no_argument, nullptr, 's'},
557 {"show-nonprinting", no_argument, nullptr, 'v'},
558 {"show-ends", no_argument, nullptr, 'E'},
559 {"show-tabs", no_argument, nullptr, 'T'},
560 {"show-all", no_argument, nullptr, 'A'},
561 {GETOPT_HELP_OPTION_DECL},
562 {GETOPT_VERSION_OPTION_DECL},
563 {nullptr, 0, nullptr, 0}
566 initialize_main (&argc, &argv);
567 set_program_name (argv[0]);
568 setlocale (LC_ALL, "");
569 bindtextdomain (PACKAGE, LOCALEDIR);
570 textdomain (PACKAGE);
572 /* Arrange to close stdout if we exit via the
573 case_GETOPT_HELP_CHAR or case_GETOPT_VERSION_CHAR code.
574 Normally STDOUT_FILENO is used rather than stdout, so
575 close_stdout does nothing. */
576 atexit (close_stdout);
578 /* Parse command line options. */
580 int c;
581 while ((c = getopt_long (argc, argv, "benstuvAET", long_options, nullptr))
582 != -1)
584 switch (c)
586 case 'b':
587 number = true;
588 number_nonblank = true;
589 break;
591 case 'e':
592 show_ends = true;
593 show_nonprinting = true;
594 break;
596 case 'n':
597 number = true;
598 break;
600 case 's':
601 squeeze_blank = true;
602 break;
604 case 't':
605 show_tabs = true;
606 show_nonprinting = true;
607 break;
609 case 'u':
610 /* We provide the -u feature unconditionally. */
611 break;
613 case 'v':
614 show_nonprinting = true;
615 break;
617 case 'A':
618 show_nonprinting = true;
619 show_ends = true;
620 show_tabs = true;
621 break;
623 case 'E':
624 show_ends = true;
625 break;
627 case 'T':
628 show_tabs = true;
629 break;
631 case_GETOPT_HELP_CHAR;
633 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
635 default:
636 usage (EXIT_FAILURE);
640 /* Get device, i-node number, and optimal blocksize of output. */
642 if (fstat (STDOUT_FILENO, &stat_buf) < 0)
643 error (EXIT_FAILURE, errno, _("standard output"));
645 /* Optimal size of i/o operations of output. */
646 idx_t outsize = io_blksize (&stat_buf);
648 /* Device, I-node number and lazily-acquired flags of the output. */
649 dev_t out_dev = stat_buf.st_dev;
650 ino_t out_ino = stat_buf.st_ino;
651 int out_flags = -2;
653 /* True if the output is a regular file. */
654 bool out_isreg = S_ISREG (stat_buf.st_mode) != 0;
656 if (! (number || show_ends || squeeze_blank))
658 file_open_mode |= O_BINARY;
659 xset_binary_mode (STDOUT_FILENO, O_BINARY);
662 /* Main loop. */
664 infile = "-";
665 int argind = optind;
666 bool ok = true;
667 idx_t page_size = getpagesize ();
671 if (argind < argc)
672 infile = argv[argind];
674 bool reading_stdin = STREQ (infile, "-");
675 if (reading_stdin)
677 have_read_stdin = true;
678 input_desc = STDIN_FILENO;
679 if (file_open_mode & O_BINARY)
680 xset_binary_mode (STDIN_FILENO, O_BINARY);
682 else
684 input_desc = open (infile, file_open_mode);
685 if (input_desc < 0)
687 error (0, errno, "%s", quotef (infile));
688 ok = false;
689 continue;
693 if (fstat (input_desc, &stat_buf) < 0)
695 error (0, errno, "%s", quotef (infile));
696 ok = false;
697 goto contin;
700 /* Optimal size of i/o operations of input. */
701 idx_t insize = io_blksize (&stat_buf);
703 fdadvise (input_desc, 0, 0, FADVISE_SEQUENTIAL);
705 /* Don't copy a file to itself if that would merely exhaust the
706 output device. It's better to catch this error earlier
707 rather than later. */
709 if (stat_buf.st_dev == out_dev && stat_buf.st_ino == out_ino)
711 if (out_flags < -1)
712 out_flags = fcntl (STDOUT_FILENO, F_GETFL);
713 bool exhausting = 0 <= out_flags && out_flags & O_APPEND;
714 if (!exhausting)
716 off_t in_pos = lseek (input_desc, 0, SEEK_CUR);
717 if (0 <= in_pos)
718 exhausting = in_pos < lseek (STDOUT_FILENO, 0, SEEK_CUR);
720 if (exhausting)
722 error (0, 0, _("%s: input file is output file"), quotef (infile));
723 ok = false;
724 goto contin;
728 /* Pointer to the input buffer. */
729 char *inbuf;
731 /* Select which version of 'cat' to use. If any format-oriented
732 options were given use 'cat'; if not, use 'copy_cat' if it
733 works, 'simple_cat' otherwise. */
735 if (! (number || show_ends || show_nonprinting
736 || show_tabs || squeeze_blank))
738 int copy_cat_status =
739 out_isreg && S_ISREG (stat_buf.st_mode) ? copy_cat () : 0;
740 if (copy_cat_status != 0)
742 inbuf = nullptr;
743 ok &= 0 < copy_cat_status;
745 else
747 insize = MAX (insize, outsize);
748 inbuf = xalignalloc (page_size, insize);
749 ok &= simple_cat (inbuf, insize);
752 else
754 /* Allocate, with an extra byte for a newline sentinel. */
755 inbuf = xalignalloc (page_size, insize + 1);
757 /* Why are
758 (OUTSIZE - 1 + INSIZE * 4 + LINE_COUNTER_BUF_LEN)
759 bytes allocated for the output buffer?
761 A test whether output needs to be written is done when the input
762 buffer empties or when a newline appears in the input. After
763 output is written, at most (OUTSIZE - 1) bytes will remain in the
764 buffer. Now INSIZE bytes of input is read. Each input character
765 may grow by a factor of 4 (by the prepending of M-^). If all
766 characters do, and no newlines appear in this block of input, we
767 will have at most (OUTSIZE - 1 + INSIZE * 4) bytes in the buffer.
768 If the last character in the preceding block of input was a
769 newline, a line number may be written (according to the given
770 options) as the first thing in the output buffer. (Done after the
771 new input is read, but before processing of the input begins.)
772 A line number requires seldom more than LINE_COUNTER_BUF_LEN
773 positions.
775 Align the output buffer to a page size boundary, for efficiency
776 on some paging implementations. */
778 idx_t bufsize;
779 if (ckd_mul (&bufsize, insize, 4)
780 || ckd_add (&bufsize, bufsize, outsize)
781 || ckd_add (&bufsize, bufsize, LINE_COUNTER_BUF_LEN - 1))
782 xalloc_die ();
783 char *outbuf = xalignalloc (page_size, bufsize);
785 ok &= cat (inbuf, insize, outbuf, outsize, show_nonprinting,
786 show_tabs, number, number_nonblank, show_ends,
787 squeeze_blank);
789 alignfree (outbuf);
792 alignfree (inbuf);
794 contin:
795 if (!reading_stdin && close (input_desc) < 0)
797 error (0, errno, "%s", quotef (infile));
798 ok = false;
801 while (++argind < argc);
803 if (pending_cr)
805 if (full_write (STDOUT_FILENO, "\r", 1) != 1)
806 write_error ();
809 if (have_read_stdin && close (STDIN_FILENO) < 0)
810 error (EXIT_FAILURE, errno, _("closing standard input"));
812 return ok ? EXIT_SUCCESS : EXIT_FAILURE;