doc: remove 'proposed' in regard to $'' descriptions
[coreutils.git] / src / wc.c
blob21ffa74d9faedca3e500fccbf84dd3cc244abd4c
1 /* wc - print the number of lines, words, and bytes in files
2 Copyright (C) 1985-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Paul Rubin, phr@ocf.berkeley.edu
18 and David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <ctype.h>
23 #include <stdio.h>
24 #include <getopt.h>
25 #include <sys/types.h>
26 #include <uchar.h>
28 #include <argmatch.h>
29 #include <argv-iter.h>
30 #include <fadvise.h>
31 #include <physmem.h>
32 #include <readtokens0.h>
33 #include <stat-size.h>
34 #include <xbinary-io.h>
36 #include "system.h"
37 #include "ioblksize.h"
38 #include "wc.h"
40 /* The official name of this program (e.g., no 'g' prefix). */
41 #define PROGRAM_NAME "wc"
43 #define AUTHORS \
44 proper_name ("Paul Rubin"), \
45 proper_name ("David MacKenzie")
47 static bool wc_isprint[UCHAR_MAX + 1];
48 static bool wc_isspace[UCHAR_MAX + 1];
50 static bool debug;
52 /* Cumulative number of lines, words, chars and bytes in all files so far.
53 max_line_length is the maximum over all files processed so far. */
54 static uintmax_t total_lines;
55 static uintmax_t total_words;
56 static uintmax_t total_chars;
57 static uintmax_t total_bytes;
58 static bool total_lines_overflow;
59 static bool total_words_overflow;
60 static bool total_chars_overflow;
61 static bool total_bytes_overflow;
62 static intmax_t max_line_length;
64 /* Which counts to print. */
65 static bool print_lines, print_words, print_chars, print_bytes;
66 static bool print_linelength;
68 /* The print width of each count. */
69 static int number_width;
71 /* True if we have ever read the standard input. */
72 static bool have_read_stdin;
74 /* Used to determine if file size can be determined without reading. */
75 static idx_t page_size;
77 /* Enable to _not_ treat non breaking space as a word separator. */
78 static bool posixly_correct;
80 /* The result of calling fstat or stat on a file descriptor or file. */
81 struct fstatus
83 /* If positive, fstat or stat has not been called yet. Otherwise,
84 this is the value returned from fstat or stat. */
85 int failed;
87 /* If FAILED is zero, this is the file's status. */
88 struct stat st;
91 /* For long options that have no equivalent short option, use a
92 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
93 enum
95 DEBUG_PROGRAM_OPTION = CHAR_MAX + 1,
96 FILES0_FROM_OPTION,
97 TOTAL_OPTION,
100 static struct option const longopts[] =
102 {"bytes", no_argument, nullptr, 'c'},
103 {"chars", no_argument, nullptr, 'm'},
104 {"lines", no_argument, nullptr, 'l'},
105 {"words", no_argument, nullptr, 'w'},
106 {"debug", no_argument, nullptr, DEBUG_PROGRAM_OPTION},
107 {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION},
108 {"max-line-length", no_argument, nullptr, 'L'},
109 {"total", required_argument, nullptr, TOTAL_OPTION},
110 {GETOPT_HELP_OPTION_DECL},
111 {GETOPT_VERSION_OPTION_DECL},
112 {nullptr, 0, nullptr, 0}
115 enum total_type
117 total_auto, /* 0: default or --total=auto */
118 total_always, /* 1: --total=always */
119 total_only, /* 2: --total=only */
120 total_never /* 3: --total=never */
122 static char const *const total_args[] =
124 "auto", "always", "only", "never", nullptr
126 static enum total_type const total_types[] =
128 total_auto, total_always, total_only, total_never
130 ARGMATCH_VERIFY (total_args, total_types);
131 static enum total_type total_mode = total_auto;
133 #ifdef USE_AVX2_WC_LINECOUNT
134 static bool
135 avx2_supported (void)
137 bool avx_enabled = 0 < __builtin_cpu_supports ("avx2");
139 if (debug)
140 error (0, 0, (avx_enabled
141 ? _("using avx2 hardware support")
142 : _("avx2 support not detected")));
144 return avx_enabled;
146 #endif
148 void
149 usage (int status)
151 if (status != EXIT_SUCCESS)
152 emit_try_help ();
153 else
155 printf (_("\
156 Usage: %s [OPTION]... [FILE]...\n\
157 or: %s [OPTION]... --files0-from=F\n\
159 program_name, program_name);
160 fputs (_("\
161 Print newline, word, and byte counts for each FILE, and a total line if\n\
162 more than one FILE is specified. A word is a nonempty sequence of non white\n\
163 space delimited by white space characters or by start or end of input.\n\
164 "), stdout);
166 emit_stdin_note ();
168 fputs (_("\
170 The options below may be used to select which counts are printed, always in\n\
171 the following order: newline, word, character, byte, maximum line length.\n\
172 -c, --bytes print the byte counts\n\
173 -m, --chars print the character counts\n\
174 -l, --lines print the newline counts\n\
175 "), stdout);
176 fputs (_("\
177 --files0-from=F read input from the files specified by\n\
178 NUL-terminated names in file F;\n\
179 If F is - then read names from standard input\n\
180 -L, --max-line-length print the maximum display width\n\
181 -w, --words print the word counts\n\
182 "), stdout);
183 fputs (_("\
184 --total=WHEN when to print a line with total counts;\n\
185 WHEN can be: auto, always, only, never\n\
186 "), stdout);
187 fputs (HELP_OPTION_DESCRIPTION, stdout);
188 fputs (VERSION_OPTION_DESCRIPTION, stdout);
189 emit_ancillary_info (PROGRAM_NAME);
191 exit (status);
194 /* Return non zero if a non breaking space. */
195 ATTRIBUTE_PURE
196 static int
197 iswnbspace (wint_t wc)
199 return ! posixly_correct
200 && (wc == 0x00A0 || wc == 0x2007
201 || wc == 0x202F || wc == 0x2060);
204 /* FILE is the name of the file (or null for standard input)
205 associated with the specified counters. */
206 static void
207 write_counts (uintmax_t lines,
208 uintmax_t words,
209 uintmax_t chars,
210 uintmax_t bytes,
211 intmax_t linelength,
212 char const *file)
214 static char const format_sp_int[] = " %*s";
215 char const *format_int = format_sp_int + 1;
216 char buf[MAX (INT_BUFSIZE_BOUND (intmax_t),
217 INT_BUFSIZE_BOUND (uintmax_t))];
219 if (print_lines)
221 printf (format_int, number_width, umaxtostr (lines, buf));
222 format_int = format_sp_int;
224 if (print_words)
226 printf (format_int, number_width, umaxtostr (words, buf));
227 format_int = format_sp_int;
229 if (print_chars)
231 printf (format_int, number_width, umaxtostr (chars, buf));
232 format_int = format_sp_int;
234 if (print_bytes)
236 printf (format_int, number_width, umaxtostr (bytes, buf));
237 format_int = format_sp_int;
239 if (print_linelength)
240 printf (format_int, number_width, imaxtostr (linelength, buf));
241 if (file)
242 printf (" %s", strchr (file, '\n') ? quotef (file) : file);
243 putchar ('\n');
246 /* Read FD and return a summary. */
247 static struct wc_lines
248 wc_lines (int fd)
250 #ifdef USE_AVX2_WC_LINECOUNT
251 static signed char use_avx2;
252 if (!use_avx2)
253 use_avx2 = avx2_supported () ? 1 : -1;
254 if (0 < use_avx2)
255 return wc_lines_avx2 (fd);
256 #endif
258 intmax_t lines = 0, bytes = 0;
259 bool long_lines = false;
261 while (true)
263 char buf[IO_BUFSIZE + 1];
264 ssize_t bytes_read = read (fd, buf, IO_BUFSIZE);
265 if (bytes_read <= 0)
266 return (struct wc_lines) { bytes_read == 0 ? 0 : errno, lines, bytes };
268 bytes += bytes_read;
269 char *end = buf + bytes_read;
270 idx_t buflines = 0;
272 if (! long_lines)
274 /* Avoid function call overhead for shorter lines. */
275 for (char *p = buf; p < end; p++)
276 buflines += *p == '\n';
278 else
280 /* rawmemchr is more efficient with longer lines. */
281 *end = '\n';
282 for (char *p = buf; (p = rawmemchr (p, '\n')) < end; p++)
283 buflines++;
286 /* If the average line length in the block is >= 15, then use
287 memchr for the next block, where system specific optimizations
288 may outweigh function call overhead.
289 FIXME: This line length was determined in 2015, on both
290 x86_64 and ppc64, but it's worth re-evaluating in future with
291 newer compilers, CPUs, or memchr() implementations etc. */
292 long_lines = 15 * buflines <= bytes_read;
293 lines += buflines;
297 /* Count words. FILE_X is the name of the file (or null for standard
298 input) that is open on descriptor FD. *FSTATUS is its status.
299 CURRENT_POS is the current file offset if known, negative if unknown.
300 Return true if successful. */
301 static bool
302 wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
304 int err = 0;
305 char buf[IO_BUFSIZE + 1];
306 intmax_t lines, words, chars, bytes, linelength;
307 bool count_bytes, count_chars, count_complicated;
308 char const *file = file_x ? file_x : _("standard input");
310 lines = words = chars = bytes = linelength = 0;
312 /* If in the current locale, chars are equivalent to bytes, we prefer
313 counting bytes, because that's easier. */
314 if (MB_CUR_MAX > 1)
316 count_bytes = print_bytes;
317 count_chars = print_chars;
319 else
321 count_bytes = print_bytes || print_chars;
322 count_chars = false;
324 count_complicated = print_words || print_linelength;
326 /* Advise the kernel of our access pattern only if we will read(). */
327 if (!count_bytes || count_chars || print_lines || count_complicated)
328 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
330 /* When counting only bytes, save some line- and word-counting
331 overhead. If FD is a 'regular' Unix file, using lseek is enough
332 to get its 'size' in bytes. Otherwise, read blocks of IO_BUFSIZE
333 bytes at a time until EOF. Note that the 'size' (number of bytes)
334 that wc reports is smaller than stats.st_size when the file is not
335 positioned at its beginning. That's why the lseek calls below are
336 necessary. For example the command
337 '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
338 should make wc report '0' bytes. */
340 if (count_bytes && !count_chars && !print_lines && !count_complicated)
342 bool skip_read = false;
344 if (0 < fstatus->failed)
345 fstatus->failed = fstat (fd, &fstatus->st);
347 /* For sized files, seek to one st_blksize before EOF rather than to EOF.
348 This works better for files in proc-like file systems where
349 the size is only approximate. */
350 if (! fstatus->failed && usable_st_size (&fstatus->st)
351 && 0 <= fstatus->st.st_size)
353 off_t end_pos = fstatus->st.st_size;
354 if (current_pos < 0)
355 current_pos = lseek (fd, 0, SEEK_CUR);
357 if (end_pos % page_size)
359 /* We only need special handling of /proc and /sys files etc.
360 when they're a multiple of PAGE_SIZE. In the common case
361 for files with st_size not a multiple of PAGE_SIZE,
362 it's more efficient and accurate to use st_size.
364 Be careful here. The current position may actually be
365 beyond the end of the file. As in the example above. */
367 bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
368 if (bytes && 0 <= lseek (fd, bytes, SEEK_CUR))
369 skip_read = true;
370 else
371 bytes = 0;
373 else
375 off_t hi_pos = (end_pos
376 - end_pos % (STP_BLKSIZE (&fstatus->st) + 1));
377 if (0 <= current_pos && current_pos < hi_pos
378 && 0 <= lseek (fd, hi_pos, SEEK_CUR))
379 bytes = hi_pos - current_pos;
383 if (! skip_read)
385 fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
386 for (ssize_t bytes_read;
387 (bytes_read = read (fd, buf, IO_BUFSIZE));
388 bytes += bytes_read)
389 if (bytes_read < 0)
391 err = errno;
392 break;
396 else if (!count_chars && !count_complicated)
398 /* Use a separate loop when counting only lines or lines and bytes --
399 but not chars or words. */
400 struct wc_lines w = wc_lines (fd);
401 err = w.err;
402 lines = w.lines;
403 bytes = w.bytes;
405 else if (MB_CUR_MAX > 1)
407 bool in_word = false;
408 intmax_t linepos = 0;
409 mbstate_t state; mbszero (&state);
410 bool in_shift = false;
411 idx_t prev = 0; /* Number of bytes carried over from previous round. */
413 for (ssize_t bytes_read;
414 ((bytes_read = read (fd, buf + prev, IO_BUFSIZE - prev))
415 || prev);
418 if (bytes_read < 0)
420 err = errno;
421 break;
424 bytes += bytes_read;
425 char const *p = buf;
426 char const *plim = p + prev + bytes_read;
429 char32_t wide_char;
430 idx_t charbytes;
431 bool single_byte;
433 if (!in_shift && 0 <= *p && *p < 0x80)
435 /* Handle most ASCII characters quickly, without calling
436 mbrtoc32. */
437 charbytes = 1;
438 wide_char = *p;
439 single_byte = true;
441 else
443 idx_t scanbytes = plim - (p + prev);
444 size_t n = mbrtoc32 (&wide_char, p + prev, scanbytes, &state);
445 prev = 0;
447 if (scanbytes < n)
449 if (n == (size_t) -2 && plim - p < IO_BUFSIZE
450 && bytes_read)
452 /* An incomplete character that is not ridiculously
453 long and there may be more input. Move the bytes
454 to buffer start and prepare to read more data. */
455 prev = plim - p;
456 memmove (buf, p, prev);
457 in_shift = true;
458 break;
461 /* Remember that we read a byte, but don't complain
462 about the error. Because of the decoding error,
463 this is a considered to be byte but not a
464 character (that is, chars is not incremented). */
465 p++;
466 mbszero (&state);
467 in_shift = false;
469 /* Treat encoding errors as non white space.
470 POSIX says a word is "a non-zero-length string of
471 characters delimited by white space". This is
472 wrong in some sense, as the string can be delimited
473 by start or end of input, and it is unclear what it
474 means when the input contains encoding errors.
475 Since encoding errors are not white space,
476 treat them that way here. */
477 words += !in_word;
478 in_word = true;
479 continue;
482 charbytes = n + !n;
483 single_byte = charbytes == !in_shift;
484 in_shift = !mbsinit (&state);
487 switch (wide_char)
489 case '\n':
490 lines++;
491 FALLTHROUGH;
492 case '\r':
493 case '\f':
494 if (linepos > linelength)
495 linelength = linepos;
496 linepos = 0;
497 in_word = false;
498 break;
500 case '\t':
501 linepos += 8 - (linepos % 8);
502 in_word = false;
503 break;
505 case ' ':
506 linepos++;
507 FALLTHROUGH;
508 case '\v':
509 in_word = false;
510 break;
512 default:;
513 bool in_word2;
514 if (single_byte)
516 linepos += wc_isprint[wide_char];
517 in_word2 = !wc_isspace[wide_char];
519 else
521 /* c32width can be expensive on macOS for example,
522 so avoid if not needed. */
523 if (print_linelength)
525 int width = c32width (wide_char);
526 if (width > 0)
527 linepos += width;
529 in_word2 = ! iswspace (wide_char)
530 && ! iswnbspace (wide_char);
533 /* Count words by counting word starts, i.e., each
534 white space character (or the start of input)
535 followed by non white space. */
536 words += !in_word & in_word2;
537 in_word = in_word2;
538 break;
541 p += charbytes;
542 chars++;
544 while (p < plim);
546 if (linepos > linelength)
547 linelength = linepos;
549 else
551 bool in_word = false;
552 intmax_t linepos = 0;
554 for (ssize_t bytes_read; (bytes_read = read (fd, buf, IO_BUFSIZE)); )
556 if (bytes_read < 0)
558 err = errno;
559 break;
562 bytes += bytes_read;
563 char const *p = buf;
566 unsigned char c = *p++;
567 switch (c)
569 case '\n':
570 lines++;
571 FALLTHROUGH;
572 case '\r':
573 case '\f':
574 if (linepos > linelength)
575 linelength = linepos;
576 linepos = 0;
577 in_word = false;
578 break;
580 case '\t':
581 linepos += 8 - (linepos % 8);
582 in_word = false;
583 break;
585 case ' ':
586 linepos++;
587 FALLTHROUGH;
588 case '\v':
589 in_word = false;
590 break;
592 default:
593 linepos += wc_isprint[c];
594 bool in_word2 = !wc_isspace[c];
595 words += !in_word & in_word2;
596 in_word = in_word2;
597 break;
600 while (--bytes_read);
602 if (linepos > linelength)
603 linelength = linepos;
606 if (count_chars < print_chars)
607 chars = bytes;
609 if (total_mode != total_only)
610 write_counts (lines, words, chars, bytes, linelength, file_x);
612 total_lines_overflow |= ckd_add (&total_lines, total_lines, lines);
613 total_words_overflow |= ckd_add (&total_words, total_words, words);
614 total_chars_overflow |= ckd_add (&total_chars, total_chars, chars);
615 total_bytes_overflow |= ckd_add (&total_bytes, total_bytes, bytes);
617 if (linelength > max_line_length)
618 max_line_length = linelength;
620 if (err)
621 error (0, err, "%s", quotef (file));
622 return !err;
625 static bool
626 wc_file (char const *file, struct fstatus *fstatus)
628 if (! file || STREQ (file, "-"))
630 have_read_stdin = true;
631 xset_binary_mode (STDIN_FILENO, O_BINARY);
632 return wc (STDIN_FILENO, file, fstatus, -1);
634 else
636 int fd = open (file, O_RDONLY | O_BINARY);
637 if (fd == -1)
639 error (0, errno, "%s", quotef (file));
640 return false;
642 else
644 bool ok = wc (fd, file, fstatus, 0);
645 if (close (fd) != 0)
647 error (0, errno, "%s", quotef (file));
648 return false;
650 return ok;
655 /* Return the file status for the NFILES files addressed by FILE.
656 Optimize the case where only one number is printed, for just one
657 file; in that case we can use a print width of 1, so we don't need
658 to stat the file. Handle the case of (nfiles == 0) in the same way;
659 that happens when we don't know how long the list of file names will be. */
661 static struct fstatus *
662 get_input_fstatus (idx_t nfiles, char *const *file)
664 struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
666 if (nfiles == 0
667 || (nfiles == 1
668 && ((print_lines + print_words + print_chars
669 + print_bytes + print_linelength)
670 == 1)))
671 fstatus[0].failed = 1;
672 else
674 for (idx_t i = 0; i < nfiles; i++)
675 fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
676 ? fstat (STDIN_FILENO, &fstatus[i].st)
677 : stat (file[i], &fstatus[i].st));
680 return fstatus;
683 /* Return a print width suitable for the NFILES files whose status is
684 recorded in FSTATUS. Optimize the same special case that
685 get_input_fstatus optimizes. */
687 ATTRIBUTE_PURE
688 static int
689 compute_number_width (idx_t nfiles, struct fstatus const *fstatus)
691 int width = 1;
693 if (0 < nfiles && fstatus[0].failed <= 0)
695 int minimum_width = 1;
696 uintmax_t regular_total = 0;
698 for (idx_t i = 0; i < nfiles; i++)
699 if (! fstatus[i].failed)
701 if (!S_ISREG (fstatus[i].st.st_mode))
702 minimum_width = 7;
703 else if (ckd_add (&regular_total, regular_total,
704 fstatus[i].st.st_size))
706 regular_total = UINTMAX_MAX;
707 break;
711 for (; 10 <= regular_total; regular_total /= 10)
712 width++;
713 if (width < minimum_width)
714 width = minimum_width;
717 return width;
722 main (int argc, char **argv)
724 int optc;
725 idx_t nfiles;
726 char **files;
727 char *files_from = nullptr;
728 struct fstatus *fstatus;
729 struct Tokens tok;
731 initialize_main (&argc, &argv);
732 set_program_name (argv[0]);
733 setlocale (LC_ALL, "");
734 bindtextdomain (PACKAGE, LOCALEDIR);
735 textdomain (PACKAGE);
737 atexit (close_stdout);
739 page_size = getpagesize ();
740 /* Line buffer stdout to ensure lines are written atomically and immediately
741 so that processes running in parallel do not intersperse their output. */
742 setvbuf (stdout, nullptr, _IOLBF, 0);
744 posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
746 print_lines = print_words = print_chars = print_bytes = false;
747 print_linelength = false;
748 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
750 while ((optc = getopt_long (argc, argv, "clLmw", longopts, nullptr)) != -1)
751 switch (optc)
753 case 'c':
754 print_bytes = true;
755 break;
757 case 'm':
758 print_chars = true;
759 break;
761 case 'l':
762 print_lines = true;
763 break;
765 case 'w':
766 print_words = true;
767 break;
769 case 'L':
770 print_linelength = true;
771 break;
773 case DEBUG_PROGRAM_OPTION:
774 debug = true;
775 break;
777 case FILES0_FROM_OPTION:
778 files_from = optarg;
779 break;
781 case TOTAL_OPTION:
782 total_mode = XARGMATCH ("--total", optarg, total_args, total_types);
783 break;
785 case_GETOPT_HELP_CHAR;
787 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
789 default:
790 usage (EXIT_FAILURE);
793 if (! (print_lines || print_words || print_chars || print_bytes
794 || print_linelength))
795 print_lines = print_words = print_bytes = true;
797 if (print_linelength)
798 for (int i = 0; i <= UCHAR_MAX; i++)
799 wc_isprint[i] = !!isprint (i);
800 if (print_words)
801 for (int i = 0; i <= UCHAR_MAX; i++)
802 wc_isspace[i] = isspace (i) || iswnbspace (btoc32 (i));
804 bool read_tokens = false;
805 struct argv_iterator *ai;
806 if (files_from)
808 FILE *stream;
810 /* When using --files0-from=F, you may not specify any files
811 on the command-line. */
812 if (optind < argc)
814 error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
815 fprintf (stderr, "%s\n",
816 _("file operands cannot be combined with --files0-from"));
817 usage (EXIT_FAILURE);
820 if (STREQ (files_from, "-"))
821 stream = stdin;
822 else
824 stream = fopen (files_from, "r");
825 if (stream == nullptr)
826 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
827 quoteaf (files_from));
830 /* Read the file list into RAM if we can detect its size and that
831 size is reasonable. Otherwise, we'll read a name at a time. */
832 struct stat st;
833 if (fstat (fileno (stream), &st) == 0
834 && S_ISREG (st.st_mode)
835 && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
837 read_tokens = true;
838 readtokens0_init (&tok);
839 if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
840 error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
841 quoteaf (files_from));
842 files = tok.tok;
843 nfiles = tok.n_tok;
844 ai = argv_iter_init_argv (files);
846 else
848 files = nullptr;
849 nfiles = 0;
850 ai = argv_iter_init_stream (stream);
853 else
855 static char *stdin_only[] = { nullptr };
856 files = (optind < argc ? argv + optind : stdin_only);
857 nfiles = (optind < argc ? argc - optind : 1);
858 ai = argv_iter_init_argv (files);
861 if (!ai)
862 xalloc_die ();
864 fstatus = get_input_fstatus (nfiles, files);
865 if (total_mode == total_only)
866 number_width = 1; /* No extra padding, since no alignment requirement. */
867 else
868 number_width = compute_number_width (nfiles, fstatus);
870 bool ok = true;
871 enum argv_iter_err ai_err;
872 char *file_name;
873 for (int i = 0; (file_name = argv_iter (ai, &ai_err)); i++)
875 bool skip_file = false;
876 if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
878 /* Give a better diagnostic in an unusual case:
879 printf - | wc --files0-from=- */
880 error (0, 0, _("when reading file names from stdin, "
881 "no file name of %s allowed"),
882 quoteaf (file_name));
883 skip_file = true;
886 if (!file_name[0])
888 /* Diagnose a zero-length file name. When it's one
889 among many, knowing the record number may help.
890 FIXME: currently print the record number only with
891 --files0-from=FILE. Maybe do it for argv, too? */
892 if (files_from == nullptr)
893 error (0, 0, "%s", _("invalid zero-length file name"));
894 else
896 /* Using the standard 'filename:line-number:' prefix here is
897 not totally appropriate, since NUL is the separator, not NL,
898 but it might be better than nothing. */
899 error (0, 0, "%s:%zu: %s", quotef (files_from),
900 argv_iter_n_args (ai), _("invalid zero-length file name"));
902 skip_file = true;
905 if (skip_file)
906 ok = false;
907 else
908 ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
910 if (! nfiles)
911 fstatus[0].failed = 1;
913 switch (ai_err)
915 case AI_ERR_EOF:
916 break;
918 case AI_ERR_READ:
919 error (0, errno, _("%s: read error"), quotef (files_from));
920 ok = false;
921 break;
923 case AI_ERR_MEM:
924 xalloc_die ();
926 default:
927 unreachable ();
930 /* No arguments on the command line is fine. That means read from stdin.
931 However, no arguments on the --files0-from input stream is an error
932 means don't read anything. */
933 if (ok && !files_from && argv_iter_n_args (ai) == 0)
934 ok &= wc_file (nullptr, &fstatus[0]);
936 if (read_tokens)
937 readtokens0_free (&tok);
939 if (total_mode != total_never
940 && (total_mode != total_auto || 1 < argv_iter_n_args (ai)))
942 if (total_lines_overflow)
944 total_lines = UINTMAX_MAX;
945 error (0, EOVERFLOW, _("total lines"));
946 ok = false;
948 if (total_words_overflow)
950 total_words = UINTMAX_MAX;
951 error (0, EOVERFLOW, _("total words"));
952 ok = false;
954 if (total_chars_overflow)
956 total_chars = UINTMAX_MAX;
957 error (0, EOVERFLOW, _("total characters"));
958 ok = false;
960 if (total_bytes_overflow)
962 total_bytes = UINTMAX_MAX;
963 error (0, EOVERFLOW, _("total bytes"));
964 ok = false;
967 write_counts (total_lines, total_words, total_chars, total_bytes,
968 max_line_length,
969 total_mode != total_only ? _("total") : nullptr);
972 argv_iter_free (ai);
974 free (fstatus);
976 if (have_read_stdin && close (STDIN_FILENO) != 0)
977 error (EXIT_FAILURE, errno, "-");
979 return ok ? EXIT_SUCCESS : EXIT_FAILURE;