Also check for the log_user method, to avoid
[coreutils.git] / src / wc.c
blob733d32d85e5a8f36748aa54c34897b3750d6c406
1 /* wc - print the number of bytes, words, and lines in files
2 Copyright (C) 85, 91, 1995-2002 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Paul Rubin, phr@ocf.berkeley.edu
19 and David MacKenzie, djm@gnu.ai.mit.edu. */
21 #include <config.h>
23 #include <stdio.h>
24 #include <getopt.h>
25 #include <sys/types.h>
27 /* Get mbstate_t, mbrtowc(), wcwidth(). */
28 #if HAVE_WCHAR_H
29 # include <wchar.h>
30 #endif
32 /* Get iswprint(), iswspace(). */
33 #if HAVE_WCTYPE_H
34 # include <wctype.h>
35 #endif
36 #if !defined iswprint && !HAVE_ISWPRINT
37 # define iswprint(wc) 1
38 #endif
39 #if !defined iswspace && !HAVE_ISWSPACE
40 # define iswspace(wc) \
41 ((wc) == (unsigned char) (wc) && ISSPACE ((unsigned char) (wc)))
42 #endif
44 /* Include this after wctype.h so that we `#undef' ISPRINT
45 (from Solaris's euc.h, from widec.h, from wctype.h) before
46 redefining and using it. */
47 #include "system.h"
49 #include "closeout.h"
50 #include "error.h"
51 #include "inttostr.h"
52 #include "safe-read.h"
54 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
55 #if HAVE_MBRTOWC && defined mbstate_t
56 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
57 #endif
59 #ifndef HAVE_DECL_WCWIDTH
60 "this configure-time declaration test was not run"
61 #endif
62 #if !HAVE_DECL_WCWIDTH
63 extern int wcwidth ();
64 #endif
66 /* If wcwidth() doesn't exist, assume all printable characters have
67 width 1. */
68 #if !defined wcwidth && !HAVE_WCWIDTH
69 # define wcwidth(wc) ((wc) == 0 ? 0 : iswprint (wc) ? 1 : -1)
70 #endif
72 /* The official name of this program (e.g., no `g' prefix). */
73 #define PROGRAM_NAME "wc"
75 #define AUTHORS N_ ("Paul Rubin and David MacKenzie")
77 /* Size of atomic reads. */
78 #define BUFFER_SIZE (16 * 1024)
80 /* The name this program was run with. */
81 char *program_name;
83 /* Cumulative number of lines, words, chars and bytes in all files so far.
84 max_line_length is the maximum over all files processed so far. */
85 static uintmax_t total_lines;
86 static uintmax_t total_words;
87 static uintmax_t total_chars;
88 static uintmax_t total_bytes;
89 static uintmax_t max_line_length;
91 /* Which counts to print. */
92 static int print_lines, print_words, print_chars, print_bytes;
93 static int print_linelength;
95 /* Nonzero if we have ever read the standard input. */
96 static int have_read_stdin;
98 /* The error code to return to the system. */
99 static int exit_status;
101 /* If nonzero, do not line up columns but instead separate numbers by
102 a single space as specified in Single Unix Specification and POSIX. */
103 static int posixly_correct;
105 static struct option const longopts[] =
107 {"bytes", no_argument, NULL, 'c'},
108 {"chars", no_argument, NULL, 'm'},
109 {"lines", no_argument, NULL, 'l'},
110 {"words", no_argument, NULL, 'w'},
111 {"max-line-length", no_argument, NULL, 'L'},
112 {GETOPT_HELP_OPTION_DECL},
113 {GETOPT_VERSION_OPTION_DECL},
114 {NULL, 0, NULL, 0}
117 void
118 usage (int status)
120 if (status != 0)
121 fprintf (stderr, _("Try `%s --help' for more information.\n"),
122 program_name);
123 else
125 printf (_("\
126 Usage: %s [OPTION]... [FILE]...\n\
128 program_name);
129 fputs (_("\
130 Print byte, word, and newline counts for each FILE, and a total line if\n\
131 more than one FILE is specified. With no FILE, or when FILE is -,\n\
132 read standard input.\n\
133 -c, --bytes print the byte counts\n\
134 -m, --chars print the character counts\n\
135 -l, --lines print the newline counts\n\
136 "), stdout);
137 fputs (_("\
138 -L, --max-line-length print the length of the longest line\n\
139 -w, --words print the word counts\n\
140 "), stdout);
141 fputs (HELP_OPTION_DESCRIPTION, stdout);
142 fputs (VERSION_OPTION_DESCRIPTION, stdout);
143 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
145 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
148 static void
149 write_counts (uintmax_t lines,
150 uintmax_t words,
151 uintmax_t chars,
152 uintmax_t bytes,
153 uintmax_t linelength,
154 const char *file)
156 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
157 char const *space = "";
158 char const *format_int = (posixly_correct ? "%s" : "%7s");
159 char const *format_sp_int = (posixly_correct ? "%s%s" : "%s%7s");
161 if (print_lines)
163 printf (format_int, umaxtostr (lines, buf));
164 space = " ";
166 if (print_words)
168 printf (format_sp_int, space, umaxtostr (words, buf));
169 space = " ";
171 if (print_chars)
173 printf (format_sp_int, space, umaxtostr (chars, buf));
174 space = " ";
176 if (print_bytes)
178 printf (format_sp_int, space, umaxtostr (bytes, buf));
179 space = " ";
181 if (print_linelength)
183 printf (format_sp_int, space, umaxtostr (linelength, buf));
185 if (*file)
186 printf (" %s", file);
187 putchar ('\n');
190 static void
191 wc (int fd, const char *file)
193 char buf[BUFFER_SIZE + 1];
194 size_t bytes_read;
195 uintmax_t lines, words, chars, bytes, linelength;
196 int count_bytes, count_chars, count_complicated;
198 lines = words = chars = bytes = linelength = 0;
200 /* If in the current locale, chars are equivalent to bytes, we prefer
201 counting bytes, because that's easier. */
202 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
203 if (MB_CUR_MAX > 1)
205 count_bytes = print_bytes;
206 count_chars = print_chars;
208 else
209 #endif
211 count_bytes = print_bytes + print_chars;
212 count_chars = 0;
214 count_complicated = print_words + print_linelength;
216 /* We need binary input, since `wc' relies on `lseek' and byte counts. */
217 SET_BINARY (fd);
219 /* When counting only bytes, save some line- and word-counting
220 overhead. If FD is a `regular' Unix file, using lseek is enough
221 to get its `size' in bytes. Otherwise, read blocks of BUFFER_SIZE
222 bytes at a time until EOF. Note that the `size' (number of bytes)
223 that wc reports is smaller than stats.st_size when the file is not
224 positioned at its beginning. That's why the lseek calls below are
225 necessary. For example the command
226 `(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
227 should make wc report `0' bytes. */
229 if (count_bytes && !count_chars && !print_lines && !count_complicated)
231 off_t current_pos, end_pos;
232 struct stat stats;
234 if (fstat (fd, &stats) == 0 && S_ISREG (stats.st_mode)
235 && (current_pos = lseek (fd, (off_t) 0, SEEK_CUR)) != -1
236 && (end_pos = lseek (fd, (off_t) 0, SEEK_END)) != -1)
238 off_t diff;
239 /* Be careful here. The current position may actually be
240 beyond the end of the file. As in the example above. */
241 bytes = (diff = end_pos - current_pos) < 0 ? 0 : diff;
243 else
245 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
247 if (bytes_read == SAFE_READ_ERROR)
249 error (0, errno, "%s", file);
250 exit_status = 1;
251 break;
253 bytes += bytes_read;
257 else if (!count_chars && !count_complicated)
259 /* Use a separate loop when counting only lines or lines and bytes --
260 but not chars or words. */
261 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
263 register char *p = buf;
265 if (bytes_read == SAFE_READ_ERROR)
267 error (0, errno, "%s", file);
268 exit_status = 1;
269 break;
272 while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
274 ++p;
275 ++lines;
277 bytes += bytes_read;
280 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
281 # define SUPPORT_OLD_MBRTOWC 1
282 else if (MB_CUR_MAX > 1)
284 int in_word = 0;
285 uintmax_t linepos = 0;
286 mbstate_t state;
287 uintmax_t last_error_line = 0;
288 int last_error_errno = 0;
289 # if SUPPORT_OLD_MBRTOWC
290 /* Back-up the state before each multibyte character conversion and
291 move the last incomplete character of the buffer to the front
292 of the buffer. This is needed because we don't know whether
293 the `mbrtowc' function updates the state when it returns -2, -
294 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
295 ANSI C, glibc-2.1 and Solaris 2.7 behaviour. We don't have an
296 autoconf test for this, yet. */
297 size_t prev = 0; /* number of bytes carried over from previous round */
298 # else
299 const size_t prev = 0;
300 # endif
302 memset (&state, 0, sizeof (mbstate_t));
303 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
305 const char *p;
306 # if SUPPORT_OLD_MBRTOWC
307 mbstate_t backup_state;
308 # endif
309 if (bytes_read == SAFE_READ_ERROR)
311 error (0, errno, "%s", file);
312 exit_status = 1;
313 break;
316 bytes += bytes_read;
317 p = buf;
318 bytes_read += prev;
321 wchar_t wide_char;
322 size_t n;
324 # if SUPPORT_OLD_MBRTOWC
325 backup_state = state;
326 # endif
327 n = mbrtowc (&wide_char, p, bytes_read, &state);
328 if (n == (size_t) -2)
330 # if SUPPORT_OLD_MBRTOWC
331 state = backup_state;
332 # endif
333 break;
335 if (n == (size_t) -1)
337 /* Signal repeated errors only once per line. */
338 if (!(lines + 1 == last_error_line
339 && errno == last_error_errno))
341 char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)];
342 last_error_line = lines + 1;
343 last_error_errno = errno;
344 error (0, errno, "%s:%s", file,
345 umaxtostr (last_error_line, line_number_buf));
347 p++;
348 bytes_read--;
350 else
352 if (n == 0)
354 wide_char = 0;
355 n = 1;
357 p += n;
358 bytes_read -= n;
359 chars++;
360 switch (wide_char)
362 case '\n':
363 lines++;
364 /* Fall through. */
365 case '\r':
366 case '\f':
367 if (linepos > linelength)
368 linelength = linepos;
369 linepos = 0;
370 goto mb_word_separator;
371 case '\t':
372 linepos += 8 - (linepos % 8);
373 goto mb_word_separator;
374 case ' ':
375 linepos++;
376 /* Fall through. */
377 case '\v':
378 mb_word_separator:
379 if (in_word)
381 in_word = 0;
382 words++;
384 break;
385 default:
386 if (iswprint (wide_char))
388 int width = wcwidth (wide_char);
389 if (width > 0)
390 linepos += width;
391 if (iswspace (wide_char))
392 goto mb_word_separator;
393 in_word = 1;
395 break;
399 while (bytes_read > 0);
401 # if SUPPORT_OLD_MBRTOWC
402 if (bytes_read > 0)
404 if (bytes_read == BUFFER_SIZE)
406 /* Encountered a very long redundant shift sequence. */
407 p++;
408 bytes_read--;
410 memmove (buf, p, bytes_read);
412 prev = bytes_read;
413 # endif
415 if (linepos > linelength)
416 linelength = linepos;
417 if (in_word)
418 words++;
420 #endif
421 else
423 int in_word = 0;
424 uintmax_t linepos = 0;
426 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
428 const char *p = buf;
429 if (bytes_read == SAFE_READ_ERROR)
431 error (0, errno, "%s", file);
432 exit_status = 1;
433 break;
436 bytes += bytes_read;
439 switch (*p++)
441 case '\n':
442 lines++;
443 /* Fall through. */
444 case '\r':
445 case '\f':
446 if (linepos > linelength)
447 linelength = linepos;
448 linepos = 0;
449 goto word_separator;
450 case '\t':
451 linepos += 8 - (linepos % 8);
452 goto word_separator;
453 case ' ':
454 linepos++;
455 /* Fall through. */
456 case '\v':
457 word_separator:
458 if (in_word)
460 in_word = 0;
461 words++;
463 break;
464 default:
465 if (ISPRINT ((unsigned char) p[-1]))
467 linepos++;
468 if (ISSPACE ((unsigned char) p[-1]))
469 goto word_separator;
470 in_word = 1;
472 break;
475 while (--bytes_read);
477 if (linepos > linelength)
478 linelength = linepos;
479 if (in_word)
480 words++;
483 if (count_chars < print_chars)
484 chars = bytes;
486 write_counts (lines, words, chars, bytes, linelength, file);
487 total_lines += lines;
488 total_words += words;
489 total_chars += chars;
490 total_bytes += bytes;
491 if (linelength > max_line_length)
492 max_line_length = linelength;
495 static void
496 wc_file (const char *file)
498 if (STREQ (file, "-"))
500 have_read_stdin = 1;
501 wc (0, file);
503 else
505 int fd = open (file, O_RDONLY);
506 if (fd == -1)
508 error (0, errno, "%s", file);
509 exit_status = 1;
510 return;
512 wc (fd, file);
513 if (close (fd))
515 error (0, errno, "%s", file);
516 exit_status = 1;
522 main (int argc, char **argv)
524 int optc;
525 int nfiles;
527 program_name = argv[0];
528 setlocale (LC_ALL, "");
529 bindtextdomain (PACKAGE, LOCALEDIR);
530 textdomain (PACKAGE);
532 atexit (close_stdout);
534 exit_status = 0;
535 posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
536 print_lines = print_words = print_chars = print_bytes = print_linelength = 0;
537 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
539 while ((optc = getopt_long (argc, argv, "clLmw", longopts, NULL)) != -1)
540 switch (optc)
542 case 0:
543 break;
545 case 'c':
546 print_bytes = 1;
547 break;
549 case 'm':
550 print_chars = 1;
551 break;
553 case 'l':
554 print_lines = 1;
555 break;
557 case 'w':
558 print_words = 1;
559 break;
561 case 'L':
562 print_linelength = 1;
563 break;
565 case_GETOPT_HELP_CHAR;
567 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
569 default:
570 usage (EXIT_FAILURE);
573 if (print_lines + print_words + print_chars + print_bytes + print_linelength
574 == 0)
575 print_lines = print_words = print_bytes = 1;
577 nfiles = argc - optind;
579 if (nfiles == 0)
581 have_read_stdin = 1;
582 wc (0, "");
584 else
586 for (; optind < argc; ++optind)
587 wc_file (argv[optind]);
589 if (nfiles > 1)
590 write_counts (total_lines, total_words, total_chars, total_bytes,
591 max_line_length, _("total"));
594 if (have_read_stdin && close (STDIN_FILENO) != 0)
595 error (EXIT_FAILURE, errno, "-");
597 exit (exit_status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);