*** empty log message ***
[coreutils.git] / src / wc.c
blob062a7dd057074a7020f433a18e932bdb470132c4
1 /* wc - print the number of bytes, words, and lines in files
2 Copyright (C) 85, 91, 1995-2002 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Paul Rubin, phr@ocf.berkeley.edu
19 and David MacKenzie, djm@gnu.ai.mit.edu. */
21 #include <config.h>
22 #if HAVE_INTTYPES_H
23 # include <inttypes.h>
24 #endif
26 #include <stdio.h>
27 #include <getopt.h>
28 #include <sys/types.h>
30 /* Get mbstate_t, mbrtowc(), wcwidth(). */
31 #if HAVE_WCHAR_H
32 # include <wchar.h>
33 #endif
35 /* Get iswprint(), iswspace(). */
36 #if HAVE_WCTYPE_H
37 # include <wctype.h>
38 #endif
39 #if !defined iswprint && !HAVE_ISWPRINT
40 # define iswprint(wc) 1
41 #endif
42 #if !defined iswspace && !HAVE_ISWSPACE
43 # define iswspace(wc) \
44 ((wc) == (unsigned char) (wc) && ISSPACE ((unsigned char) (wc)))
45 #endif
47 /* Include this after wctype.h so that we `#undef' ISPRINT
48 (from Solaris's euc.h, from widec.h, from wctype.h) before
49 redefining and using it. */
50 #include "system.h"
52 #include "closeout.h"
53 #include "error.h"
54 #include "human.h"
55 #include "safe-read.h"
57 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
58 #if HAVE_MBRTOWC && defined mbstate_t
59 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
60 #endif
62 #ifndef HAVE_DECL_WCWIDTH
63 "this configure-time declaration test was not run"
64 #endif
65 #if !HAVE_DECL_WCWIDTH
66 extern int wcwidth ();
67 #endif
69 /* If wcwidth() doesn't exist, assume all printable characters have
70 width 1. */
71 #if !defined wcwidth && !HAVE_WCWIDTH
72 # define wcwidth(wc) ((wc) == 0 ? 0 : iswprint (wc) ? 1 : -1)
73 #endif
75 /* The official name of this program (e.g., no `g' prefix). */
76 #define PROGRAM_NAME "wc"
78 #define AUTHORS N_ ("Paul Rubin and David MacKenzie")
80 /* Size of atomic reads. */
81 #define BUFFER_SIZE (16 * 1024)
83 /* The name this program was run with. */
84 char *program_name;
86 /* Cumulative number of lines, words, chars and bytes in all files so far.
87 max_line_length is the maximum over all files processed so far. */
88 static uintmax_t total_lines;
89 static uintmax_t total_words;
90 static uintmax_t total_chars;
91 static uintmax_t total_bytes;
92 static uintmax_t max_line_length;
94 /* Which counts to print. */
95 static int print_lines, print_words, print_chars, print_bytes;
96 static int print_linelength;
98 /* Nonzero if we have ever read the standard input. */
99 static int have_read_stdin;
101 /* The error code to return to the system. */
102 static int exit_status;
104 /* If nonzero, do not line up columns but instead separate numbers by
105 a single space as specified in Single Unix Specification and POSIX. */
106 static int posixly_correct;
108 static struct option const longopts[] =
110 {"bytes", no_argument, NULL, 'c'},
111 {"chars", no_argument, NULL, 'm'},
112 {"lines", no_argument, NULL, 'l'},
113 {"words", no_argument, NULL, 'w'},
114 {"max-line-length", no_argument, NULL, 'L'},
115 {GETOPT_HELP_OPTION_DECL},
116 {GETOPT_VERSION_OPTION_DECL},
117 {NULL, 0, NULL, 0}
120 void
121 usage (int status)
123 if (status != 0)
124 fprintf (stderr, _("Try `%s --help' for more information.\n"),
125 program_name);
126 else
128 printf (_("\
129 Usage: %s [OPTION]... [FILE]...\n\
131 program_name);
132 fputs (_("\
133 Print byte, word, and newline counts for each FILE, and a total line if\n\
134 more than one FILE is specified. With no FILE, or when FILE is -,\n\
135 read standard input.\n\
136 -c, --bytes print the byte counts\n\
137 -m, --chars print the character counts\n\
138 -l, --lines print the newline counts\n\
139 "), stdout);
140 fputs (_("\
141 -L, --max-line-length print the length of the longest line\n\
142 -w, --words print the word counts\n\
143 "), stdout);
144 fputs (HELP_OPTION_DESCRIPTION, stdout);
145 fputs (VERSION_OPTION_DESCRIPTION, stdout);
146 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
148 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
151 static void
152 write_counts (uintmax_t lines,
153 uintmax_t words,
154 uintmax_t chars,
155 uintmax_t bytes,
156 uintmax_t linelength,
157 const char *file)
159 char buf[LONGEST_HUMAN_READABLE + 1];
160 char const *space = "";
161 char const *format_int = (posixly_correct ? "%s" : "%7s");
162 char const *format_sp_int = (posixly_correct ? "%s%s" : "%s%7s");
164 if (print_lines)
166 printf (format_int, human_readable (lines, buf, 1, 1));
167 space = " ";
169 if (print_words)
171 printf (format_sp_int, space, human_readable (words, buf, 1, 1));
172 space = " ";
174 if (print_chars)
176 printf (format_sp_int, space, human_readable (chars, buf, 1, 1));
177 space = " ";
179 if (print_bytes)
181 printf (format_sp_int, space, human_readable (bytes, buf, 1, 1));
182 space = " ";
184 if (print_linelength)
186 printf (format_sp_int, space, human_readable (linelength, buf, 1, 1));
188 if (*file)
189 printf (" %s", file);
190 putchar ('\n');
193 static void
194 wc (int fd, const char *file)
196 char buf[BUFFER_SIZE + 1];
197 ssize_t bytes_read;
198 uintmax_t lines, words, chars, bytes, linelength;
199 int count_bytes, count_chars, count_complicated;
201 lines = words = chars = bytes = linelength = 0;
203 /* If in the current locale, chars are equivalent to bytes, we prefer
204 counting bytes, because that's easier. */
205 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
206 if (MB_CUR_MAX > 1)
208 count_bytes = print_bytes;
209 count_chars = print_chars;
211 else
212 #endif
214 count_bytes = print_bytes + print_chars;
215 count_chars = 0;
217 count_complicated = print_words + print_linelength;
219 /* We need binary input, since `wc' relies on `lseek' and byte counts. */
220 SET_BINARY (fd);
222 /* When counting only bytes, save some line- and word-counting
223 overhead. If FD is a `regular' Unix file, using lseek is enough
224 to get its `size' in bytes. Otherwise, read blocks of BUFFER_SIZE
225 bytes at a time until EOF. Note that the `size' (number of bytes)
226 that wc reports is smaller than stats.st_size when the file is not
227 positioned at its beginning. That's why the lseek calls below are
228 necessary. For example the command
229 `(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
230 should make wc report `0' bytes. */
232 if (count_bytes && !count_chars && !print_lines && !count_complicated)
234 off_t current_pos, end_pos;
235 struct stat stats;
237 if (fstat (fd, &stats) == 0 && S_ISREG (stats.st_mode)
238 && (current_pos = lseek (fd, (off_t) 0, SEEK_CUR)) != -1
239 && (end_pos = lseek (fd, (off_t) 0, SEEK_END)) != -1)
241 off_t diff;
242 /* Be careful here. The current position may actually be
243 beyond the end of the file. As in the example above. */
244 bytes = (diff = end_pos - current_pos) < 0 ? 0 : diff;
246 else
248 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
250 bytes += bytes_read;
252 if (bytes_read < 0)
254 error (0, errno, "%s", file);
255 exit_status = 1;
259 else if (!count_chars && !count_complicated)
261 /* Use a separate loop when counting only lines or lines and bytes --
262 but not chars or words. */
263 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
265 register char *p = buf;
267 while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
269 ++p;
270 ++lines;
272 bytes += bytes_read;
274 if (bytes_read < 0)
276 error (0, errno, "%s", file);
277 exit_status = 1;
280 #if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
281 # define SUPPORT_OLD_MBRTOWC 1
282 else if (MB_CUR_MAX > 1)
284 int in_word = 0;
285 uintmax_t linepos = 0;
286 mbstate_t state;
287 uintmax_t last_error_line = 0;
288 int last_error_errno = 0;
289 # if SUPPORT_OLD_MBRTOWC
290 /* Back-up the state before each multibyte character conversion and
291 move the last incomplete character of the buffer to the front
292 of the buffer. This is needed because we don't know whether
293 the `mbrtowc' function updates the state when it returns -2, -
294 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
295 ANSI C, glibc-2.1 and Solaris 2.7 behaviour. We don't have an
296 autoconf test for this, yet. */
297 int prev = 0; /* number of bytes carried over from previous round */
298 # else
299 const int prev = 0;
300 # endif
302 memset (&state, 0, sizeof (mbstate_t));
303 while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
305 const char *p;
306 # if SUPPORT_OLD_MBRTOWC
307 mbstate_t backup_state;
308 # endif
310 bytes += bytes_read;
311 p = buf;
312 bytes_read += prev;
315 wchar_t wide_char;
316 size_t n;
318 # if SUPPORT_OLD_MBRTOWC
319 backup_state = state;
320 # endif
321 n = mbrtowc (&wide_char, p, bytes_read, &state);
322 if (n == (size_t) -2)
324 # if SUPPORT_OLD_MBRTOWC
325 state = backup_state;
326 # endif
327 break;
329 if (n == (size_t) -1)
331 /* Signal repeated errors only once per line. */
332 if (!(lines + 1 == last_error_line
333 && errno == last_error_errno))
335 char hr_buf[LONGEST_HUMAN_READABLE + 1];
336 last_error_line = lines + 1;
337 last_error_errno = errno;
338 error (0, errno, "%s:%s", file,
339 human_readable (lines + 1, hr_buf, 1, 1));
341 p++;
342 bytes_read--;
344 else
346 if (n == 0)
348 wide_char = 0;
349 n = 1;
351 p += n;
352 bytes_read -= n;
353 chars++;
354 switch (wide_char)
356 case '\n':
357 lines++;
358 /* Fall through. */
359 case '\r':
360 case '\f':
361 if (linepos > linelength)
362 linelength = linepos;
363 linepos = 0;
364 goto mb_word_separator;
365 case '\t':
366 linepos += 8 - (linepos % 8);
367 goto mb_word_separator;
368 case ' ':
369 linepos++;
370 /* Fall through. */
371 case '\v':
372 mb_word_separator:
373 if (in_word)
375 in_word = 0;
376 words++;
378 break;
379 default:
380 if (iswprint (wide_char))
382 int width = wcwidth (wide_char);
383 if (width > 0)
384 linepos += width;
385 if (iswspace (wide_char))
386 goto mb_word_separator;
387 in_word = 1;
389 break;
393 while (bytes_read > 0);
395 # if SUPPORT_OLD_MBRTOWC
396 if (bytes_read > 0)
398 if (bytes_read == BUFFER_SIZE)
400 /* Encountered a very long redundant shift sequence. */
401 p++;
402 bytes_read--;
404 memmove (buf, p, bytes_read);
406 prev = bytes_read;
407 # endif
409 if (bytes_read < 0)
411 error (0, errno, "%s", file);
412 exit_status = 1;
414 if (linepos > linelength)
415 linelength = linepos;
416 if (in_word)
417 words++;
419 #endif
420 else
422 int in_word = 0;
423 uintmax_t linepos = 0;
425 while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
427 const char *p = buf;
429 bytes += bytes_read;
432 switch (*p++)
434 case '\n':
435 lines++;
436 /* Fall through. */
437 case '\r':
438 case '\f':
439 if (linepos > linelength)
440 linelength = linepos;
441 linepos = 0;
442 goto word_separator;
443 case '\t':
444 linepos += 8 - (linepos % 8);
445 goto word_separator;
446 case ' ':
447 linepos++;
448 /* Fall through. */
449 case '\v':
450 word_separator:
451 if (in_word)
453 in_word = 0;
454 words++;
456 break;
457 default:
458 if (ISPRINT ((unsigned char) p[-1]))
460 linepos++;
461 if (ISSPACE ((unsigned char) p[-1]))
462 goto word_separator;
463 in_word = 1;
465 break;
468 while (--bytes_read);
470 if (bytes_read < 0)
472 error (0, errno, "%s", file);
473 exit_status = 1;
475 if (linepos > linelength)
476 linelength = linepos;
477 if (in_word)
478 words++;
481 if (count_chars < print_chars)
482 chars = bytes;
484 write_counts (lines, words, chars, bytes, linelength, file);
485 total_lines += lines;
486 total_words += words;
487 total_chars += chars;
488 total_bytes += bytes;
489 if (linelength > max_line_length)
490 max_line_length = linelength;
493 static void
494 wc_file (const char *file)
496 if (STREQ (file, "-"))
498 have_read_stdin = 1;
499 wc (0, file);
501 else
503 int fd = open (file, O_RDONLY);
504 if (fd == -1)
506 error (0, errno, "%s", file);
507 exit_status = 1;
508 return;
510 wc (fd, file);
511 if (close (fd))
513 error (0, errno, "%s", file);
514 exit_status = 1;
520 main (int argc, char **argv)
522 int optc;
523 int nfiles;
525 program_name = argv[0];
526 setlocale (LC_ALL, "");
527 bindtextdomain (PACKAGE, LOCALEDIR);
528 textdomain (PACKAGE);
530 atexit (close_stdout);
532 exit_status = 0;
533 posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
534 print_lines = print_words = print_chars = print_bytes = print_linelength = 0;
535 total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
537 while ((optc = getopt_long (argc, argv, "clLmw", longopts, NULL)) != -1)
538 switch (optc)
540 case 0:
541 break;
543 case 'c':
544 print_bytes = 1;
545 break;
547 case 'm':
548 print_chars = 1;
549 break;
551 case 'l':
552 print_lines = 1;
553 break;
555 case 'w':
556 print_words = 1;
557 break;
559 case 'L':
560 print_linelength = 1;
561 break;
563 case_GETOPT_HELP_CHAR;
565 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
567 default:
568 usage (1);
571 if (print_lines + print_words + print_chars + print_bytes + print_linelength
572 == 0)
573 print_lines = print_words = print_bytes = 1;
575 nfiles = argc - optind;
577 if (nfiles == 0)
579 have_read_stdin = 1;
580 wc (0, "");
582 else
584 for (; optind < argc; ++optind)
585 wc_file (argv[optind]);
587 if (nfiles > 1)
588 write_counts (total_lines, total_words, total_chars, total_bytes,
589 max_line_length, _("total"));
592 if (have_read_stdin && close (0))
593 error (EXIT_FAILURE, errno, "-");
595 exit (exit_status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);