Sync with manuals from netbsd-8 branch.
[minix3.git] / usr.bin / wc / wc.c
blob602a4508da810c5ed5ad14f40391a6e4d0f0553f
1 /* $NetBSD: wc.c,v 1.35 2011/09/16 15:39:30 joerg Exp $ */
3 /*
4 * Copyright (c) 1980, 1987, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\
35 The Regents of the University of California. All rights reserved.");
36 #endif /* not lint */
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95";
41 #else
42 __RCSID("$NetBSD: wc.c,v 1.35 2011/09/16 15:39:30 joerg Exp $");
43 #endif
44 #endif /* not lint */
46 /* wc line, word, char count and optionally longest line. */
48 #include <sys/param.h>
49 #include <sys/file.h>
50 #include <sys/stat.h>
52 #include <ctype.h>
53 #include <fcntl.h>
54 #include <err.h>
55 #include <errno.h>
56 #include <locale.h>
57 #include <stdbool.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <unistd.h>
62 #include <wchar.h>
63 #include <wctype.h>
65 #ifdef NO_QUAD
66 typedef u_long wc_count_t;
67 # define WCFMT " %7lu"
68 # define WCCAST unsigned long
69 #else
70 typedef u_quad_t wc_count_t;
71 # define WCFMT " %7llu"
72 # define WCCAST unsigned long long
73 #endif
75 static wc_count_t tlinect, twordct, tcharct, tlongest;
76 static bool doline, doword, dobyte, dochar, dolongest;
77 static int rval = 0;
79 static void cnt(const char *);
80 static void print_counts(wc_count_t, wc_count_t, wc_count_t, wc_count_t,
81 const char *);
82 __dead static void usage(void);
83 static size_t do_mb(wchar_t *, const char *, size_t, mbstate_t *,
84 size_t *, const char *);
86 int
87 main(int argc, char *argv[])
89 int ch;
91 setlocale(LC_ALL, "");
93 while ((ch = getopt(argc, argv, "lwcmL")) != -1)
94 switch (ch) {
95 case 'l':
96 doline = true;
97 break;
98 case 'w':
99 doword = true;
100 break;
101 case 'm':
102 dochar = true;
103 dobyte = 0;
104 break;
105 case 'c':
106 dochar = 0;
107 dobyte = true;
108 break;
109 case 'L':
110 dolongest = true;
111 break;
112 case '?':
113 default:
114 usage();
116 argv += optind;
117 argc -= optind;
119 /* Wc's flags are on by default. */
120 if (!(doline || doword || dobyte || dochar || dolongest))
121 doline = doword = dobyte = true;
123 if (*argv == NULL) {
124 cnt(NULL);
125 } else {
126 bool dototal = (argc > 1);
128 do {
129 cnt(*argv);
130 } while(*++argv);
132 if (dototal) {
133 print_counts(tlinect, twordct, tcharct, tlongest,
134 "total");
138 exit(rval);
141 static size_t
142 do_mb(wchar_t *wc, const char *p, size_t len, mbstate_t *st,
143 size_t *retcnt, const char *file)
145 size_t r;
146 size_t c = 0;
148 do {
149 r = mbrtowc(wc, p, len, st);
150 if (r == (size_t)-1) {
151 warnx("%s: invalid byte sequence", file);
152 rval = 1;
154 /* XXX skip 1 byte */
155 len--;
156 p++;
157 memset(st, 0, sizeof(*st));
158 continue;
159 } else if (r == (size_t)-2)
160 break;
161 else if (r == 0)
162 r = 1;
163 c++;
164 if (wc)
165 wc++;
166 len -= r;
167 p += r;
168 } while (len > 0);
170 *retcnt = c;
172 return (r);
175 static void
176 cnt(const char *file)
178 u_char buf[MAXBSIZE];
179 wchar_t wbuf[MAXBSIZE];
180 struct stat sb;
181 wc_count_t charct, linect, wordct, longest;
182 mbstate_t st;
183 u_char *C;
184 wchar_t *WC;
185 const char *name; /* filename or <stdin> */
186 size_t r = 0;
187 int fd, len = 0;
189 linect = wordct = charct = longest = 0;
190 if (file != NULL) {
191 if ((fd = open(file, O_RDONLY, 0)) < 0) {
192 warn("%s", file);
193 rval = 1;
194 return;
196 name = file;
197 } else {
198 fd = STDIN_FILENO;
199 name = "<stdin>";
202 if (dochar || doword || dolongest)
203 (void)memset(&st, 0, sizeof(st));
205 if (!(doword || dolongest)) {
207 * line counting is split out because it's a lot
208 * faster to get lines than to get words, since
209 * the word count requires some logic.
211 if (doline || dochar) {
212 while ((len = read(fd, buf, MAXBSIZE)) > 0) {
213 if (dochar) {
214 size_t wlen;
216 r = do_mb(0, (char *)buf, (size_t)len,
217 &st, &wlen, name);
218 charct += wlen;
219 } else if (dobyte)
220 charct += len;
221 if (doline) {
222 for (C = buf; len--; ++C) {
223 if (*C == '\n')
224 ++linect;
231 * if all we need is the number of characters and
232 * it's a directory or a regular or linked file, just
233 * stat the puppy. We avoid testing for it not being
234 * a special device in case someone adds a new type
235 * of inode.
237 else if (dobyte) {
238 if (fstat(fd, &sb)) {
239 warn("%s", name);
240 rval = 1;
241 } else {
242 if (S_ISREG(sb.st_mode) ||
243 S_ISLNK(sb.st_mode) ||
244 S_ISDIR(sb.st_mode)) {
245 charct = sb.st_size;
246 } else {
247 while ((len =
248 read(fd, buf, MAXBSIZE)) > 0)
249 charct += len;
253 } else {
254 /* do it the hard way... */
255 wc_count_t linelen;
256 bool gotsp;
258 linelen = 0;
259 gotsp = true;
260 while ((len = read(fd, buf, MAXBSIZE)) > 0) {
261 size_t wlen;
263 r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen,
264 name);
265 if (dochar) {
266 charct += wlen;
267 } else if (dobyte) {
268 charct += len;
270 for (WC = wbuf; wlen--; ++WC) {
271 if (iswspace(*WC)) {
272 gotsp = true;
273 if (*WC == L'\n') {
274 ++linect;
275 if (linelen > longest)
276 longest = linelen;
277 linelen = 0;
278 } else {
279 linelen++;
281 } else {
283 * This line implements the POSIX
284 * spec, i.e. a word is a "maximal
285 * string of characters delimited by
286 * whitespace." Notice nothing was
287 * said about a character being
288 * printing or non-printing.
290 if (gotsp) {
291 gotsp = false;
292 ++wordct;
295 linelen++;
301 if (len == -1) {
302 warn("%s", name);
303 rval = 1;
305 if (dochar && r == (size_t)-2) {
306 warnx("%s: incomplete multibyte character", name);
307 rval = 1;
310 print_counts(linect, wordct, charct, longest, file);
313 * don't bother checkint doline, doword, or dobyte --- speeds
314 * up the common case
316 tlinect += linect;
317 twordct += wordct;
318 tcharct += charct;
319 if (dolongest && longest > tlongest)
320 tlongest = longest;
322 if (close(fd)) {
323 warn("%s", name);
324 rval = 1;
328 static void
329 print_counts(wc_count_t lines, wc_count_t words, wc_count_t chars,
330 wc_count_t longest, const char *name)
333 if (doline)
334 (void)printf(WCFMT, (WCCAST)lines);
335 if (doword)
336 (void)printf(WCFMT, (WCCAST)words);
337 if (dobyte || dochar)
338 (void)printf(WCFMT, (WCCAST)chars);
339 if (dolongest)
340 (void)printf(WCFMT, (WCCAST)longest);
342 if (name != NULL)
343 (void)printf(" %s\n", name);
344 else
345 (void)putchar('\n');
348 static void
349 usage(void)
352 (void)fprintf(stderr, "usage: wc [-c | -m] [-Llw] [file ...]\n");
353 exit(1);