1 /* $NetBSD: wc.c,v 1.35 2011/09/16 15:39:30 joerg Exp $ */
4 * Copyright (c) 1980, 1987, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include <sys/cdefs.h>
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\
35 The Regents of the University of California. All rights reserved.");
40 static char sccsid
[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95";
42 __RCSID("$NetBSD: wc.c,v 1.35 2011/09/16 15:39:30 joerg Exp $");
46 /* wc line, word, char count and optionally longest line. */
48 #include <sys/param.h>
66 typedef u_long wc_count_t
;
67 # define WCFMT " %7lu"
68 # define WCCAST unsigned long
70 typedef u_quad_t wc_count_t
;
71 # define WCFMT " %7llu"
72 # define WCCAST unsigned long long
75 static wc_count_t tlinect
, twordct
, tcharct
, tlongest
;
76 static bool doline
, doword
, dobyte
, dochar
, dolongest
;
79 static void cnt(const char *);
80 static void print_counts(wc_count_t
, wc_count_t
, wc_count_t
, wc_count_t
,
82 __dead
static void usage(void);
83 static size_t do_mb(wchar_t *, const char *, size_t, mbstate_t *,
84 size_t *, const char *);
87 main(int argc
, char *argv
[])
91 setlocale(LC_ALL
, "");
93 while ((ch
= getopt(argc
, argv
, "lwcmL")) != -1)
119 /* Wc's flags are on by default. */
120 if (!(doline
|| doword
|| dobyte
|| dochar
|| dolongest
))
121 doline
= doword
= dobyte
= true;
126 bool dototal
= (argc
> 1);
133 print_counts(tlinect
, twordct
, tcharct
, tlongest
,
142 do_mb(wchar_t *wc
, const char *p
, size_t len
, mbstate_t *st
,
143 size_t *retcnt
, const char *file
)
149 r
= mbrtowc(wc
, p
, len
, st
);
150 if (r
== (size_t)-1) {
151 warnx("%s: invalid byte sequence", file
);
154 /* XXX skip 1 byte */
157 memset(st
, 0, sizeof(*st
));
159 } else if (r
== (size_t)-2)
176 cnt(const char *file
)
178 u_char buf
[MAXBSIZE
];
179 wchar_t wbuf
[MAXBSIZE
];
181 wc_count_t charct
, linect
, wordct
, longest
;
185 const char *name
; /* filename or <stdin> */
189 linect
= wordct
= charct
= longest
= 0;
191 if ((fd
= open(file
, O_RDONLY
, 0)) < 0) {
202 if (dochar
|| doword
|| dolongest
)
203 (void)memset(&st
, 0, sizeof(st
));
205 if (!(doword
|| dolongest
)) {
207 * line counting is split out because it's a lot
208 * faster to get lines than to get words, since
209 * the word count requires some logic.
211 if (doline
|| dochar
) {
212 while ((len
= read(fd
, buf
, MAXBSIZE
)) > 0) {
216 r
= do_mb(0, (char *)buf
, (size_t)len
,
222 for (C
= buf
; len
--; ++C
) {
231 * if all we need is the number of characters and
232 * it's a directory or a regular or linked file, just
233 * stat the puppy. We avoid testing for it not being
234 * a special device in case someone adds a new type
238 if (fstat(fd
, &sb
)) {
242 if (S_ISREG(sb
.st_mode
) ||
243 S_ISLNK(sb
.st_mode
) ||
244 S_ISDIR(sb
.st_mode
)) {
248 read(fd
, buf
, MAXBSIZE
)) > 0)
254 /* do it the hard way... */
260 while ((len
= read(fd
, buf
, MAXBSIZE
)) > 0) {
263 r
= do_mb(wbuf
, (char *)buf
, (size_t)len
, &st
, &wlen
,
270 for (WC
= wbuf
; wlen
--; ++WC
) {
275 if (linelen
> longest
)
283 * This line implements the POSIX
284 * spec, i.e. a word is a "maximal
285 * string of characters delimited by
286 * whitespace." Notice nothing was
287 * said about a character being
288 * printing or non-printing.
305 if (dochar
&& r
== (size_t)-2) {
306 warnx("%s: incomplete multibyte character", name
);
310 print_counts(linect
, wordct
, charct
, longest
, file
);
313 * don't bother checkint doline, doword, or dobyte --- speeds
319 if (dolongest
&& longest
> tlongest
)
329 print_counts(wc_count_t lines
, wc_count_t words
, wc_count_t chars
,
330 wc_count_t longest
, const char *name
)
334 (void)printf(WCFMT
, (WCCAST
)lines
);
336 (void)printf(WCFMT
, (WCCAST
)words
);
337 if (dobyte
|| dochar
)
338 (void)printf(WCFMT
, (WCCAST
)chars
);
340 (void)printf(WCFMT
, (WCCAST
)longest
);
343 (void)printf(" %s\n", name
);
352 (void)fprintf(stderr
, "usage: wc [-c | -m] [-Llw] [file ...]\n");