4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
37 #include <sys/param.h>
41 * fmt -- format the concatenation of input files or standard input
42 * onto standard output. Designed for use with Mail ~|
44 * Syntax: fmt [ -width | -w width ] [ -cs ] [ name ... ]
45 * Author: Kurt Shoens (UCB) 12/7/78
48 #define NOSTR ((wchar_t *)0) /* Null string pointer for lint */
49 #define MAXLINES 100 /* maximum mail header lines to verify */
51 wchar_t outbuf
[BUFSIZ
]; /* Sandbagged output line image */
52 wchar_t *outp
; /* Pointer in above */
53 int filler
; /* Filler amount in outbuf */
54 char sobuf
[BUFSIZ
]; /* Global buffer */
56 int pfx
; /* Current leading blank count */
57 int width
= 72; /* Width that we will not exceed */
58 int nojoin
= 0; /* split lines only, don't join short ones */
59 int errs
= 0; /* Current number of errors */
61 enum crown_type
{c_none
, c_reset
, c_head
, c_lead
, c_fixup
, c_body
};
62 enum crown_type crown_state
; /* Crown margin state */
63 int crown_head
; /* The header offset */
64 int crown_body
; /* The body offset */
65 /* currently-known initial strings found in mail headers */
66 wchar_t *headnames
[] = {
67 L
"Apparently-To", L
"Bcc", L
"bcc", L
"Cc", L
"cc", L
"Confirmed-By",
68 L
"Content", L
"content-length", L
"From", L
"Date", L
"id",
69 L
"Message-I", L
"MIME-Version", L
"Precedence", L
"Return-Path",
70 L
"Received", L
"Reply-To", L
"Status", L
"Subject", L
"To", L
"X-IMAP",
71 L
"X-Lines", L
"X-Sender", L
"X-Sun", L
"X-Status", L
"X-UID",
75 off
, /* mail header processing is off */
76 not_in_hdr
, /* not currently processing a mail header */
77 in_hdr
, /* currently filling hdrbuf with potential hdr lines */
78 flush_hdr
, /* flush hdrbuf; not a header, no special processing */
79 do_hdr
/* process hdrbuf as a mail header */
81 /* current state of hdrbuf */
82 enum hdr_type hdr_state
= not_in_hdr
;
84 wchar_t *hdrbuf
[MAXLINES
]; /* buffer to hold potential mail header lines */
85 int h_lines
; /* index into lines of hdrbuf */
87 void (*(split
))(wchar_t []);
88 extern int scrwidth(wchar_t);
89 extern boolean_t
is_headline(const char *);
92 static void fill_hdrbuf(wchar_t []);
93 static void header_chk(void);
94 static void process_hdrbuf(void);
95 static void leadin(void);
96 static void tabulate(wchar_t []);
97 static void oflush(void);
98 static void pack(wchar_t []);
99 static void msplit(wchar_t []);
100 static void csplit(wchar_t []);
101 static void _wckind_init(void);
102 static void prefix(wchar_t []);
103 static void fmt(FILE *);
104 static int setopt(char *);
105 int _wckind(wchar_t);
108 * Drive the whole formatter by managing input files. Also,
109 * cause initialization of the output stuff and flush it out
114 main(int argc
, char **argv
)
122 setbuf(stdout
, sobuf
);
123 setlocale(LC_ALL
, "");
124 locale
= setlocale(LC_CTYPE
, "");
125 if (strcmp(locale
, "C") == 0) {
143 if ((fi
= fopen(cp
, "r")) == NULL
) {
159 * Read up characters from the passed input file, forming lines,
160 * doing ^H processing, expanding tabs, stripping trailing blanks,
161 * and sending each line down for analysis.
167 wchar_t linebuf
[BUFSIZ
], canonb
[BUFSIZ
];
171 char cbuf
[BUFSIZ
]; /* stores wchar_t string as char string */
176 * Collect a line, doing ^H processing.
177 * Leave tabs for now.
181 while (c
!= L
'\n' && c
!= EOF
&& cp
-linebuf
< BUFSIZ
-1) {
188 if (!(iswprint(c
)) && c
!= L
'\t') {
198 * Toss anything remaining on the input line.
201 while (c
!= L
'\n' && c
!= EOF
)
204 * Expand tabs on the way to canonb.
213 if (cp2
-canonb
< BUFSIZ
-1)
218 if (cp2
-canonb
< BUFSIZ
-1)
221 } while ((col
& 07) != 0);
225 * Swipe trailing blanks from the line.
228 for (cp2
--; cp2
>= canonb
&& *cp2
== L
' '; cp2
--) {
232 /* special processing to look for mail header lines */
237 /* look for an initial mail header line */
238 /* skip initial blanks */
239 for (cp
= canonb
; *cp
== L
' '; cp
++) {
242 * Need to convert string from wchar_t to char,
243 * since this is what is_headline() expects. Since we
244 * only want to make sure cp points to a "From" line
245 * of the email, we don't have to alloc
246 * BUFSIZ * MB_LEN_MAX to cbuf.
248 wcstombs(cbuf
, cp
, (BUFSIZ
- 1));
249 if (is_headline(cbuf
) == B_TRUE
) {
253 /* no mail header line; process normally */
258 /* already saw 1st mail header line; look for more */
259 if (canonb
[0] == L
'\0') {
261 * blank line means end of mail header;
262 * verify current mail header buffer
263 * then process it accordingly
267 /* now process the current blank line */
271 * not a blank line--save this line as
272 * a potential mail header line
281 * end of this file--make sure we process the stuff in
282 * hdrbuf before we're finished
284 if (hdr_state
== in_hdr
) {
291 * Take a line devoid of tabs and other garbage and determine its
292 * blank prefix. If the indent changes, call for a linebreak.
293 * If the input line is blank, echo the blank line on the output.
294 * Finally, if the line minus the prefix is a mail header, try to keep
295 * it on a line by itself.
299 prefix(wchar_t line
[])
303 int nosplit
= 0; /* flag set if line should not be split */
305 if (line
[0] == L
'\0') {
308 if (crown_state
!= c_none
)
309 crown_state
= c_reset
;
312 for (cp
= line
; *cp
== L
' '; cp
++) {
317 * The following horrible expression attempts to avoid linebreaks
318 * when the indent changes due to a paragraph.
321 if (crown_state
== c_none
&& np
!= pfx
&& (np
> pfx
|| abs(pfx
-np
) > 8))
324 * if this is a mail header line, don't split it; flush previous
325 * line, if any, so we don't join this line to it
327 if (hdr_state
== do_hdr
) {
331 /* flush previous line so we don't join this one to it */
334 /* nroff-type lines starting with '.' are not split nor joined */
335 if (!nosplit
&& (nosplit
= (*cp
== L
'.')))
338 switch (crown_state
) {
341 crown_state
= c_head
;
345 crown_state
= c_body
;
349 crown_state
= c_body
;
354 wscpy(s
, &outbuf
[crown_head
]);
361 /* put whole input line onto outbuf and print it out */
366 * split puts current line onto outbuf, but splits it
367 * at word boundaries, if it exceeds desired length
372 * flush current line so next lines, if any,
373 * won't join to this one
379 * Split up the passed line into output "words" which are
380 * maximal strings of non-blanks with the blank separation
381 * attached at the end. Pass these words along to the output
386 csplit(wchar_t line
[])
389 wchar_t word
[BUFSIZ
];
390 static const wchar_t *srchlist
= (const wchar_t *) L
".:!?";
397 * Collect a 'word,' allowing it to contain escaped
401 while (*cp
&& !(iswspace(*cp
))) {
402 if (*cp
== '\\' && iswspace(cp
[1]))
408 * Guarantee a space at end of line.
409 * Two spaces after end of sentence punctuation.
414 if (wschr(srchlist
, cp
[-1]) != NULL
)
417 while (iswspace(*cp
))
425 msplit(wchar_t line
[])
427 wchar_t *cp
, *cp2
, prev
;
428 wchar_t word
[BUFSIZ
];
429 static const wchar_t *srchlist
= (const wchar_t *) L
".:!?";
437 * Collect a 'word,' allowing it to contain escaped
444 if (_wckind(*cp
) != _wckind(prev
))
445 if (wcsetno(*cp
) != 0 || wcsetno(prev
) != 0)
447 if (*cp
== '\\' && iswspace(cp
[1]))
454 * Guarantee a space at end of line.
455 * Two spaces after end of sentence punctuation.
460 if (wschr(srchlist
, cp
[-1]) != NULL
)
463 while (iswspace(*cp
))
472 * Build up line images from the words passed in. Prefix
473 * each line with correct number of blanks. The buffer "outbuf"
474 * contains the current partial line image, including prefixed blanks.
475 * "outp" points to the next available space therein. When outp is NOSTR,
476 * there ain't nothing in there yet. At the bottom of this whole mess,
477 * leading tabs are reinserted.
481 * Pack a word onto the output line. If this is the beginning of
482 * the line, push on the appropriately-sized string of blanks first.
483 * If the word won't fit on the current line, flush and begin a new
484 * line. If the word is too long to fit all by itself on a line,
485 * just give it its own and hope for the best.
500 for (cp
= word
; *cp
; *outp
++ = *cp
++) {
508 for (cp
= word
; *cp
; *outp
++ = *cp
++) {
513 * If there is anything on the current output line, send it on
514 * its way. Set outp to NOSTR to indicate the absence of the current
529 * Take the passed line buffer, insert leading tabs where possible, and
530 * output on standard output (finally).
534 tabulate(wchar_t line
[])
540 /* Toss trailing blanks in the output line */
541 cp
= line
+ wslen(line
) - 1;
542 while (cp
>= line
&& *cp
== L
' ')
545 /* Count the leading blank space and tabulate */
546 for (cp
= line
; *cp
== L
' '; cp
++) {
560 putwc(*cp
++, stdout
);
565 * Initialize the output line with the appropriate number of
576 switch (crown_state
) {
579 crown_state
= c_lead
;
585 crown_state
= c_fixup
;
597 for (b
= 0, cp
= outbuf
; b
< l
; b
++)
603 * Is s1 a prefix of s2??
607 ispref(wchar_t *s1
, wchar_t *s2
)
610 while (*s1
!= L
'\0' && *s2
!= L
'\0')
617 * Set an input option
626 if (cp
[1] == 'c' && cp
[2] == '\0') {
627 crown_state
= c_reset
;
630 if (cp
[1] == 's' && cp
[2] == '\0') {
634 if (cp
[1] == 'w' && cp
[2] == '\0') {
644 if (width
<= 0 || width
>= BUFSIZ
-2) {
645 fprintf(stderr
, "fmt: bad width: %d\n", width
);
652 #define LIB_WDRESOLVE "/usr/lib/locale/%s/LC_CTYPE/wdresolve.so"
653 #define WCHKIND "_wdchkind_"
655 static int _wckind_c_locale(wchar_t);
657 static int (*__wckind
)(wchar_t) = _wckind_c_locale
;
658 static void *dlhandle
= NULL
;
665 char path
[MAXPATHLEN
+ 1];
668 if (dlhandle
!= NULL
) {
669 (void) dlclose(dlhandle
);
673 locale
= setlocale(LC_CTYPE
, NULL
);
674 if (strcmp(locale
, "C") == 0)
677 (void) sprintf(path
, LIB_WDRESOLVE
, locale
);
679 if ((dlhandle
= dlopen(path
, RTLD_LAZY
)) != NULL
) {
680 __wckind
= (int (*)(wchar_t))dlsym(dlhandle
, WCHKIND
);
681 if (__wckind
!= NULL
)
683 (void) dlclose(dlhandle
);
688 __wckind
= _wckind_c_locale
;
695 return (*__wckind
) (wc
);
700 _wckind_c_locale(wchar_t wc
)
705 * DEPEND_ON_ANSIC: L notion for the character is new in
706 * ANSI-C, k&r compiler won't work.
709 ret
= (iswalnum(wc
) || wc
== L
'_') ? 0 : 1;
711 ret
= wcsetno(wc
) + 1;
718 * Called when done looking for a set mail header lines.
719 * Either a blank line was seen, or EOF was reached.
721 * Verifies if current hdrbuf of potential mail header lines
722 * is really a mail header. A mail header must be at least 2
723 * lines and more than half of them must start with one of the
724 * known mail header strings in headnames.
726 * header_chk sets hdr_state to do_hdr if hdrbuf contained a valid
727 * mail header. Otherwise, it sets hdr_state to flush_hdr.
729 * h_lines = hdrbuf index for next line to be saved;
730 * also indicates current # of lines in potential header
735 wchar_t *cp
; /* ptr to current char of line */
736 wchar_t **hp
; /* ptr to current char of a valid */
737 /* mail header string */
740 * number of lines in hdrbuf that look
741 * like mail header lines (start with
742 * a known mail header prefix)
745 /* header must have at least 2 lines (h_lines > 1) */
747 hdr_state
= flush_hdr
;
751 * go through each line in hdrbuf and see how many
752 * look like mail header lines
754 for (l
= 0; l
< h_lines
; l
++) {
755 /* skip initial blanks */
756 for (cp
= hdrbuf
[l
]; *cp
== L
' '; cp
++) {
758 for (hp
= &headnames
[0]; *hp
!= (wchar_t *)0; hp
++)
759 if (ispref(*hp
, cp
)) {
765 * if over half match, we'll assume this is a header;
766 * set hdr_state to indicate whether to treat
767 * these lines as mail header (do_hdr) or not (flush_hdr)
769 if (hdrcount
> h_lines
/ 2)
772 hdr_state
= flush_hdr
;
777 * Save given input line into next element of hdrbuf,
778 * as a potential mail header line, to be processed later
779 * once we decide whether or not the contents of hdrbuf is
780 * really a mail header, via header_chk().
782 * Does not allow hdrbuf to exceed MAXLINES lines.
783 * Dynamically allocates space for each line. If we are unable
784 * to allocate space for the current string, stop special mail
785 * header preservation at this point and continue formatting
789 fill_hdrbuf(wchar_t line
[])
791 wchar_t *cp
; /* pointer to characters in input line */
792 int i
; /* index into characters a hdrbuf line */
794 if (h_lines
>= MAXLINES
) {
796 * if we run over MAXLINES potential mail header
797 * lines, stop checking--this is most likely NOT a
798 * mail header; flush out the hdrbuf, then process
799 * the current 'line' normally.
801 hdr_state
= flush_hdr
;
806 hdrbuf
[h_lines
] = (wchar_t *)malloc(sizeof (wchar_t) *
808 if (hdrbuf
[h_lines
] == NULL
) {
810 fprintf(stderr
, "fmt: unable to do mail header preservation\n");
813 * Can't process mail header; flush current contents
814 * of mail header and continue with no more mail
818 /* hdrbuf is empty; process this line normally */
821 hdr_state
= flush_hdr
;
822 for (i
= 0; i
< h_lines
; i
++) {
831 /* save this line as a potential mail header line */
832 for (i
= 0, cp
= line
; (hdrbuf
[h_lines
][i
] = *cp
) != L
'\0'; i
++, cp
++) {
839 * Outputs the lines currently stored in hdrbuf, according
840 * to the current hdr_state value, assumed to be either do_hdr
842 * This should be called after doing a header_chk() to verify
843 * the hdrbuf and set the hdr_state flag.
850 for (i
= 0; i
< h_lines
; i
++) {
854 hdr_state
= not_in_hdr
;