Expand PMF_FN_* macros.
[netbsd-mini2440.git] / external / ibm-public / postfix / dist / src / global / header_token.c
blobf8e50b4318a6c4013d0563cb339d967ab07ba166
1 /* $NetBSD$ */
3 /*++
4 /* NAME
5 /* header_token 3
6 /* SUMMARY
7 /* mail header parser
8 /* SYNOPSIS
9 /* #include <header_token.h>
11 /* typedef struct {
12 /* .in +4
13 /* int type;
14 /* const char *u.value;
15 /* /* ... */
16 /* .in
17 /* } HEADER_TOKEN;
19 /* ssize_t header_token(token, token_len, token_buffer, ptr,
20 /* specials, terminator)
21 /* HEADER_TOKEN *token;
22 /* ssize_t token_len;
23 /* VSTRING *token_buffer;
24 /* const char **ptr;
25 /* const char *specials;
26 /* int terminator;
27 /* DESCRIPTION
28 /* This module parses a mail header value (text after field-name:)
29 /* into tokens. The parser understands RFC 822 linear white space,
30 /* quoted-string, comment, control characters, and a set of
31 /* user-specified special characters.
33 /* A result token type is one of the following:
34 /* .IP HEADER_TOK_QSTRING
35 /* Quoted string as per RFC 822.
36 /* .IP HEADER_TOK_TOKEN
37 /* Token as per RFC 822, and the special characters supplied by the
38 /* caller.
39 /* .IP other
40 /* The value of a control character or special character.
41 /* .PP
42 /* header_token() tokenizes the input and stops after a user-specified
43 /* terminator (ignoring all tokens that exceed the capacity of
44 /* the result storage), or when it runs out of space for the result.
45 /* The terminator is not stored. The result value is the number of
46 /* tokens stored, or -1 when the input was exhausted before any tokens
47 /* were found.
49 /* Arguments:
50 /* .IP token
51 /* Result array of HEADER_TOKEN structures. Token string values
52 /* are pointers to null-terminated substrings in the token_buffer.
53 /* .IP token_len
54 /* Length of the array of HEADER_TOKEN structures.
55 /* .IP token_buffer
56 /* Storage for result token string values.
57 /* .IP ptr
58 /* Input/output read position. The input is a null-terminated string.
59 /* .IP specials
60 /* Special characters according to the relevant RFC, or a
61 /* null pointer (default to the RFC 822 special characters).
62 /* This must include the optional terminator if one is specified.
63 /* .IP terminator
64 /* The special character to stop after, or zero.
65 /* BUGS
66 /* Eight-bit characters are not given special treatment.
67 /* SEE ALSO
68 /* RFC 822 (ARPA Internet Text Messages)
69 /* DIAGNOSTICS
70 /* Fatal errors: memory allocation problem.
71 /* LICENSE
72 /* .ad
73 /* .fi
74 /* The Secure Mailer license must be distributed with this software.
75 /* AUTHOR(S)
76 /* Wietse Venema
77 /* IBM T.J. Watson Research
78 /* P.O. Box 704
79 /* Yorktown Heights, NY 10598, USA
80 /*--*/
82 /* System library. */
84 #include <sys_defs.h>
85 #include <string.h>
86 #include <ctype.h>
88 /* Utility library. */
90 #include <msg.h>
91 #include <vstring.h>
93 /* Global library. */
95 #include <lex_822.h>
96 #include <header_token.h>
98 /* Application-specific. */
101 * Silly little macros.
103 #define STR(x) vstring_str(x)
104 #define LEN(x) VSTRING_LEN(x)
105 #define CU_CHAR_PTR(x) ((const unsigned char *) (x))
107 /* header_token - parse out the next item in a message header */
109 ssize_t header_token(HEADER_TOKEN *token, ssize_t token_len,
110 VSTRING *token_buffer, const char **ptr,
111 const char *user_specials, int user_terminator)
113 ssize_t comment_level;
114 const unsigned char *cp;
115 ssize_t len;
116 int ch;
117 ssize_t tok_count;
118 ssize_t n;
121 * Initialize.
123 VSTRING_RESET(token_buffer);
124 cp = CU_CHAR_PTR(*ptr);
125 tok_count = 0;
126 if (user_specials == 0)
127 user_specials = LEX_822_SPECIALS;
130 * Main parsing loop.
132 * XXX What was the reason to continue parsing when user_terminator is
133 * specified? Perhaps this was needed at some intermediate stage of
134 * development?
136 while ((ch = *cp) != 0 && (user_terminator != 0 || tok_count < token_len)) {
137 cp++;
140 * Skip RFC 822 linear white space.
142 if (IS_SPACE_TAB_CR_LF(ch))
143 continue;
146 * Terminator.
148 if (ch == user_terminator)
149 break;
152 * Skip RFC 822 comment.
154 if (ch == '(') {
155 comment_level = 1;
156 while ((ch = *cp) != 0) {
157 cp++;
158 if (ch == '(') { /* comments can nest! */
159 comment_level++;
160 } else if (ch == ')') {
161 if (--comment_level == 0)
162 break;
163 } else if (ch == '\\') {
164 if ((ch = *cp) == 0)
165 break;
166 cp++;
169 continue;
173 * Copy quoted text according to RFC 822.
175 if (ch == '"') {
176 if (tok_count < token_len) {
177 token[tok_count].u.offset = LEN(token_buffer);
178 token[tok_count].type = HEADER_TOK_QSTRING;
180 while ((ch = *cp) != 0) {
181 cp++;
182 if (ch == '"')
183 break;
184 if (ch == '\n') { /* unfold */
185 if (tok_count < token_len) {
186 len = LEN(token_buffer);
187 while (len > 0
188 && IS_SPACE_TAB_CR_LF(STR(token_buffer)[len - 1]))
189 len--;
190 if (len < LEN(token_buffer))
191 vstring_truncate(token_buffer, len);
193 continue;
195 if (ch == '\\') {
196 if ((ch = *cp) == 0)
197 break;
198 cp++;
200 if (tok_count < token_len)
201 VSTRING_ADDCH(token_buffer, ch);
203 if (tok_count < token_len) {
204 VSTRING_ADDCH(token_buffer, 0);
205 tok_count++;
207 continue;
211 * Control, or special.
213 if (strchr(user_specials, ch) || ISCNTRL(ch)) {
214 if (tok_count < token_len) {
215 token[tok_count].u.offset = LEN(token_buffer);
216 token[tok_count].type = ch;
217 VSTRING_ADDCH(token_buffer, ch);
218 VSTRING_ADDCH(token_buffer, 0);
219 tok_count++;
221 continue;
225 * Token.
227 else {
228 if (tok_count < token_len) {
229 token[tok_count].u.offset = LEN(token_buffer);
230 token[tok_count].type = HEADER_TOK_TOKEN;
231 VSTRING_ADDCH(token_buffer, ch);
233 while ((ch = *cp) != 0 && !IS_SPACE_TAB_CR_LF(ch)
234 && !ISCNTRL(ch) && !strchr(user_specials, ch)) {
235 cp++;
236 if (tok_count < token_len)
237 VSTRING_ADDCH(token_buffer, ch);
239 if (tok_count < token_len) {
240 VSTRING_ADDCH(token_buffer, 0);
241 tok_count++;
243 continue;
248 * Ignore a zero-length item after the last terminator.
250 if (tok_count == 0 && ch == 0)
251 return (-1);
254 * Finalize. Fill in the string pointer array, now that the token buffer
255 * is no longer dynamically reallocated as it grows.
257 *ptr = (const char *) cp;
258 for (n = 0; n < tok_count; n++)
259 token[n].u.value = STR(token_buffer) + token[n].u.offset;
261 if (msg_verbose)
262 msg_info("header_token: %s %s %s",
263 tok_count > 0 ? token[0].u.value : "",
264 tok_count > 1 ? token[1].u.value : "",
265 tok_count > 2 ? token[2].u.value : "");
267 return (tok_count);