9 /* #include <header_token.h>
14 /* const char *u.value;
19 /* ssize_t header_token(token, token_len, token_buffer, ptr,
20 /* specials, terminator)
21 /* HEADER_TOKEN *token;
23 /* VSTRING *token_buffer;
25 /* const char *specials;
28 /* This module parses a mail header value (text after field-name:)
29 /* into tokens. The parser understands RFC 822 linear white space,
30 /* quoted-string, comment, control characters, and a set of
31 /* user-specified special characters.
33 /* A result token type is one of the following:
34 /* .IP HEADER_TOK_QSTRING
35 /* Quoted string as per RFC 822.
36 /* .IP HEADER_TOK_TOKEN
37 /* Token as per RFC 822, and the special characters supplied by the
40 /* The value of a control character or special character.
42 /* header_token() tokenizes the input and stops after a user-specified
43 /* terminator (ignoring all tokens that exceed the capacity of
44 /* the result storage), or when it runs out of space for the result.
45 /* The terminator is not stored. The result value is the number of
46 /* tokens stored, or -1 when the input was exhausted before any tokens
51 /* Result array of HEADER_TOKEN structures. Token string values
52 /* are pointers to null-terminated substrings in the token_buffer.
54 /* Length of the array of HEADER_TOKEN structures.
56 /* Storage for result token string values.
58 /* Input/output read position. The input is a null-terminated string.
60 /* Special characters according to the relevant RFC, or a
61 /* null pointer (default to the RFC 822 special characters).
62 /* This must include the optional terminator if one is specified.
64 /* The special character to stop after, or zero.
66 /* Eight-bit characters are not given special treatment.
68 /* RFC 822 (ARPA Internet Text Messages)
70 /* Fatal errors: memory allocation problem.
74 /* The Secure Mailer license must be distributed with this software.
77 /* IBM T.J. Watson Research
79 /* Yorktown Heights, NY 10598, USA
88 /* Utility library. */
96 #include <header_token.h>
98 /* Application-specific. */
101 * Silly little macros.
103 #define STR(x) vstring_str(x)
104 #define LEN(x) VSTRING_LEN(x)
105 #define CU_CHAR_PTR(x) ((const unsigned char *) (x))
107 /* header_token - parse out the next item in a message header */
109 ssize_t
header_token(HEADER_TOKEN
*token
, ssize_t token_len
,
110 VSTRING
*token_buffer
, const char **ptr
,
111 const char *user_specials
, int user_terminator
)
113 ssize_t comment_level
;
114 const unsigned char *cp
;
123 VSTRING_RESET(token_buffer
);
124 cp
= CU_CHAR_PTR(*ptr
);
126 if (user_specials
== 0)
127 user_specials
= LEX_822_SPECIALS
;
132 * XXX What was the reason to continue parsing when user_terminator is
133 * specified? Perhaps this was needed at some intermediate stage of
136 while ((ch
= *cp
) != 0 && (user_terminator
!= 0 || tok_count
< token_len
)) {
140 * Skip RFC 822 linear white space.
142 if (IS_SPACE_TAB_CR_LF(ch
))
148 if (ch
== user_terminator
)
152 * Skip RFC 822 comment.
156 while ((ch
= *cp
) != 0) {
158 if (ch
== '(') { /* comments can nest! */
160 } else if (ch
== ')') {
161 if (--comment_level
== 0)
163 } else if (ch
== '\\') {
173 * Copy quoted text according to RFC 822.
176 if (tok_count
< token_len
) {
177 token
[tok_count
].u
.offset
= LEN(token_buffer
);
178 token
[tok_count
].type
= HEADER_TOK_QSTRING
;
180 while ((ch
= *cp
) != 0) {
184 if (ch
== '\n') { /* unfold */
185 if (tok_count
< token_len
) {
186 len
= LEN(token_buffer
);
188 && IS_SPACE_TAB_CR_LF(STR(token_buffer
)[len
- 1]))
190 if (len
< LEN(token_buffer
))
191 vstring_truncate(token_buffer
, len
);
200 if (tok_count
< token_len
)
201 VSTRING_ADDCH(token_buffer
, ch
);
203 if (tok_count
< token_len
) {
204 VSTRING_ADDCH(token_buffer
, 0);
211 * Control, or special.
213 if (strchr(user_specials
, ch
) || ISCNTRL(ch
)) {
214 if (tok_count
< token_len
) {
215 token
[tok_count
].u
.offset
= LEN(token_buffer
);
216 token
[tok_count
].type
= ch
;
217 VSTRING_ADDCH(token_buffer
, ch
);
218 VSTRING_ADDCH(token_buffer
, 0);
228 if (tok_count
< token_len
) {
229 token
[tok_count
].u
.offset
= LEN(token_buffer
);
230 token
[tok_count
].type
= HEADER_TOK_TOKEN
;
231 VSTRING_ADDCH(token_buffer
, ch
);
233 while ((ch
= *cp
) != 0 && !IS_SPACE_TAB_CR_LF(ch
)
234 && !ISCNTRL(ch
) && !strchr(user_specials
, ch
)) {
236 if (tok_count
< token_len
)
237 VSTRING_ADDCH(token_buffer
, ch
);
239 if (tok_count
< token_len
) {
240 VSTRING_ADDCH(token_buffer
, 0);
248 * Ignore a zero-length item after the last terminator.
250 if (tok_count
== 0 && ch
== 0)
254 * Finalize. Fill in the string pointer array, now that the token buffer
255 * is no longer dynamically reallocated as it grows.
257 *ptr
= (const char *) cp
;
258 for (n
= 0; n
< tok_count
; n
++)
259 token
[n
].u
.value
= STR(token_buffer
) + token
[n
].u
.offset
;
262 msg_info("header_token: %s %s %s",
263 tok_count
> 0 ? token
[0].u
.value
: "",
264 tok_count
> 1 ? token
[1].u
.value
: "",
265 tok_count
> 2 ? token
[2].u
.value
: "");