7 /* MIME parser state machine
9 /* #include <mime_state.h>
11 /* MIME_STATE *mime_state_alloc(flags, head_out, head_end,
12 /* body_out, body_end,
13 /* err_print, context)
15 /* void (*head_out)(void *ptr, int header_class,
16 /* const HEADER_OPTS *header_info,
17 /* VSTRING *buf, off_t offset);
18 /* void (*head_end)(void *ptr);
19 /* void (*body_out)(void *ptr, int rec_type,
20 /* const char *buf, ssize_t len,
22 /* void (*body_end)(void *ptr);
23 /* void (*err_print)(void *ptr, int err_flag, const char *text)
26 /* int mime_state_update(state, rec_type, buf, len)
32 /* MIME_STATE *mime_state_free(state)
35 /* const char *mime_state_error(error_code)
40 /* const int code; /* internal error code */
41 /* const char *dsn; /* RFC 3463 */
42 /* const char *text; /* descriptive text */
44 /* } MIME_STATE_DETAIL;
46 /* const MIME_STATE_DETAIL *mime_state_detail(error_code)
49 /* This module implements a one-pass MIME processor with optional
50 /* 8-bit to quoted-printable conversion.
52 /* In order to fend off denial of service attacks, message headers
53 /* are truncated at or above var_header_limit bytes, message boundary
54 /* strings are truncated at var_boundary_len bytes, and the multipart
55 /* nesting level is limited to var_mime_maxdepth levels.
57 /* mime_state_alloc() creates a MIME state machine. The machine
58 /* is delivered in its initial state, expecting content type
59 /* text/plain, 7-bit data.
61 /* mime_state_update() updates the MIME state machine according
62 /* to the input record type and the record content.
63 /* The result value is the bit-wise OR of zero or more of the following:
64 /* .IP MIME_ERR_TRUNC_HEADER
65 /* A message header was longer than var_header_limit bytes.
66 /* .IP MIME_ERR_NESTING
67 /* The MIME structure was nested more than var_mime_maxdepth levels.
68 /* .IP MIME_ERR_8BIT_IN_HEADER
69 /* A message header contains 8-bit data. This is always illegal.
70 /* .IP MIME_ERR_8BIT_IN_7BIT_BODY
71 /* A MIME header specifies (or defaults to) 7-bit content, but the
72 /* corresponding message body or body parts contain 8-bit content.
73 /* .IP MIME_ERR_ENCODING_DOMAIN
74 /* An entity of type "message" or "multipart" specifies the wrong
75 /* content transfer encoding domain, or specifies a transformation
76 /* (quoted-printable, base64) instead of a domain (7bit, 8bit,
79 /* mime_state_free() releases storage for a MIME state machine,
80 /* and conveniently returns a null pointer.
82 /* mime_state_error() returns a string representation for the
83 /* specified error code. When multiple errors are specified it
84 /* reports what it deems the most serious one.
86 /* mime_state_detail() returns a table entry with error
87 /* information for the specified error code. When multiple
88 /* errors are specified it reports what it deems the most
93 /* The output routine for body lines. It receives unmodified input
94 /* records, or the result of 8-bit -> 7-bit conversion.
96 /* A null pointer, or a pointer to a routine that is called after
97 /* the last input record is processed.
99 /* Buffer with the content of a logical or physical message record.
101 /* Caller context that is passed on to the head_out and body_out
104 /* The content encoding: MIME_ENC_7BIT or MIME_ENC_8BIT.
106 /* Null pointer, or pointer to a function that is called with
107 /* arguments: the application context, the error type, and the
108 /* offending input. Only one instance per error type is reported.
110 /* Special processing options. Specify the bit-wise OR of zero or
111 /* more of the following:
113 /* .IP MIME_OPT_DISABLE_MIME
114 /* Pay no attention to Content-* message headers, and switch to
115 /* message body state at the end of the primary message headers.
116 /* .IP MIME_OPT_REPORT_TRUNC_HEADER
117 /* Report errors that set the MIME_ERR_TRUNC_HEADER error flag
119 /* .IP MIME_OPT_REPORT_8BIT_IN_HEADER
120 /* Report errors that set the MIME_ERR_8BIT_IN_HEADER error
121 /* flag (see above). This rarely stops legitimate mail.
122 /* .IP MIME_OPT_REPORT_8BIT_IN_7BIT_BODY
123 /* Report errors that set the MIME_ERR_8BIT_IN_7BIT_BODY error
124 /* flag (see above). This currently breaks Majordomo mail that is
125 /* forwarded for approval, because Majordomo does not propagate
126 /* MIME type information from the enclosed message to the message
127 /* headers of the request for approval.
128 /* .IP MIME_OPT_REPORT_ENCODING_DOMAIN
129 /* Report errors that set the MIME_ERR_ENCODING_DOMAIN error
131 /* .IP MIME_OPT_REPORT_NESTING
132 /* Report errors that set the MIME_ERR_NESTING error flag
134 /* .IP MIME_OPT_RECURSE_ALL_MESSAGE
135 /* Recurse into message/anything types other than message/rfc822.
136 /* This feature can detect "bad" information in headers of
137 /* message/partial and message/external-body types. It must
138 /* not be used with 8-bit -> 7-bit MIME transformations.
139 /* .IP MIME_OPT_DOWNGRADE
140 /* Transform content that claims to be 8-bit into quoted-printable.
141 /* Where appropriate, update Content-Transfer-Encoding: message
145 /* For convenience, MIME_OPT_NONE requests no special processing.
147 /* Specifies where a message header is located.
149 /* .IP MIME_HDR_PRIMARY
150 /* In the primary message header section.
151 /* .IP MIME_HDR_MULTIPART
152 /* In the header section after a multipart boundary string.
153 /* .IP MIME_HDR_NESTED
154 /* At the start of a nested (e.g., message/rfc822) message.
157 /* For convenience, the macros MIME_HDR_FIRST and MIME_HDR_LAST
158 /* specify the range of MIME_HDR_MUMBLE macros.
160 /* To find out if something is a MIME header at the beginning
161 /* of an RFC 822 message or an attached message, look at the
162 /* header_info argument.
164 /* Null pointer or information about the message header, see
167 /* The output routine that is invoked for outputting a message header.
168 /* A multi-line header is passed as one chunk of text with embedded
170 /* It is the responsibility of the output routine to break the text
171 /* at embedded newlines, and to break up long text between newlines
172 /* into multiple output records.
173 /* Note: an output routine is explicitly allowed to modify the text.
175 /* A null pointer, or a pointer to a routine that is called after
176 /* the last message header in the first header block is processed.
178 /* Length of non-VSTRING input buffer.
180 /* The offset in bytes from the start of the current block of message
181 /* headers or body lines. Line boundaries are counted as one byte.
183 /* The input record type as defined in rec_type(3h). State is
184 /* updated for text records (REC_TYPE_NORM or REC_TYPE_CONT).
185 /* Some input records are stored internally in order to reconstruct
186 /* multi-line input. Upon receipt of any non-text record type, all
187 /* stored input is flushed and the state is set to "body".
189 /* MIME parser state created with mime_state_alloc().
191 /* NOTE: when the end of headers is reached, mime_state_update()
192 /* may execute up to three call-backs before returning to the
193 /* caller: head_out(), head_end(), and body_out() or body_end().
194 /* As long as call-backs return no result, it is up to the
195 /* call-back routines to check if a previous call-back experienced
198 /* Different mail user agents treat malformed message boundary
199 /* strings in different ways. The Postfix MIME processor cannot
200 /* be bug-compatible with everything.
202 /* This module will not glue together multipart boundary strings that
203 /* span multiple input records.
205 /* This module will not glue together RFC 2231 formatted (boundary)
206 /* parameter values. RFC 2231 claims compatibility with existing
207 /* MIME processors. Splitting boundary strings is not backwards
210 /* The "8-bit data inside 7-bit body" test is myopic. It is not aware
211 /* of any enclosing (message or multipart) encoding information.
213 /* If the input ends in data other than a hard line break, this module
214 /* will add a hard line break of its own. No line break is added to
217 /* This code recognizes the obsolete form "headername :" but will
218 /* normalize it to the canonical form "headername:". Leaving the
219 /* obsolete form alone would cause too much trouble with existing code
220 /* that expects only the normalized form.
222 /* msg(3) diagnostics interface
223 /* header_opts(3) header information lookup
224 /* RFC 822 (ARPA Internet Text Messages)
225 /* RFC 2045 (MIME: Format of internet message bodies)
226 /* RFC 2046 (MIME: Media types)
228 /* Fatal errors: memory allocation problem.
232 /* The Secure Mailer license must be distributed with this software.
236 /* This code was implemented from scratch after reading the RFC
237 /* documents. This was a relatively straightforward effort with
238 /* few if any surprises. Victor Duchovni of Morgan Stanley shared
239 /* his experiences with ambiguities in real-life MIME implementations.
240 /* Liviu Daia of the Romanian Academy shared his insights in some
241 /* of the darker corners.
244 /* IBM T.J. Watson Research
246 /* Yorktown Heights, NY 10598, USA
249 /* System library. */
251 #include <sys_defs.h>
256 #ifdef STRCASECMP_IN_STRINGS_H
260 /* Utility library. */
262 #include <mymalloc.h>
266 /* Global library. */
268 #include <rec_type.h>
269 #include <is_header.h>
270 #include <header_opts.h>
271 #include <mail_params.h>
272 #include <header_token.h>
274 #include <mime_state.h>
276 /* Application-specific. */
279 * Mime parser stack element for multipart content.
281 typedef struct MIME_STACK
{
282 int def_ctype
; /* default content type */
283 int def_stype
; /* default content subtype */
284 char *boundary
; /* boundary string */
285 ssize_t bound_len
; /* boundary length */
286 struct MIME_STACK
*next
; /* linkage */
292 #define MIME_MAX_TOKEN 3 /* tokens per attribute */
299 int curr_state
; /* header/body state */
300 int curr_ctype
; /* last or default content type */
301 int curr_stype
; /* last or default content subtype */
302 int curr_encoding
; /* last or default content encoding */
303 int curr_domain
; /* last or default encoding unit */
304 VSTRING
*output_buffer
; /* headers, quoted-printable body */
305 int prev_rec_type
; /* previous input record type */
306 int nesting_level
; /* safety */
307 MIME_STACK
*stack
; /* for composite types */
308 HEADER_TOKEN token
[MIME_MAX_TOKEN
]; /* header token array */
309 VSTRING
*token_buffer
; /* header parser scratch buffer */
310 int err_flags
; /* processing errors */
311 off_t head_offset
; /* offset in header block */
312 off_t body_offset
; /* offset in body block */
317 int static_flags
; /* static processing options */
318 MIME_STATE_HEAD_OUT head_out
; /* header output routine */
319 MIME_STATE_ANY_END head_end
; /* end of primary header routine */
320 MIME_STATE_BODY_OUT body_out
; /* body output routine */
321 MIME_STATE_ANY_END body_end
; /* end of body output routine */
322 MIME_STATE_ERR_PRINT err_print
; /* error report */
323 void *app_context
; /* application context */
327 * Content types and subtypes that we care about, either because we have to,
328 * or because we want to filter out broken MIME messages.
330 #define MIME_CTYPE_OTHER 0
331 #define MIME_CTYPE_TEXT 1
332 #define MIME_CTYPE_MESSAGE 2
333 #define MIME_CTYPE_MULTIPART 3
335 #define MIME_STYPE_OTHER 0
336 #define MIME_STYPE_PLAIN 1
337 #define MIME_STYPE_RFC822 2
338 #define MIME_STYPE_PARTIAL 3
339 #define MIME_STYPE_EXTERN_BODY 4
342 * MIME parser states. We steal from the public interface.
344 #define MIME_STATE_PRIMARY MIME_HDR_PRIMARY /* primary headers */
345 #define MIME_STATE_MULTIPART MIME_HDR_MULTIPART /* after --boundary */
346 #define MIME_STATE_NESTED MIME_HDR_NESTED /* message/rfc822 */
347 #define MIME_STATE_BODY (MIME_HDR_NESTED + 1)
349 #define SET_MIME_STATE(ptr, state, ctype, stype, encoding, domain) do { \
350 (ptr)->curr_state = (state); \
351 (ptr)->curr_ctype = (ctype); \
352 (ptr)->curr_stype = (stype); \
353 (ptr)->curr_encoding = (encoding); \
354 (ptr)->curr_domain = (domain); \
355 if ((state) == MIME_STATE_BODY) \
356 (ptr)->body_offset = 0; \
358 (ptr)->head_offset = 0; \
361 #define SET_CURR_STATE(ptr, state) do { \
362 (ptr)->curr_state = (state); \
363 if ((state) == MIME_STATE_BODY) \
364 (ptr)->body_offset = 0; \
366 (ptr)->head_offset = 0; \
370 * MIME encodings and domains. We intentionally use the same codes for
371 * encodings and domains, so that we can easily find out whether a content
372 * transfer encoding header specifies a domain or whether it specifies
373 * domain+encoding, which is illegal for multipart/any and message/any.
375 typedef struct MIME_ENCODING
{
376 const char *name
; /* external representation */
377 int encoding
; /* internal representation */
378 int domain
; /* subset of encoding */
381 #define MIME_ENC_QP 1 /* encoding + domain */
382 #define MIME_ENC_BASE64 2 /* encoding + domain */
383 /* These are defined in mime_state.h as part of the external interface. */
384 #ifndef MIME_ENC_7BIT
385 #define MIME_ENC_7BIT 7 /* domain only */
386 #define MIME_ENC_8BIT 8 /* domain only */
387 #define MIME_ENC_BINARY 9 /* domain only */
390 static const MIME_ENCODING mime_encoding_map
[] = { /* RFC 2045 */
391 "7bit", MIME_ENC_7BIT
, MIME_ENC_7BIT
, /* domain */
392 "8bit", MIME_ENC_8BIT
, MIME_ENC_8BIT
, /* domain */
393 "binary", MIME_ENC_BINARY
, MIME_ENC_BINARY
, /* domain */
394 "base64", MIME_ENC_BASE64
, MIME_ENC_7BIT
, /* encoding */
395 "quoted-printable", MIME_ENC_QP
, MIME_ENC_7BIT
, /* encoding */
400 * Silly Little Macros.
402 #define STR(x) vstring_str(x)
403 #define LEN(x) VSTRING_LEN(x)
404 #define END(x) vstring_end(x)
405 #define CU_CHAR_PTR(x) ((const unsigned char *) (x))
407 #define REPORT_ERROR_LEN(state, err_type, text, len) do { \
408 if ((state->err_flags & err_type) == 0) { \
409 if (state->err_print != 0) \
410 state->err_print(state->app_context, err_type, text, len); \
411 state->err_flags |= err_type; \
415 #define REPORT_ERROR(state, err_type, text) do { \
416 const char *_text = text; \
417 ssize_t _len = strlen(text); \
418 REPORT_ERROR_LEN(state, err_type, _text, _len); \
421 #define REPORT_ERROR_BUF(state, err_type, buf) \
422 REPORT_ERROR_LEN(state, err_type, STR(buf), LEN(buf))
426 * Outputs and state changes are interleaved, so we must maintain separate
427 * offsets for header and body segments.
429 #define HEAD_OUT(ptr, info, len) do { \
430 if ((ptr)->head_out) { \
431 (ptr)->head_out((ptr)->app_context, (ptr)->curr_state, \
432 (info), (ptr)->output_buffer, (ptr)->head_offset); \
433 (ptr)->head_offset += (len) + 1; \
437 #define BODY_OUT(ptr, rec_type, text, len) do { \
438 if ((ptr)->body_out) { \
439 (ptr)->body_out((ptr)->app_context, (rec_type), \
440 (text), (len), (ptr)->body_offset); \
441 (ptr)->body_offset += (len) + 1; \
445 /* mime_state_push - push boundary onto stack */
447 static void mime_state_push(MIME_STATE
*state
, int def_ctype
, int def_stype
,
448 const char *boundary
)
453 * RFC 2046 mandates that a boundary string be up to 70 characters long.
454 * Some MTAs, including Postfix, include the fully-qualified MTA name
455 * which can be longer, so we are willing to handle boundary strings that
456 * exceed the RFC specification. We allow for message headers of up to
457 * var_header_limit characters. In order to avoid denial of service, we
458 * have to impose a configurable limit on the amount of text that we are
459 * willing to store as a boundary string. Despite this truncation way we
460 * will still correctly detect all intermediate boundaries and all the
461 * message headers that follow those boundaries.
463 state
->nesting_level
+= 1;
464 stack
= (MIME_STACK
*) mymalloc(sizeof(*stack
));
465 stack
->def_ctype
= def_ctype
;
466 stack
->def_stype
= def_stype
;
467 if ((stack
->bound_len
= strlen(boundary
)) > var_mime_bound_len
)
468 stack
->bound_len
= var_mime_bound_len
;
469 stack
->boundary
= mystrndup(boundary
, stack
->bound_len
);
470 stack
->next
= state
->stack
;
471 state
->stack
= stack
;
473 msg_info("PUSH boundary %s", stack
->boundary
);
476 /* mime_state_pop - pop boundary from stack */
478 static void mime_state_pop(MIME_STATE
*state
)
482 if ((stack
= state
->stack
) == 0)
483 msg_panic("mime_state_pop: there is no stack");
485 msg_info("POP boundary %s", stack
->boundary
);
486 state
->nesting_level
-= 1;
487 state
->stack
= stack
->next
;
488 myfree(stack
->boundary
);
489 myfree((char *) stack
);
492 /* mime_state_alloc - create MIME state machine */
494 MIME_STATE
*mime_state_alloc(int flags
,
495 MIME_STATE_HEAD_OUT head_out
,
496 MIME_STATE_ANY_END head_end
,
497 MIME_STATE_BODY_OUT body_out
,
498 MIME_STATE_ANY_END body_end
,
499 MIME_STATE_ERR_PRINT err_print
,
504 state
= (MIME_STATE
*) mymalloc(sizeof(*state
));
506 /* Volatile members. */
507 state
->err_flags
= 0;
508 state
->body_offset
= 0; /* XXX */
509 SET_MIME_STATE(state
, MIME_STATE_PRIMARY
,
510 MIME_CTYPE_TEXT
, MIME_STYPE_PLAIN
,
511 MIME_ENC_7BIT
, MIME_ENC_7BIT
);
512 state
->output_buffer
= vstring_alloc(100);
513 state
->prev_rec_type
= 0;
515 state
->token_buffer
= vstring_alloc(1);
517 /* Static members. */
518 state
->static_flags
= flags
;
519 state
->head_out
= head_out
;
520 state
->head_end
= head_end
;
521 state
->body_out
= body_out
;
522 state
->body_end
= body_end
;
523 state
->err_print
= err_print
;
524 state
->app_context
= context
;
528 /* mime_state_free - destroy MIME state machine */
530 MIME_STATE
*mime_state_free(MIME_STATE
*state
)
532 vstring_free(state
->output_buffer
);
534 mime_state_pop(state
);
535 if (state
->token_buffer
)
536 vstring_free(state
->token_buffer
);
537 myfree((char *) state
);
541 /* mime_state_content_type - process content-type header */
543 static void mime_state_content_type(MIME_STATE
*state
,
544 const HEADER_OPTS
*header_info
)
551 #define TOKEN_MATCH(tok, text) \
552 ((tok).type == HEADER_TOK_TOKEN && strcasecmp((tok).u.value, (text)) == 0)
554 #define RFC2045_TSPECIALS "()<>@,;:\\\"/[]?="
556 #define PARSE_CONTENT_TYPE_HEADER(state, ptr) \
557 header_token(state->token, MIME_MAX_TOKEN, \
558 state->token_buffer, ptr, RFC2045_TSPECIALS, ';')
560 cp
= STR(state
->output_buffer
) + strlen(header_info
->name
) + 1;
561 if ((tok_count
= PARSE_CONTENT_TYPE_HEADER(state
, &cp
)) > 0) {
564 * text/whatever. Right now we don't really care if it is plain or
565 * not, but we may want to recognize subtypes later, and then this
566 * code can serve as an example.
568 if (TOKEN_MATCH(state
->token
[0], "text")) {
569 state
->curr_ctype
= MIME_CTYPE_TEXT
;
571 && state
->token
[1].type
== '/'
572 && TOKEN_MATCH(state
->token
[2], "plain"))
573 state
->curr_stype
= MIME_STYPE_PLAIN
;
575 state
->curr_stype
= MIME_STYPE_OTHER
;
580 * message/whatever body parts start with another block of message
581 * headers that we may want to look at. The partial and external-body
582 * subtypes cannot be subjected to 8-bit -> 7-bit conversion, so we
583 * must properly recognize them.
585 if (TOKEN_MATCH(state
->token
[0], "message")) {
586 state
->curr_ctype
= MIME_CTYPE_MESSAGE
;
587 state
->curr_stype
= MIME_STYPE_OTHER
;
589 && state
->token
[1].type
== '/') {
590 if (TOKEN_MATCH(state
->token
[2], "rfc822"))
591 state
->curr_stype
= MIME_STYPE_RFC822
;
592 else if (TOKEN_MATCH(state
->token
[2], "partial"))
593 state
->curr_stype
= MIME_STYPE_PARTIAL
;
594 else if (TOKEN_MATCH(state
->token
[2], "external-body"))
595 state
->curr_stype
= MIME_STYPE_EXTERN_BODY
;
601 * multipart/digest has default content type message/rfc822,
602 * multipart/whatever has default content type text/plain.
604 if (TOKEN_MATCH(state
->token
[0], "multipart")) {
605 state
->curr_ctype
= MIME_CTYPE_MULTIPART
;
607 && state
->token
[1].type
== '/'
608 && TOKEN_MATCH(state
->token
[2], "digest")) {
609 def_ctype
= MIME_CTYPE_MESSAGE
;
610 def_stype
= MIME_STYPE_RFC822
;
612 def_ctype
= MIME_CTYPE_TEXT
;
613 def_stype
= MIME_STYPE_PLAIN
;
617 * Yes, this is supposed to capture multiple boundary strings,
618 * which are illegal and which could be used to hide content in
619 * an implementation dependent manner. The code below allows us
620 * to find embedded message headers as long as the sender uses
621 * only one of these same-level boundary strings.
623 * Yes, this is supposed to ignore the boundary value type.
625 while ((tok_count
= PARSE_CONTENT_TYPE_HEADER(state
, &cp
)) >= 0) {
627 && TOKEN_MATCH(state
->token
[0], "boundary")
628 && state
->token
[1].type
== '=') {
629 if (state
->nesting_level
> var_mime_maxdepth
) {
630 if (state
->static_flags
& MIME_OPT_REPORT_NESTING
)
631 REPORT_ERROR_BUF(state
, MIME_ERR_NESTING
,
632 state
->output_buffer
);
634 mime_state_push(state
, def_ctype
, def_stype
,
635 state
->token
[2].u
.value
);
647 state
->curr_ctype
= MIME_CTYPE_OTHER
;
652 /* mime_state_content_encoding - process content-transfer-encoding header */
654 static void mime_state_content_encoding(MIME_STATE
*state
,
655 const HEADER_OPTS
*header_info
)
658 const MIME_ENCODING
*cmp
;
660 #define PARSE_CONTENT_ENCODING_HEADER(state, ptr) \
661 header_token(state->token, 1, state->token_buffer, ptr, (char *) 0, 0)
664 * Do content-transfer-encoding header. Never set the encoding domain to
665 * something other than 7bit, 8bit or binary, even if we don't recognize
668 cp
= STR(state
->output_buffer
) + strlen(header_info
->name
) + 1;
669 if (PARSE_CONTENT_ENCODING_HEADER(state
, &cp
) > 0
670 && state
->token
[0].type
== HEADER_TOK_TOKEN
) {
671 for (cmp
= mime_encoding_map
; cmp
->name
!= 0; cmp
++) {
672 if (strcasecmp(state
->token
[0].u
.value
, cmp
->name
) == 0) {
673 state
->curr_encoding
= cmp
->encoding
;
674 state
->curr_domain
= cmp
->domain
;
681 /* mime_state_enc_name - encoding to printable form */
683 static const char *mime_state_enc_name(int encoding
)
685 const MIME_ENCODING
*cmp
;
687 for (cmp
= mime_encoding_map
; cmp
->name
!= 0; cmp
++)
688 if (encoding
== cmp
->encoding
)
693 /* mime_state_downgrade - convert 8-bit data to quoted-printable */
695 static void mime_state_downgrade(MIME_STATE
*state
, int rec_type
,
696 const char *text
, ssize_t len
)
698 static char hexchars
[] = "0123456789ABCDEF";
699 const unsigned char *cp
;
702 #define QP_ENCODE(buffer, ch) { \
703 VSTRING_ADDCH(buffer, '='); \
704 VSTRING_ADDCH(buffer, hexchars[(ch >> 4) & 0xff]); \
705 VSTRING_ADDCH(buffer, hexchars[ch & 0xf]); \
709 * Insert a soft line break when the output reaches a critical length
710 * before we reach a hard line break.
712 for (cp
= CU_CHAR_PTR(text
); cp
< CU_CHAR_PTR(text
+ len
); cp
++) {
713 /* Critical length before hard line break. */
714 if (LEN(state
->output_buffer
) > 72) {
715 VSTRING_ADDCH(state
->output_buffer
, '=');
716 VSTRING_TERMINATE(state
->output_buffer
);
717 BODY_OUT(state
, REC_TYPE_NORM
,
718 STR(state
->output_buffer
),
719 LEN(state
->output_buffer
));
720 VSTRING_RESET(state
->output_buffer
);
722 /* Append the next character. */
724 if ((ch
< 32 && ch
!= '\t') || ch
== '=' || ch
> 126) {
725 QP_ENCODE(state
->output_buffer
, ch
);
727 VSTRING_ADDCH(state
->output_buffer
, ch
);
732 * Flush output after a hard line break (i.e. the end of a REC_TYPE_NORM
733 * record). Fix trailing whitespace as per the RFC: in the worst case,
734 * the output length will grow from 73 characters to 75 characters.
736 if (rec_type
== REC_TYPE_NORM
) {
737 if (LEN(state
->output_buffer
) > 0
738 && ((ch
= END(state
->output_buffer
)[-1]) == ' ' || ch
== '\t')) {
739 vstring_truncate(state
->output_buffer
,
740 LEN(state
->output_buffer
) - 1);
741 QP_ENCODE(state
->output_buffer
, ch
);
743 VSTRING_TERMINATE(state
->output_buffer
);
744 BODY_OUT(state
, REC_TYPE_NORM
,
745 STR(state
->output_buffer
),
746 LEN(state
->output_buffer
));
747 VSTRING_RESET(state
->output_buffer
);
751 /* mime_state_update - update MIME state machine */
753 int mime_state_update(MIME_STATE
*state
, int rec_type
,
754 const char *text
, ssize_t len
)
756 int input_is_text
= (rec_type
== REC_TYPE_NORM
757 || rec_type
== REC_TYPE_CONT
);
759 const HEADER_OPTS
*header_info
;
760 const unsigned char *cp
;
762 #define SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type) do { \
763 state->prev_rec_type = rec_type; \
764 return (state->err_flags); \
768 * Be sure to flush any partial output line that might still be buffered
769 * up before taking any other "end of input" actions.
771 if (!input_is_text
&& state
->prev_rec_type
== REC_TYPE_CONT
)
772 mime_state_update(state
, REC_TYPE_NORM
, "", 0);
775 * This message state machine is kept simple for the sake of robustness.
776 * Standards evolve over time, and we want to be able to correctly
777 * process messages that are not yet defined. This state machine knows
778 * about headers and bodies, understands that multipart/whatever has
779 * multiple body parts with a header and body, and that message/whatever
780 * has message headers at the start of a body part.
782 switch (state
->curr_state
) {
785 * First, deal with header information that we have accumulated from
786 * previous input records. Discard text that does not fit in a header
787 * buffer. Our limit is quite generous; Sendmail will refuse mail
788 * with only 32kbyte in all the message headers combined.
790 case MIME_STATE_PRIMARY
:
791 case MIME_STATE_MULTIPART
:
792 case MIME_STATE_NESTED
:
793 if (LEN(state
->output_buffer
) > 0) {
795 if (state
->prev_rec_type
== REC_TYPE_CONT
) {
796 if (LEN(state
->output_buffer
) < var_header_limit
) {
797 vstring_strncat(state
->output_buffer
, text
, len
);
799 if (state
->static_flags
& MIME_OPT_REPORT_TRUNC_HEADER
)
800 REPORT_ERROR_BUF(state
, MIME_ERR_TRUNC_HEADER
,
801 state
->output_buffer
);
803 SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state
, rec_type
);
805 if (IS_SPACE_TAB(*text
)) {
806 if (LEN(state
->output_buffer
) < var_header_limit
) {
807 vstring_strcat(state
->output_buffer
, "\n");
808 vstring_strncat(state
->output_buffer
, text
, len
);
810 if (state
->static_flags
& MIME_OPT_REPORT_TRUNC_HEADER
)
811 REPORT_ERROR_BUF(state
, MIME_ERR_TRUNC_HEADER
,
812 state
->output_buffer
);
814 SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state
, rec_type
);
819 * The input is (the beginning of) another message header, or is
820 * not a message header, or is not even a text record. With no
821 * more input to append to this saved header, do output
822 * processing and reset the saved header buffer. Hold on to the
823 * content transfer encoding header if we have to do a 8->7
824 * transformation, because the proper information depends on the
825 * content type header: message and multipart require a domain,
826 * leaf entities have either a transformation or a domain.
828 if (LEN(state
->output_buffer
) > 0) {
829 header_info
= header_opts_find(STR(state
->output_buffer
));
830 if (!(state
->static_flags
& MIME_OPT_DISABLE_MIME
)
831 && header_info
!= 0) {
832 if (header_info
->type
== HDR_CONTENT_TYPE
)
833 mime_state_content_type(state
, header_info
);
834 if (header_info
->type
== HDR_CONTENT_TRANSFER_ENCODING
)
835 mime_state_content_encoding(state
, header_info
);
837 if ((state
->static_flags
& MIME_OPT_REPORT_8BIT_IN_HEADER
) != 0
838 && (state
->err_flags
& MIME_ERR_8BIT_IN_HEADER
) == 0) {
839 for (cp
= CU_CHAR_PTR(STR(state
->output_buffer
));
840 cp
< CU_CHAR_PTR(END(state
->output_buffer
)); cp
++)
842 REPORT_ERROR_BUF(state
, MIME_ERR_8BIT_IN_HEADER
,
843 state
->output_buffer
);
847 /* Output routine is explicitly allowed to change the data. */
849 || header_info
->type
!= HDR_CONTENT_TRANSFER_ENCODING
850 || (state
->static_flags
& MIME_OPT_DOWNGRADE
) == 0
851 || state
->curr_domain
== MIME_ENC_7BIT
)
852 HEAD_OUT(state
, header_info
, len
);
853 state
->prev_rec_type
= 0;
854 VSTRING_RESET(state
->output_buffer
);
859 * With past header information moved out of the way, proceed with a
866 * See if this input is (the beginning of) a message header.
868 * Normalize obsolete "name space colon" syntax to "name colon".
869 * Things would be too confusing otherwise.
871 * Don't assume that the input is null terminated.
873 if ((header_len
= is_header_buf(text
, len
)) > 0) {
874 vstring_strncpy(state
->output_buffer
, text
, header_len
);
875 for (text
+= header_len
, len
-= header_len
;
876 len
> 0 && IS_SPACE_TAB(*text
);
879 vstring_strncat(state
->output_buffer
, text
, len
);
880 SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state
, rec_type
);
885 * This input terminates a block of message headers. When converting
886 * 8-bit to 7-bit mail, this is the right place to emit the correct
887 * content-transfer-encoding header. With message or multipart we
888 * specify 7bit, with leaf entities we specify quoted-printable.
890 * We're not going to convert non-text data into base 64. If they send
891 * arbitrary binary data as 8-bit text, then the data is already
892 * broken beyond recovery, because the Postfix SMTP server sanitizes
893 * record boundaries, treating broken record boundaries as CRLF.
895 * Clear the output buffer, we will need it for storage of the
898 if ((state
->static_flags
& MIME_OPT_DOWNGRADE
)
899 && state
->curr_domain
!= MIME_ENC_7BIT
) {
900 if (state
->curr_ctype
== MIME_CTYPE_MESSAGE
901 || state
->curr_ctype
== MIME_CTYPE_MULTIPART
)
902 cp
= CU_CHAR_PTR("7bit");
904 cp
= CU_CHAR_PTR("quoted-printable");
905 vstring_sprintf(state
->output_buffer
,
906 "Content-Transfer-Encoding: %s", cp
);
907 HEAD_OUT(state
, (HEADER_OPTS
*) 0, len
);
908 VSTRING_RESET(state
->output_buffer
);
912 * This input terminates a block of message headers. Call the
913 * optional header end routine at the end of the first header block.
915 if (state
->curr_state
== MIME_STATE_PRIMARY
&& state
->head_end
)
916 state
->head_end(state
->app_context
);
919 * This is the right place to check if the sender specified an
920 * appropriate identity encoding (7bit, 8bit, binary) for multipart
923 if (state
->static_flags
& MIME_OPT_REPORT_ENCODING_DOMAIN
) {
924 if (state
->curr_ctype
== MIME_CTYPE_MESSAGE
) {
925 if (state
->curr_stype
== MIME_STYPE_PARTIAL
926 || state
->curr_stype
== MIME_STYPE_EXTERN_BODY
) {
927 if (state
->curr_domain
!= MIME_ENC_7BIT
)
928 REPORT_ERROR(state
, MIME_ERR_ENCODING_DOMAIN
,
929 mime_state_enc_name(state
->curr_encoding
));
931 if (state
->curr_encoding
!= state
->curr_domain
)
932 REPORT_ERROR(state
, MIME_ERR_ENCODING_DOMAIN
,
933 mime_state_enc_name(state
->curr_encoding
));
935 } else if (state
->curr_ctype
== MIME_CTYPE_MULTIPART
) {
936 if (state
->curr_encoding
!= state
->curr_domain
)
937 REPORT_ERROR(state
, MIME_ERR_ENCODING_DOMAIN
,
938 mime_state_enc_name(state
->curr_encoding
));
943 * Find out if the next body starts with its own message headers. In
944 * agressive mode, examine headers of partial and external-body
945 * messages. Otherwise, treat such headers as part of the "body". Set
946 * the proper encoding information for the multipart prolog.
948 * XXX We parse headers inside message/* content even when the encoding
949 * is invalid (encoding != domain). With base64 we won't recognize
950 * any headers, and with quoted-printable we won't recognize MIME
951 * boundary strings, but the MIME processor will still resynchronize
952 * when it runs into the higher-level boundary string at the end of
953 * the message/* content. Although we will treat some headers as body
954 * text, we will still do a better job than if we were treating the
955 * entire message/* content as body text.
957 * XXX This changes state to MIME_STATE_NESTED and then outputs a body
958 * line, so that the body offset is not properly reset.
960 * Don't assume that the input is null terminated.
964 state
->body_offset
= 0; /* XXX */
965 if (state
->curr_ctype
== MIME_CTYPE_MESSAGE
) {
966 if (state
->curr_stype
== MIME_STYPE_RFC822
967 || (state
->static_flags
& MIME_OPT_RECURSE_ALL_MESSAGE
))
968 SET_MIME_STATE(state
, MIME_STATE_NESTED
,
969 MIME_CTYPE_TEXT
, MIME_STYPE_PLAIN
,
970 MIME_ENC_7BIT
, MIME_ENC_7BIT
);
972 SET_CURR_STATE(state
, MIME_STATE_BODY
);
973 } else if (state
->curr_ctype
== MIME_CTYPE_MULTIPART
) {
974 SET_MIME_STATE(state
, MIME_STATE_BODY
,
975 MIME_CTYPE_OTHER
, MIME_STYPE_OTHER
,
976 MIME_ENC_7BIT
, MIME_ENC_7BIT
);
978 SET_CURR_STATE(state
, MIME_STATE_BODY
);
983 * Invalid input. Force output of one blank line and jump to the
984 * body state, leaving all other state alone.
986 * We don't break legitimate mail by inserting a blank line
987 * separator between primary headers and a non-empty body. Many
988 * MTA's don't even record the presence or absence of this
989 * separator, nor does the Milter protocol pass it on to Milter
992 * XXX We don't insert a blank line separator into attachments, to
993 * avoid breaking digital signatures. Postfix shall not do a
994 * worse mail delivery job than MTAs that can't even parse MIME.
995 * We switch to body state anyway, to avoid treating body text as
996 * header text, and mis-interpreting or truncating it. The code
997 * below for initial From_ lines is for educational purposes.
999 * Sites concerned about MIME evasion can use a MIME normalizer.
1000 * Postfix has a different mission.
1004 msg_info("garbage in %s header",
1005 state
->curr_state
== MIME_STATE_MULTIPART
? "multipart" :
1006 state
->curr_state
== MIME_STATE_PRIMARY
? "primary" :
1007 state
->curr_state
== MIME_STATE_NESTED
? "nested" :
1009 switch (state
->curr_state
) {
1010 case MIME_STATE_PRIMARY
:
1011 BODY_OUT(state
, REC_TYPE_NORM
, "", 0);
1012 SET_CURR_STATE(state
, MIME_STATE_BODY
);
1015 case MIME_STATE_NESTED
:
1016 if (state
->body_offset
<= 1
1017 && rec_type
== REC_TYPE_NORM
1019 && (strncmp(text
+ (*text
== '>'), "From ", 5) == 0
1020 || strncmp(text
, "=46rom ", 7) == 0))
1025 SET_CURR_STATE(state
, MIME_STATE_BODY
);
1032 * This input is not text. Go to body state, unconditionally.
1035 SET_CURR_STATE(state
, MIME_STATE_BODY
);
1040 * Body text. Look for message boundaries, and recover from missing
1041 * boundary strings. Missing boundaries can happen in agressive mode
1042 * with text/rfc822-headers or with message/partial. Ignore non-space
1043 * cruft after --boundary or --boundary--, because some MUAs do, and
1044 * because only perverse software would take advantage of this to
1045 * escape detection. We have to ignore trailing cruft anyway, because
1046 * our saved copy of the boundary string may have been truncated for
1049 * Optionally look for 8-bit data in content that was announced as, or
1050 * that defaults to, 7-bit. Unfortunately, we cannot turn this on by
1051 * default. Majordomo sends requests for approval that do not
1052 * propagate the MIME information from the enclosed message to the
1053 * message headers of the approval request.
1055 * Set the proper state information after processing a message boundary
1058 * Don't look for boundary strings at the start of a continued record.
1060 * Don't assume that the input is null terminated.
1062 case MIME_STATE_BODY
:
1063 if (input_is_text
) {
1064 if ((state
->static_flags
& MIME_OPT_REPORT_8BIT_IN_7BIT_BODY
) != 0
1065 && state
->curr_encoding
== MIME_ENC_7BIT
1066 && (state
->err_flags
& MIME_ERR_8BIT_IN_7BIT_BODY
) == 0) {
1067 for (cp
= CU_CHAR_PTR(text
); cp
< CU_CHAR_PTR(text
+ len
); cp
++)
1069 REPORT_ERROR_LEN(state
, MIME_ERR_8BIT_IN_7BIT_BODY
,
1074 if (state
->stack
&& state
->prev_rec_type
!= REC_TYPE_CONT
1075 && len
> 2 && text
[0] == '-' && text
[1] == '-') {
1076 for (sp
= state
->stack
; sp
!= 0; sp
= sp
->next
) {
1077 if (len
>= 2 + sp
->bound_len
&&
1078 strncmp(text
+ 2, sp
->boundary
, sp
->bound_len
) == 0) {
1079 while (sp
!= state
->stack
)
1080 mime_state_pop(state
);
1081 if (len
>= 4 + sp
->bound_len
&&
1082 strncmp(text
+ 2 + sp
->bound_len
, "--", 2) == 0) {
1083 mime_state_pop(state
);
1084 SET_MIME_STATE(state
, MIME_STATE_BODY
,
1085 MIME_CTYPE_OTHER
, MIME_STYPE_OTHER
,
1086 MIME_ENC_7BIT
, MIME_ENC_7BIT
);
1088 SET_MIME_STATE(state
, MIME_STATE_MULTIPART
,
1089 sp
->def_ctype
, sp
->def_stype
,
1090 MIME_ENC_7BIT
, MIME_ENC_7BIT
);
1096 /* Put last for consistency with header output routine. */
1097 if ((state
->static_flags
& MIME_OPT_DOWNGRADE
)
1098 && state
->curr_domain
!= MIME_ENC_7BIT
)
1099 mime_state_downgrade(state
, rec_type
, text
, len
);
1101 BODY_OUT(state
, rec_type
, text
, len
);
1105 * The input is not a text record. Inform the application that this
1106 * is the last opportunity to send any pending output.
1109 if (state
->body_end
)
1110 state
->body_end(state
->app_context
);
1112 SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state
, rec_type
);
1115 * Oops. This can't happen.
1118 msg_panic("mime_state_update: unknown state: %d", state
->curr_state
);
1123 * Mime error to (DSN, text) mapping. Order matters; more serious errors
1124 * must precede less serious errors, because the error-to-text conversion
1125 * can report only one error.
1127 static const MIME_STATE_DETAIL mime_err_detail
[] = {
1128 MIME_ERR_NESTING
, "5.6.0", "MIME nesting exceeds safety limit",
1129 MIME_ERR_TRUNC_HEADER
, "5.6.0", "message header length exceeds safety limit",
1130 MIME_ERR_8BIT_IN_HEADER
, "5.6.0", "improper use of 8-bit data in message header",
1131 MIME_ERR_8BIT_IN_7BIT_BODY
, "5.6.0", "improper use of 8-bit data in message body",
1132 MIME_ERR_ENCODING_DOMAIN
, "5.6.0", "invalid message/* or multipart/* encoding domain",
1136 /* mime_state_error - error code to string */
1138 const char *mime_state_error(int error_code
)
1140 const MIME_STATE_DETAIL
*mp
;
1142 if (error_code
== 0)
1143 msg_panic("mime_state_error: there is no error");
1144 for (mp
= mime_err_detail
; mp
->code
; mp
++)
1145 if (mp
->code
& error_code
)
1147 msg_panic("mime_state_error: unknown error code %d", error_code
);
1150 /* mime_state_detail - error code to table entry with assorted data */
1152 const MIME_STATE_DETAIL
*mime_state_detail(int error_code
)
1154 const MIME_STATE_DETAIL
*mp
;
1156 if (error_code
== 0)
1157 msg_panic("mime_state_detail: there is no error");
1158 for (mp
= mime_err_detail
; mp
->code
; mp
++)
1159 if (mp
->code
& error_code
)
1161 msg_panic("mime_state_detail: unknown error code %d", error_code
);
1167 #include <stringops.h>
1168 #include <vstream.h>
1169 #include <msg_vstream.h>
1170 #include <rec_streamlf.h>
1173 * Stress test the REC_TYPE_CONT/NORM handling, but don't break header
1176 /*#define REC_LEN 40*/
1178 #define REC_LEN 1024
1180 static void head_out(void *context
, int class, const HEADER_OPTS
*unused_info
,
1181 VSTRING
*buf
, off_t offset
)
1183 VSTREAM
*stream
= (VSTREAM
*) context
;
1185 vstream_fprintf(stream
, "%s %ld\t|%s\n",
1186 class == MIME_HDR_PRIMARY
? "MAIN" :
1187 class == MIME_HDR_MULTIPART
? "MULT" :
1188 class == MIME_HDR_NESTED
? "NEST" :
1189 "ERROR", (long) offset
, STR(buf
));
1192 static void head_end(void *context
)
1194 VSTREAM
*stream
= (VSTREAM
*) context
;
1196 vstream_fprintf(stream
, "HEADER END\n");
1199 static void body_out(void *context
, int rec_type
, const char *buf
, ssize_t len
,
1202 VSTREAM
*stream
= (VSTREAM
*) context
;
1204 vstream_fprintf(stream
, "BODY %c %ld\t|", rec_type
, (long) offset
);
1205 vstream_fwrite(stream
, buf
, len
);
1206 if (rec_type
== REC_TYPE_NORM
)
1207 VSTREAM_PUTC('\n', stream
);
1210 static void body_end(void *context
)
1212 VSTREAM
*stream
= (VSTREAM
*) context
;
1214 vstream_fprintf(stream
, "BODY END\n");
1217 static void err_print(void *unused_context
, int err_flag
,
1218 const char *text
, ssize_t len
)
1220 msg_warn("%s: %.*s", mime_state_error(err_flag
),
1221 len
< 100 ? (int) len
: 100, text
);
1224 int var_header_limit
= 2000;
1225 int var_mime_maxdepth
= 20;
1226 int var_mime_bound_len
= 2000;
1228 int main(int unused_argc
, char **argv
)
1239 #define MIME_OPTIONS \
1240 (MIME_OPT_REPORT_8BIT_IN_7BIT_BODY \
1241 | MIME_OPT_REPORT_8BIT_IN_HEADER \
1242 | MIME_OPT_REPORT_ENCODING_DOMAIN \
1243 | MIME_OPT_REPORT_TRUNC_HEADER \
1244 | MIME_OPT_REPORT_NESTING \
1245 | MIME_OPT_DOWNGRADE)
1247 msg_vstream_init(basename(argv
[0]), VSTREAM_OUT
);
1249 buf
= vstring_alloc(10);
1250 state
= mime_state_alloc(MIME_OPTIONS
,
1254 (void *) VSTREAM_OUT
);
1260 rec_type
= rec_streamlf_get(VSTREAM_IN
, buf
, REC_LEN
);
1261 VSTRING_TERMINATE(buf
);
1262 err
= mime_state_update(state
, last
= rec_type
, STR(buf
), LEN(buf
));
1263 vstream_fflush(VSTREAM_OUT
);
1264 } while (rec_type
> 0);
1269 if (err
& MIME_ERR_TRUNC_HEADER
)
1270 msg_warn("message header length exceeds safety limit");
1271 if (err
& MIME_ERR_NESTING
)
1272 msg_warn("MIME nesting exceeds safety limit");
1273 if (err
& MIME_ERR_8BIT_IN_HEADER
)
1274 msg_warn("improper use of 8-bit data in message header");
1275 if (err
& MIME_ERR_8BIT_IN_7BIT_BODY
)
1276 msg_warn("improper use of 8-bit data in message body");
1277 if (err
& MIME_ERR_ENCODING_DOMAIN
)
1278 msg_warn("improper message/* or multipart/* encoding domain");
1283 mime_state_free(state
);