external/ibm-public/postfix/dist/src/global/mime_state.c

   1 /*      $NetBSD$        */
   2
   3 /*++
   4 /* NAME
   5 /*      mime_state 3
   6 /* SUMMARY
   7 /*      MIME parser state machine
   8 /* SYNOPSIS
   9 /*      #include <mime_state.h>
  10 /*
  11 /*      MIME_STATE *mime_state_alloc(flags, head_out, head_end,
  12 /*                                       body_out, body_end,
  13 /*                                       err_print, context)
  14 /*      int     flags;
  15 /*      void    (*head_out)(void *ptr, int header_class,
  16 /*                              const HEADER_OPTS *header_info,
  17 /*                              VSTRING *buf, off_t offset);
  18 /*      void    (*head_end)(void *ptr);
  19 /*      void    (*body_out)(void *ptr, int rec_type,
  20 /*                              const char *buf, ssize_t len,
  21 /*                              off_t offset);
  22 /*      void    (*body_end)(void *ptr);
  23 /*      void    (*err_print)(void *ptr, int err_flag, const char *text)
  24 /*      void    *context;
  25 /*
  26 /*      int     mime_state_update(state, rec_type, buf, len)
  27 /*      MIME_STATE *state;
  28 /*      int     rec_type;
  29 /*      const char *buf;
  30 /*      ssize_t len;
  31 /*
  32 /*      MIME_STATE *mime_state_free(state)
  33 /*      MIME_STATE *state;
  34 /*
  35 /*      const char *mime_state_error(error_code)
  36 /*      int     error_code;
  37 /*
  38 /*      typedef struct {
  39 /* .in +4
  40 /*              const int code;         /* internal error code */
  41 /*              const char *dsn;        /* RFC 3463 */
  42 /*              const char *text;       /* descriptive text */
  43 /* .in -4
  44 /*      } MIME_STATE_DETAIL;
  45 /*
  46 /*      const MIME_STATE_DETAIL *mime_state_detail(error_code)
  47 /*      int     error_code;
  48 /* DESCRIPTION
  49 /*      This module implements a one-pass MIME processor with optional
  50 /*      8-bit to quoted-printable conversion.
  51 /*
  52 /*      In order to fend off denial of service attacks, message headers
  53 /*      are truncated at or above var_header_limit bytes, message boundary
  54 /*      strings are truncated at var_boundary_len bytes, and the multipart
  55 /*      nesting level is limited to var_mime_maxdepth levels.
  56 /*
  57 /*      mime_state_alloc() creates a MIME state machine. The machine
  58 /*      is delivered in its initial state, expecting content type
  59 /*      text/plain, 7-bit data.
  60 /*
  61 /*      mime_state_update() updates the MIME state machine according
  62 /*      to the input record type and the record content.
  63 /*      The result value is the bit-wise OR of zero or more of the following:
  64 /* .IP MIME_ERR_TRUNC_HEADER
  65 /*      A message header was longer than var_header_limit bytes.
  66 /* .IP MIME_ERR_NESTING
  67 /*      The MIME structure was nested more than var_mime_maxdepth levels.
  68 /* .IP MIME_ERR_8BIT_IN_HEADER
  69 /*      A message header contains 8-bit data. This is always illegal.
  70 /* .IP MIME_ERR_8BIT_IN_7BIT_BODY
  71 /*      A MIME header specifies (or defaults to) 7-bit content, but the
  72 /*      corresponding message body or body parts contain 8-bit content.
  73 /* .IP MIME_ERR_ENCODING_DOMAIN
  74 /*      An entity of type "message" or "multipart" specifies the wrong
  75 /*      content transfer encoding domain, or specifies a transformation
  76 /*      (quoted-printable, base64) instead of a domain (7bit, 8bit,
  77 /*      or binary).
  78 /* .PP
  79 /*      mime_state_free() releases storage for a MIME state machine,
  80 /*      and conveniently returns a null pointer.
  81 /*
  82 /*      mime_state_error() returns a string representation for the
  83 /*      specified error code. When multiple errors are specified it
  84 /*      reports what it deems the most serious one.
  85 /*
  86 /*      mime_state_detail() returns a table entry with error
  87 /*      information for the specified error code. When multiple
  88 /*      errors are specified it reports what it deems the most
  89 /*      serious one.
  90 /*
  91 /*      Arguments:
  92 /* .IP body_out
  93 /*      The output routine for body lines. It receives unmodified input
  94 /*      records, or the result of 8-bit -> 7-bit conversion.
  95 /* .IP body_end
  96 /*      A null pointer, or a pointer to a routine that is called after
  97 /*      the last input record is processed.
  98 /* .IP buf
  99 /*      Buffer with the content of a logical or physical message record.
 100 /* .IP context
 101 /*      Caller context that is passed on to the head_out and body_out
 102 /*      routines.
 103 /* .IP enc_type
 104 /*      The content encoding: MIME_ENC_7BIT or MIME_ENC_8BIT.
 105 /* .IP err_print
 106 /*      Null pointer, or pointer to a function that is called with
 107 /*      arguments: the application context, the error type, and the
 108 /*      offending input. Only one instance per error type is reported.
 109 /* .IP flags
 110 /*      Special processing options. Specify the bit-wise OR of zero or
 111 /*      more of the following:
 112 /* .RS
 113 /* .IP MIME_OPT_DISABLE_MIME
 114 /*      Pay no attention to Content-* message headers, and switch to
 115 /*      message body state at the end of the primary message headers.
 116 /* .IP MIME_OPT_REPORT_TRUNC_HEADER
 117 /*      Report errors that set the MIME_ERR_TRUNC_HEADER error flag
 118 /*      (see above).
 119 /* .IP MIME_OPT_REPORT_8BIT_IN_HEADER
 120 /*      Report errors that set the MIME_ERR_8BIT_IN_HEADER error
 121 /*      flag (see above). This rarely stops legitimate mail.
 122 /* .IP MIME_OPT_REPORT_8BIT_IN_7BIT_BODY
 123 /*      Report errors that set the MIME_ERR_8BIT_IN_7BIT_BODY error
 124 /*      flag (see above). This currently breaks Majordomo mail that is
 125 /*      forwarded for approval, because Majordomo does not propagate
 126 /*      MIME type information from the enclosed message to the message
 127 /*      headers of the request for approval.
 128 /* .IP MIME_OPT_REPORT_ENCODING_DOMAIN
 129 /*      Report errors that set the MIME_ERR_ENCODING_DOMAIN error
 130 /*      flag (see above).
 131 /* .IP MIME_OPT_REPORT_NESTING
 132 /*      Report errors that set the MIME_ERR_NESTING error flag
 133 /*      (see above).
 134 /* .IP MIME_OPT_RECURSE_ALL_MESSAGE
 135 /*      Recurse into message/anything types other than message/rfc822.
 136 /*      This feature can detect "bad" information in headers of
 137 /*      message/partial and message/external-body types. It must
 138 /*      not be used with 8-bit -> 7-bit MIME transformations.
 139 /* .IP MIME_OPT_DOWNGRADE
 140 /*      Transform content that claims to be 8-bit into quoted-printable.
 141 /*      Where appropriate, update Content-Transfer-Encoding: message
 142 /*      headers.
 143 /* .RE
 144 /* .sp
 145 /*      For convenience, MIME_OPT_NONE requests no special processing.
 146 /* .IP header_class
 147 /*      Specifies where a message header is located.
 148 /* .RS
 149 /* .IP MIME_HDR_PRIMARY
 150 /*      In the primary message header section.
 151 /* .IP MIME_HDR_MULTIPART
 152 /*      In the header section after a multipart boundary string.
 153 /* .IP MIME_HDR_NESTED
 154 /*      At the start of a nested (e.g., message/rfc822) message.
 155 /* .RE
 156 /* .sp
 157 /*      For convenience, the macros MIME_HDR_FIRST and MIME_HDR_LAST
 158 /*      specify the range of MIME_HDR_MUMBLE macros.
 159 /* .sp
 160 /*      To find out if something is a MIME header at the beginning
 161 /*      of an RFC 822 message or an attached message, look at the
 162 /*      header_info argument.
 163 /* .IP header_info
 164 /*      Null pointer or information about the message header, see
 165 /*      header_opts(3).
 166 /* .IP head_out
 167 /*      The output routine that is invoked for outputting a message header.
 168 /*      A multi-line header is passed as one chunk of text with embedded
 169 /*      newlines.
 170 /*      It is the responsibility of the output routine to break the text
 171 /*      at embedded newlines, and to break up long text between newlines
 172 /*      into multiple output records.
 173 /*      Note: an output routine is explicitly allowed to modify the text.
 174 /* .IP head_end
 175 /*      A null pointer, or a pointer to a routine that is called after
 176 /*      the last message header in the first header block is processed.
 177 /* .IP len
 178 /*      Length of non-VSTRING input buffer.
 179 /* .IP offset
 180 /*      The offset in bytes from the start of the current block of message
 181 /*      headers or body lines. Line boundaries are counted as one byte.
 182 /* .IP rec_type
 183 /*      The input record type as defined in rec_type(3h). State is
 184 /*      updated for text records (REC_TYPE_NORM or REC_TYPE_CONT).
 185 /*      Some input records are stored internally in order to reconstruct
 186 /*      multi-line input.  Upon receipt of any non-text record type, all
 187 /*      stored input is flushed and the state is set to "body".
 188 /* .IP state
 189 /*      MIME parser state created with mime_state_alloc().
 190 /* BUGS
 191 /*      NOTE: when the end of headers is reached, mime_state_update()
 192 /*      may execute up to three call-backs before returning to the
 193 /*      caller: head_out(), head_end(), and body_out() or body_end().
 194 /*      As long as call-backs return no result, it is up to the
 195 /*      call-back routines to check if a previous call-back experienced
 196 /*      an error.
 197 /*
 198 /*      Different mail user agents treat malformed message boundary
 199 /*      strings in different ways. The Postfix MIME processor cannot
 200 /*      be bug-compatible with everything.
 201 /*
 202 /*      This module will not glue together multipart boundary strings that
 203 /*      span multiple input records.
 204 /*
 205 /*      This module will not glue together RFC 2231 formatted (boundary)
 206 /*      parameter values. RFC 2231 claims compatibility with existing
 207 /*      MIME processors. Splitting boundary strings is not backwards
 208 /*      compatible.
 209 /*
 210 /*      The "8-bit data inside 7-bit body" test is myopic. It is not aware
 211 /*      of any enclosing (message or multipart) encoding information.
 212 /*
 213 /*      If the input ends in data other than a hard line break, this module
 214 /*      will add a hard line break of its own. No line break is added to
 215 /*      empty input.
 216 /*
 217 /*      This code recognizes the obsolete form "headername :" but will
 218 /*      normalize it to the canonical form "headername:". Leaving the
 219 /*      obsolete form alone would cause too much trouble with existing code
 220 /*      that expects only the normalized form.
 221 /* SEE ALSO
 222 /*      msg(3) diagnostics interface
 223 /*      header_opts(3) header information lookup
 224 /*      RFC 822 (ARPA Internet Text Messages)
 225 /*      RFC 2045 (MIME: Format of internet message bodies)
 226 /*      RFC 2046 (MIME: Media types)
 227 /* DIAGNOSTICS
 228 /*      Fatal errors: memory allocation problem.
 229 /* LICENSE
 230 /* .ad
 231 /* .fi
 232 /*      The Secure Mailer license must be distributed with this software.
 233 /* HISTORY
 234 /* .ad
 235 /* .fi
 236 /*      This code was implemented from scratch after reading the RFC
 237 /*      documents. This was a relatively straightforward effort with
 238 /*      few if any surprises. Victor Duchovni of Morgan Stanley shared
 239 /*      his experiences with ambiguities in real-life MIME implementations.
 240 /*      Liviu Daia of the Romanian Academy shared his insights in some
 241 /*      of the darker corners.
 242 /* AUTHOR(S)
 243 /*      Wietse Venema
 244 /*      IBM T.J. Watson Research
 245 /*      P.O. Box 704
 246 /*      Yorktown Heights, NY 10598, USA
 247 /*--*/
 248
 249 /* System library. */
 250
 251 #include <sys_defs.h>
 252 #include <stdarg.h>
 253 #include <ctype.h>
 254 #include <string.h>
 255
 256 #ifdef STRCASECMP_IN_STRINGS_H
 257 #include <strings.h>
 258 #endif
 259
 260 /* Utility library. */
 261
 262 #include <mymalloc.h>
 263 #include <msg.h>
 264 #include <vstring.h>
 265
 266 /* Global library. */
 267
 268 #include <rec_type.h>
 269 #include <is_header.h>
 270 #include <header_opts.h>
 271 #include <mail_params.h>
 272 #include <header_token.h>
 273 #include <lex_822.h>
 274 #include <mime_state.h>
 275
 276 /* Application-specific. */
 277
 278  /*
 279   * Mime parser stack element for multipart content.
 280   */
 281 typedef struct MIME_STACK {
 282     int     def_ctype;                  /* default content type */
 283     int     def_stype;                  /* default content subtype */
 284     char   *boundary;                   /* boundary string */
 285     ssize_t bound_len;                  /* boundary length */
 286     struct MIME_STACK *next;            /* linkage */
 287 } MIME_STACK;
 288
 289  /*
 290   * Mime parser state.
 291   */
 292 #define MIME_MAX_TOKEN          3       /* tokens per attribute */
 293
 294 struct MIME_STATE {
 295
 296     /*
 297      * Volatile members.
 298      */
 299     int     curr_state;                 /* header/body state */
 300     int     curr_ctype;                 /* last or default content type */
 301     int     curr_stype;                 /* last or default content subtype */
 302     int     curr_encoding;              /* last or default content encoding */
 303     int     curr_domain;                /* last or default encoding unit */
 304     VSTRING *output_buffer;             /* headers, quoted-printable body */
 305     int     prev_rec_type;              /* previous input record type */
 306     int     nesting_level;              /* safety */
 307     MIME_STACK *stack;                  /* for composite types */
 308     HEADER_TOKEN token[MIME_MAX_TOKEN]; /* header token array */
 309     VSTRING *token_buffer;              /* header parser scratch buffer */
 310     int     err_flags;                  /* processing errors */
 311     off_t   head_offset;                /* offset in header block */
 312     off_t   body_offset;                /* offset in body block */
 313
 314     /*
 315      * Static members.
 316      */
 317     int     static_flags;               /* static processing options */
 318     MIME_STATE_HEAD_OUT head_out;       /* header output routine */
 319     MIME_STATE_ANY_END head_end;        /* end of primary header routine */
 320     MIME_STATE_BODY_OUT body_out;       /* body output routine */
 321     MIME_STATE_ANY_END body_end;        /* end of body output routine */
 322     MIME_STATE_ERR_PRINT err_print;     /* error report */
 323     void   *app_context;                /* application context */
 324 };
 325
 326  /*
 327   * Content types and subtypes that we care about, either because we have to,
 328   * or because we want to filter out broken MIME messages.
 329   */
 330 #define MIME_CTYPE_OTHER        0
 331 #define MIME_CTYPE_TEXT         1
 332 #define MIME_CTYPE_MESSAGE      2
 333 #define MIME_CTYPE_MULTIPART    3
 334
 335 #define MIME_STYPE_OTHER        0
 336 #define MIME_STYPE_PLAIN        1
 337 #define MIME_STYPE_RFC822       2
 338 #define MIME_STYPE_PARTIAL      3
 339 #define MIME_STYPE_EXTERN_BODY  4
 340
 341  /*
 342   * MIME parser states. We steal from the public interface.
 343   */
 344 #define MIME_STATE_PRIMARY      MIME_HDR_PRIMARY        /* primary headers */
 345 #define MIME_STATE_MULTIPART    MIME_HDR_MULTIPART      /* after --boundary */
 346 #define MIME_STATE_NESTED       MIME_HDR_NESTED /* message/rfc822 */
 347 #define MIME_STATE_BODY         (MIME_HDR_NESTED + 1)
 348
 349 #define SET_MIME_STATE(ptr, state, ctype, stype, encoding, domain) do { \
 350         (ptr)->curr_state = (state); \
 351         (ptr)->curr_ctype = (ctype); \
 352         (ptr)->curr_stype = (stype); \
 353         (ptr)->curr_encoding = (encoding); \
 354         (ptr)->curr_domain = (domain); \
 355         if ((state) == MIME_STATE_BODY) \
 356             (ptr)->body_offset = 0; \
 357         else \
 358             (ptr)->head_offset = 0; \
 359     } while (0)
 360
 361 #define SET_CURR_STATE(ptr, state) do { \
 362         (ptr)->curr_state = (state); \
 363         if ((state) == MIME_STATE_BODY) \
 364             (ptr)->body_offset = 0; \
 365         else \
 366             (ptr)->head_offset = 0; \
 367     } while (0)
 368
 369  /*
 370   * MIME encodings and domains. We intentionally use the same codes for
 371   * encodings and domains, so that we can easily find out whether a content
 372   * transfer encoding header specifies a domain or whether it specifies
 373   * domain+encoding, which is illegal for multipart/any and message/any.
 374   */
 375 typedef struct MIME_ENCODING {
 376     const char *name;                   /* external representation */
 377     int     encoding;                   /* internal representation */
 378     int     domain;                     /* subset of encoding */
 379 } MIME_ENCODING;
 380
 381 #define MIME_ENC_QP             1       /* encoding + domain */
 382 #define MIME_ENC_BASE64         2       /* encoding + domain */
 383  /* These are defined in mime_state.h as part of the external interface. */
 384 #ifndef MIME_ENC_7BIT
 385 #define MIME_ENC_7BIT           7       /* domain only */
 386 #define MIME_ENC_8BIT           8       /* domain only */
 387 #define MIME_ENC_BINARY         9       /* domain only */
 388 #endif
 389
 390 static const MIME_ENCODING mime_encoding_map[] = {      /* RFC 2045 */
 391     "7bit", MIME_ENC_7BIT, MIME_ENC_7BIT,       /* domain */
 392     "8bit", MIME_ENC_8BIT, MIME_ENC_8BIT,       /* domain */
 393     "binary", MIME_ENC_BINARY, MIME_ENC_BINARY, /* domain */
 394     "base64", MIME_ENC_BASE64, MIME_ENC_7BIT,   /* encoding */
 395     "quoted-printable", MIME_ENC_QP, MIME_ENC_7BIT,     /* encoding */
 396     0,
 397 };
 398
 399  /*
 400   * Silly Little Macros.
 401   */
 402 #define STR(x)          vstring_str(x)
 403 #define LEN(x)          VSTRING_LEN(x)
 404 #define END(x)          vstring_end(x)
 405 #define CU_CHAR_PTR(x)  ((const unsigned char *) (x))
 406
 407 #define REPORT_ERROR_LEN(state, err_type, text, len) do { \
 408         if ((state->err_flags & err_type) == 0) { \
 409             if (state->err_print != 0) \
 410                 state->err_print(state->app_context, err_type, text, len); \
 411             state->err_flags |= err_type; \
 412         } \
 413     } while (0)
 414
 415 #define REPORT_ERROR(state, err_type, text) do { \
 416         const char *_text = text; \
 417         ssize_t _len = strlen(text); \
 418         REPORT_ERROR_LEN(state, err_type, _text, _len); \
 419     } while (0)
 420
 421 #define REPORT_ERROR_BUF(state, err_type, buf) \
 422     REPORT_ERROR_LEN(state, err_type, STR(buf), LEN(buf))
 423
 424
 425  /*
 426   * Outputs and state changes are interleaved, so we must maintain separate
 427   * offsets for header and body segments.
 428   */
 429 #define HEAD_OUT(ptr, info, len) do { \
 430         if ((ptr)->head_out) { \
 431             (ptr)->head_out((ptr)->app_context, (ptr)->curr_state, \
 432                             (info), (ptr)->output_buffer, (ptr)->head_offset); \
 433             (ptr)->head_offset += (len) + 1; \
 434         } \
 435     } while(0)
 436
 437 #define BODY_OUT(ptr, rec_type, text, len) do { \
 438         if ((ptr)->body_out) { \
 439             (ptr)->body_out((ptr)->app_context, (rec_type), \
 440                             (text), (len), (ptr)->body_offset); \
 441             (ptr)->body_offset += (len) + 1; \
 442         } \
 443     } while(0)
 444
 445 /* mime_state_push - push boundary onto stack */
 446
 447 static void mime_state_push(MIME_STATE *state, int def_ctype, int def_stype,
 448                                     const char *boundary)
 449 {
 450     MIME_STACK *stack;
 451
 452     /*
 453      * RFC 2046 mandates that a boundary string be up to 70 characters long.
 454      * Some MTAs, including Postfix, include the fully-qualified MTA name
 455      * which can be longer, so we are willing to handle boundary strings that
 456      * exceed the RFC specification. We allow for message headers of up to
 457      * var_header_limit characters. In order to avoid denial of service, we
 458      * have to impose a configurable limit on the amount of text that we are
 459      * willing to store as a boundary string. Despite this truncation way we
 460      * will still correctly detect all intermediate boundaries and all the
 461      * message headers that follow those boundaries.
 462      */
 463     state->nesting_level += 1;
 464     stack = (MIME_STACK *) mymalloc(sizeof(*stack));
 465     stack->def_ctype = def_ctype;
 466     stack->def_stype = def_stype;
 467     if ((stack->bound_len = strlen(boundary)) > var_mime_bound_len)
 468         stack->bound_len = var_mime_bound_len;
 469     stack->boundary = mystrndup(boundary, stack->bound_len);
 470     stack->next = state->stack;
 471     state->stack = stack;
 472     if (msg_verbose)
 473         msg_info("PUSH boundary %s", stack->boundary);
 474 }
 475
 476 /* mime_state_pop - pop boundary from stack */
 477
 478 static void mime_state_pop(MIME_STATE *state)
 479 {
 480     MIME_STACK *stack;
 481
 482     if ((stack = state->stack) == 0)
 483         msg_panic("mime_state_pop: there is no stack");
 484     if (msg_verbose)
 485         msg_info("POP boundary %s", stack->boundary);
 486     state->nesting_level -= 1;
 487     state->stack = stack->next;
 488     myfree(stack->boundary);
 489     myfree((char *) stack);
 490 }
 491
 492 /* mime_state_alloc - create MIME state machine */
 493
 494 MIME_STATE *mime_state_alloc(int flags,
 495                                      MIME_STATE_HEAD_OUT head_out,
 496                                      MIME_STATE_ANY_END head_end,
 497                                      MIME_STATE_BODY_OUT body_out,
 498                                      MIME_STATE_ANY_END body_end,
 499                                      MIME_STATE_ERR_PRINT err_print,
 500                                      void *context)
 501 {
 502     MIME_STATE *state;
 503
 504     state = (MIME_STATE *) mymalloc(sizeof(*state));
 505
 506     /* Volatile members. */
 507     state->err_flags = 0;
 508     state->body_offset = 0;                     /* XXX */
 509     SET_MIME_STATE(state, MIME_STATE_PRIMARY,
 510                    MIME_CTYPE_TEXT, MIME_STYPE_PLAIN,
 511                    MIME_ENC_7BIT, MIME_ENC_7BIT);
 512     state->output_buffer = vstring_alloc(100);
 513     state->prev_rec_type = 0;
 514     state->stack = 0;
 515     state->token_buffer = vstring_alloc(1);
 516
 517     /* Static members. */
 518     state->static_flags = flags;
 519     state->head_out = head_out;
 520     state->head_end = head_end;
 521     state->body_out = body_out;
 522     state->body_end = body_end;
 523     state->err_print = err_print;
 524     state->app_context = context;
 525     return (state);
 526 }
 527
 528 /* mime_state_free - destroy MIME state machine */
 529
 530 MIME_STATE *mime_state_free(MIME_STATE *state)
 531 {
 532     vstring_free(state->output_buffer);
 533     while (state->stack)
 534         mime_state_pop(state);
 535     if (state->token_buffer)
 536         vstring_free(state->token_buffer);
 537     myfree((char *) state);
 538     return (0);
 539 }
 540
 541 /* mime_state_content_type - process content-type header */
 542
 543 static void mime_state_content_type(MIME_STATE *state,
 544                                             const HEADER_OPTS *header_info)
 545 {
 546     const char *cp;
 547     ssize_t tok_count;
 548     int     def_ctype;
 549     int     def_stype;
 550
 551 #define TOKEN_MATCH(tok, text) \
 552     ((tok).type == HEADER_TOK_TOKEN && strcasecmp((tok).u.value, (text)) == 0)
 553
 554 #define RFC2045_TSPECIALS       "()<>@,;:\\\"/[]?="
 555
 556 #define PARSE_CONTENT_TYPE_HEADER(state, ptr) \
 557     header_token(state->token, MIME_MAX_TOKEN, \
 558         state->token_buffer, ptr, RFC2045_TSPECIALS, ';')
 559
 560     cp = STR(state->output_buffer) + strlen(header_info->name) + 1;
 561     if ((tok_count = PARSE_CONTENT_TYPE_HEADER(state, &cp)) > 0) {
 562
 563         /*
 564          * text/whatever. Right now we don't really care if it is plain or
 565          * not, but we may want to recognize subtypes later, and then this
 566          * code can serve as an example.
 567          */
 568         if (TOKEN_MATCH(state->token[0], "text")) {
 569             state->curr_ctype = MIME_CTYPE_TEXT;
 570             if (tok_count >= 3
 571                 && state->token[1].type == '/'
 572                 && TOKEN_MATCH(state->token[2], "plain"))
 573                 state->curr_stype = MIME_STYPE_PLAIN;
 574             else
 575                 state->curr_stype = MIME_STYPE_OTHER;
 576             return;
 577         }
 578
 579         /*
 580          * message/whatever body parts start with another block of message
 581          * headers that we may want to look at. The partial and external-body
 582          * subtypes cannot be subjected to 8-bit -> 7-bit conversion, so we
 583          * must properly recognize them.
 584          */
 585         if (TOKEN_MATCH(state->token[0], "message")) {
 586             state->curr_ctype = MIME_CTYPE_MESSAGE;
 587             state->curr_stype = MIME_STYPE_OTHER;
 588             if (tok_count >= 3
 589                 && state->token[1].type == '/') {
 590                 if (TOKEN_MATCH(state->token[2], "rfc822"))
 591                     state->curr_stype = MIME_STYPE_RFC822;
 592                 else if (TOKEN_MATCH(state->token[2], "partial"))
 593                     state->curr_stype = MIME_STYPE_PARTIAL;
 594                 else if (TOKEN_MATCH(state->token[2], "external-body"))
 595                     state->curr_stype = MIME_STYPE_EXTERN_BODY;
 596             }
 597             return;
 598         }
 599
 600         /*
 601          * multipart/digest has default content type message/rfc822,
 602          * multipart/whatever has default content type text/plain.
 603          */
 604         if (TOKEN_MATCH(state->token[0], "multipart")) {
 605             state->curr_ctype = MIME_CTYPE_MULTIPART;
 606             if (tok_count >= 3
 607                 && state->token[1].type == '/'
 608                 && TOKEN_MATCH(state->token[2], "digest")) {
 609                 def_ctype = MIME_CTYPE_MESSAGE;
 610                 def_stype = MIME_STYPE_RFC822;
 611             } else {
 612                 def_ctype = MIME_CTYPE_TEXT;
 613                 def_stype = MIME_STYPE_PLAIN;
 614             }
 615
 616             /*
 617              * Yes, this is supposed to capture multiple boundary strings,
 618              * which are illegal and which could be used to hide content in
 619              * an implementation dependent manner. The code below allows us
 620              * to find embedded message headers as long as the sender uses
 621              * only one of these same-level boundary strings.
 622              *
 623              * Yes, this is supposed to ignore the boundary value type.
 624              */
 625             while ((tok_count = PARSE_CONTENT_TYPE_HEADER(state, &cp)) >= 0) {
 626                 if (tok_count >= 3
 627                     && TOKEN_MATCH(state->token[0], "boundary")
 628                     && state->token[1].type == '=') {
 629                     if (state->nesting_level > var_mime_maxdepth) {
 630                         if (state->static_flags & MIME_OPT_REPORT_NESTING)
 631                             REPORT_ERROR_BUF(state, MIME_ERR_NESTING,
 632                                              state->output_buffer);
 633                     } else {
 634                         mime_state_push(state, def_ctype, def_stype,
 635                                         state->token[2].u.value);
 636                     }
 637                 }
 638             }
 639         }
 640         return;
 641     }
 642
 643     /*
 644      * other/whatever.
 645      */
 646     else {
 647         state->curr_ctype = MIME_CTYPE_OTHER;
 648         return;
 649     }
 650 }
 651
 652 /* mime_state_content_encoding - process content-transfer-encoding header */
 653
 654 static void mime_state_content_encoding(MIME_STATE *state,
 655                                              const HEADER_OPTS *header_info)
 656 {
 657     const char *cp;
 658     const MIME_ENCODING *cmp;
 659
 660 #define PARSE_CONTENT_ENCODING_HEADER(state, ptr) \
 661     header_token(state->token, 1, state->token_buffer, ptr, (char *) 0, 0)
 662
 663     /*
 664      * Do content-transfer-encoding header. Never set the encoding domain to
 665      * something other than 7bit, 8bit or binary, even if we don't recognize
 666      * the input.
 667      */
 668     cp = STR(state->output_buffer) + strlen(header_info->name) + 1;
 669     if (PARSE_CONTENT_ENCODING_HEADER(state, &cp) > 0
 670         && state->token[0].type == HEADER_TOK_TOKEN) {
 671         for (cmp = mime_encoding_map; cmp->name != 0; cmp++) {
 672             if (strcasecmp(state->token[0].u.value, cmp->name) == 0) {
 673                 state->curr_encoding = cmp->encoding;
 674                 state->curr_domain = cmp->domain;
 675                 break;
 676             }
 677         }
 678     }
 679 }
 680
 681 /* mime_state_enc_name - encoding to printable form */
 682
 683 static const char *mime_state_enc_name(int encoding)
 684 {
 685     const MIME_ENCODING *cmp;
 686
 687     for (cmp = mime_encoding_map; cmp->name != 0; cmp++)
 688         if (encoding == cmp->encoding)
 689             return (cmp->name);
 690     return ("unknown");
 691 }
 692
 693 /* mime_state_downgrade - convert 8-bit data to quoted-printable */
 694
 695 static void mime_state_downgrade(MIME_STATE *state, int rec_type,
 696                                          const char *text, ssize_t len)
 697 {
 698     static char hexchars[] = "0123456789ABCDEF";
 699     const unsigned char *cp;
 700     int     ch;
 701
 702 #define QP_ENCODE(buffer, ch) { \
 703         VSTRING_ADDCH(buffer, '='); \
 704         VSTRING_ADDCH(buffer, hexchars[(ch >> 4) & 0xff]); \
 705         VSTRING_ADDCH(buffer, hexchars[ch & 0xf]); \
 706     }
 707
 708     /*
 709      * Insert a soft line break when the output reaches a critical length
 710      * before we reach a hard line break.
 711      */
 712     for (cp = CU_CHAR_PTR(text); cp < CU_CHAR_PTR(text + len); cp++) {
 713         /* Critical length before hard line break. */
 714         if (LEN(state->output_buffer) > 72) {
 715             VSTRING_ADDCH(state->output_buffer, '=');
 716             VSTRING_TERMINATE(state->output_buffer);
 717             BODY_OUT(state, REC_TYPE_NORM,
 718                      STR(state->output_buffer),
 719                      LEN(state->output_buffer));
 720             VSTRING_RESET(state->output_buffer);
 721         }
 722         /* Append the next character. */
 723         ch = *cp;
 724         if ((ch < 32 && ch != '\t') || ch == '=' || ch > 126) {
 725             QP_ENCODE(state->output_buffer, ch);
 726         } else {
 727             VSTRING_ADDCH(state->output_buffer, ch);
 728         }
 729     }
 730
 731     /*
 732      * Flush output after a hard line break (i.e. the end of a REC_TYPE_NORM
 733      * record). Fix trailing whitespace as per the RFC: in the worst case,
 734      * the output length will grow from 73 characters to 75 characters.
 735      */
 736     if (rec_type == REC_TYPE_NORM) {
 737         if (LEN(state->output_buffer) > 0
 738             && ((ch = END(state->output_buffer)[-1]) == ' ' || ch == '\t')) {
 739             vstring_truncate(state->output_buffer,
 740                              LEN(state->output_buffer) - 1);
 741             QP_ENCODE(state->output_buffer, ch);
 742         }
 743         VSTRING_TERMINATE(state->output_buffer);
 744         BODY_OUT(state, REC_TYPE_NORM,
 745                  STR(state->output_buffer),
 746                  LEN(state->output_buffer));
 747         VSTRING_RESET(state->output_buffer);
 748     }
 749 }
 750
 751 /* mime_state_update - update MIME state machine */
 752
 753 int     mime_state_update(MIME_STATE *state, int rec_type,
 754                                   const char *text, ssize_t len)
 755 {
 756     int     input_is_text = (rec_type == REC_TYPE_NORM
 757                              || rec_type == REC_TYPE_CONT);
 758     MIME_STACK *sp;
 759     const HEADER_OPTS *header_info;
 760     const unsigned char *cp;
 761
 762 #define SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type) do { \
 763         state->prev_rec_type = rec_type; \
 764         return (state->err_flags); \
 765     } while (0)
 766
 767     /*
 768      * Be sure to flush any partial output line that might still be buffered
 769      * up before taking any other "end of input" actions.
 770      */
 771     if (!input_is_text && state->prev_rec_type == REC_TYPE_CONT)
 772         mime_state_update(state, REC_TYPE_NORM, "", 0);
 773
 774     /*
 775      * This message state machine is kept simple for the sake of robustness.
 776      * Standards evolve over time, and we want to be able to correctly
 777      * process messages that are not yet defined. This state machine knows
 778      * about headers and bodies, understands that multipart/whatever has
 779      * multiple body parts with a header and body, and that message/whatever
 780      * has message headers at the start of a body part.
 781      */
 782     switch (state->curr_state) {
 783
 784         /*
 785          * First, deal with header information that we have accumulated from
 786          * previous input records. Discard text that does not fit in a header
 787          * buffer. Our limit is quite generous; Sendmail will refuse mail
 788          * with only 32kbyte in all the message headers combined.
 789          */
 790     case MIME_STATE_PRIMARY:
 791     case MIME_STATE_MULTIPART:
 792     case MIME_STATE_NESTED:
 793         if (LEN(state->output_buffer) > 0) {
 794             if (input_is_text) {
 795                 if (state->prev_rec_type == REC_TYPE_CONT) {
 796                     if (LEN(state->output_buffer) < var_header_limit) {
 797                         vstring_strncat(state->output_buffer, text, len);
 798                     } else {
 799                         if (state->static_flags & MIME_OPT_REPORT_TRUNC_HEADER)
 800                             REPORT_ERROR_BUF(state, MIME_ERR_TRUNC_HEADER,
 801                                              state->output_buffer);
 802                     }
 803                     SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
 804                 }
 805                 if (IS_SPACE_TAB(*text)) {
 806                     if (LEN(state->output_buffer) < var_header_limit) {
 807                         vstring_strcat(state->output_buffer, "\n");
 808                         vstring_strncat(state->output_buffer, text, len);
 809                     } else {
 810                         if (state->static_flags & MIME_OPT_REPORT_TRUNC_HEADER)
 811                             REPORT_ERROR_BUF(state, MIME_ERR_TRUNC_HEADER,
 812                                              state->output_buffer);
 813                     }
 814                     SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
 815                 }
 816             }
 817
 818             /*
 819              * The input is (the beginning of) another message header, or is
 820              * not a message header, or is not even a text record. With no
 821              * more input to append to this saved header, do output
 822              * processing and reset the saved header buffer. Hold on to the
 823              * content transfer encoding header if we have to do a 8->7
 824              * transformation, because the proper information depends on the
 825              * content type header: message and multipart require a domain,
 826              * leaf entities have either a transformation or a domain.
 827              */
 828             if (LEN(state->output_buffer) > 0) {
 829                 header_info = header_opts_find(STR(state->output_buffer));
 830                 if (!(state->static_flags & MIME_OPT_DISABLE_MIME)
 831                     && header_info != 0) {
 832                     if (header_info->type == HDR_CONTENT_TYPE)
 833                         mime_state_content_type(state, header_info);
 834                     if (header_info->type == HDR_CONTENT_TRANSFER_ENCODING)
 835                         mime_state_content_encoding(state, header_info);
 836                 }
 837                 if ((state->static_flags & MIME_OPT_REPORT_8BIT_IN_HEADER) != 0
 838                     && (state->err_flags & MIME_ERR_8BIT_IN_HEADER) == 0) {
 839                     for (cp = CU_CHAR_PTR(STR(state->output_buffer));
 840                          cp < CU_CHAR_PTR(END(state->output_buffer)); cp++)
 841                         if (*cp & 0200) {
 842                             REPORT_ERROR_BUF(state, MIME_ERR_8BIT_IN_HEADER,
 843                                              state->output_buffer);
 844                             break;
 845                         }
 846                 }
 847                 /* Output routine is explicitly allowed to change the data. */
 848                 if (header_info == 0
 849                     || header_info->type != HDR_CONTENT_TRANSFER_ENCODING
 850                     || (state->static_flags & MIME_OPT_DOWNGRADE) == 0
 851                     || state->curr_domain == MIME_ENC_7BIT)
 852                     HEAD_OUT(state, header_info, len);
 853                 state->prev_rec_type = 0;
 854                 VSTRING_RESET(state->output_buffer);
 855             }
 856         }
 857
 858         /*
 859          * With past header information moved out of the way, proceed with a
 860          * clean slate.
 861          */
 862         if (input_is_text) {
 863             ssize_t header_len;
 864
 865             /*
 866              * See if this input is (the beginning of) a message header.
 867              *
 868              * Normalize obsolete "name space colon" syntax to "name colon".
 869              * Things would be too confusing otherwise.
 870              *
 871              * Don't assume that the input is null terminated.
 872              */
 873             if ((header_len = is_header_buf(text, len)) > 0) {
 874                 vstring_strncpy(state->output_buffer, text, header_len);
 875                 for (text += header_len, len -= header_len;
 876                      len > 0 && IS_SPACE_TAB(*text);
 877                      text++, len--)
 878                      /* void */ ;
 879                 vstring_strncat(state->output_buffer, text, len);
 880                 SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
 881             }
 882         }
 883
 884         /*
 885          * This input terminates a block of message headers. When converting
 886          * 8-bit to 7-bit mail, this is the right place to emit the correct
 887          * content-transfer-encoding header. With message or multipart we
 888          * specify 7bit, with leaf entities we specify quoted-printable.
 889          *
 890          * We're not going to convert non-text data into base 64. If they send
 891          * arbitrary binary data as 8-bit text, then the data is already
 892          * broken beyond recovery, because the Postfix SMTP server sanitizes
 893          * record boundaries, treating broken record boundaries as CRLF.
 894          *
 895          * Clear the output buffer, we will need it for storage of the
 896          * conversion result.
 897          */
 898         if ((state->static_flags & MIME_OPT_DOWNGRADE)
 899             && state->curr_domain != MIME_ENC_7BIT) {
 900             if (state->curr_ctype == MIME_CTYPE_MESSAGE
 901                 || state->curr_ctype == MIME_CTYPE_MULTIPART)
 902                 cp = CU_CHAR_PTR("7bit");
 903             else
 904                 cp = CU_CHAR_PTR("quoted-printable");
 905             vstring_sprintf(state->output_buffer,
 906                             "Content-Transfer-Encoding: %s", cp);
 907             HEAD_OUT(state, (HEADER_OPTS *) 0, len);
 908             VSTRING_RESET(state->output_buffer);
 909         }
 910
 911         /*
 912          * This input terminates a block of message headers. Call the
 913          * optional header end routine at the end of the first header block.
 914          */
 915         if (state->curr_state == MIME_STATE_PRIMARY && state->head_end)
 916             state->head_end(state->app_context);
 917
 918         /*
 919          * This is the right place to check if the sender specified an
 920          * appropriate identity encoding (7bit, 8bit, binary) for multipart
 921          * and for message.
 922          */
 923         if (state->static_flags & MIME_OPT_REPORT_ENCODING_DOMAIN) {
 924             if (state->curr_ctype == MIME_CTYPE_MESSAGE) {
 925                 if (state->curr_stype == MIME_STYPE_PARTIAL
 926                     || state->curr_stype == MIME_STYPE_EXTERN_BODY) {
 927                     if (state->curr_domain != MIME_ENC_7BIT)
 928                         REPORT_ERROR(state, MIME_ERR_ENCODING_DOMAIN,
 929                                  mime_state_enc_name(state->curr_encoding));
 930                 } else {
 931                     if (state->curr_encoding != state->curr_domain)
 932                         REPORT_ERROR(state, MIME_ERR_ENCODING_DOMAIN,
 933                                  mime_state_enc_name(state->curr_encoding));
 934                 }
 935             } else if (state->curr_ctype == MIME_CTYPE_MULTIPART) {
 936                 if (state->curr_encoding != state->curr_domain)
 937                     REPORT_ERROR(state, MIME_ERR_ENCODING_DOMAIN,
 938                                  mime_state_enc_name(state->curr_encoding));
 939             }
 940         }
 941
 942         /*
 943          * Find out if the next body starts with its own message headers. In
 944          * agressive mode, examine headers of partial and external-body
 945          * messages. Otherwise, treat such headers as part of the "body". Set
 946          * the proper encoding information for the multipart prolog.
 947          *
 948          * XXX We parse headers inside message/* content even when the encoding
 949          * is invalid (encoding != domain). With base64 we won't recognize
 950          * any headers, and with quoted-printable we won't recognize MIME
 951          * boundary strings, but the MIME processor will still resynchronize
 952          * when it runs into the higher-level boundary string at the end of
 953          * the message/* content. Although we will treat some headers as body
 954          * text, we will still do a better job than if we were treating the
 955          * entire message/* content as body text.
 956          *
 957          * XXX This changes state to MIME_STATE_NESTED and then outputs a body
 958          * line, so that the body offset is not properly reset.
 959          *
 960          * Don't assume that the input is null terminated.
 961          */
 962         if (input_is_text) {
 963             if (len == 0) {
 964                 state->body_offset = 0;         /* XXX */
 965                 if (state->curr_ctype == MIME_CTYPE_MESSAGE) {
 966                     if (state->curr_stype == MIME_STYPE_RFC822
 967                     || (state->static_flags & MIME_OPT_RECURSE_ALL_MESSAGE))
 968                         SET_MIME_STATE(state, MIME_STATE_NESTED,
 969                                        MIME_CTYPE_TEXT, MIME_STYPE_PLAIN,
 970                                        MIME_ENC_7BIT, MIME_ENC_7BIT);
 971                     else
 972                         SET_CURR_STATE(state, MIME_STATE_BODY);
 973                 } else if (state->curr_ctype == MIME_CTYPE_MULTIPART) {
 974                     SET_MIME_STATE(state, MIME_STATE_BODY,
 975                                    MIME_CTYPE_OTHER, MIME_STYPE_OTHER,
 976                                    MIME_ENC_7BIT, MIME_ENC_7BIT);
 977                 } else {
 978                     SET_CURR_STATE(state, MIME_STATE_BODY);
 979                 }
 980             }
 981
 982             /*
 983              * Invalid input. Force output of one blank line and jump to the
 984              * body state, leaving all other state alone.
 985              *
 986              * We don't break legitimate mail by inserting a blank line
 987              * separator between primary headers and a non-empty body. Many
 988              * MTA's don't even record the presence or absence of this
 989              * separator, nor does the Milter protocol pass it on to Milter
 990              * applications.
 991              *
 992              * XXX We don't insert a blank line separator into attachments, to
 993              * avoid breaking digital signatures. Postfix shall not do a
 994              * worse mail delivery job than MTAs that can't even parse MIME.
 995              * We switch to body state anyway, to avoid treating body text as
 996              * header text, and mis-interpreting or truncating it. The code
 997              * below for initial From_ lines is for educational purposes.
 998              *
 999              * Sites concerned about MIME evasion can use a MIME normalizer.
1000              * Postfix has a different mission.
1001              */
1002             else {
1003                 if (msg_verbose)
1004                     msg_info("garbage in %s header",
1005                     state->curr_state == MIME_STATE_MULTIPART ? "multipart" :
1006                        state->curr_state == MIME_STATE_PRIMARY ? "primary" :
1007                          state->curr_state == MIME_STATE_NESTED ? "nested" :
1008                              "other");
1009                 switch (state->curr_state) {
1010                 case MIME_STATE_PRIMARY:
1011                     BODY_OUT(state, REC_TYPE_NORM, "", 0);
1012                     SET_CURR_STATE(state, MIME_STATE_BODY);
1013                     break;
1014 #if 0
1015                 case MIME_STATE_NESTED:
1016                     if (state->body_offset <= 1
1017                         && rec_type == REC_TYPE_NORM
1018                         && len > 7
1019                         && (strncmp(text + (*text == '>'), "From ", 5) == 0
1020                             || strncmp(text, "=46rom ", 7) == 0))
1021                         break;
1022                     /* FALLTHROUGH */
1023 #endif
1024                 default:
1025                     SET_CURR_STATE(state, MIME_STATE_BODY);
1026                     break;
1027                 }
1028             }
1029         }
1030
1031         /*
1032          * This input is not text. Go to body state, unconditionally.
1033          */
1034         else {
1035             SET_CURR_STATE(state, MIME_STATE_BODY);
1036         }
1037         /* FALLTHROUGH */
1038
1039         /*
1040          * Body text. Look for message boundaries, and recover from missing
1041          * boundary strings. Missing boundaries can happen in agressive mode
1042          * with text/rfc822-headers or with message/partial. Ignore non-space
1043          * cruft after --boundary or --boundary--, because some MUAs do, and
1044          * because only perverse software would take advantage of this to
1045          * escape detection. We have to ignore trailing cruft anyway, because
1046          * our saved copy of the boundary string may have been truncated for
1047          * safety reasons.
1048          *
1049          * Optionally look for 8-bit data in content that was announced as, or
1050          * that defaults to, 7-bit. Unfortunately, we cannot turn this on by
1051          * default. Majordomo sends requests for approval that do not
1052          * propagate the MIME information from the enclosed message to the
1053          * message headers of the approval request.
1054          *
1055          * Set the proper state information after processing a message boundary
1056          * string.
1057          *
1058          * Don't look for boundary strings at the start of a continued record.
1059          *
1060          * Don't assume that the input is null terminated.
1061          */
1062     case MIME_STATE_BODY:
1063         if (input_is_text) {
1064             if ((state->static_flags & MIME_OPT_REPORT_8BIT_IN_7BIT_BODY) != 0
1065                 && state->curr_encoding == MIME_ENC_7BIT
1066                 && (state->err_flags & MIME_ERR_8BIT_IN_7BIT_BODY) == 0) {
1067                 for (cp = CU_CHAR_PTR(text); cp < CU_CHAR_PTR(text + len); cp++)
1068                     if (*cp & 0200) {
1069                         REPORT_ERROR_LEN(state, MIME_ERR_8BIT_IN_7BIT_BODY,
1070                                          text, len);
1071                         break;
1072                     }
1073             }
1074             if (state->stack && state->prev_rec_type != REC_TYPE_CONT
1075                 && len > 2 && text[0] == '-' && text[1] == '-') {
1076                 for (sp = state->stack; sp != 0; sp = sp->next) {
1077                     if (len >= 2 + sp->bound_len &&
1078                       strncmp(text + 2, sp->boundary, sp->bound_len) == 0) {
1079                         while (sp != state->stack)
1080                             mime_state_pop(state);
1081                         if (len >= 4 + sp->bound_len &&
1082                           strncmp(text + 2 + sp->bound_len, "--", 2) == 0) {
1083                             mime_state_pop(state);
1084                             SET_MIME_STATE(state, MIME_STATE_BODY,
1085                                          MIME_CTYPE_OTHER, MIME_STYPE_OTHER,
1086                                            MIME_ENC_7BIT, MIME_ENC_7BIT);
1087                         } else {
1088                             SET_MIME_STATE(state, MIME_STATE_MULTIPART,
1089                                            sp->def_ctype, sp->def_stype,
1090                                            MIME_ENC_7BIT, MIME_ENC_7BIT);
1091                         }
1092                         break;
1093                     }
1094                 }
1095             }
1096             /* Put last for consistency with header output routine. */
1097             if ((state->static_flags & MIME_OPT_DOWNGRADE)
1098                 && state->curr_domain != MIME_ENC_7BIT)
1099                 mime_state_downgrade(state, rec_type, text, len);
1100             else
1101                 BODY_OUT(state, rec_type, text, len);
1102         }
1103
1104         /*
1105          * The input is not a text record. Inform the application that this
1106          * is the last opportunity to send any pending output.
1107          */
1108         else {
1109             if (state->body_end)
1110                 state->body_end(state->app_context);
1111         }
1112         SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
1113
1114         /*
1115          * Oops. This can't happen.
1116          */
1117     default:
1118         msg_panic("mime_state_update: unknown state: %d", state->curr_state);
1119     }
1120 }
1121
1122  /*
1123   * Mime error to (DSN, text) mapping. Order matters; more serious errors
1124   * must precede less serious errors, because the error-to-text conversion
1125   * can report only one error.
1126   */
1127 static const MIME_STATE_DETAIL mime_err_detail[] = {
1128     MIME_ERR_NESTING, "5.6.0", "MIME nesting exceeds safety limit",
1129     MIME_ERR_TRUNC_HEADER, "5.6.0", "message header length exceeds safety limit",
1130     MIME_ERR_8BIT_IN_HEADER, "5.6.0", "improper use of 8-bit data in message header",
1131     MIME_ERR_8BIT_IN_7BIT_BODY, "5.6.0", "improper use of 8-bit data in message body",
1132     MIME_ERR_ENCODING_DOMAIN, "5.6.0", "invalid message/* or multipart/* encoding domain",
1133     0,
1134 };
1135
1136 /* mime_state_error - error code to string */
1137
1138 const char *mime_state_error(int error_code)
1139 {
1140     const MIME_STATE_DETAIL *mp;
1141
1142     if (error_code == 0)
1143         msg_panic("mime_state_error: there is no error");
1144     for (mp = mime_err_detail; mp->code; mp++)
1145         if (mp->code & error_code)
1146             return (mp->text);
1147     msg_panic("mime_state_error: unknown error code %d", error_code);
1148 }
1149
1150 /* mime_state_detail - error code to table entry with assorted data */
1151
1152 const MIME_STATE_DETAIL *mime_state_detail(int error_code)
1153 {
1154     const MIME_STATE_DETAIL *mp;
1155
1156     if (error_code == 0)
1157         msg_panic("mime_state_detail: there is no error");
1158     for (mp = mime_err_detail; mp->code; mp++)
1159         if (mp->code & error_code)
1160             return (mp);
1161     msg_panic("mime_state_detail: unknown error code %d", error_code);
1162 }
1163
1164 #ifdef TEST
1165
1166 #include <stdlib.h>
1167 #include <stringops.h>
1168 #include <vstream.h>
1169 #include <msg_vstream.h>
1170 #include <rec_streamlf.h>
1171
1172  /*
1173   * Stress test the REC_TYPE_CONT/NORM handling, but don't break header
1174   * labels.
1175   */
1176 /*#define REC_LEN       40*/
1177
1178 #define REC_LEN 1024
1179
1180 static void head_out(void *context, int class, const HEADER_OPTS *unused_info,
1181                              VSTRING *buf, off_t offset)
1182 {
1183     VSTREAM *stream = (VSTREAM *) context;
1184
1185     vstream_fprintf(stream, "%s %ld\t|%s\n",
1186                     class == MIME_HDR_PRIMARY ? "MAIN" :
1187                     class == MIME_HDR_MULTIPART ? "MULT" :
1188                     class == MIME_HDR_NESTED ? "NEST" :
1189                     "ERROR", (long) offset, STR(buf));
1190 }
1191
1192 static void head_end(void *context)
1193 {
1194     VSTREAM *stream = (VSTREAM *) context;
1195
1196     vstream_fprintf(stream, "HEADER END\n");
1197 }
1198
1199 static void body_out(void *context, int rec_type, const char *buf, ssize_t len,
1200                              off_t offset)
1201 {
1202     VSTREAM *stream = (VSTREAM *) context;
1203
1204     vstream_fprintf(stream, "BODY %c %ld\t|", rec_type, (long) offset);
1205     vstream_fwrite(stream, buf, len);
1206     if (rec_type == REC_TYPE_NORM)
1207         VSTREAM_PUTC('\n', stream);
1208 }
1209
1210 static void body_end(void *context)
1211 {
1212     VSTREAM *stream = (VSTREAM *) context;
1213
1214     vstream_fprintf(stream, "BODY END\n");
1215 }
1216
1217 static void err_print(void *unused_context, int err_flag,
1218                               const char *text, ssize_t len)
1219 {
1220     msg_warn("%s: %.*s", mime_state_error(err_flag),
1221              len < 100 ? (int) len : 100, text);
1222 }
1223
1224 int     var_header_limit = 2000;
1225 int     var_mime_maxdepth = 20;
1226 int     var_mime_bound_len = 2000;
1227
1228 int     main(int unused_argc, char **argv)
1229 {
1230     int     rec_type;
1231     int     last = 0;
1232     VSTRING *buf;
1233     MIME_STATE *state;
1234     int     err;
1235
1236     /*
1237      * Initialize.
1238      */
1239 #define MIME_OPTIONS \
1240             (MIME_OPT_REPORT_8BIT_IN_7BIT_BODY \
1241             | MIME_OPT_REPORT_8BIT_IN_HEADER \
1242             | MIME_OPT_REPORT_ENCODING_DOMAIN \
1243             | MIME_OPT_REPORT_TRUNC_HEADER \
1244             | MIME_OPT_REPORT_NESTING \
1245             | MIME_OPT_DOWNGRADE)
1246
1247     msg_vstream_init(basename(argv[0]), VSTREAM_OUT);
1248     msg_verbose = 1;
1249     buf = vstring_alloc(10);
1250     state = mime_state_alloc(MIME_OPTIONS,
1251                              head_out, head_end,
1252                              body_out, body_end,
1253                              err_print,
1254                              (void *) VSTREAM_OUT);
1255
1256     /*
1257      * Main loop.
1258      */
1259     do {
1260         rec_type = rec_streamlf_get(VSTREAM_IN, buf, REC_LEN);
1261         VSTRING_TERMINATE(buf);
1262         err = mime_state_update(state, last = rec_type, STR(buf), LEN(buf));
1263         vstream_fflush(VSTREAM_OUT);
1264     } while (rec_type > 0);
1265
1266     /*
1267      * Error reporting.
1268      */
1269     if (err & MIME_ERR_TRUNC_HEADER)
1270         msg_warn("message header length exceeds safety limit");
1271     if (err & MIME_ERR_NESTING)
1272         msg_warn("MIME nesting exceeds safety limit");
1273     if (err & MIME_ERR_8BIT_IN_HEADER)
1274         msg_warn("improper use of 8-bit data in message header");
1275     if (err & MIME_ERR_8BIT_IN_7BIT_BODY)
1276         msg_warn("improper use of 8-bit data in message body");
1277     if (err & MIME_ERR_ENCODING_DOMAIN)
1278         msg_warn("improper message/* or multipart/* encoding domain");
1279
1280     /*
1281      * Cleanup.
1282      */
1283     mime_state_free(state);
1284     vstring_free(buf);
1285     exit(0);
1286 }
1287
1288 #endif