external/ibm-public/postfix/dist/src/global/header_token.c

   1 /*      $NetBSD$        */
   2
   3 /*++
   4 /* NAME
   5 /*      header_token 3
   6 /* SUMMARY
   7 /*      mail header parser
   8 /* SYNOPSIS
   9 /*      #include <header_token.h>
  10 /*
  11 /*      typedef struct {
  12 /* .in +4
  13 /*          int     type;
  14 /*          const char *u.value;
  15 /*          /* ... */
  16 /* .in
  17 /*      } HEADER_TOKEN;
  18 /*
  19 /*      ssize_t header_token(token, token_len, token_buffer, ptr,
  20 /*                              specials, terminator)
  21 /*      HEADER_TOKEN *token;
  22 /*      ssize_t token_len;
  23 /*      VSTRING *token_buffer;
  24 /*      const char **ptr;
  25 /*      const char *specials;
  26 /*      int     terminator;
  27 /* DESCRIPTION
  28 /*      This module parses a mail header value (text after field-name:)
  29 /*      into tokens. The parser understands RFC 822 linear white space,
  30 /*      quoted-string, comment, control characters, and a set of
  31 /*      user-specified special characters.
  32 /*
  33 /*      A result token type is one of the following:
  34 /* .IP HEADER_TOK_QSTRING
  35 /*      Quoted string as per RFC 822.
  36 /* .IP HEADER_TOK_TOKEN
  37 /*      Token as per RFC 822, and the special characters supplied by the
  38 /*      caller.
  39 /* .IP other
  40 /*      The value of a control character or special character.
  41 /* .PP
  42 /*      header_token() tokenizes the input and stops after a user-specified
  43 /*      terminator (ignoring all tokens that exceed the capacity of
  44 /*      the result storage), or when it runs out of space for the result.
  45 /*      The terminator is not stored. The result value is the number of
  46 /*      tokens stored, or -1 when the input was exhausted before any tokens
  47 /*      were found.
  48 /*
  49 /*      Arguments:
  50 /* .IP token
  51 /*      Result array of HEADER_TOKEN structures. Token string values
  52 /*      are pointers to null-terminated substrings in the token_buffer.
  53 /* .IP token_len
  54 /*      Length of the array of HEADER_TOKEN structures.
  55 /* .IP token_buffer
  56 /*      Storage for result token string values.
  57 /* .IP ptr
  58 /*      Input/output read position. The input is a null-terminated string.
  59 /* .IP specials
  60 /*      Special characters according to the relevant RFC, or a
  61 /*      null pointer (default to the RFC 822 special characters).
  62 /*      This must include the optional terminator if one is specified.
  63 /* .IP terminator
  64 /*      The special character to stop after, or zero.
  65 /* BUGS
  66 /*      Eight-bit characters are not given special treatment.
  67 /* SEE ALSO
  68 /*      RFC 822 (ARPA Internet Text Messages)
  69 /* DIAGNOSTICS
  70 /*      Fatal errors: memory allocation problem.
  71 /* LICENSE
  72 /* .ad
  73 /* .fi
  74 /*      The Secure Mailer license must be distributed with this software.
  75 /* AUTHOR(S)
  76 /*      Wietse Venema
  77 /*      IBM T.J. Watson Research
  78 /*      P.O. Box 704
  79 /*      Yorktown Heights, NY 10598, USA
  80 /*--*/
  81
  82 /* System library. */
  83
  84 #include <sys_defs.h>
  85 #include <string.h>
  86 #include <ctype.h>
  87
  88 /* Utility library. */
  89
  90 #include <msg.h>
  91 #include <vstring.h>
  92
  93 /* Global library. */
  94
  95 #include <lex_822.h>
  96 #include <header_token.h>
  97
  98 /* Application-specific. */
  99
 100  /*
 101   * Silly little macros.
 102   */
 103 #define STR(x)  vstring_str(x)
 104 #define LEN(x)  VSTRING_LEN(x)
 105 #define CU_CHAR_PTR(x)  ((const unsigned char *) (x))
 106
 107 /* header_token - parse out the next item in a message header */
 108
 109 ssize_t header_token(HEADER_TOKEN *token, ssize_t token_len,
 110                              VSTRING *token_buffer, const char **ptr,
 111                              const char *user_specials, int user_terminator)
 112 {
 113     ssize_t comment_level;
 114     const unsigned char *cp;
 115     ssize_t len;
 116     int     ch;
 117     ssize_t tok_count;
 118     ssize_t n;
 119
 120     /*
 121      * Initialize.
 122      */
 123     VSTRING_RESET(token_buffer);
 124     cp = CU_CHAR_PTR(*ptr);
 125     tok_count = 0;
 126     if (user_specials == 0)
 127         user_specials = LEX_822_SPECIALS;
 128
 129     /*
 130      * Main parsing loop.
 131      *
 132      * XXX What was the reason to continue parsing when user_terminator is
 133      * specified? Perhaps this was needed at some intermediate stage of
 134      * development?
 135      */
 136     while ((ch = *cp) != 0 && (user_terminator != 0 || tok_count < token_len)) {
 137         cp++;
 138
 139         /*
 140          * Skip RFC 822 linear white space.
 141          */
 142         if (IS_SPACE_TAB_CR_LF(ch))
 143             continue;
 144
 145         /*
 146          * Terminator.
 147          */
 148         if (ch == user_terminator)
 149             break;
 150
 151         /*
 152          * Skip RFC 822 comment.
 153          */
 154         if (ch == '(') {
 155             comment_level = 1;
 156             while ((ch = *cp) != 0) {
 157                 cp++;
 158                 if (ch == '(') {                /* comments can nest! */
 159                     comment_level++;
 160                 } else if (ch == ')') {
 161                     if (--comment_level == 0)
 162                         break;
 163                 } else if (ch == '\\') {
 164                     if ((ch = *cp) == 0)
 165                         break;
 166                     cp++;
 167                 }
 168             }
 169             continue;
 170         }
 171
 172         /*
 173          * Copy quoted text according to RFC 822.
 174          */
 175         if (ch == '"') {
 176             if (tok_count < token_len) {
 177                 token[tok_count].u.offset = LEN(token_buffer);
 178                 token[tok_count].type = HEADER_TOK_QSTRING;
 179             }
 180             while ((ch = *cp) != 0) {
 181                 cp++;
 182                 if (ch == '"')
 183                     break;
 184                 if (ch == '\n') {               /* unfold */
 185                     if (tok_count < token_len) {
 186                         len = LEN(token_buffer);
 187                         while (len > 0
 188                           && IS_SPACE_TAB_CR_LF(STR(token_buffer)[len - 1]))
 189                             len--;
 190                         if (len < LEN(token_buffer))
 191                             vstring_truncate(token_buffer, len);
 192                     }
 193                     continue;
 194                 }
 195                 if (ch == '\\') {
 196                     if ((ch = *cp) == 0)
 197                         break;
 198                     cp++;
 199                 }
 200                 if (tok_count < token_len)
 201                     VSTRING_ADDCH(token_buffer, ch);
 202             }
 203             if (tok_count < token_len) {
 204                 VSTRING_ADDCH(token_buffer, 0);
 205                 tok_count++;
 206             }
 207             continue;
 208         }
 209
 210         /*
 211          * Control, or special.
 212          */
 213         if (strchr(user_specials, ch) || ISCNTRL(ch)) {
 214             if (tok_count < token_len) {
 215                 token[tok_count].u.offset = LEN(token_buffer);
 216                 token[tok_count].type = ch;
 217                 VSTRING_ADDCH(token_buffer, ch);
 218                 VSTRING_ADDCH(token_buffer, 0);
 219                 tok_count++;
 220             }
 221             continue;
 222         }
 223
 224         /*
 225          * Token.
 226          */
 227         else {
 228             if (tok_count < token_len) {
 229                 token[tok_count].u.offset = LEN(token_buffer);
 230                 token[tok_count].type = HEADER_TOK_TOKEN;
 231                 VSTRING_ADDCH(token_buffer, ch);
 232             }
 233             while ((ch = *cp) != 0 && !IS_SPACE_TAB_CR_LF(ch)
 234                    && !ISCNTRL(ch) && !strchr(user_specials, ch)) {
 235                 cp++;
 236                 if (tok_count < token_len)
 237                     VSTRING_ADDCH(token_buffer, ch);
 238             }
 239             if (tok_count < token_len) {
 240                 VSTRING_ADDCH(token_buffer, 0);
 241                 tok_count++;
 242             }
 243             continue;
 244         }
 245     }
 246
 247     /*
 248      * Ignore a zero-length item after the last terminator.
 249      */
 250     if (tok_count == 0 && ch == 0)
 251         return (-1);
 252
 253     /*
 254      * Finalize. Fill in the string pointer array, now that the token buffer
 255      * is no longer dynamically reallocated as it grows.
 256      */
 257     *ptr = (const char *) cp;
 258     for (n = 0; n < tok_count; n++)
 259         token[n].u.value = STR(token_buffer) + token[n].u.offset;
 260
 261     if (msg_verbose)
 262         msg_info("header_token: %s %s %s",
 263                  tok_count > 0 ? token[0].u.value : "",
 264                  tok_count > 1 ? token[1].u.value : "",
 265                  tok_count > 2 ? token[2].u.value : "");
 266
 267     return (tok_count);
 268 }