2006-09-02 Marcus Brinkmann <marcus@g10code.de>
[gnupg.git] / tools / rfc822parse.c
blob303ddad136065f23f80bdb927379de40af4f0b72
1 /* rfc822parse.c - Simple mail and MIME parser
2 * Copyright (C) 1999, 2000 Werner Koch, Duesseldorf
3 * Copyright (C) 2003, 2004 g10 Code GmbH
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public License
7 * as published by the Free Software Foundation; either version 2.1 of
8 * the License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA.
22 /* According to RFC822 binary zeroes are allowed at many places. We do
23 * not handle this correct especially in the field parsing code. It
24 * should be easy to fix and the API provides a interfaces which
25 * returns the length but in addition makes sure that returned strings
26 * are always ended by a \0.
28 * Furthermore, the case of field names is changed and thus it is not
29 * always a good idea to use these modified header
30 * lines (e.g. signatures may break).
33 #ifdef HAVE_CONFIG_H
34 #include <config.h>
35 #endif
37 #include <stdlib.h>
38 #include <stdio.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdarg.h>
42 #include <assert.h>
44 #include "rfc822parse.h"
46 enum token_type
48 tSPACE,
49 tATOM,
50 tQUOTED,
51 tDOMAINLIT,
52 tSPECIAL
55 /* For now we directly use our TOKEN as the parse context */
56 typedef struct rfc822parse_field_context *TOKEN;
57 struct rfc822parse_field_context
59 TOKEN next;
60 enum token_type type;
61 struct {
62 unsigned int cont:1;
63 unsigned int lowered:1;
64 } flags;
65 /*TOKEN owner_pantry; */
66 char data[1];
69 struct hdr_line
71 struct hdr_line *next;
72 int cont; /* This is a continuation of the previous line. */
73 unsigned char line[1];
76 typedef struct hdr_line *HDR_LINE;
79 struct part
81 struct part *right; /* The next part. */
82 struct part *down; /* A contained part. */
83 HDR_LINE hdr_lines; /* Header lines os that part. */
84 HDR_LINE *hdr_lines_tail; /* Helper for adding lines. */
85 char *boundary; /* Only used in the first part. */
87 typedef struct part *part_t;
89 struct rfc822parse_context
91 rfc822parse_cb_t callback;
92 void *callback_value;
93 int callback_error;
94 int in_body;
95 int in_preamble; /* Wether we are before the first boundary. */
96 part_t parts; /* The tree of parts. */
97 part_t current_part; /* Whom we are processing (points into parts). */
98 const char *boundary; /* Current boundary. */
101 static HDR_LINE find_header (rfc822parse_t msg, const char *name,
102 int which, HDR_LINE * rprev);
105 static size_t
106 length_sans_trailing_ws (const unsigned char *line, size_t len)
108 const unsigned char *p, *mark;
109 size_t n;
111 for (mark=NULL, p=line, n=0; n < len; n++, p++)
113 if (strchr (" \t\r\n", *p ))
115 if( !mark )
116 mark = p;
118 else
119 mark = NULL;
122 if (mark)
123 return mark - line;
124 return len;
128 static void
129 lowercase_string (unsigned char *string)
131 for (; *string; string++)
132 if (*string >= 'A' && *string <= 'Z')
133 *string = *string - 'A' + 'a';
136 /* Transform a header name into a standard capitalized format; i.e
137 "Content-Type". Conversion stops at the colon. As usual we don't
138 use the localized versions of ctype.h.
140 static void
141 capitalize_header_name (unsigned char *name)
143 int first = 1;
145 for (; *name && *name != ':'; name++)
146 if (*name == '-')
147 first = 1;
148 else if (first)
150 if (*name >= 'a' && *name <= 'z')
151 *name = *name - 'a' + 'A';
152 first = 0;
154 else if (*name >= 'A' && *name <= 'Z')
155 *name = *name - 'A' + 'a';
158 #ifndef HAVE_STPCPY
159 static char *
160 stpcpy (char *a,const char *b)
162 while (*b)
163 *a++ = *b++;
164 *a = 0;
166 return (char*)a;
168 #endif
171 /* If a callback has been registerd, call it for the event of type
172 EVENT. */
173 static int
174 do_callback (rfc822parse_t msg, rfc822parse_event_t event)
176 int rc;
178 if (!msg->callback || msg->callback_error)
179 return 0;
180 rc = msg->callback (msg->callback_value, event, msg);
181 if (rc)
182 msg->callback_error = rc;
183 return rc;
186 static part_t
187 new_part (void)
189 part_t part;
191 part = calloc (1, sizeof *part);
192 if (part)
194 part->hdr_lines_tail = &part->hdr_lines;
196 return part;
200 static void
201 release_part (part_t part)
203 part_t tmp;
204 HDR_LINE hdr, hdr2;
206 for (; part; part = tmp)
208 tmp = part->right;
209 if (part->down)
210 release_part (part->down);
211 for (hdr = part->hdr_lines; hdr; hdr = hdr2)
213 hdr2 = hdr->next;
214 free (hdr);
216 free (part->boundary);
217 free (part);
222 static void
223 release_handle_data (rfc822parse_t msg)
225 release_part (msg->parts);
226 msg->parts = NULL;
227 msg->current_part = NULL;
228 msg->boundary = NULL;
232 /* Create a new parsing context for an entire rfc822 message and
233 return it. CB and CB_VALUE may be given to callback for certain
234 events. NULL is returned on error with errno set appropriately. */
235 rfc822parse_t
236 rfc822parse_open (rfc822parse_cb_t cb, void *cb_value)
238 rfc822parse_t msg = calloc (1, sizeof *msg);
239 if (msg)
241 msg->parts = msg->current_part = new_part ();
242 if (!msg->parts)
244 free (msg);
245 msg = NULL;
247 else
249 msg->callback = cb;
250 msg->callback_value = cb_value;
251 if (do_callback (msg, RFC822PARSE_OPEN))
253 release_handle_data (msg);
254 free (msg);
255 msg = NULL;
259 return msg;
263 void
264 rfc822parse_cancel (rfc822parse_t msg)
266 if (msg)
268 do_callback (msg, RFC822PARSE_CANCEL);
269 release_handle_data (msg);
270 free (msg);
275 void
276 rfc822parse_close (rfc822parse_t msg)
278 if (msg)
280 do_callback (msg, RFC822PARSE_CLOSE);
281 release_handle_data (msg);
282 free (msg);
286 static part_t
287 find_parent (part_t tree, part_t target)
289 part_t part;
291 for (part = tree->down; part; part = part->right)
293 if (part == target)
294 return tree; /* Found. */
295 if (part->down)
297 part_t tmp = find_parent (part, target);
298 if (tmp)
299 return tmp;
302 return NULL;
305 static void
306 set_current_part_to_parent (rfc822parse_t msg)
308 part_t parent;
310 assert (msg->current_part);
311 parent = find_parent (msg->parts, msg->current_part);
312 if (!parent)
313 return; /* Already at the top. */
315 #ifndef NDEBUG
317 part_t part;
318 for (part = parent->down; part; part = part->right)
319 if (part == msg->current_part)
320 break;
321 assert (part);
323 #endif
324 msg->current_part = parent;
326 parent = find_parent (msg->parts, parent);
327 msg->boundary = parent? parent->boundary: NULL;
332 /****************
333 * We have read in all header lines and are about to receive the body
334 * part. The delimiter line has already been processed.
336 * FIXME: we's better return an error in case of memory failures.
338 static int
339 transition_to_body (rfc822parse_t msg)
341 rfc822parse_field_t ctx;
342 int rc;
344 rc = do_callback (msg, RFC822PARSE_T2BODY);
345 if (!rc)
347 /* Store the boundary if we have multipart type. */
348 ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
349 if (ctx)
351 const char *s;
353 s = rfc822parse_query_media_type (ctx, NULL);
354 if (s && !strcmp (s,"multipart"))
356 s = rfc822parse_query_parameter (ctx, "boundary", 0);
357 if (s)
359 assert (!msg->current_part->boundary);
360 msg->current_part->boundary = malloc (strlen (s) + 1);
361 if (msg->current_part->boundary)
363 part_t part;
365 strcpy (msg->current_part->boundary, s);
366 msg->boundary = msg->current_part->boundary;
367 part = new_part ();
368 if (!part)
370 int save_errno = errno;
371 rfc822parse_release_field (ctx);
372 errno = save_errno;
373 return -1;
375 rc = do_callback (msg, RFC822PARSE_LEVEL_DOWN);
376 assert (!msg->current_part->down);
377 msg->current_part->down = part;
378 msg->current_part = part;
379 msg->in_preamble = 1;
383 rfc822parse_release_field (ctx);
387 return rc;
390 /* We have just passed a MIME boundary and need to prepare for new part.
391 headers. */
392 static int
393 transition_to_header (rfc822parse_t msg)
395 part_t part;
397 assert (msg->current_part);
398 assert (!msg->current_part->right);
400 part = new_part ();
401 if (!part)
402 return -1;
404 msg->current_part->right = part;
405 msg->current_part = part;
406 return 0;
410 static int
411 insert_header (rfc822parse_t msg, const unsigned char *line, size_t length)
413 HDR_LINE hdr;
415 assert (msg->current_part);
416 if (!length)
418 msg->in_body = 1;
419 return transition_to_body (msg);
422 if (!msg->current_part->hdr_lines)
423 do_callback (msg, RFC822PARSE_BEGIN_HEADER);
425 length = length_sans_trailing_ws (line, length);
426 hdr = malloc (sizeof (*hdr) + length);
427 if (!hdr)
428 return -1;
429 hdr->next = NULL;
430 hdr->cont = (*line == ' ' || *line == '\t');
431 memcpy (hdr->line, line, length);
432 hdr->line[length] = 0; /* Make it a string. */
434 /* Transform a field name into canonical format. */
435 if (!hdr->cont && strchr (line, ':'))
436 capitalize_header_name (hdr->line);
438 *msg->current_part->hdr_lines_tail = hdr;
439 msg->current_part->hdr_lines_tail = &hdr->next;
441 /* Lets help the caller to prevent mail loops and issue an event for
442 * every Received header. */
443 if (length >= 9 && !memcmp (line, "Received:", 9))
444 do_callback (msg, RFC822PARSE_RCVD_SEEN);
445 return 0;
449 /****************
450 * Note: We handle the body transparent to allow binary zeroes in it.
452 static int
453 insert_body (rfc822parse_t msg, const unsigned char *line, size_t length)
455 int rc = 0;
457 if (length > 2 && *line == '-' && line[1] == '-' && msg->boundary)
459 size_t blen = strlen (msg->boundary);
461 if (length == blen + 2
462 && !memcmp (line+2, msg->boundary, blen))
464 rc = do_callback (msg, RFC822PARSE_BOUNDARY);
465 msg->in_body = 0;
466 if (!rc && !msg->in_preamble)
467 rc = transition_to_header (msg);
468 msg->in_preamble = 0;
470 else if (length == blen + 4
471 && line[length-2] =='-' && line[length-1] == '-'
472 && !memcmp (line+2, msg->boundary, blen))
474 rc = do_callback (msg, RFC822PARSE_LAST_BOUNDARY);
475 msg->boundary = NULL; /* No current boundary anymore. */
476 set_current_part_to_parent (msg);
478 /* Fixme: The next should actually be send right before the
479 next boundary, so that we can mark the epilogue. */
480 if (!rc)
481 rc = do_callback (msg, RFC822PARSE_LEVEL_UP);
484 if (msg->in_preamble && !rc)
485 rc = do_callback (msg, RFC822PARSE_PREAMBLE);
487 return rc;
490 /* Insert the next line into the parser. Return 0 on success or true
491 on error with errno set appropriately. */
493 rfc822parse_insert (rfc822parse_t msg, const unsigned char *line, size_t length)
495 return (msg->in_body
496 ? insert_body (msg, line, length)
497 : insert_header (msg, line, length));
501 /* Tell the parser that we have finished the message. */
503 rfc822parse_finish (rfc822parse_t msg)
505 return do_callback (msg, RFC822PARSE_FINISH);
510 /****************
511 * Get a copy of a header line. The line is returned as one long
512 * string with LF to separate the continuation line. Caller must free
513 * the return buffer. WHICH may be used to enumerate over all lines.
514 * Wildcards are allowed. This function works on the current headers;
515 * i.e. the regular mail headers or the MIME headers of the current
516 * part.
518 * WHICH gives the mode:
519 * -1 := Take the last occurence
520 * n := Take the n-th one.
522 * Returns a newly allocated buffer or NULL on error. errno is set in
523 * case of a memory failure or set to 0 if the requested field is not
524 * available.
526 * If VALUEOFF is not NULL it will receive the offset of the first non
527 * space character in the value part of the line (i.e. after the first
528 * colon).
530 char *
531 rfc822parse_get_field (rfc822parse_t msg, const char *name, int which,
532 size_t *valueoff)
534 HDR_LINE h, h2;
535 char *buf, *p;
536 size_t n;
538 h = find_header (msg, name, which, NULL);
539 if (!h)
541 errno = 0;
542 return NULL; /* no such field */
545 n = strlen (h->line) + 1;
546 for (h2 = h->next; h2 && h2->cont; h2 = h2->next)
547 n += strlen (h2->line) + 1;
549 buf = p = malloc (n);
550 if (buf)
552 p = stpcpy (p, h->line);
553 *p++ = '\n';
554 for (h2 = h->next; h2 && h2->cont; h2 = h2->next)
556 p = stpcpy (p, h2->line);
557 *p++ = '\n';
559 p[-1] = 0;
562 if (valueoff)
564 p = strchr (buf, ':');
565 if (!p)
566 *valueoff = 0; /* Oops: should never happen. */
567 else
569 p++;
570 while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
571 p++;
572 *valueoff = p - buf;
576 return buf;
580 /****************
581 * Enumerate all header. Caller has to provide the address of a pointer
582 * which has to be initialzed to NULL, the caller should then never change this
583 * pointer until he has closed the enumeration by passing again the address
584 * of the pointer but with msg set to NULL.
585 * The function returns pointers to all the header lines or NULL when
586 * all lines have been enumerated or no headers are available.
588 const char *
589 rfc822parse_enum_header_lines (rfc822parse_t msg, void **context)
591 HDR_LINE l;
593 if (!msg) /* Close. */
594 return NULL;
596 if (*context == msg || !msg->current_part)
597 return NULL;
599 l = *context ? (HDR_LINE) *context : msg->current_part->hdr_lines;
601 if (l)
603 *context = l->next ? (void *) (l->next) : (void *) msg;
604 return l->line;
606 *context = msg; /* Mark end of list. */
607 return NULL;
612 /****************
613 * Find a header field. If the Name does end in an asterisk this is meant
614 * to be a wildcard.
616 * which -1 : Retrieve the last field
617 * >0 : Retrieve the n-th field
619 * RPREV may be used to return the predecessor of the returned field;
620 * which may be NULL for the very first one. It has to be initialzed
621 * to either NULL in which case the search start at the first header line,
622 * or it may point to a headerline, where the search should start
624 static HDR_LINE
625 find_header (rfc822parse_t msg, const char *name, int which, HDR_LINE *rprev)
627 HDR_LINE hdr, prev = NULL, mark = NULL;
628 unsigned char *p;
629 size_t namelen, n;
630 int found = 0;
631 int glob = 0;
633 if (!msg->current_part)
634 return NULL;
636 namelen = strlen (name);
637 if (namelen && name[namelen - 1] == '*')
639 namelen--;
640 glob = 1;
643 hdr = msg->current_part->hdr_lines;
644 if (rprev && *rprev)
646 /* spool forward to the requested starting place.
647 * we cannot simply set this as we have to return
648 * the previous list element too */
649 for (; hdr && hdr != *rprev; prev = hdr, hdr = hdr->next)
653 for (; hdr; prev = hdr, hdr = hdr->next)
655 if (hdr->cont)
656 continue;
657 if (!(p = strchr (hdr->line, ':')))
658 continue; /* invalid header, just skip it. */
659 n = p - hdr->line;
660 if (!n)
661 continue; /* invalid name */
662 if ((glob ? (namelen <= n) : (namelen == n))
663 && !memcmp (hdr->line, name, namelen))
665 found++;
666 if (which == -1)
667 mark = hdr;
668 else if (found == which)
670 if (rprev)
671 *rprev = prev;
672 return hdr;
676 if (mark && rprev)
677 *rprev = prev;
678 return mark;
683 static const char *
684 skip_ws (const char *s)
686 while (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n')
687 s++;
688 return s;
692 static void
693 release_token_list (TOKEN t)
695 while (t)
697 TOKEN t2 = t->next;
698 /* fixme: If we have owner_pantry, put the token back to
699 * this pantry so that it can be reused later */
700 free (t);
701 t = t2;
706 static TOKEN
707 new_token (enum token_type type, const char *buf, size_t length)
709 TOKEN t;
711 /* fixme: look through our pantries to find a suitable
712 * token for reuse */
713 t = malloc (sizeof *t + length);
714 if (t)
716 t->next = NULL;
717 t->type = type;
718 memset (&t->flags, 0, sizeof (t->flags));
719 t->data[0] = 0;
720 if (buf)
722 memcpy (t->data, buf, length);
723 t->data[length] = 0; /* Make sure it is a C string. */
725 else
726 t->data[0] = 0;
728 return t;
731 static TOKEN
732 append_to_token (TOKEN old, const char *buf, size_t length)
734 size_t n = strlen (old->data);
735 TOKEN t;
737 t = malloc (sizeof *t + n + length);
738 if (t)
740 t->next = old->next;
741 t->type = old->type;
742 t->flags = old->flags;
743 memcpy (t->data, old->data, n);
744 memcpy (t->data + n, buf, length);
745 t->data[n + length] = 0;
746 old->next = NULL;
747 release_token_list (old);
749 return t;
755 Parse a field into tokens as defined by rfc822.
757 static TOKEN
758 parse_field (HDR_LINE hdr)
760 static const char specials[] = "<>@.,;:\\[]\"()";
761 static const char specials2[] = "<>@.,;:";
762 static const char tspecials[] = "/?=<>@,;:\\[]\"()";
763 static const char tspecials2[] = "/?=<>@.,;:"; /* FIXME: really
764 include '.'?*/
765 static struct
767 const unsigned char *name;
768 size_t namelen;
769 } tspecial_header[] = {
770 { "Content-Type", 12},
771 { "Content-Transfer-Encoding", 25},
772 { "Content-Disposition", 19},
773 { NULL, 0}
775 const char *delimiters;
776 const char *delimiters2;
777 const unsigned char *line, *s, *s2;
778 size_t n;
779 int i, invalid = 0;
780 TOKEN t, tok, *tok_tail;
782 errno = 0;
783 if (!hdr)
784 return NULL;
786 tok = NULL;
787 tok_tail = &tok;
789 line = hdr->line;
790 if (!(s = strchr (line, ':')))
791 return NULL; /* oops */
793 n = s - line;
794 if (!n)
795 return NULL; /* oops: invalid name */
797 delimiters = specials;
798 delimiters2 = specials2;
799 for (i = 0; tspecial_header[i].name; i++)
801 if (n == tspecial_header[i].namelen
802 && !memcmp (line, tspecial_header[i].name, n))
804 delimiters = tspecials;
805 delimiters2 = tspecials2;
806 break;
810 s++; /* Move over the colon. */
811 for (;;)
813 if (!*s)
815 if (!hdr->next || !hdr->next->cont)
816 break;
817 hdr = hdr->next;
818 s = hdr->line;
821 if (*s == '(')
823 int level = 1;
824 int in_quote = 0;
826 invalid = 0;
827 for (s++;; s++)
829 if (!*s)
831 if (!hdr->next || !hdr->next->cont)
832 break;
833 hdr = hdr->next;
834 s = hdr->line;
837 if (in_quote)
839 if (*s == '\"')
840 in_quote = 0;
841 else if (*s == '\\' && s[1]) /* what about continuation? */
842 s++;
844 else if (*s == ')')
846 if (!--level)
847 break;
849 else if (*s == '(')
850 level++;
851 else if (*s == '\"')
852 in_quote = 1;
854 if (!*s)
855 ; /* Actually this is an error, but we don't care about it. */
856 else
857 s++;
859 else if (*s == '\"' || *s == '[')
861 /* We do not check for non-allowed nesting of domainliterals */
862 int term = *s == '\"' ? '\"' : ']';
863 invalid = 0;
864 s++;
865 t = NULL;
867 for (;;)
869 for (s2 = s; *s2; s2++)
871 if (*s2 == term)
872 break;
873 else if (*s2 == '\\' && s2[1]) /* what about continuation? */
874 s2++;
877 t = (t
878 ? append_to_token (t, s, s2 - s)
879 : new_token (term == '\"'? tQUOTED : tDOMAINLIT, s, s2 - s));
880 if (!t)
881 goto failure;
883 if (*s2 || !hdr->next || !hdr->next->cont)
884 break;
885 hdr = hdr->next;
886 s = hdr->line;
888 *tok_tail = t;
889 tok_tail = &t->next;
890 s = s2;
891 if (*s)
892 s++; /* skip the delimiter */
894 else if ((s2 = strchr (delimiters2, *s)))
895 { /* Special characters which are not handled above. */
896 invalid = 0;
897 t = new_token (tSPECIAL, s, 1);
898 if (!t)
899 goto failure;
900 *tok_tail = t;
901 tok_tail = &t->next;
902 s++;
904 else if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n')
906 invalid = 0;
907 s = skip_ws (s + 1);
909 else if (*s > 0x20 && !(*s & 128))
910 { /* Atom. */
911 invalid = 0;
912 for (s2 = s + 1; *s2 > 0x20
913 && !(*s2 & 128) && !strchr (delimiters, *s2); s2++)
915 t = new_token (tATOM, s, s2 - s);
916 if (!t)
917 goto failure;
918 *tok_tail = t;
919 tok_tail = &t->next;
920 s = s2;
922 else
923 { /* Invalid character. */
924 if (!invalid)
925 { /* For parsing we assume only one space. */
926 t = new_token (tSPACE, NULL, 0);
927 if (!t)
928 goto failure;
929 *tok_tail = t;
930 tok_tail = &t->next;
931 invalid = 1;
933 s++;
937 return tok;
939 failure:
941 int save = errno;
942 release_token_list (tok);
943 errno = save;
945 return NULL;
951 /****************
952 * Find and parse a header field.
953 * WHICH indicates what to do if there are multiple instance of the same
954 * field (like "Received"); the following value are defined:
955 * -1 := Take the last occurence
956 * 0 := Reserved
957 * n := Take the n-th one.
958 * Returns a handle for further operations on the parse context of the field
959 * or NULL if the field was not found.
961 rfc822parse_field_t
962 rfc822parse_parse_field (rfc822parse_t msg, const char *name, int which)
964 HDR_LINE hdr;
966 if (!which)
967 return NULL;
969 hdr = find_header (msg, name, which, NULL);
970 if (!hdr)
971 return NULL;
972 return parse_field (hdr);
975 void
976 rfc822parse_release_field (rfc822parse_field_t ctx)
978 if (ctx)
979 release_token_list (ctx);
984 /****************
985 * Check whether T points to a parameter.
986 * A parameter starts with a semicolon and it is assumed that t
987 * points to exactly this one.
989 static int
990 is_parameter (TOKEN t)
992 t = t->next;
993 if (!t || t->type != tATOM)
994 return 0;
995 t = t->next;
996 if (!t || !(t->type == tSPECIAL && t->data[0] == '='))
997 return 0;
998 t = t->next;
999 if (!t)
1000 return 1; /* We assume that an non existing value is an empty one. */
1001 return t->type == tQUOTED || t->type == tATOM;
1005 Some header (Content-type) have a special syntax where attribute=value
1006 pairs are used after a leading semicolon. The parse_field code
1007 knows about these fields and changes the parsing to the one defined
1008 in RFC2045.
1009 Returns a pointer to the value which is valid as long as the
1010 parse context is valid; NULL is returned in case that attr is not
1011 defined in the header, a missing value is reppresented by an empty string.
1013 With LOWER_VALUE set to true, a matching field valuebe be
1014 lowercased.
1016 Note, that ATTR should be lowercase.
1018 const char *
1019 rfc822parse_query_parameter (rfc822parse_field_t ctx, const char *attr,
1020 int lower_value)
1022 TOKEN t, a;
1024 for (t = ctx; t; t = t->next)
1026 /* skip to the next semicolon */
1027 for (; t && !(t->type == tSPECIAL && t->data[0] == ';'); t = t->next)
1029 if (!t)
1030 return NULL;
1031 if (is_parameter (t))
1032 { /* Look closer. */
1033 a = t->next; /* We know that this is an atom */
1034 if ( !a->flags.lowered )
1036 lowercase_string (a->data);
1037 a->flags.lowered = 1;
1039 if (!strcmp (a->data, attr))
1040 { /* found */
1041 t = a->next->next;
1042 /* Either T is now an atom, a quoted string or NULL in
1043 * which case we return an empty string. */
1045 if ( lower_value && t && !t->flags.lowered )
1047 lowercase_string (t->data);
1048 t->flags.lowered = 1;
1050 return t ? t->data : "";
1054 return NULL;
1057 /****************
1058 * This function may be used for the Content-Type header to figure out
1059 * the media type and subtype. Note, that the returned strings are
1060 * guaranteed to be lowercase as required by MIME.
1062 * Returns: a pointer to the media type and if subtype is not NULL,
1063 * a pointer to the subtype.
1065 const char *
1066 rfc822parse_query_media_type (rfc822parse_field_t ctx, const char **subtype)
1068 TOKEN t = ctx;
1069 const char *type;
1071 if (t->type != tATOM)
1072 return NULL;
1073 if (!t->flags.lowered)
1075 lowercase_string (t->data);
1076 t->flags.lowered = 1;
1078 type = t->data;
1079 t = t->next;
1080 if (!t || t->type != tSPECIAL || t->data[0] != '/')
1081 return NULL;
1082 t = t->next;
1083 if (!t || t->type != tATOM)
1084 return NULL;
1086 if (subtype)
1088 if (!t->flags.lowered)
1090 lowercase_string (t->data);
1091 t->flags.lowered = 1;
1093 *subtype = t->data;
1095 return type;
1102 #ifdef TESTING
1104 /* Internal debug function to print the structure of the message. */
1105 static void
1106 dump_structure (rfc822parse_t msg, part_t part, int indent)
1108 if (!part)
1110 printf ("*** Structure of this message:\n");
1111 part = msg->parts;
1114 for (; part; part = part->right)
1116 rfc822parse_field_t ctx;
1117 part_t save_part; /* ugly hack - we should have a function to
1118 get part inforation. */
1119 const char *s;
1121 save_part = msg->current_part;
1122 msg->current_part = part;
1123 ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
1124 msg->current_part = save_part;
1125 if (ctx)
1127 const char *s1, *s2;
1128 s1 = rfc822parse_query_media_type (ctx, &s2);
1129 if (s1)
1130 printf ("*** %*s %s/%s", indent*2, "", s1, s2);
1131 else
1132 printf ("*** %*s [not found]", indent*2, "");
1134 s = rfc822parse_query_parameter (ctx, "boundary", 0);
1135 if (s)
1136 printf (" (boundary=\"%s\")", s);
1137 rfc822parse_release_field (ctx);
1139 else
1140 printf ("*** %*s text/plain [assumed]", indent*2, "");
1141 putchar('\n');
1143 if (part->down)
1144 dump_structure (msg, part->down, indent + 1);
1151 static void
1152 show_param (rfc822parse_field_t ctx, const char *name)
1154 const char *s;
1156 if (!ctx)
1157 return;
1158 s = rfc822parse_query_parameter (ctx, name, 0);
1159 if (s)
1160 printf ("*** %s: `%s'\n", name, s);
1165 static void
1166 show_event (rfc822parse_event_t event)
1168 const char *s;
1170 switch (event)
1172 case RFC822PARSE_OPEN: s= "Open"; break;
1173 case RFC822PARSE_CLOSE: s= "Close"; break;
1174 case RFC822PARSE_CANCEL: s= "Cancel"; break;
1175 case RFC822PARSE_T2BODY: s= "T2Body"; break;
1176 case RFC822PARSE_FINISH: s= "Finish"; break;
1177 case RFC822PARSE_RCVD_SEEN: s= "Rcvd_Seen"; break;
1178 case RFC822PARSE_BOUNDARY: s= "Boundary"; break;
1179 case RFC822PARSE_LAST_BOUNDARY: s= "Last_Boundary"; break;
1180 default: s= "***invalid event***"; break;
1182 printf ("*** got RFC822 event %s\n", s);
1185 static int
1186 msg_cb (void *dummy_arg, rfc822parse_event_t event, rfc822parse_t msg)
1188 show_event (event);
1189 if (event == RFC822PARSE_T2BODY)
1191 rfc822parse_field_t ctx;
1192 void *ectx;
1193 const char *line;
1195 for (ectx=NULL; (line = rfc822parse_enum_header_lines (msg, &ectx)); )
1197 printf ("*** HDR: %s\n", line);
1199 rfc822parse_enum_header_lines (NULL, &ectx); /* Close enumerator. */
1201 ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
1202 if (ctx)
1204 const char *s1, *s2;
1205 s1 = rfc822parse_query_media_type (ctx, &s2);
1206 if (s1)
1207 printf ("*** media: `%s/%s'\n", s1, s2);
1208 else
1209 printf ("*** media: [not found]\n");
1210 show_param (ctx, "boundary");
1211 show_param (ctx, "protocol");
1212 rfc822parse_release_field (ctx);
1214 else
1215 printf ("*** media: text/plain [assumed]\n");
1220 return 0;
1226 main (int argc, char **argv)
1228 char line[5000];
1229 size_t length;
1230 rfc822parse_t msg;
1232 msg = rfc822parse_open (msg_cb, NULL);
1233 if (!msg)
1234 abort ();
1236 while (fgets (line, sizeof (line), stdin))
1238 length = strlen (line);
1239 if (length && line[length - 1] == '\n')
1240 line[--length] = 0;
1241 if (length && line[length - 1] == '\r')
1242 line[--length] = 0;
1243 if (rfc822parse_insert (msg, line, length))
1244 abort ();
1247 dump_structure (msg, NULL, 0);
1249 rfc822parse_close (msg);
1250 return 0;
1252 #endif
1255 Local Variables:
1256 compile-command: "gcc -Wall -g -DTESTING -o rfc822parse rfc822parse.c"
1257 End: