Remove testing cruft.
[gnupg.git] / tools / rfc822parse.c
blob8fbe3c28303a6010ec743deb1594240fb6eb9838
1 /* rfc822parse.c - Simple mail and MIME parser
2 * Copyright (C) 1999, 2000 Werner Koch, Duesseldorf
3 * Copyright (C) 2003, 2004 g10 Code GmbH
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public License
7 * as published by the Free Software Foundation; either version 3 of
8 * the License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
20 /* According to RFC822 binary zeroes are allowed at many places. We do
21 * not handle this correct especially in the field parsing code. It
22 * should be easy to fix and the API provides a interfaces which
23 * returns the length but in addition makes sure that returned strings
24 * are always ended by a \0.
26 * Furthermore, the case of field names is changed and thus it is not
27 * always a good idea to use these modified header
28 * lines (e.g. signatures may break).
31 #ifdef HAVE_CONFIG_H
32 #include <config.h>
33 #endif
35 #include <stdlib.h>
36 #include <stdio.h>
37 #include <string.h>
38 #include <errno.h>
39 #include <stdarg.h>
40 #include <assert.h>
42 #include "rfc822parse.h"
44 enum token_type
46 tSPACE,
47 tATOM,
48 tQUOTED,
49 tDOMAINLIT,
50 tSPECIAL
53 /* For now we directly use our TOKEN as the parse context */
54 typedef struct rfc822parse_field_context *TOKEN;
55 struct rfc822parse_field_context
57 TOKEN next;
58 enum token_type type;
59 struct {
60 unsigned int cont:1;
61 unsigned int lowered:1;
62 } flags;
63 /*TOKEN owner_pantry; */
64 char data[1];
67 struct hdr_line
69 struct hdr_line *next;
70 int cont; /* This is a continuation of the previous line. */
71 unsigned char line[1];
74 typedef struct hdr_line *HDR_LINE;
77 struct part
79 struct part *right; /* The next part. */
80 struct part *down; /* A contained part. */
81 HDR_LINE hdr_lines; /* Header lines os that part. */
82 HDR_LINE *hdr_lines_tail; /* Helper for adding lines. */
83 char *boundary; /* Only used in the first part. */
85 typedef struct part *part_t;
87 struct rfc822parse_context
89 rfc822parse_cb_t callback;
90 void *callback_value;
91 int callback_error;
92 int in_body;
93 int in_preamble; /* Wether we are before the first boundary. */
94 part_t parts; /* The tree of parts. */
95 part_t current_part; /* Whom we are processing (points into parts). */
96 const char *boundary; /* Current boundary. */
99 static HDR_LINE find_header (rfc822parse_t msg, const char *name,
100 int which, HDR_LINE * rprev);
103 static size_t
104 length_sans_trailing_ws (const unsigned char *line, size_t len)
106 const unsigned char *p, *mark;
107 size_t n;
109 for (mark=NULL, p=line, n=0; n < len; n++, p++)
111 if (strchr (" \t\r\n", *p ))
113 if( !mark )
114 mark = p;
116 else
117 mark = NULL;
120 if (mark)
121 return mark - line;
122 return len;
126 static void
127 lowercase_string (unsigned char *string)
129 for (; *string; string++)
130 if (*string >= 'A' && *string <= 'Z')
131 *string = *string - 'A' + 'a';
134 /* Transform a header name into a standard capitalized format; i.e
135 "Content-Type". Conversion stops at the colon. As usual we don't
136 use the localized versions of ctype.h.
138 static void
139 capitalize_header_name (unsigned char *name)
141 int first = 1;
143 for (; *name && *name != ':'; name++)
144 if (*name == '-')
145 first = 1;
146 else if (first)
148 if (*name >= 'a' && *name <= 'z')
149 *name = *name - 'a' + 'A';
150 first = 0;
152 else if (*name >= 'A' && *name <= 'Z')
153 *name = *name - 'A' + 'a';
156 #ifndef HAVE_STPCPY
157 static char *
158 stpcpy (char *a,const char *b)
160 while (*b)
161 *a++ = *b++;
162 *a = 0;
164 return (char*)a;
166 #endif
169 /* If a callback has been registerd, call it for the event of type
170 EVENT. */
171 static int
172 do_callback (rfc822parse_t msg, rfc822parse_event_t event)
174 int rc;
176 if (!msg->callback || msg->callback_error)
177 return 0;
178 rc = msg->callback (msg->callback_value, event, msg);
179 if (rc)
180 msg->callback_error = rc;
181 return rc;
184 static part_t
185 new_part (void)
187 part_t part;
189 part = calloc (1, sizeof *part);
190 if (part)
192 part->hdr_lines_tail = &part->hdr_lines;
194 return part;
198 static void
199 release_part (part_t part)
201 part_t tmp;
202 HDR_LINE hdr, hdr2;
204 for (; part; part = tmp)
206 tmp = part->right;
207 if (part->down)
208 release_part (part->down);
209 for (hdr = part->hdr_lines; hdr; hdr = hdr2)
211 hdr2 = hdr->next;
212 free (hdr);
214 free (part->boundary);
215 free (part);
220 static void
221 release_handle_data (rfc822parse_t msg)
223 release_part (msg->parts);
224 msg->parts = NULL;
225 msg->current_part = NULL;
226 msg->boundary = NULL;
230 /* Create a new parsing context for an entire rfc822 message and
231 return it. CB and CB_VALUE may be given to callback for certain
232 events. NULL is returned on error with errno set appropriately. */
233 rfc822parse_t
234 rfc822parse_open (rfc822parse_cb_t cb, void *cb_value)
236 rfc822parse_t msg = calloc (1, sizeof *msg);
237 if (msg)
239 msg->parts = msg->current_part = new_part ();
240 if (!msg->parts)
242 free (msg);
243 msg = NULL;
245 else
247 msg->callback = cb;
248 msg->callback_value = cb_value;
249 if (do_callback (msg, RFC822PARSE_OPEN))
251 release_handle_data (msg);
252 free (msg);
253 msg = NULL;
257 return msg;
261 void
262 rfc822parse_cancel (rfc822parse_t msg)
264 if (msg)
266 do_callback (msg, RFC822PARSE_CANCEL);
267 release_handle_data (msg);
268 free (msg);
273 void
274 rfc822parse_close (rfc822parse_t msg)
276 if (msg)
278 do_callback (msg, RFC822PARSE_CLOSE);
279 release_handle_data (msg);
280 free (msg);
284 static part_t
285 find_parent (part_t tree, part_t target)
287 part_t part;
289 for (part = tree->down; part; part = part->right)
291 if (part == target)
292 return tree; /* Found. */
293 if (part->down)
295 part_t tmp = find_parent (part, target);
296 if (tmp)
297 return tmp;
300 return NULL;
303 static void
304 set_current_part_to_parent (rfc822parse_t msg)
306 part_t parent;
308 assert (msg->current_part);
309 parent = find_parent (msg->parts, msg->current_part);
310 if (!parent)
311 return; /* Already at the top. */
313 #ifndef NDEBUG
315 part_t part;
316 for (part = parent->down; part; part = part->right)
317 if (part == msg->current_part)
318 break;
319 assert (part);
321 #endif
322 msg->current_part = parent;
324 parent = find_parent (msg->parts, parent);
325 msg->boundary = parent? parent->boundary: NULL;
330 /****************
331 * We have read in all header lines and are about to receive the body
332 * part. The delimiter line has already been processed.
334 * FIXME: we's better return an error in case of memory failures.
336 static int
337 transition_to_body (rfc822parse_t msg)
339 rfc822parse_field_t ctx;
340 int rc;
342 rc = do_callback (msg, RFC822PARSE_T2BODY);
343 if (!rc)
345 /* Store the boundary if we have multipart type. */
346 ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
347 if (ctx)
349 const char *s;
351 s = rfc822parse_query_media_type (ctx, NULL);
352 if (s && !strcmp (s,"multipart"))
354 s = rfc822parse_query_parameter (ctx, "boundary", 0);
355 if (s)
357 assert (!msg->current_part->boundary);
358 msg->current_part->boundary = malloc (strlen (s) + 1);
359 if (msg->current_part->boundary)
361 part_t part;
363 strcpy (msg->current_part->boundary, s);
364 msg->boundary = msg->current_part->boundary;
365 part = new_part ();
366 if (!part)
368 int save_errno = errno;
369 rfc822parse_release_field (ctx);
370 errno = save_errno;
371 return -1;
373 rc = do_callback (msg, RFC822PARSE_LEVEL_DOWN);
374 assert (!msg->current_part->down);
375 msg->current_part->down = part;
376 msg->current_part = part;
377 msg->in_preamble = 1;
381 rfc822parse_release_field (ctx);
385 return rc;
388 /* We have just passed a MIME boundary and need to prepare for new part.
389 headers. */
390 static int
391 transition_to_header (rfc822parse_t msg)
393 part_t part;
395 assert (msg->current_part);
396 assert (!msg->current_part->right);
398 part = new_part ();
399 if (!part)
400 return -1;
402 msg->current_part->right = part;
403 msg->current_part = part;
404 return 0;
408 static int
409 insert_header (rfc822parse_t msg, const unsigned char *line, size_t length)
411 HDR_LINE hdr;
413 assert (msg->current_part);
414 if (!length)
416 msg->in_body = 1;
417 return transition_to_body (msg);
420 if (!msg->current_part->hdr_lines)
421 do_callback (msg, RFC822PARSE_BEGIN_HEADER);
423 length = length_sans_trailing_ws (line, length);
424 hdr = malloc (sizeof (*hdr) + length);
425 if (!hdr)
426 return -1;
427 hdr->next = NULL;
428 hdr->cont = (*line == ' ' || *line == '\t');
429 memcpy (hdr->line, line, length);
430 hdr->line[length] = 0; /* Make it a string. */
432 /* Transform a field name into canonical format. */
433 if (!hdr->cont && strchr (line, ':'))
434 capitalize_header_name (hdr->line);
436 *msg->current_part->hdr_lines_tail = hdr;
437 msg->current_part->hdr_lines_tail = &hdr->next;
439 /* Lets help the caller to prevent mail loops and issue an event for
440 * every Received header. */
441 if (length >= 9 && !memcmp (line, "Received:", 9))
442 do_callback (msg, RFC822PARSE_RCVD_SEEN);
443 return 0;
447 /****************
448 * Note: We handle the body transparent to allow binary zeroes in it.
450 static int
451 insert_body (rfc822parse_t msg, const unsigned char *line, size_t length)
453 int rc = 0;
455 if (length > 2 && *line == '-' && line[1] == '-' && msg->boundary)
457 size_t blen = strlen (msg->boundary);
459 if (length == blen + 2
460 && !memcmp (line+2, msg->boundary, blen))
462 rc = do_callback (msg, RFC822PARSE_BOUNDARY);
463 msg->in_body = 0;
464 if (!rc && !msg->in_preamble)
465 rc = transition_to_header (msg);
466 msg->in_preamble = 0;
468 else if (length == blen + 4
469 && line[length-2] =='-' && line[length-1] == '-'
470 && !memcmp (line+2, msg->boundary, blen))
472 rc = do_callback (msg, RFC822PARSE_LAST_BOUNDARY);
473 msg->boundary = NULL; /* No current boundary anymore. */
474 set_current_part_to_parent (msg);
476 /* Fixme: The next should actually be send right before the
477 next boundary, so that we can mark the epilogue. */
478 if (!rc)
479 rc = do_callback (msg, RFC822PARSE_LEVEL_UP);
482 if (msg->in_preamble && !rc)
483 rc = do_callback (msg, RFC822PARSE_PREAMBLE);
485 return rc;
488 /* Insert the next line into the parser. Return 0 on success or true
489 on error with errno set appropriately. */
491 rfc822parse_insert (rfc822parse_t msg, const unsigned char *line, size_t length)
493 return (msg->in_body
494 ? insert_body (msg, line, length)
495 : insert_header (msg, line, length));
499 /* Tell the parser that we have finished the message. */
501 rfc822parse_finish (rfc822parse_t msg)
503 return do_callback (msg, RFC822PARSE_FINISH);
508 /****************
509 * Get a copy of a header line. The line is returned as one long
510 * string with LF to separate the continuation line. Caller must free
511 * the return buffer. WHICH may be used to enumerate over all lines.
512 * Wildcards are allowed. This function works on the current headers;
513 * i.e. the regular mail headers or the MIME headers of the current
514 * part.
516 * WHICH gives the mode:
517 * -1 := Take the last occurence
518 * n := Take the n-th one.
520 * Returns a newly allocated buffer or NULL on error. errno is set in
521 * case of a memory failure or set to 0 if the requested field is not
522 * available.
524 * If VALUEOFF is not NULL it will receive the offset of the first non
525 * space character in the value part of the line (i.e. after the first
526 * colon).
528 char *
529 rfc822parse_get_field (rfc822parse_t msg, const char *name, int which,
530 size_t *valueoff)
532 HDR_LINE h, h2;
533 char *buf, *p;
534 size_t n;
536 h = find_header (msg, name, which, NULL);
537 if (!h)
539 errno = 0;
540 return NULL; /* no such field */
543 n = strlen (h->line) + 1;
544 for (h2 = h->next; h2 && h2->cont; h2 = h2->next)
545 n += strlen (h2->line) + 1;
547 buf = p = malloc (n);
548 if (buf)
550 p = stpcpy (p, h->line);
551 *p++ = '\n';
552 for (h2 = h->next; h2 && h2->cont; h2 = h2->next)
554 p = stpcpy (p, h2->line);
555 *p++ = '\n';
557 p[-1] = 0;
560 if (valueoff)
562 p = strchr (buf, ':');
563 if (!p)
564 *valueoff = 0; /* Oops: should never happen. */
565 else
567 p++;
568 while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
569 p++;
570 *valueoff = p - buf;
574 return buf;
578 /****************
579 * Enumerate all header. Caller has to provide the address of a pointer
580 * which has to be initialzed to NULL, the caller should then never change this
581 * pointer until he has closed the enumeration by passing again the address
582 * of the pointer but with msg set to NULL.
583 * The function returns pointers to all the header lines or NULL when
584 * all lines have been enumerated or no headers are available.
586 const char *
587 rfc822parse_enum_header_lines (rfc822parse_t msg, void **context)
589 HDR_LINE l;
591 if (!msg) /* Close. */
592 return NULL;
594 if (*context == msg || !msg->current_part)
595 return NULL;
597 l = *context ? (HDR_LINE) *context : msg->current_part->hdr_lines;
599 if (l)
601 *context = l->next ? (void *) (l->next) : (void *) msg;
602 return l->line;
604 *context = msg; /* Mark end of list. */
605 return NULL;
610 /****************
611 * Find a header field. If the Name does end in an asterisk this is meant
612 * to be a wildcard.
614 * which -1 : Retrieve the last field
615 * >0 : Retrieve the n-th field
617 * RPREV may be used to return the predecessor of the returned field;
618 * which may be NULL for the very first one. It has to be initialzed
619 * to either NULL in which case the search start at the first header line,
620 * or it may point to a headerline, where the search should start
622 static HDR_LINE
623 find_header (rfc822parse_t msg, const char *name, int which, HDR_LINE *rprev)
625 HDR_LINE hdr, prev = NULL, mark = NULL;
626 unsigned char *p;
627 size_t namelen, n;
628 int found = 0;
629 int glob = 0;
631 if (!msg->current_part)
632 return NULL;
634 namelen = strlen (name);
635 if (namelen && name[namelen - 1] == '*')
637 namelen--;
638 glob = 1;
641 hdr = msg->current_part->hdr_lines;
642 if (rprev && *rprev)
644 /* spool forward to the requested starting place.
645 * we cannot simply set this as we have to return
646 * the previous list element too */
647 for (; hdr && hdr != *rprev; prev = hdr, hdr = hdr->next)
651 for (; hdr; prev = hdr, hdr = hdr->next)
653 if (hdr->cont)
654 continue;
655 if (!(p = strchr (hdr->line, ':')))
656 continue; /* invalid header, just skip it. */
657 n = p - hdr->line;
658 if (!n)
659 continue; /* invalid name */
660 if ((glob ? (namelen <= n) : (namelen == n))
661 && !memcmp (hdr->line, name, namelen))
663 found++;
664 if (which == -1)
665 mark = hdr;
666 else if (found == which)
668 if (rprev)
669 *rprev = prev;
670 return hdr;
674 if (mark && rprev)
675 *rprev = prev;
676 return mark;
681 static const char *
682 skip_ws (const char *s)
684 while (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n')
685 s++;
686 return s;
690 static void
691 release_token_list (TOKEN t)
693 while (t)
695 TOKEN t2 = t->next;
696 /* fixme: If we have owner_pantry, put the token back to
697 * this pantry so that it can be reused later */
698 free (t);
699 t = t2;
704 static TOKEN
705 new_token (enum token_type type, const char *buf, size_t length)
707 TOKEN t;
709 /* fixme: look through our pantries to find a suitable
710 * token for reuse */
711 t = malloc (sizeof *t + length);
712 if (t)
714 t->next = NULL;
715 t->type = type;
716 memset (&t->flags, 0, sizeof (t->flags));
717 t->data[0] = 0;
718 if (buf)
720 memcpy (t->data, buf, length);
721 t->data[length] = 0; /* Make sure it is a C string. */
723 else
724 t->data[0] = 0;
726 return t;
729 static TOKEN
730 append_to_token (TOKEN old, const char *buf, size_t length)
732 size_t n = strlen (old->data);
733 TOKEN t;
735 t = malloc (sizeof *t + n + length);
736 if (t)
738 t->next = old->next;
739 t->type = old->type;
740 t->flags = old->flags;
741 memcpy (t->data, old->data, n);
742 memcpy (t->data + n, buf, length);
743 t->data[n + length] = 0;
744 old->next = NULL;
745 release_token_list (old);
747 return t;
753 Parse a field into tokens as defined by rfc822.
755 static TOKEN
756 parse_field (HDR_LINE hdr)
758 static const char specials[] = "<>@.,;:\\[]\"()";
759 static const char specials2[] = "<>@.,;:";
760 static const char tspecials[] = "/?=<>@,;:\\[]\"()";
761 static const char tspecials2[] = "/?=<>@.,;:"; /* FIXME: really
762 include '.'?*/
763 static struct
765 const unsigned char *name;
766 size_t namelen;
767 } tspecial_header[] = {
768 { "Content-Type", 12},
769 { "Content-Transfer-Encoding", 25},
770 { "Content-Disposition", 19},
771 { NULL, 0}
773 const char *delimiters;
774 const char *delimiters2;
775 const unsigned char *line, *s, *s2;
776 size_t n;
777 int i, invalid = 0;
778 TOKEN t, tok, *tok_tail;
780 errno = 0;
781 if (!hdr)
782 return NULL;
784 tok = NULL;
785 tok_tail = &tok;
787 line = hdr->line;
788 if (!(s = strchr (line, ':')))
789 return NULL; /* oops */
791 n = s - line;
792 if (!n)
793 return NULL; /* oops: invalid name */
795 delimiters = specials;
796 delimiters2 = specials2;
797 for (i = 0; tspecial_header[i].name; i++)
799 if (n == tspecial_header[i].namelen
800 && !memcmp (line, tspecial_header[i].name, n))
802 delimiters = tspecials;
803 delimiters2 = tspecials2;
804 break;
808 s++; /* Move over the colon. */
809 for (;;)
811 if (!*s)
813 if (!hdr->next || !hdr->next->cont)
814 break;
815 hdr = hdr->next;
816 s = hdr->line;
819 if (*s == '(')
821 int level = 1;
822 int in_quote = 0;
824 invalid = 0;
825 for (s++;; s++)
827 if (!*s)
829 if (!hdr->next || !hdr->next->cont)
830 break;
831 hdr = hdr->next;
832 s = hdr->line;
835 if (in_quote)
837 if (*s == '\"')
838 in_quote = 0;
839 else if (*s == '\\' && s[1]) /* what about continuation? */
840 s++;
842 else if (*s == ')')
844 if (!--level)
845 break;
847 else if (*s == '(')
848 level++;
849 else if (*s == '\"')
850 in_quote = 1;
852 if (!*s)
853 ; /* Actually this is an error, but we don't care about it. */
854 else
855 s++;
857 else if (*s == '\"' || *s == '[')
859 /* We do not check for non-allowed nesting of domainliterals */
860 int term = *s == '\"' ? '\"' : ']';
861 invalid = 0;
862 s++;
863 t = NULL;
865 for (;;)
867 for (s2 = s; *s2; s2++)
869 if (*s2 == term)
870 break;
871 else if (*s2 == '\\' && s2[1]) /* what about continuation? */
872 s2++;
875 t = (t
876 ? append_to_token (t, s, s2 - s)
877 : new_token (term == '\"'? tQUOTED : tDOMAINLIT, s, s2 - s));
878 if (!t)
879 goto failure;
881 if (*s2 || !hdr->next || !hdr->next->cont)
882 break;
883 hdr = hdr->next;
884 s = hdr->line;
886 *tok_tail = t;
887 tok_tail = &t->next;
888 s = s2;
889 if (*s)
890 s++; /* skip the delimiter */
892 else if ((s2 = strchr (delimiters2, *s)))
893 { /* Special characters which are not handled above. */
894 invalid = 0;
895 t = new_token (tSPECIAL, s, 1);
896 if (!t)
897 goto failure;
898 *tok_tail = t;
899 tok_tail = &t->next;
900 s++;
902 else if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n')
904 invalid = 0;
905 s = skip_ws (s + 1);
907 else if (*s > 0x20 && !(*s & 128))
908 { /* Atom. */
909 invalid = 0;
910 for (s2 = s + 1; *s2 > 0x20
911 && !(*s2 & 128) && !strchr (delimiters, *s2); s2++)
913 t = new_token (tATOM, s, s2 - s);
914 if (!t)
915 goto failure;
916 *tok_tail = t;
917 tok_tail = &t->next;
918 s = s2;
920 else
921 { /* Invalid character. */
922 if (!invalid)
923 { /* For parsing we assume only one space. */
924 t = new_token (tSPACE, NULL, 0);
925 if (!t)
926 goto failure;
927 *tok_tail = t;
928 tok_tail = &t->next;
929 invalid = 1;
931 s++;
935 return tok;
937 failure:
939 int save = errno;
940 release_token_list (tok);
941 errno = save;
943 return NULL;
949 /****************
950 * Find and parse a header field.
951 * WHICH indicates what to do if there are multiple instance of the same
952 * field (like "Received"); the following value are defined:
953 * -1 := Take the last occurence
954 * 0 := Reserved
955 * n := Take the n-th one.
956 * Returns a handle for further operations on the parse context of the field
957 * or NULL if the field was not found.
959 rfc822parse_field_t
960 rfc822parse_parse_field (rfc822parse_t msg, const char *name, int which)
962 HDR_LINE hdr;
964 if (!which)
965 return NULL;
967 hdr = find_header (msg, name, which, NULL);
968 if (!hdr)
969 return NULL;
970 return parse_field (hdr);
973 void
974 rfc822parse_release_field (rfc822parse_field_t ctx)
976 if (ctx)
977 release_token_list (ctx);
982 /****************
983 * Check whether T points to a parameter.
984 * A parameter starts with a semicolon and it is assumed that t
985 * points to exactly this one.
987 static int
988 is_parameter (TOKEN t)
990 t = t->next;
991 if (!t || t->type != tATOM)
992 return 0;
993 t = t->next;
994 if (!t || !(t->type == tSPECIAL && t->data[0] == '='))
995 return 0;
996 t = t->next;
997 if (!t)
998 return 1; /* We assume that an non existing value is an empty one. */
999 return t->type == tQUOTED || t->type == tATOM;
1003 Some header (Content-type) have a special syntax where attribute=value
1004 pairs are used after a leading semicolon. The parse_field code
1005 knows about these fields and changes the parsing to the one defined
1006 in RFC2045.
1007 Returns a pointer to the value which is valid as long as the
1008 parse context is valid; NULL is returned in case that attr is not
1009 defined in the header, a missing value is reppresented by an empty string.
1011 With LOWER_VALUE set to true, a matching field valuebe be
1012 lowercased.
1014 Note, that ATTR should be lowercase.
1016 const char *
1017 rfc822parse_query_parameter (rfc822parse_field_t ctx, const char *attr,
1018 int lower_value)
1020 TOKEN t, a;
1022 for (t = ctx; t; t = t->next)
1024 /* skip to the next semicolon */
1025 for (; t && !(t->type == tSPECIAL && t->data[0] == ';'); t = t->next)
1027 if (!t)
1028 return NULL;
1029 if (is_parameter (t))
1030 { /* Look closer. */
1031 a = t->next; /* We know that this is an atom */
1032 if ( !a->flags.lowered )
1034 lowercase_string (a->data);
1035 a->flags.lowered = 1;
1037 if (!strcmp (a->data, attr))
1038 { /* found */
1039 t = a->next->next;
1040 /* Either T is now an atom, a quoted string or NULL in
1041 * which case we return an empty string. */
1043 if ( lower_value && t && !t->flags.lowered )
1045 lowercase_string (t->data);
1046 t->flags.lowered = 1;
1048 return t ? t->data : "";
1052 return NULL;
1055 /****************
1056 * This function may be used for the Content-Type header to figure out
1057 * the media type and subtype. Note, that the returned strings are
1058 * guaranteed to be lowercase as required by MIME.
1060 * Returns: a pointer to the media type and if subtype is not NULL,
1061 * a pointer to the subtype.
1063 const char *
1064 rfc822parse_query_media_type (rfc822parse_field_t ctx, const char **subtype)
1066 TOKEN t = ctx;
1067 const char *type;
1069 if (t->type != tATOM)
1070 return NULL;
1071 if (!t->flags.lowered)
1073 lowercase_string (t->data);
1074 t->flags.lowered = 1;
1076 type = t->data;
1077 t = t->next;
1078 if (!t || t->type != tSPECIAL || t->data[0] != '/')
1079 return NULL;
1080 t = t->next;
1081 if (!t || t->type != tATOM)
1082 return NULL;
1084 if (subtype)
1086 if (!t->flags.lowered)
1088 lowercase_string (t->data);
1089 t->flags.lowered = 1;
1091 *subtype = t->data;
1093 return type;
1100 #ifdef TESTING
1102 /* Internal debug function to print the structure of the message. */
1103 static void
1104 dump_structure (rfc822parse_t msg, part_t part, int indent)
1106 if (!part)
1108 printf ("*** Structure of this message:\n");
1109 part = msg->parts;
1112 for (; part; part = part->right)
1114 rfc822parse_field_t ctx;
1115 part_t save_part; /* ugly hack - we should have a function to
1116 get part inforation. */
1117 const char *s;
1119 save_part = msg->current_part;
1120 msg->current_part = part;
1121 ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
1122 msg->current_part = save_part;
1123 if (ctx)
1125 const char *s1, *s2;
1126 s1 = rfc822parse_query_media_type (ctx, &s2);
1127 if (s1)
1128 printf ("*** %*s %s/%s", indent*2, "", s1, s2);
1129 else
1130 printf ("*** %*s [not found]", indent*2, "");
1132 s = rfc822parse_query_parameter (ctx, "boundary", 0);
1133 if (s)
1134 printf (" (boundary=\"%s\")", s);
1135 rfc822parse_release_field (ctx);
1137 else
1138 printf ("*** %*s text/plain [assumed]", indent*2, "");
1139 putchar('\n');
1141 if (part->down)
1142 dump_structure (msg, part->down, indent + 1);
1149 static void
1150 show_param (rfc822parse_field_t ctx, const char *name)
1152 const char *s;
1154 if (!ctx)
1155 return;
1156 s = rfc822parse_query_parameter (ctx, name, 0);
1157 if (s)
1158 printf ("*** %s: `%s'\n", name, s);
1163 static void
1164 show_event (rfc822parse_event_t event)
1166 const char *s;
1168 switch (event)
1170 case RFC822PARSE_OPEN: s= "Open"; break;
1171 case RFC822PARSE_CLOSE: s= "Close"; break;
1172 case RFC822PARSE_CANCEL: s= "Cancel"; break;
1173 case RFC822PARSE_T2BODY: s= "T2Body"; break;
1174 case RFC822PARSE_FINISH: s= "Finish"; break;
1175 case RFC822PARSE_RCVD_SEEN: s= "Rcvd_Seen"; break;
1176 case RFC822PARSE_BOUNDARY: s= "Boundary"; break;
1177 case RFC822PARSE_LAST_BOUNDARY: s= "Last_Boundary"; break;
1178 default: s= "***invalid event***"; break;
1180 printf ("*** got RFC822 event %s\n", s);
1183 static int
1184 msg_cb (void *dummy_arg, rfc822parse_event_t event, rfc822parse_t msg)
1186 show_event (event);
1187 if (event == RFC822PARSE_T2BODY)
1189 rfc822parse_field_t ctx;
1190 void *ectx;
1191 const char *line;
1193 for (ectx=NULL; (line = rfc822parse_enum_header_lines (msg, &ectx)); )
1195 printf ("*** HDR: %s\n", line);
1197 rfc822parse_enum_header_lines (NULL, &ectx); /* Close enumerator. */
1199 ctx = rfc822parse_parse_field (msg, "Content-Type", -1);
1200 if (ctx)
1202 const char *s1, *s2;
1203 s1 = rfc822parse_query_media_type (ctx, &s2);
1204 if (s1)
1205 printf ("*** media: `%s/%s'\n", s1, s2);
1206 else
1207 printf ("*** media: [not found]\n");
1208 show_param (ctx, "boundary");
1209 show_param (ctx, "protocol");
1210 rfc822parse_release_field (ctx);
1212 else
1213 printf ("*** media: text/plain [assumed]\n");
1218 return 0;
1224 main (int argc, char **argv)
1226 char line[5000];
1227 size_t length;
1228 rfc822parse_t msg;
1230 msg = rfc822parse_open (msg_cb, NULL);
1231 if (!msg)
1232 abort ();
1234 while (fgets (line, sizeof (line), stdin))
1236 length = strlen (line);
1237 if (length && line[length - 1] == '\n')
1238 line[--length] = 0;
1239 if (length && line[length - 1] == '\r')
1240 line[--length] = 0;
1241 if (rfc822parse_insert (msg, line, length))
1242 abort ();
1245 dump_structure (msg, NULL, 0);
1247 rfc822parse_close (msg);
1248 return 0;
1250 #endif
1253 Local Variables:
1254 compile-command: "gcc -Wall -Wno-pointer-sign -g -DTESTING -o rfc822parse rfc822parse.c"
1255 End: