1 /* $NetBSD: mime_codecs.c,v 1.8 2009/01/18 01:29:57 lukem Exp $ */
4 * Copyright (c) 2006 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
33 * This module contains all mime related codecs. Typically there are
34 * two versions: one operating on buffers and one operating on files.
35 * All exported routines have a "mime_" prefix. The file oriented
36 * routines have a "mime_f" prefix replacing the "mime_" prefix of the
37 * equivalent buffer based version.
39 * The file based API should be:
41 * mime_f<name>_{encode,decode}(FILE *in, FILE *out, void *cookie)
43 * XXX - currently this naming convention has not been adheared to.
45 * where the cookie is a generic way to pass arguments to the routine.
46 * This way these routines can be run by run_function() in mime.c.
48 * The buffer based API is not as rigid.
53 #include <sys/cdefs.h>
55 __RCSID("$NetBSD: mime_codecs.c,v 1.8 2009/01/18 01:29:57 lukem Exp $");
56 #endif /* not __lint__ */
66 #include "mime_codecs.h"
69 #ifdef CHARSET_SUPPORT
70 /************************************************************************
71 * Core character set conversion routines.
76 * Fault-tolerant iconv() function.
78 * This routine was borrowed from nail-11.25/mime.c and modified. It
79 * tries to handle errno == EILSEQ by restarting at the next input
80 * byte (is this a good idea?). All other errors are handled by the
84 mime_iconv(iconv_t cd
, const char **inb
, size_t *inbleft
, char **outb
, size_t *outbleft
)
88 while ((sz
= iconv(cd
, inb
, inbleft
, outb
, outbleft
)) == (size_t)-1
109 * This routine was mostly borrowed from src/usr.bin/iconv/iconv.c.
110 * We don't care about the invalid character count, so don't bother
111 * with __iconv(). We do care about robustness, so call iconv_ft()
112 * above to try to recover from errors.
114 #define INBUFSIZE 1024
115 #define OUTBUFSIZE (INBUFSIZE * 2)
118 mime_ficonv(FILE *fi
, FILE *fo
, void *cookie
)
120 char inbuf
[INBUFSIZE
], outbuf
[OUTBUFSIZE
], *out
;
122 size_t inbytes
, outbytes
, ret
;
126 * NOTE: iconv_t is actually a pointer typedef, so this
127 * conversion is not what it appears to be!
129 cd
= (iconv_t
)cookie
;
131 while ((inbytes
= fread(inbuf
, 1, INBUFSIZE
, fi
)) > 0) {
133 while (inbytes
> 0) {
135 outbytes
= OUTBUFSIZE
;
136 ret
= mime_iconv(cd
, &in
, &inbytes
, &out
, &outbytes
);
137 if (ret
== (size_t)-1 && errno
!= E2BIG
) {
138 if (errno
!= EINVAL
|| in
== inbuf
) {
139 /* XXX - what is proper here?
140 * Just copy out the remains? */
142 "\n\t[ iconv truncated message: %s ]\n\n",
147 * If here: errno == EINVAL && in != inbuf
149 /* incomplete input character */
150 (void)memmove(inbuf
, in
, inbytes
);
151 ret
= fread(inbuf
+ inbytes
, 1,
152 INBUFSIZE
- inbytes
, fi
);
156 "\n\t[ unexpected end of file; "
157 "the last character is "
158 "incomplete. ]\n\n");
162 "\n\t[ fread(): %s ]\n\n",
170 if (outbytes
< OUTBUFSIZE
)
171 (void)fwrite(outbuf
, 1, OUTBUFSIZE
- outbytes
, fo
);
174 /* reset the shift state of the output buffer */
175 outbytes
= OUTBUFSIZE
;
177 ret
= iconv(cd
, NULL
, NULL
, &out
, &outbytes
);
178 if (ret
== (size_t)-1) {
179 (void)fprintf(fo
, "\n\t[ iconv(): %s ]\n\n",
183 if (outbytes
< OUTBUFSIZE
)
184 (void)fwrite(outbuf
, 1, OUTBUFSIZE
- outbytes
, fo
);
187 #endif /* CHARSET_SUPPORT */
191 /************************************************************************
192 * Core base64 routines
194 * Defined in sec 6.8 of RFC 2045.
198 * Decode a base64 buffer.
200 * bin: buffer to hold the decoded (binary) result (see note 1).
201 * b64: buffer holding the encoded (base64) source.
202 * cnt: number of bytes in the b64 buffer to decode (see note 2).
204 * Return: the number of bytes written to the 'bin' buffer or -1 on
207 * 1) It is the callers responsibility to ensure that bin is large
208 * enough to hold the result.
209 * 2) The b64 buffer should always contain a multiple of 4 bytes of
213 mime_b64tobin(char *bin
, const char *b64
, size_t cnt
)
215 static const signed char b64index
[] = {
216 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
217 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
218 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
219 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
220 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
221 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
222 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
223 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
226 const unsigned char *q
, *end
;
228 #define EQU (unsigned)-2
229 #define BAD (unsigned)-1
230 #define uchar64(c) ((c) >= sizeof(b64index) ? BAD : (unsigned)b64index[(c)])
232 p
= (unsigned char *)bin
;
233 q
= (const unsigned char *)b64
;
234 for (end
= q
+ cnt
; q
< end
; q
+= 4) {
235 unsigned a
= uchar64(q
[0]);
236 unsigned b
= uchar64(q
[1]);
237 unsigned c
= uchar64(q
[2]);
238 unsigned d
= uchar64(q
[3]);
240 *p
++ = ((a
<< 2) | ((b
& 0x30) >> 4));
241 if (c
== EQU
) { /* got '=' */
246 *p
++ = (((b
& 0x0f) << 4) | ((c
& 0x3c) >> 2));
247 if (d
== EQU
) { /* got '=' */
250 *p
++ = (((c
& 0x03) << 6) | d
);
252 if (a
== BAD
|| b
== BAD
|| c
== BAD
|| d
== BAD
)
260 return p
- (unsigned char*)bin
;
264 * Encode a buffer as a base64 result.
266 * b64: buffer to hold the encoded (base64) result (see note).
267 * bin: buffer holding the binary source.
268 * cnt: number of bytes in the bin buffer to encode.
270 * NOTE: it is the callers responsibility to ensure that 'b64' is
271 * large enough to hold the result.
274 mime_bintob64(char *b64
, const char *bin
, size_t cnt
)
276 static const char b64table
[] =
277 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
278 const unsigned char *p
= (const unsigned char*)bin
;
281 for (i
= cnt
; i
> 0; i
-= 3) {
286 b64
[0] = b64table
[a
>> 2];
289 b64
[1] = b64table
[((a
& 0x3) << 4)];
294 b64
[1] = b64table
[((a
& 0x3) << 4) | ((b
& 0xf0) >> 4)];
295 b64
[2] = b64table
[((b
& 0xf) << 2)];
299 b64
[1] = b64table
[((a
& 0x3) << 4) | ((b
& 0xf0) >> 4)];
300 b64
[2] = b64table
[((b
& 0xf) << 2) | ((c
& 0xc0) >> 6)];
301 b64
[3] = b64table
[c
& 0x3f];
310 #define MIME_BASE64_LINE_MAX (4 * 19) /* max line length is 76: see RFC2045 sec 6.8 */
313 mime_fB64_encode(FILE *fi
, FILE *fo
, void *cookie __unused
)
315 static char b64
[MIME_BASE64_LINE_MAX
];
316 static char mem
[3 * (MIME_BASE64_LINE_MAX
/ 4)];
324 if ((cp
= value(ENAME_MIME_B64_LINE_MAX
)) != NULL
)
325 limit
= (size_t)atoi(cp
);
326 if (limit
== 0 || limit
> sizeof(b64
))
329 limit
= 3 * roundup(limit
, 4) / 4;
333 while ((cnt
= fread(mem
, sizeof(*mem
), limit
, fi
)) > 0) {
334 mime_bintob64(b64
, mem
, (size_t)cnt
);
335 (void)fwrite(b64
, sizeof(*b64
), (size_t)4 * roundup(cnt
, 3) / 3, fo
);
336 (void)putc('\n', fo
);
341 mime_fB64_decode(FILE *fi
, FILE *fo
, void *add_lf
)
348 buflen
= 3 * (MIME_BASE64_LINE_MAX
/ 4);
349 buf
= emalloc(buflen
);
351 while ((line
= fgetln(fi
, &len
)) != NULL
) {
353 if (line
[len
-1] == '\n') /* forget the trailing newline */
356 /* trash trailing white space */
357 for (/*EMPTY*/; len
> 0 && is_WSP(line
[len
-1]); len
--)
360 /* skip leading white space */
361 for (/*EMPTY*/; len
> 0 && is_WSP(line
[0]); len
--, line
++)
367 if (3 * len
> 4 * buflen
) {
369 buf
= erealloc(buf
, buflen
);
372 binlen
= mime_b64tobin(buf
, line
, len
);
375 (void)fprintf(fo
, "WARN: invalid base64 encoding\n");
378 (void)fwrite(buf
, 1, (size_t)binlen
, fo
);
384 (void)fputc('\n', fo
);
388 /************************************************************************
389 * Core quoted-printable routines.
391 * Note: the header QP routines are slightly different and burried
392 * inside mime_header.c
396 mustquote(unsigned char *p
, unsigned char *end
, size_t l
)
398 #define N 0 /* do not quote */
399 #define Q 1 /* must quote */
400 #define SP 2 /* white space */
401 #define XF 3 /* special character 'F' - maybe quoted */
402 #define XD 4 /* special character '.' - maybe quoted */
403 #define EQ Q /* '=' must be quoted */
404 #define TB SP /* treat '\t' as a space */
405 #define NL N /* don't quote '\n' (NL) - XXX - quoting here breaks the line length algorithm */
406 #define CR Q /* always quote a '\r' (CR) - it occurs only in a CRLF combo */
408 static const signed char quotetab
[] = {
409 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,TB
,NL
, Q
, Q
,CR
, Q
, Q
,
410 Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
, Q
,
411 SP
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,XD
, N
,
412 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,EQ
, N
, N
,
414 N
, N
, N
, N
, N
, N
,XF
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
415 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
416 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
,
417 N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, N
, Q
,
419 int flag
= *p
> 0x7f ? Q
: quotetab
[*p
];
426 return p
+ 1 < end
&& p
[1] == '\n'; /* trailing white space */
428 /* The remainder are special start-of-line cases. */
432 if (flag
== XF
) /* line may start with "From" */
433 return p
+ 4 < end
&& p
[1] == 'r' && p
[2] == 'o' && p
[3] == 'm';
435 if (flag
== XD
) /* line may consist of a single dot */
436 return p
+ 1 < end
&& p
[1] == '\n';
439 "mustquote: invalid logic: *p=0x%x (%d) flag=%d, l=%zu\n",
442 return 0; /* appease GCC */
455 #define MIME_QUOTED_LINE_MAX 76 /* QP max length: see RFC2045 sec 6.7 */
458 fput_quoted_line(FILE *fo
, char *line
, size_t len
, size_t limit
)
460 size_t l
; /* length of current output line */
465 assert(limit
<= MIME_QUOTED_LINE_MAX
);
467 beg
= (unsigned char*)line
;
470 for (p
= (unsigned char*)line
; p
< end
; p
++) {
471 if (mustquote(p
, end
, l
)) {
473 (void)fputs("=\n", fo
);
476 (void)fprintf(fo
, "=%02X", *p
);
481 if (p
> beg
&& p
[-1] == '\r')
482 (void)fputs("=0A=", fo
);
485 else if (l
+ 2 > limit
) {
486 (void)fputs("=\n", fo
);
494 * Lines ending in a blank must escape the newline.
496 if (len
&& is_WSP(p
[-1]))
497 (void)fputs("=\n", fo
);
501 mime_fQP_encode(FILE *fi
, FILE *fo
, void *cookie __unused
)
512 if ((cp
= value(ENAME_MIME_QP_LINE_MAX
)) != NULL
)
513 limit
= (size_t)atoi(cp
);
514 if (limit
== 0 || limit
> MIME_QUOTED_LINE_MAX
)
515 limit
= MIME_QUOTED_LINE_MAX
;
519 while ((line
= fgetln(fi
, &len
)) != NULL
)
520 fput_quoted_line(fo
, line
, len
, limit
);
524 mime_fQP_decode(FILE *fi
, FILE *fo
, void *cookie __unused
)
532 while ((line
= fgetln(fi
, &len
)) != NULL
) {
537 for (p
= line
; p
< end
; p
++) {
540 while (p
< end
&& is_WSP(*p
))
542 if (*p
!= '\n' && p
+ 1 < end
) {
549 c
= (int)strtol(buf
, NULL
, 16);
560 /************************************************************************
561 * Routines to select the codec by name.
565 mime_fio_copy(FILE *fi
, FILE *fo
, void *cookie __unused
)
572 while ((c
= getc(fi
)) != EOF
)
590 static const struct transfer_encoding_s
{
594 } transfer_encoding_tbl
[] = {
595 { MIME_TRANSFER_7BIT
, mime_fio_copy
, mime_fio_copy
},
596 { MIME_TRANSFER_8BIT
, mime_fio_copy
, mime_fio_copy
},
597 { MIME_TRANSFER_BINARY
, mime_fio_copy
, mime_fio_copy
},
598 { MIME_TRANSFER_QUOTED
, mime_fQP_encode
, mime_fQP_decode
},
599 { MIME_TRANSFER_BASE64
, mime_fB64_encode
, mime_fB64_decode
},
600 { NULL
, NULL
, NULL
},
605 mime_fio_encoder(const char *ename
)
607 const struct transfer_encoding_s
*tep
= NULL
;
612 for (tep
= transfer_encoding_tbl
; tep
->name
; tep
++)
613 if (strcasecmp(tep
->name
, ename
) == 0)
619 mime_fio_decoder(const char *ename
)
621 const struct transfer_encoding_s
*tep
= NULL
;
626 for (tep
= transfer_encoding_tbl
; tep
->name
; tep
++)
627 if (strcasecmp(tep
->name
, ename
) == 0)
633 * This is for use in complete.c and mime.c to get the list of
634 * encoding names without exposing the transfer_encoding_tbl[]. The
635 * first name is returned if called with a pointer to a NULL pointer.
636 * Subsequent calls with the same cookie give successive names. A
637 * NULL return indicates the end of the list.
640 mime_next_encoding_name(const void **cookie
)
642 const struct transfer_encoding_s
*tep
;
646 tep
= transfer_encoding_tbl
;
648 *cookie
= tep
->name
? &tep
[1] : NULL
;
653 #endif /* MIME_SUPPORT */