5 * Wrapper around iconv_open()
6 * Our version adds aliases for non-standard Microsoft charsets
7 * such as 'MS950', aliasing them to names like 'CP950'
9 * tocode Target encoding
10 * fromcode Source encoding
12 iconv_t ctdl_iconv_open(const char *tocode, const char *fromcode)
14 iconv_t ic = (iconv_t)(-1) ;
15 ic = iconv_open(tocode, fromcode);
16 if (ic == (iconv_t)(-1) ) {
17 char alias_fromcode[64];
18 if ( (strlen(fromcode) == 5) && (!strncasecmp(fromcode, "MS", 2)) ) {
19 safestrncpy(alias_fromcode, fromcode, sizeof alias_fromcode);
20 alias_fromcode[0] = 'C';
21 alias_fromcode[1] = 'P';
22 ic = iconv_open(tocode, alias_fromcode);
30 inline char *FindNextEnd (char *bptr
)
33 /* Find the next ?Q? */
34 end
= strchr(bptr
+ 2, '?');
35 if (end
== NULL
) return NULL
;
36 if (((*(end
+ 1) == 'B') || (*(end
+ 1) == 'Q')) &&
37 (*(end
+ 2) == '?')) {
38 /* skip on to the end of the cluster, the next ?= */
39 end
= strstr(end
+ 3, "?=");
42 /* sort of half valid encoding, try to find an end. */
43 end
= strstr(bptr
, "?=");
48 * Handle subjects with RFC2047 encoding such as:
49 * =?koi8-r?B?78bP0s3Mxc7JxSDXz9rE1dvO2c3JINvB0sHNySDP?=
51 void utf8ify_rfc822_string(char *buf
) {
52 char *start
, *end
, *next
, *nextend
, *ptr
;
57 iconv_t ic
= (iconv_t
)(-1) ;
58 char *ibuf
; /**< Buffer of characters to be converted */
59 char *obuf
; /**< Buffer for converted characters */
60 size_t ibuflen
; /**< Length of input buffer */
61 size_t obuflen
; /**< Length of output buffer */
62 char *isav
; /**< Saved pointer to input buffer */
63 char *osav
; /**< Saved pointer to output buffer */
66 int illegal_non_rfc2047_encoding
= 0;
68 /* Sometimes, badly formed messages contain strings which were simply
69 * written out directly in some foreign character set instead of
70 * using RFC2047 encoding. This is illegal but we will attempt to
71 * handle it anyway by converting from a user-specified default
72 * charset to UTF-8 if we see any nonprintable characters.
75 for (i
=0; i
<len
; ++i
) {
76 if ((buf
[i
] < 32) || (buf
[i
] > 126)) {
77 illegal_non_rfc2047_encoding
= 1;
78 i
= len
; /*< take a shortcut, it won't be more than one. */
81 if (illegal_non_rfc2047_encoding
) {
82 StrBuf
*default_header_charset
;
83 get_preference("default_header_charset", &default_header_charset
);
84 if ( (strcasecmp(ChrPtr(default_header_charset
), "UTF-8")) &&
85 (strcasecmp(ChrPtr(default_header_charset
), "us-ascii")) ) {
86 ctdl_iconv_open("UTF-8", ChrPtr(default_header_charset
), &ic
);
87 if (ic
!= (iconv_t
)(-1) ) {
90 safestrncpy(ibuf
, buf
, 1024);
91 ibuflen
= strlen(ibuf
);
93 obuf
= (char *) malloc(obuflen
);
95 iconv(ic
, &ibuf
, &ibuflen
, &obuf
, &obuflen
);
96 osav
[1024-obuflen
] = 0;
105 /* pre evaluate the first pair */
106 nextend
= end
= NULL
;
108 start
= strstr(buf
, "=?");
110 end
= FindNextEnd (start
);
112 while ((start
!= NULL
) && (end
!= NULL
))
114 next
= strstr(end
, "=?");
116 nextend
= FindNextEnd(next
);
120 /* did we find two partitions */
121 if ((next
!= NULL
) &&
125 while ((ptr
< next
) &&
131 /* did we find a gab just filled with blanks? */
136 len
- (next
- start
));
138 /* now terminate the gab at the end */
139 delta
= (next
- end
) - 2;
143 /* move next to its new location. */
148 /* our next-pair is our new first pair now. */
153 /* Now we handle foreign character sets properly encoded
156 while (start
=strstr(buf
, "=?"), end
=FindNextEnd((start
!= NULL
)? start
: buf
),
157 ((start
!= NULL
) && (end
!= NULL
) && (end
> start
)) )
159 extract_token(charset
, start
, 1, '?', sizeof charset
);
160 extract_token(encoding
, start
, 2, '?', sizeof encoding
);
161 extract_token(istr
, start
, 3, '?', sizeof istr
);
165 if (!strcasecmp(encoding
, "B")) { /**< base64 */
166 ibuflen
= CtdlDecodeBase64(ibuf
, istr
, strlen(istr
));
168 else if (!strcasecmp(encoding
, "Q")) { /**< quoted-printable */
176 if (istr
[pos
] == '_') istr
[pos
] = ' ';
180 ibuflen
= CtdlDecodeQuotedPrintable(ibuf
, istr
, len
);
183 strcpy(ibuf
, istr
); /**< unknown encoding */
184 ibuflen
= strlen(istr
);
187 ctdl_iconv_open("UTF-8", charset
, &ic
);
188 if (ic
!= (iconv_t
)(-1) ) {
190 obuf
= (char *) malloc(obuflen
);
192 iconv(ic
, &ibuf
, &ibuflen
, &obuf
, &obuflen
);
193 osav
[1024-obuflen
] = 0;
198 remove_token(end
, 0, '?');
199 remove_token(end
, 0, '?');
200 remove_token(end
, 0, '?');
201 remove_token(end
, 0, '?');
202 strcpy(end
, &end
[1]);
204 snprintf(newbuf
, sizeof newbuf
, "%s%s%s", buf
, osav
, end
);
213 remove_token(end
, 0, '?');
214 remove_token(end
, 0, '?');
215 remove_token(end
, 0, '?');
216 remove_token(end
, 0, '?');
217 strcpy(end
, &end
[1]);
219 snprintf(newbuf
, sizeof newbuf
, "%s(unreadable)%s", buf
, end
);
226 * Since spammers will go to all sorts of absurd lengths to get their
227 * messages through, there are LOTS of corrupt headers out there.
228 * So, prevent a really badly formed RFC2047 header from throwing
229 * this function into an infinite loop.
232 if (passes
> 20) return;
237 inline void utf8ify_rfc822_string(char *a
){};
245 * \brief RFC2047-encode a header field if necessary.
246 * If no non-ASCII characters are found, the string
247 * will be copied verbatim without encoding.
249 * \param target Target buffer.
250 * \param maxlen Maximum size of target buffer.
251 * \param source Source string to be encoded.
252 * \param SourceLen Length of the source string
253 * \returns encoded length; -1 if non success.
255 int webcit_rfc2047encode(char *target
, int maxlen
, char *source
, long SourceLen
)
257 const char headerStr
[] = "=?UTF-8?Q?";
258 int need_to_encode
= 0;
263 if ((source
== NULL
) ||
265 (SourceLen
> maxlen
)) return -1;
267 while ((!IsEmptyStr (&source
[i
])) &&
268 (need_to_encode
== 0) &&
270 if (((unsigned char) source
[i
] < 32) ||
271 ((unsigned char) source
[i
] > 126)) {
277 if (!need_to_encode
) {
278 memcpy (target
, source
, SourceLen
);
279 target
[SourceLen
] = '\0';
283 if (sizeof (headerStr
+ SourceLen
+ 2) > maxlen
)
285 memcpy (target
, headerStr
, sizeof (headerStr
));
286 len
= sizeof (headerStr
) - 1;
287 for (i
=0; (i
< SourceLen
) && (len
+ 3< maxlen
) ; ++i
) {
288 ch
= (unsigned char) source
[i
];
289 if ((ch
< 32) || (ch
> 126) || (ch
== 61)) {
290 sprintf(&target
[len
], "=%02X", ch
);
294 sprintf(&target
[len
], "%c", ch
);
299 if (len
+ 2 < maxlen
) {
300 strcat(&target
[len
], "?=");