* Attempting to add a smart host in webcit was instead adding it as an RBL host....
[citadel.git] / webcit / decode.c
blob267e85982117dde96c95a9f574417543f647f962
1 #include "webcit.h"
2 #ifdef HAVE_ICONV
4 /*
5 * Wrapper around iconv_open()
6 * Our version adds aliases for non-standard Microsoft charsets
7 * such as 'MS950', aliasing them to names like 'CP950'
9 * tocode Target encoding
10 * fromcode Source encoding
11 * /
12 iconv_t ctdl_iconv_open(const char *tocode, const char *fromcode)
14 iconv_t ic = (iconv_t)(-1) ;
15 ic = iconv_open(tocode, fromcode);
16 if (ic == (iconv_t)(-1) ) {
17 char alias_fromcode[64];
18 if ( (strlen(fromcode) == 5) && (!strncasecmp(fromcode, "MS", 2)) ) {
19 safestrncpy(alias_fromcode, fromcode, sizeof alias_fromcode);
20 alias_fromcode[0] = 'C';
21 alias_fromcode[1] = 'P';
22 ic = iconv_open(tocode, alias_fromcode);
25 return(ic);
30 inline char *FindNextEnd (char *bptr)
32 char * end;
33 /* Find the next ?Q? */
34 end = strchr(bptr + 2, '?');
35 if (end == NULL) return NULL;
36 if (((*(end + 1) == 'B') || (*(end + 1) == 'Q')) &&
37 (*(end + 2) == '?')) {
38 /* skip on to the end of the cluster, the next ?= */
39 end = strstr(end + 3, "?=");
41 else
42 /* sort of half valid encoding, try to find an end. */
43 end = strstr(bptr, "?=");
44 return end;
48 * Handle subjects with RFC2047 encoding such as:
49 * =?koi8-r?B?78bP0s3Mxc7JxSDXz9rE1dvO2c3JINvB0sHNySDP?=
51 void utf8ify_rfc822_string(char *buf) {
52 char *start, *end, *next, *nextend, *ptr;
53 char newbuf[1024];
54 char charset[128];
55 char encoding[16];
56 char istr[1024];
57 iconv_t ic = (iconv_t)(-1) ;
58 char *ibuf; /**< Buffer of characters to be converted */
59 char *obuf; /**< Buffer for converted characters */
60 size_t ibuflen; /**< Length of input buffer */
61 size_t obuflen; /**< Length of output buffer */
62 char *isav; /**< Saved pointer to input buffer */
63 char *osav; /**< Saved pointer to output buffer */
64 int passes = 0;
65 int i, len, delta;
66 int illegal_non_rfc2047_encoding = 0;
68 /* Sometimes, badly formed messages contain strings which were simply
69 * written out directly in some foreign character set instead of
70 * using RFC2047 encoding. This is illegal but we will attempt to
71 * handle it anyway by converting from a user-specified default
72 * charset to UTF-8 if we see any nonprintable characters.
74 len = strlen(buf);
75 for (i=0; i<len; ++i) {
76 if ((buf[i] < 32) || (buf[i] > 126)) {
77 illegal_non_rfc2047_encoding = 1;
78 i = len; /*< take a shortcut, it won't be more than one. */
81 if (illegal_non_rfc2047_encoding) {
82 StrBuf *default_header_charset;
83 get_preference("default_header_charset", &default_header_charset);
84 if ( (strcasecmp(ChrPtr(default_header_charset), "UTF-8")) &&
85 (strcasecmp(ChrPtr(default_header_charset), "us-ascii")) ) {
86 ctdl_iconv_open("UTF-8", ChrPtr(default_header_charset), &ic);
87 if (ic != (iconv_t)(-1) ) {
88 ibuf = malloc(1024);
89 isav = ibuf;
90 safestrncpy(ibuf, buf, 1024);
91 ibuflen = strlen(ibuf);
92 obuflen = 1024;
93 obuf = (char *) malloc(obuflen);
94 osav = obuf;
95 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
96 osav[1024-obuflen] = 0;
97 strcpy(buf, osav);
98 free(osav);
99 iconv_close(ic);
100 free(isav);
105 /* pre evaluate the first pair */
106 nextend = end = NULL;
107 len = strlen(buf);
108 start = strstr(buf, "=?");
109 if (start != NULL)
110 end = FindNextEnd (start);
112 while ((start != NULL) && (end != NULL))
114 next = strstr(end, "=?");
115 if (next != NULL)
116 nextend = FindNextEnd(next);
117 if (nextend == NULL)
118 next = NULL;
120 /* did we find two partitions */
121 if ((next != NULL) &&
122 ((next - end) > 2))
124 ptr = end + 2;
125 while ((ptr < next) &&
126 (isspace(*ptr) ||
127 (*ptr == '\r') ||
128 (*ptr == '\n') ||
129 (*ptr == '\t')))
130 ptr ++;
131 /* did we find a gab just filled with blanks? */
132 if (ptr == next)
134 memmove (end + 2,
135 next,
136 len - (next - start));
138 /* now terminate the gab at the end */
139 delta = (next - end) - 2;
140 len -= delta;
141 buf[len] = '\0';
143 /* move next to its new location. */
144 next -= delta;
145 nextend -= delta;
148 /* our next-pair is our new first pair now. */
149 start = next;
150 end = nextend;
153 /* Now we handle foreign character sets properly encoded
154 * in RFC2047 format.
156 while (start=strstr(buf, "=?"), end=FindNextEnd((start != NULL)? start : buf),
157 ((start != NULL) && (end != NULL) && (end > start)) )
159 extract_token(charset, start, 1, '?', sizeof charset);
160 extract_token(encoding, start, 2, '?', sizeof encoding);
161 extract_token(istr, start, 3, '?', sizeof istr);
163 ibuf = malloc(1024);
164 isav = ibuf;
165 if (!strcasecmp(encoding, "B")) { /**< base64 */
166 ibuflen = CtdlDecodeBase64(ibuf, istr, strlen(istr));
168 else if (!strcasecmp(encoding, "Q")) { /**< quoted-printable */
169 size_t len;
170 long pos;
172 len = strlen(istr);
173 pos = 0;
174 while (pos < len)
176 if (istr[pos] == '_') istr[pos] = ' ';
177 pos++;
180 ibuflen = CtdlDecodeQuotedPrintable(ibuf, istr, len);
182 else {
183 strcpy(ibuf, istr); /**< unknown encoding */
184 ibuflen = strlen(istr);
187 ctdl_iconv_open("UTF-8", charset, &ic);
188 if (ic != (iconv_t)(-1) ) {
189 obuflen = 1024;
190 obuf = (char *) malloc(obuflen);
191 osav = obuf;
192 iconv(ic, &ibuf, &ibuflen, &obuf, &obuflen);
193 osav[1024-obuflen] = 0;
195 end = start;
196 end++;
197 strcpy(start, "");
198 remove_token(end, 0, '?');
199 remove_token(end, 0, '?');
200 remove_token(end, 0, '?');
201 remove_token(end, 0, '?');
202 strcpy(end, &end[1]);
204 snprintf(newbuf, sizeof newbuf, "%s%s%s", buf, osav, end);
205 strcpy(buf, newbuf);
206 free(osav);
207 iconv_close(ic);
209 else {
210 end = start;
211 end++;
212 strcpy(start, "");
213 remove_token(end, 0, '?');
214 remove_token(end, 0, '?');
215 remove_token(end, 0, '?');
216 remove_token(end, 0, '?');
217 strcpy(end, &end[1]);
219 snprintf(newbuf, sizeof newbuf, "%s(unreadable)%s", buf, end);
220 strcpy(buf, newbuf);
223 free(isav);
226 * Since spammers will go to all sorts of absurd lengths to get their
227 * messages through, there are LOTS of corrupt headers out there.
228 * So, prevent a really badly formed RFC2047 header from throwing
229 * this function into an infinite loop.
231 ++passes;
232 if (passes > 20) return;
236 #else
237 inline void utf8ify_rfc822_string(char *a){};
239 #endif
245 * \brief RFC2047-encode a header field if necessary.
246 * If no non-ASCII characters are found, the string
247 * will be copied verbatim without encoding.
249 * \param target Target buffer.
250 * \param maxlen Maximum size of target buffer.
251 * \param source Source string to be encoded.
252 * \param SourceLen Length of the source string
253 * \returns encoded length; -1 if non success.
255 int webcit_rfc2047encode(char *target, int maxlen, char *source, long SourceLen)
257 const char headerStr[] = "=?UTF-8?Q?";
258 int need_to_encode = 0;
259 int i = 0;
260 int len;
261 unsigned char ch;
263 if ((source == NULL) ||
264 (target == NULL) ||
265 (SourceLen > maxlen)) return -1;
267 while ((!IsEmptyStr (&source[i])) &&
268 (need_to_encode == 0) &&
269 (i < SourceLen) ) {
270 if (((unsigned char) source[i] < 32) ||
271 ((unsigned char) source[i] > 126)) {
272 need_to_encode = 1;
274 i++;
277 if (!need_to_encode) {
278 memcpy (target, source, SourceLen);
279 target[SourceLen] = '\0';
280 return SourceLen;
283 if (sizeof (headerStr + SourceLen + 2) > maxlen)
284 return -1;
285 memcpy (target, headerStr, sizeof (headerStr));
286 len = sizeof (headerStr) - 1;
287 for (i=0; (i < SourceLen) && (len + 3< maxlen) ; ++i) {
288 ch = (unsigned char) source[i];
289 if ((ch < 32) || (ch > 126) || (ch == 61)) {
290 sprintf(&target[len], "=%02X", ch);
291 len += 3;
293 else {
294 sprintf(&target[len], "%c", ch);
295 len ++;
299 if (len + 2 < maxlen) {
300 strcat(&target[len], "?=");
301 len +=2;
302 return len;
304 else
305 return -1;