BPicture: Fix archive constructor.
[haiku.git] / src / kits / mail / mail_util.cpp
blob05bb4e7efebf794403ca71c804f76864e1af63ac
1 /*
2 * Copyright 2011, Haiku, Inc. All rights reserved.
3 * Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved.
4 */
7 #include <mail_util.h>
9 #include <stdlib.h>
10 #include <strings.h>
11 #include <stdio.h>
12 #define __USE_GNU
13 #include <regex.h>
14 #include <ctype.h>
15 #include <errno.h>
17 #include <List.h>
18 #include <Locker.h>
19 #include <parsedate.h>
20 #include <String.h>
21 #include <UTF8.h>
23 #include <mail_encoding.h>
25 #include <CharacterSet.h>
26 #include <CharacterSetRoster.h>
29 using namespace BPrivate;
32 #define CRLF "\r\n"
34 struct CharsetConversionEntry {
35 const char *charset;
36 uint32 flavor;
39 extern const CharsetConversionEntry mail_charsets[] = {
40 // In order of authority, so when searching for the name for a particular
41 // numbered conversion, start at the beginning of the array.
42 {"iso-8859-1", B_ISO1_CONVERSION}, // MIME STANDARD
43 {"iso-8859-2", B_ISO2_CONVERSION}, // MIME STANDARD
44 {"iso-8859-3", B_ISO3_CONVERSION}, // MIME STANDARD
45 {"iso-8859-4", B_ISO4_CONVERSION}, // MIME STANDARD
46 {"iso-8859-5", B_ISO5_CONVERSION}, // MIME STANDARD
47 {"iso-8859-6", B_ISO6_CONVERSION}, // MIME STANDARD
48 {"iso-8859-7", B_ISO7_CONVERSION}, // MIME STANDARD
49 {"iso-8859-8", B_ISO8_CONVERSION}, // MIME STANDARD
50 {"iso-8859-9", B_ISO9_CONVERSION}, // MIME STANDARD
51 {"iso-8859-10", B_ISO10_CONVERSION}, // MIME STANDARD
52 {"iso-8859-13", B_ISO13_CONVERSION}, // MIME STANDARD
53 {"iso-8859-14", B_ISO14_CONVERSION}, // MIME STANDARD
54 {"iso-8859-15", B_ISO15_CONVERSION}, // MIME STANDARD
56 {"shift_jis", B_SJIS_CONVERSION}, // MIME STANDARD
57 {"shift-jis", B_SJIS_CONVERSION},
58 {"iso-2022-jp", B_JIS_CONVERSION}, // MIME STANDARD
59 {"euc-jp", B_EUC_CONVERSION}, // MIME STANDARD
61 {"euc-kr", B_EUC_KR_CONVERSION}, // Shift encoding 7 bit and KSC-5601 if bit 8 is on. // MIME STANDARD
62 {"ksc5601", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE?
63 {"ks_c_5601-1987", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE with stupid MS software
65 {"koi8-r", B_KOI8R_CONVERSION}, // MIME STANDARD
66 {"windows-1251",B_MS_WINDOWS_1251_CONVERSION}, // MIME STANDARD
67 {"windows-1252",B_MS_WINDOWS_CONVERSION}, // MIME STANDARD
69 {"dos-437", B_MS_DOS_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM437? )
70 {"dos-866", B_MS_DOS_866_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM866? )
71 {"x-mac-roman", B_MAC_ROMAN_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( macintosh? + x-mac-roman? )
73 {"big5", 24}, // MIME STANDARD
75 {"gb18030", 25}, // WRONG NAME : MIME STANDARD NAME = NONE ( GB18030? )
76 {"gb2312", 25}, // COMPATIBLE
77 {"gbk", 25}, // COMPATIBLE
79 /* {"utf-16", B_UNICODE_CONVERSION}, Might not work due to NULs in text, needs testing. */
80 {"us-ascii", B_MAIL_US_ASCII_CONVERSION}, // MIME STANDARD
81 {"utf-8", B_MAIL_UTF8_CONVERSION /* Special code for no conversion */}, // MIME STANDARD
83 {NULL, (uint32) -1} /* End of list marker, NULL string pointer is the key. */
87 static int32 gLocker = 0;
88 static size_t gNsub = 1;
89 static re_pattern_buffer gRe;
90 static re_pattern_buffer *gRebuf = NULL;
91 static unsigned char gTranslation[256];
94 static int
95 handle_non_rfc2047_encoding(char **buffer, size_t *bufferLength,
96 size_t *sourceLength)
98 char *string = *buffer;
99 int32 length = *sourceLength;
100 int32 i;
102 // check for 8-bit characters
103 for (i = 0;i < length;i++)
104 if (string[i] & 0x80)
105 break;
106 if (i == length)
107 return false;
109 // check for groups of 8-bit characters - this code is not very smart;
110 // it just can detect some sort of single-byte encoded stuff, the rest
111 // is regarded as UTF-8
113 int32 singletons = 0,doubles = 0;
115 for (i = 0;i < length;i++)
117 if (string[i] & 0x80)
119 if ((string[i + 1] & 0x80) == 0)
120 singletons++;
121 else doubles++;
122 i++;
126 if (singletons != 0) // can't be valid UTF-8 anymore, so we assume ISO-Latin-1
128 int32 state = 0;
129 // just to be sure
130 int32 destLength = length * 4 + 1;
131 int32 destBufferLength = destLength;
132 char *dest = (char*)malloc(destLength);
133 if (dest == NULL)
134 return 0;
136 if (convert_to_utf8(B_ISO1_CONVERSION, string, &length,dest,
137 &destLength, &state) == B_OK) {
138 *buffer = dest;
139 *bufferLength = destBufferLength;
140 *sourceLength = destLength;
141 return true;
143 free(dest);
144 return false;
147 // we assume a valid UTF-8 string here, but yes, we don't check it
148 return true;
152 // #pragma mark -
155 status_t
156 write_read_attr(BNode& node, read_flags flag)
158 if (node.WriteAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32))
159 < 0)
160 return B_ERROR;
162 // manage the status string only if it currently has a "read" status
163 BString currentStatus;
164 if (node.ReadAttrString(B_MAIL_ATTR_STATUS, &currentStatus) == B_OK) {
165 if (currentStatus.ICompare("New") != 0
166 && currentStatus.ICompare("Read") != 0
167 && currentStatus.ICompare("Seen") != 0)
168 return B_OK;
171 const char* statusString = flag == B_READ ? "Read"
172 : flag == B_SEEN ? "Seen" : "New";
173 if (node.WriteAttr(B_MAIL_ATTR_STATUS, B_STRING_TYPE, 0, statusString,
174 strlen(statusString)) < 0)
175 return B_ERROR;
177 return B_OK;
181 status_t
182 read_read_attr(BNode& node, read_flags& flag)
184 if (node.ReadAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32))
185 == sizeof(int32))
186 return B_OK;
188 BString statusString;
189 if (node.ReadAttrString(B_MAIL_ATTR_STATUS, &statusString) == B_OK) {
190 if (statusString.ICompare("New"))
191 flag = B_UNREAD;
192 else
193 flag = B_READ;
195 return B_OK;
198 return B_ERROR;
202 // The next couple of functions are our wrapper around convert_to_utf8 and
203 // convert_from_utf8 so that they can also convert from UTF-8 to UTF-8 by
204 // specifying the B_MAIL_UTF8_CONVERSION constant as the conversion operation.
205 // It also lets us add new conversions, like B_MAIL_US_ASCII_CONVERSION.
208 status_t
209 mail_convert_to_utf8(uint32 srcEncoding, const char *src, int32 *srcLen,
210 char *dst, int32 *dstLen, int32 *state, char substitute)
212 int32 copyAmount;
213 char *originalDst = dst;
214 status_t returnCode = -1;
216 if (srcEncoding == B_MAIL_UTF8_CONVERSION) {
217 copyAmount = *srcLen;
218 if (*dstLen < copyAmount)
219 copyAmount = *dstLen;
220 memcpy (dst, src, copyAmount);
221 *srcLen = copyAmount;
222 *dstLen = copyAmount;
223 returnCode = B_OK;
224 } else if (srcEncoding == B_MAIL_US_ASCII_CONVERSION) {
225 int32 i;
226 unsigned char letter;
227 copyAmount = *srcLen;
228 if (*dstLen < copyAmount)
229 copyAmount = *dstLen;
230 for (i = 0; i < copyAmount; i++) {
231 letter = *src++;
232 if (letter > 0x80U)
233 // Invalid, could also use substitute, but better to strip high bit.
234 *dst++ = letter - 0x80U;
235 else if (letter == 0x80U)
236 // Can't convert to 0x00 since that's NUL, which would cause problems.
237 *dst++ = substitute;
238 else
239 *dst++ = letter;
241 *srcLen = copyAmount;
242 *dstLen = copyAmount;
243 returnCode = B_OK;
244 } else
245 returnCode = convert_to_utf8 (srcEncoding, src, srcLen,
246 dst, dstLen, state, substitute);
248 if (returnCode == B_OK) {
249 // Replace spurious NUL bytes, which should normally not be in the
250 // output of the decoding (not normal UTF-8 characters, and no NULs are
251 // in our usual input strings). They happen for some odd ISO-2022-JP
252 // byte pair combinations which are improperly handled by the BeOS
253 // routines. Like "\e$ByD\e(B" where \e is the ESC character $1B, the
254 // first ESC $ B switches to a Japanese character set, then the next
255 // two bytes "yD" specify a character, then ESC ( B switches back to
256 // the ASCII character set. The UTF-8 conversion yields a NUL byte.
257 int32 i;
258 for (i = 0; i < *dstLen; i++)
259 if (originalDst[i] == 0)
260 originalDst[i] = substitute;
262 return returnCode;
266 status_t
267 mail_convert_from_utf8(uint32 dstEncoding, const char *src, int32 *srcLen,
268 char *dst, int32 *dstLen, int32 *state, char substitute)
270 int32 copyAmount;
271 status_t errorCode;
272 int32 originalDstLen = *dstLen;
273 int32 tempDstLen;
274 int32 tempSrcLen;
276 if (dstEncoding == B_MAIL_UTF8_CONVERSION) {
277 copyAmount = *srcLen;
278 if (*dstLen < copyAmount)
279 copyAmount = *dstLen;
280 memcpy (dst, src, copyAmount);
281 *srcLen = copyAmount;
282 *dstLen = copyAmount;
283 return B_OK;
286 if (dstEncoding == B_MAIL_US_ASCII_CONVERSION) {
287 int32 characterLength;
288 int32 dstRemaining = *dstLen;
289 unsigned char letter;
290 int32 srcRemaining = *srcLen;
292 // state contains the number of source bytes to skip, left over from a
293 // partial UTF-8 character split over the end of the buffer from last
294 // time.
295 if (srcRemaining <= *state) {
296 *state -= srcRemaining;
297 *dstLen = 0;
298 return B_OK;
300 srcRemaining -= *state;
301 src += *state;
302 *state = 0;
304 while (true) {
305 if (srcRemaining <= 0 || dstRemaining <= 0)
306 break;
307 letter = *src;
308 if (letter < 0x80)
309 characterLength = 1; // Regular ASCII equivalent code.
310 else if (letter < 0xC0)
311 characterLength = 1; // Invalid in-between data byte 10xxxxxx.
312 else if (letter < 0xE0)
313 characterLength = 2;
314 else if (letter < 0xF0)
315 characterLength = 3;
316 else if (letter < 0xF8)
317 characterLength = 4;
318 else if (letter < 0xFC)
319 characterLength = 5;
320 else if (letter < 0xFE)
321 characterLength = 6;
322 else
323 characterLength = 1; // 0xFE and 0xFF are invalid in UTF-8.
324 if (letter < 0x80)
325 *dst++ = *src;
326 else
327 *dst++ = substitute;
328 dstRemaining--;
329 if (srcRemaining < characterLength) {
330 // Character split past the end of the buffer.
331 *state = characterLength - srcRemaining;
332 srcRemaining = 0;
333 } else {
334 src += characterLength;
335 srcRemaining -= characterLength;
338 // Update with the amounts used.
339 *srcLen = *srcLen - srcRemaining;
340 *dstLen = *dstLen - dstRemaining;
341 return B_OK;
344 errorCode = convert_from_utf8(dstEncoding, src, srcLen, dst, dstLen, state,
345 substitute);
346 if (errorCode != B_OK)
347 return errorCode;
349 if (dstEncoding != B_JIS_CONVERSION)
350 return B_OK;
352 // B_JIS_CONVERSION (ISO-2022-JP) works by shifting between different
353 // character subsets. For E-mail headers (and other uses), it needs to be
354 // switched back to ASCII at the end (otherwise the last character gets
355 // lost or other weird things happen in the headers). Note that we can't
356 // just append the escape code since the convert_from_utf8 "state" will be
357 // wrong. So we append an ASCII letter and throw it away, leaving just the
358 // escape code. Well, it actually switches to the Roman character set, not
359 // ASCII, but that should be OK.
361 tempDstLen = originalDstLen - *dstLen;
362 if (tempDstLen < 3) // Not enough space remaining in the output.
363 return B_OK; // Sort of an error, but we did convert the rest OK.
364 tempSrcLen = 1;
365 errorCode = convert_from_utf8(dstEncoding, "a", &tempSrcLen,
366 dst + *dstLen, &tempDstLen, state, substitute);
367 if (errorCode != B_OK)
368 return errorCode;
369 *dstLen += tempDstLen - 1 /* don't include the ASCII letter */;
370 return B_OK;
374 ssize_t
375 rfc2047_to_utf8(char **bufp, size_t *bufLen, size_t strLen)
377 char *head, *tail;
378 char *charset, *encoding, *end;
379 ssize_t ret = B_OK;
381 if (bufp == NULL || *bufp == NULL)
382 return -1;
384 char *string = *bufp;
386 //---------Handle *&&^%*&^ non-RFC compliant, 8bit mail
387 if (handle_non_rfc2047_encoding(bufp,bufLen,&strLen))
388 return strLen;
390 // set up string length
391 if (strLen == 0)
392 strLen = strlen(*bufp);
393 char lastChar = (*bufp)[strLen];
394 (*bufp)[strLen] = '\0';
396 //---------Whew! Now for RFC compliant mail
397 bool encodedWordFoundPreviously = false;
398 for (head = tail = string;
399 ((charset = strstr(tail, "=?")) != NULL)
400 && (((encoding = strchr(charset + 2, '?')) != NULL)
401 && encoding[1] && (encoding[2] == '?') && encoding[3])
402 && (end = strstr(encoding + 3, "?=")) != NULL;
403 // found "=?...charset...?e?...text...?= (e == encoding)
404 // ^charset ^encoding ^end
405 tail = end)
407 // Copy non-encoded text (from tail up to charset) to the output.
408 // Ignore spaces between two encoded "words". RFC2047 says the words
409 // should be concatenated without the space (designed for Asian
410 // sentences which have no spaces yet need to be broken into "words" to
411 // keep within the line length limits).
412 bool nonSpaceFound = false;
413 for (int i = 0; i < charset-tail; i++) {
414 if (!isspace (tail[i])) {
415 nonSpaceFound = true;
416 break;
419 if (!encodedWordFoundPreviously || nonSpaceFound) {
420 if (string != tail && tail != charset)
421 memmove(string, tail, charset-tail);
422 string += charset-tail;
424 tail = charset;
425 encodedWordFoundPreviously = true;
427 // move things to point at what they should:
428 // =?...charset...?e?...text...?= (e == encoding)
429 // ^charset ^encoding ^end
430 charset += 2;
431 encoding += 1;
432 end += 2;
434 // find the charset this text is in now
435 size_t cLen = encoding - 1 - charset;
436 bool base64encoded = toupper(*encoding) == 'B';
438 uint32 convertID = B_MAIL_NULL_CONVERSION;
439 char charsetName[cLen + 1];
440 memcpy(charsetName, charset, cLen);
441 charsetName[cLen] = '\0';
442 if (strcasecmp(charsetName, "us-ascii") == 0) {
443 convertID = B_MAIL_US_ASCII_CONVERSION;
444 } else if (strcasecmp(charsetName, "utf-8") == 0) {
445 convertID = B_MAIL_UTF8_CONVERSION;
446 } else {
447 const BCharacterSet* charSet
448 = BCharacterSetRoster::FindCharacterSetByName(charsetName);
449 if (charSet != NULL) {
450 convertID = charSet->GetConversionID();
453 if (convertID == B_MAIL_NULL_CONVERSION) {
454 // unidentified charset
455 // what to do? doing nothing skips the encoded text;
456 // but we should keep it: we copy it to the output.
457 if (string != tail && tail != end)
458 memmove(string, tail, end-tail);
459 string += end-tail;
460 continue;
462 // else we've successfully identified the charset
464 char *src = encoding+2;
465 int32 srcLen = end - 2 - src;
466 // encoded text: src..src+srcLen
468 // decode text, get decoded length (reducing xforms)
469 srcLen = !base64encoded ? decode_qp(src, src, srcLen, 1)
470 : decode_base64(src, src, srcLen);
472 // allocate space for the converted text
473 int32 dstLen = end-string + *bufLen-strLen;
474 char *dst = (char*)malloc(dstLen);
475 int32 cvLen = srcLen;
476 int32 convState = 0;
479 // do the conversion
481 ret = mail_convert_to_utf8(convertID, src, &cvLen, dst, &dstLen,
482 &convState);
483 if (ret != B_OK) {
484 // what to do? doing nothing skips the encoded text
485 // but we should keep it: we copy it to the output.
487 free(dst);
489 if (string != tail && tail != end)
490 memmove(string, tail, end-tail);
491 string += end-tail;
492 continue;
494 /* convert_to_ is either returning something wrong or my
495 test data is screwed up. Whatever it is, Not Enough
496 Space is not the only cause of the below, so we just
497 assume it succeeds if it converts anything at all.
498 else if (cvLen < srcLen)
500 // not enough room to convert the data;
501 // grow *buf and retry
503 free(dst);
505 char *temp = (char*)realloc(*bufp, 2*(*bufLen + 1));
506 if (temp == NULL)
508 ret = B_NO_MEMORY;
509 break;
512 *bufp = temp;
513 *bufLen = 2*(*bufLen + 1);
515 string = *bufp + (string-head);
516 tail = *bufp + (tail-head);
517 charset = *bufp + (charset-head);
518 encoding = *bufp + (encoding-head);
519 end = *bufp + (end-head);
520 src = *bufp + (src-head);
521 head = *bufp;
522 continue;
525 else {
526 if (dstLen > end-string) {
527 // copy the string forward...
528 memmove(string+dstLen, end, strLen - (end-head) + 1);
529 strLen += string+dstLen - end;
530 end = string + dstLen;
533 memcpy(string, dst, dstLen);
534 string += dstLen;
535 free(dst);
536 continue;
540 // copy everything that's left
541 size_t tailLen = strLen - (tail - head);
542 memmove(string, tail, tailLen+1);
543 string += tailLen;
545 // replace the last char
546 (*bufp)[strLen] = lastChar;
548 return ret < B_OK ? ret : string-head;
552 ssize_t
553 utf8_to_rfc2047 (char **bufp, ssize_t length, uint32 charset, char encoding)
555 struct word {
556 BString originalWord;
557 BString convertedWord;
558 bool needsEncoding;
560 // Convert the word from UTF-8 to the desired character set. The
561 // converted version also includes the escape codes to return to ASCII
562 // mode, if relevant. Also note if it uses unprintable characters,
563 // which means it will need that special encoding treatment later.
564 void ConvertWordToCharset (uint32 charset) {
565 int32 state = 0;
566 int32 originalLength = originalWord.Length();
567 int32 convertedLength = originalLength * 5 + 1;
568 char *convertedBuffer = convertedWord.LockBuffer (convertedLength);
569 mail_convert_from_utf8 (charset, originalWord.String(),
570 &originalLength, convertedBuffer, &convertedLength, &state);
571 for (int i = 0; i < convertedLength; i++) {
572 if ((convertedBuffer[i] & (1 << 7)) ||
573 (convertedBuffer[i] >= 0 && convertedBuffer[i] < 32)) {
574 needsEncoding = true;
575 break;
578 convertedWord.UnlockBuffer (convertedLength);
581 struct word *currentWord;
582 BList words;
584 // Break the header into words. White space characters (including tabs and
585 // newlines) separate the words. Each word includes any space before it as
586 // part of the word. Actually, quotes and other special characters
587 // (",()<>@) are treated as separate words of their own so that they don't
588 // get encoded (because MIME headers get the quotes parsed before character
589 // set unconversion is done). The reader is supposed to ignore all white
590 // space between encoded words, which can be inserted so that older mail
591 // parsers don't have overly long line length problems.
593 const char *source = *bufp;
594 const char *bufEnd = *bufp + length;
595 const char *specialChars = "\"()<>@,";
597 while (source < bufEnd) {
598 currentWord = new struct word;
599 currentWord->needsEncoding = false;
601 int wordEnd = 0;
603 // Include leading spaces as part of the word.
604 while (source + wordEnd < bufEnd && isspace (source[wordEnd]))
605 wordEnd++;
607 if (source + wordEnd < bufEnd &&
608 strchr (specialChars, source[wordEnd]) != NULL) {
609 // Got a quote mark or other special character, which is treated as
610 // a word in itself since it shouldn't be encoded, which would hide
611 // it from the mail system.
612 wordEnd++;
613 } else {
614 // Find the end of the word. Leave wordEnd pointing just after the
615 // last character in the word.
616 while (source + wordEnd < bufEnd) {
617 if (isspace(source[wordEnd]) ||
618 strchr (specialChars, source[wordEnd]) != NULL)
619 break;
620 if (wordEnd > 51 /* Makes Base64 ISO-2022-JP "word" a multiple of 4 bytes */ &&
621 0xC0 == (0xC0 & (unsigned int) source[wordEnd])) {
622 // No English words are that long (46 is the longest),
623 // break up what is likely Asian text (which has no spaces)
624 // at the start of the next non-ASCII UTF-8 character (high
625 // two bits are both ones). Note that two encoded words in
626 // a row get joined together, even if there is a space
627 // between them in the final output text, according to the
628 // standard. Next word will also be conveniently get
629 // encoded due to the 0xC0 test.
630 currentWord->needsEncoding = true;
631 break;
633 wordEnd++;
636 currentWord->originalWord.SetTo (source, wordEnd);
637 currentWord->ConvertWordToCharset (charset);
638 words.AddItem(currentWord);
639 source += wordEnd;
642 // Combine adjacent words which contain unprintable text so that the
643 // overhead of switching back and forth between regular text and specially
644 // encoded text is reduced. However, the combined word must be shorter
645 // than the maximum of 75 bytes, including character set specification and
646 // all those delimiters (worst case 22 bytes of overhead).
648 struct word *run;
650 for (int32 i = 0; (currentWord = (struct word *) words.ItemAt (i)) != NULL; i++) {
651 if (!currentWord->needsEncoding)
652 continue; // No need to combine unencoded words.
653 for (int32 g = i+1; (run = (struct word *) words.ItemAt (g)) != NULL; g++) {
654 if (!run->needsEncoding)
655 break; // Don't want to combine encoded and unencoded words.
656 if ((currentWord->convertedWord.Length() + run->convertedWord.Length() <= 53)) {
657 currentWord->originalWord.Append (run->originalWord);
658 currentWord->ConvertWordToCharset (charset);
659 words.RemoveItem(g);
660 delete run;
661 g--;
662 } else // Can't merge this word, result would be too long.
663 break;
667 // Combine the encoded and unencoded words into one line, doing the
668 // quoted-printable or base64 encoding. Insert an extra space between
669 // words which are both encoded to make word wrapping easier, since there
670 // is normally none, and you're allowed to insert space (the receiver
671 // throws it away if it is between encoded words).
673 BString rfc2047;
674 bool previousWordNeededEncoding = false;
676 const char *charset_dec = "none-bug";
677 for (int32 i = 0; mail_charsets[i].charset != NULL; i++) {
678 if (mail_charsets[i].flavor == charset) {
679 charset_dec = mail_charsets[i].charset;
680 break;
684 while ((currentWord = (struct word *)words.RemoveItem((int32)0)) != NULL) {
685 if ((encoding != quoted_printable && encoding != base64) ||
686 !currentWord->needsEncoding) {
687 rfc2047.Append (currentWord->convertedWord);
688 } else {
689 // This word needs encoding. Try to insert a space between it and
690 // the previous word.
691 if (previousWordNeededEncoding)
692 rfc2047 << ' '; // Can insert as many spaces as you want between encoded words.
693 else {
694 // Previous word is not encoded, spaces are significant. Try
695 // to move a space from the start of this word to be outside of
696 // the encoded text, so that there is a bit of space between
697 // this word and the previous one to enhance word wrapping
698 // chances later on.
699 if (currentWord->originalWord.Length() > 1 &&
700 isspace (currentWord->originalWord[0])) {
701 rfc2047 << currentWord->originalWord[0];
702 currentWord->originalWord.Remove (0 /* offset */, 1 /* length */);
703 currentWord->ConvertWordToCharset (charset);
707 char *encoded = NULL;
708 ssize_t encoded_len = 0;
709 int32 convertedLength = currentWord->convertedWord.Length ();
710 const char *convertedBuffer = currentWord->convertedWord.String ();
712 switch (encoding) {
713 case quoted_printable:
714 encoded = (char *) malloc (convertedLength * 3);
715 encoded_len = encode_qp (encoded, convertedBuffer, convertedLength, true /* headerMode */);
716 break;
717 case base64:
718 encoded = (char *) malloc (convertedLength * 2);
719 encoded_len = encode_base64 (encoded, convertedBuffer, convertedLength, true /* headerMode */);
720 break;
721 default: // Unknown encoding type, shouldn't happen.
722 encoded = (char *) convertedBuffer;
723 encoded_len = convertedLength;
724 break;
727 rfc2047 << "=?" << charset_dec << '?' << encoding << '?';
728 rfc2047.Append (encoded, encoded_len);
729 rfc2047 << "?=";
731 if (encoding == quoted_printable || encoding == base64)
732 free(encoded);
734 previousWordNeededEncoding = currentWord->needsEncoding;
735 delete currentWord;
738 free(*bufp);
740 ssize_t finalLength = rfc2047.Length ();
741 *bufp = (char *) (malloc (finalLength + 1));
742 memcpy (*bufp, rfc2047.String(), finalLength);
743 (*bufp)[finalLength] = 0;
745 return finalLength;
749 void
750 FoldLineAtWhiteSpaceAndAddCRLF(BString &string)
752 int inputLength = string.Length();
753 int lineStartIndex;
754 const int maxLineLength = 78; // Doesn't include CRLF.
755 BString output;
756 int splitIndex;
757 int tempIndex;
759 lineStartIndex = 0;
760 while (true) {
761 // If we don't need to wrap the text, just output the remainder, if any.
763 if (lineStartIndex + maxLineLength >= inputLength) {
764 if (lineStartIndex < inputLength) {
765 output.Insert (string, lineStartIndex /* source offset */,
766 inputLength - lineStartIndex /* count */,
767 output.Length() /* insert at */);
768 output.Append (CRLF);
770 break;
773 // Look ahead for a convenient spot to split it, between a comma and
774 // space, which you often see between e-mail addresses like this:
775 // "Joe Who" joe@dot.com, "Someone Else" else@blot.com
777 tempIndex = lineStartIndex + maxLineLength;
778 if (tempIndex > inputLength)
779 tempIndex = inputLength;
780 splitIndex = string.FindLast (", ", tempIndex);
781 if (splitIndex >= lineStartIndex)
782 splitIndex++; // Point to the space character.
784 // If none of those exist, try splitting at any white space.
786 if (splitIndex <= lineStartIndex)
787 splitIndex = string.FindLast (" ", tempIndex);
788 if (splitIndex <= lineStartIndex)
789 splitIndex = string.FindLast ("\t", tempIndex);
791 // If none of those exist, allow for a longer word - split at the next
792 // available white space.
794 if (splitIndex <= lineStartIndex)
795 splitIndex = string.FindFirst (" ", lineStartIndex + 1);
796 if (splitIndex <= lineStartIndex)
797 splitIndex = string.FindFirst ("\t", lineStartIndex + 1);
799 // Give up, the whole rest of the line can't be split, just dump it
800 // out.
802 if (splitIndex <= lineStartIndex) {
803 if (lineStartIndex < inputLength) {
804 output.Insert (string, lineStartIndex /* source offset */,
805 inputLength - lineStartIndex /* count */,
806 output.Length() /* insert at */);
807 output.Append (CRLF);
809 break;
812 // Do the split. The current line up to but not including the space
813 // gets output, followed by a CRLF. The space remains to become the
814 // start of the next line (and that tells the message reader that it is
815 // a continuation line).
817 output.Insert (string, lineStartIndex /* source offset */,
818 splitIndex - lineStartIndex /* count */,
819 output.Length() /* insert at */);
820 output.Append (CRLF);
821 lineStartIndex = splitIndex;
823 string.SetTo (output);
827 ssize_t
828 readfoldedline(FILE *file, char **buffer, size_t *buflen)
830 ssize_t len = buflen && *buflen ? *buflen : 0;
831 char * buf = buffer && *buffer ? *buffer : NULL;
832 ssize_t cnt = 0; // Number of characters currently in the buffer.
833 int c;
835 while (true) {
836 // Make sure there is space in the buffer for two more characters (one
837 // for the next character, and one for the end of string NUL byte).
838 if (buf == NULL || cnt + 2 >= len) {
839 char *temp = (char *)realloc(buf, len + 64);
840 if (temp == NULL) {
841 // Out of memory, however existing buffer remains allocated.
842 cnt = ENOMEM;
843 break;
845 len += 64;
846 buf = temp;
849 // Read the next character, or end of file, or IO error.
850 if ((c = fgetc(file)) == EOF) {
851 if (ferror (file)) {
852 cnt = errno;
853 if (cnt >= 0)
854 cnt = -1; // Error codes must be negative.
855 } else {
856 // Really is end of file. Also make it end of line if there is
857 // some text already read in. If the first thing read was EOF,
858 // just return an empty string.
859 if (cnt > 0) {
860 buf[cnt++] = '\n';
861 if (buf[cnt-2] == '\r') {
862 buf[cnt-2] = '\n';
863 --cnt;
867 break;
870 buf[cnt++] = c;
872 if (c == '\n') {
873 // Convert CRLF end of line to just a LF. Do it before folding, in
874 // case we don't need to fold.
875 if (cnt >= 2 && buf[cnt-2] == '\r') {
876 buf[cnt-2] = '\n';
877 --cnt;
879 // If the current line is empty then return it (so that empty lines
880 // don't disappear if the next line starts with a space).
881 if (cnt <= 1)
882 break;
883 // Fold if first character on the next line is whitespace.
884 c = fgetc(file); // Note it's OK to read EOF and ungetc it too.
885 if (c == ' ' || c == '\t')
886 buf[cnt-1] = c; // Replace \n with the white space character.
887 else {
888 // Not folding, we finished reading a line; break out of the loop
889 ungetc(c,file);
890 break;
895 if (buf != NULL && cnt >= 0)
896 buf[cnt] = '\0';
898 if (buffer)
899 *buffer = buf;
900 else if (buf)
901 free(buf);
903 if (buflen)
904 *buflen = len;
906 return cnt;
910 ssize_t
911 readfoldedline(BPositionIO &in, char **buffer, size_t *buflen)
913 ssize_t len = buflen && *buflen ? *buflen : 0;
914 char * buf = buffer && *buffer ? *buffer : NULL;
915 ssize_t cnt = 0; // Number of characters currently in the buffer.
916 char c;
917 status_t errorCode;
919 while (true) {
920 // Make sure there is space in the buffer for two more characters (one
921 // for the next character, and one for the end of string NUL byte).
922 if (buf == NULL || cnt + 2 >= len) {
923 char *temp = (char *)realloc(buf, len + 64);
924 if (temp == NULL) {
925 // Out of memory, however existing buffer remains allocated.
926 cnt = ENOMEM;
927 break;
929 len += 64;
930 buf = temp;
933 errorCode = in.Read (&c,1); // A really slow way of reading - unbuffered.
934 if (errorCode != 1) {
935 if (errorCode < 0) {
936 cnt = errorCode; // IO error encountered, just return the code.
937 } else {
938 // Really is end of file. Also make it end of line if there is
939 // some text already read in. If the first thing read was EOF,
940 // just return an empty string.
941 if (cnt > 0) {
942 buf[cnt++] = '\n';
943 if (buf[cnt-2] == '\r') {
944 buf[cnt-2] = '\n';
945 --cnt;
949 break;
952 buf[cnt++] = c;
954 if (c == '\n') {
955 // Convert CRLF end of line to just a LF. Do it before folding, in
956 // case we don't need to fold.
957 if (cnt >= 2 && buf[cnt-2] == '\r') {
958 buf[cnt-2] = '\n';
959 --cnt;
961 // If the current line is empty then return it (so that empty lines
962 // don't disappear if the next line starts with a space).
963 if (cnt <= 1)
964 break;
965 // if first character on the next line is whitespace, fold lines
966 errorCode = in.Read(&c,1);
967 if (errorCode == 1) {
968 if (c == ' ' || c == '\t')
969 buf[cnt-1] = c; // Replace \n with the white space character.
970 else {
971 // Not folding, we finished reading a whole line.
972 in.Seek(-1,SEEK_CUR); // Undo the look-ahead character read.
973 break;
975 } else if (errorCode < 0) {
976 cnt = errorCode;
977 break;
978 } else // No next line; at the end of the file. Return the line.
979 break;
983 if (buf != NULL && cnt >= 0)
984 buf[cnt] = '\0';
986 if (buffer)
987 *buffer = buf;
988 else if (buf)
989 free(buf);
991 if (buflen)
992 *buflen = len;
994 return cnt;
998 ssize_t
999 nextfoldedline(const char** header, char **buffer, size_t *buflen)
1001 ssize_t len = buflen && *buflen ? *buflen : 0;
1002 char * buf = buffer && *buffer ? *buffer : NULL;
1003 ssize_t cnt = 0; // Number of characters currently in the buffer.
1004 char c;
1006 while (true)
1008 // Make sure there is space in the buffer for two more characters (one
1009 // for the next character, and one for the end of string NUL byte).
1010 if (buf == NULL || cnt + 2 >= len)
1012 char *temp = (char *)realloc(buf, len + 64);
1013 if (temp == NULL) {
1014 // Out of memory, however existing buffer remains allocated.
1015 cnt = ENOMEM;
1016 break;
1018 len += 64;
1019 buf = temp;
1022 // Read the next character, or end of file.
1023 if ((c = *(*header)++) == 0) {
1024 // End of file. Also make it end of line if there is some text
1025 // already read in. If the first thing read was EOF, just return
1026 // an empty string.
1027 if (cnt > 0) {
1028 buf[cnt++] = '\n';
1029 if (buf[cnt-2] == '\r') {
1030 buf[cnt-2] = '\n';
1031 --cnt;
1034 break;
1037 buf[cnt++] = c;
1039 if (c == '\n') {
1040 // Convert CRLF end of line to just a LF. Do it before folding, in
1041 // case we don't need to fold.
1042 if (cnt >= 2 && buf[cnt-2] == '\r') {
1043 buf[cnt-2] = '\n';
1044 --cnt;
1046 // If the current line is empty then return it (so that empty lines
1047 // don't disappear if the next line starts with a space).
1048 if (cnt <= 1)
1049 break;
1050 // if first character on the next line is whitespace, fold lines
1051 c = *(*header)++;
1052 if (c == ' ' || c == '\t')
1053 buf[cnt-1] = c; // Replace \n with the white space character.
1054 else {
1055 // Not folding, we finished reading a line; break out of the loop
1056 (*header)--; // Undo read of the non-whitespace.
1057 break;
1063 if (buf != NULL && cnt >= 0)
1064 buf[cnt] = '\0';
1066 if (buffer)
1067 *buffer = buf;
1068 else if (buf)
1069 free(buf);
1071 if (buflen)
1072 *buflen = len;
1074 return cnt;
1078 void
1079 trim_white_space(BString &string)
1081 int32 i;
1082 int32 length = string.Length();
1083 char *buffer = string.LockBuffer(length + 1);
1085 while (length > 0 && isspace(buffer[length - 1]))
1086 length--;
1087 buffer[length] = '\0';
1089 for (i = 0; buffer[i] && isspace(buffer[i]); i++) {}
1090 if (i != 0) {
1091 length -= i;
1092 memmove(buffer,buffer + i,length + 1);
1094 string.UnlockBuffer(length);
1098 /*! Tries to return a human-readable name from the specified
1099 header parameter (should be from "To:" or "From:").
1100 Tries to return the name rather than the eMail address.
1102 void
1103 extract_address_name(BString &header)
1105 BString name;
1106 const char *start = header.String();
1107 const char *stop = start + strlen (start);
1109 // Find a string S in the header (email foo) that matches:
1110 // Old style name in brackets: foo@bar.com (S)
1111 // New style quotes: "S" <foo@bar.com>
1112 // New style no quotes if nothing else found: S <foo@bar.com>
1113 // If nothing else found then use the whole thing: S
1115 for (int i = 0; i <= 3; i++) {
1116 // Set p1 to the first letter in the name and p2 to just past the last
1117 // letter in the name. p2 stays NULL if a name wasn't found in this
1118 // pass.
1119 const char *p1 = NULL, *p2 = NULL;
1121 switch (i) {
1122 case 0: // foo@bar.com (S)
1123 if ((p1 = strchr(start,'(')) != NULL) {
1124 p1++; // Advance to first letter in the name.
1125 size_t nest = 1; // Handle nested brackets.
1126 for (p2 = p1; p2 < stop; ++p2)
1128 if (*p2 == ')')
1129 --nest;
1130 else if (*p2 == '(')
1131 ++nest;
1132 if (nest <= 0)
1133 break;
1135 if (nest != 0)
1136 p2 = NULL; // False alarm, no terminating bracket.
1138 break;
1139 case 1: // "S" <foo@bar.com>
1140 if ((p1 = strchr(start, '\"')) != NULL)
1141 p2 = strchr(++p1, '\"');
1142 break;
1143 case 2: // S <foo@bar.com>
1144 p1 = start;
1145 if (name.Length() == 0)
1146 p2 = strchr(start, '<');
1147 break;
1148 case 3: // S
1149 p1 = start;
1150 if (name.Length() == 0)
1151 p2 = stop;
1152 break;
1155 // Remove leading and trailing space-like characters and save the
1156 // result if it is longer than any other likely names found.
1157 if (p2 != NULL) {
1158 while (p1 < p2 && (isspace (*p1)))
1159 ++p1;
1161 while (p1 < p2 && (isspace (p2[-1])))
1162 --p2;
1164 int newLength = p2 - p1;
1165 if (name.Length() < newLength)
1166 name.SetTo(p1, newLength);
1170 int32 lessIndex = name.FindFirst('<');
1171 int32 greaterIndex = name.FindLast('>');
1173 if (lessIndex == 0) {
1174 // Have an address of the form <address> and nothing else, so remove
1175 // the greater and less than signs, if any.
1176 if (greaterIndex > 0)
1177 name.Remove(greaterIndex, 1);
1178 name.Remove(lessIndex, 1);
1179 } else if (lessIndex > 0 && lessIndex < greaterIndex) {
1180 // Yahoo stupidly inserts the e-mail address into the name string, so
1181 // this bit of code fixes: "Joe <joe@yahoo.com>" <joe@yahoo.com>
1182 name.Remove(lessIndex, greaterIndex - lessIndex + 1);
1185 trim_white_space(name);
1186 header = name;
1190 /*! Given a subject in a BString, remove the extraneous RE: re: and other stuff
1191 to get down to the core subject string, which should be identical for all
1192 messages posted about a topic. The input string is modified in place to
1193 become the output core subject string.
1195 void
1196 SubjectToThread (BString &string)
1198 // a regex that matches a non-ASCII UTF8 character:
1199 #define U8C \
1200 "[\302-\337][\200-\277]" \
1201 "|\340[\302-\337][\200-\277]" \
1202 "|[\341-\357][\200-\277][\200-\277]" \
1203 "|\360[\220-\277][\200-\277][\200-\277]" \
1204 "|[\361-\367][\200-\277][\200-\277][\200-\277]" \
1205 "|\370[\210-\277][\200-\277][\200-\277][\200-\277]" \
1206 "|[\371-\373][\200-\277][\200-\277][\200-\277][\200-\277]" \
1207 "|\374[\204-\277][\200-\277][\200-\277][\200-\277][\200-\277]" \
1208 "|\375[\200-\277][\200-\277][\200-\277][\200-\277][\200-\277]"
1210 #define PATTERN \
1211 "^ +" \
1212 "|^(\\[[^]]*\\])(\\<| +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \
1213 "|^( +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \
1214 "| *\\(fwd\\) *$"
1216 if (gRebuf == NULL && atomic_add(&gLocker, 1) == 0) {
1217 // the idea is to compile the regexp once to speed up testing
1219 for (int i=0; i<256; ++i) gTranslation[i]=i;
1220 for (int i='a'; i<='z'; ++i) gTranslation[i]=toupper(i);
1222 gRe.translate = gTranslation;
1223 gRe.regs_allocated = REGS_FIXED;
1224 re_syntax_options = RE_SYNTAX_POSIX_EXTENDED;
1226 const char *pattern = PATTERN;
1227 // count subexpressions in PATTERN
1228 for (unsigned int i=0; pattern[i] != 0; ++i)
1230 if (pattern[i] == '\\')
1231 ++i;
1232 else if (pattern[i] == '(')
1233 ++gNsub;
1236 const char *err = re_compile_pattern(pattern,strlen(pattern),&gRe);
1237 if (err == NULL)
1238 gRebuf = &gRe;
1239 else
1240 fprintf(stderr, "Failed to compile the regex: %s\n", err);
1241 } else {
1242 int32 tries = 200;
1243 while (gRebuf == NULL && tries-- > 0)
1244 snooze(10000);
1247 if (gRebuf) {
1248 struct re_registers regs;
1249 // can't be static if this function is to be thread-safe
1251 regs.num_regs = gNsub;
1252 regs.start = (regoff_t*)malloc(gNsub*sizeof(regoff_t));
1253 regs.end = (regoff_t*)malloc(gNsub*sizeof(regoff_t));
1255 for (int start = 0; (start = re_search(gRebuf, string.String(),
1256 string.Length(), 0, string.Length(), &regs)) >= 0;) {
1258 // we found something
1261 // don't delete [bemaildaemon]...
1262 if (start == regs.start[1])
1263 start = regs.start[2];
1265 string.Remove(start,regs.end[0]-start);
1266 if (start)
1267 string.Insert(' ',1,start);
1269 // TODO: for some subjects this results in an endless loop, check
1270 // why this happen.
1271 if (regs.end[0] - start <= 1)
1272 break;
1275 free(regs.start);
1276 free(regs.end);
1279 // Finally remove leading and trailing space. Some software, like
1280 // tm-edit 1.8, appends a space to the subject, which would break
1281 // threading if we left it in.
1282 trim_white_space(string);
1286 /*! Converts a date to a time. Handles numeric time zones too, unlike
1287 parsedate(). Returns -1 if it fails.
1289 time_t
1290 ParseDateWithTimeZone(const char *DateString)
1292 time_t currentTime;
1293 time_t dateAsTime;
1294 char tempDateString[80];
1295 char tempZoneString[6];
1296 time_t zoneDeltaTime;
1297 int zoneIndex;
1298 char *zonePntr;
1300 // See if we can remove the time zone portion. parsedate understands time
1301 // zone 3 letter names, but doesn't understand the numeric +9999 time zone
1302 // format. To do: see if a newer parsedate exists.
1304 strncpy (tempDateString, DateString, sizeof (tempDateString));
1305 tempDateString[sizeof (tempDateString) - 1] = 0;
1307 // Remove trailing spaces.
1308 zonePntr = tempDateString + strlen (tempDateString) - 1;
1309 while (zonePntr >= tempDateString && isspace (*zonePntr))
1310 *zonePntr-- = 0;
1311 if (zonePntr < tempDateString)
1312 return -1; // Empty string.
1314 // Remove the trailing time zone in round brackets, like in
1315 // Fri, 22 Feb 2002 15:22:42 EST (-0500)
1316 // Thu, 25 Apr 1996 11:44:19 -0400 (EDT)
1317 if (tempDateString[strlen(tempDateString)-1] == ')')
1319 zonePntr = strrchr (tempDateString, '(');
1320 if (zonePntr != NULL)
1322 *zonePntr-- = 0; // Zap the '(', then remove trailing spaces.
1323 while (zonePntr >= tempDateString && isspace (*zonePntr))
1324 *zonePntr-- = 0;
1325 if (zonePntr < tempDateString)
1326 return -1; // Empty string.
1330 // Look for a numeric time zone like Tue, 30 Dec 2003 05:01:40 +0000
1331 for (zoneIndex = strlen (tempDateString); zoneIndex >= 0; zoneIndex--)
1333 zonePntr = tempDateString + zoneIndex;
1334 if (zonePntr[0] == '+' || zonePntr[0] == '-')
1336 if (zonePntr[1] >= '0' && zonePntr[1] <= '9' &&
1337 zonePntr[2] >= '0' && zonePntr[2] <= '9' &&
1338 zonePntr[3] >= '0' && zonePntr[3] <= '9' &&
1339 zonePntr[4] >= '0' && zonePntr[4] <= '9')
1340 break;
1343 if (zoneIndex >= 0)
1345 // Remove the zone from the date string and any following time zone
1346 // letter codes. Also put in GMT so that the date gets parsed as GMT.
1347 memcpy (tempZoneString, zonePntr, 5);
1348 tempZoneString [5] = 0;
1349 strcpy (zonePntr, "GMT");
1351 else // No numeric time zone found.
1352 strcpy (tempZoneString, "+0000");
1354 time (&currentTime);
1355 dateAsTime = parsedate (tempDateString, currentTime);
1356 if (dateAsTime == (time_t) -1)
1357 return -1; // Failure.
1359 zoneDeltaTime = 60 * atol (tempZoneString + 3); // Get the last two digits - minutes.
1360 tempZoneString[3] = 0;
1361 zoneDeltaTime += atol (tempZoneString + 1) * 60 * 60; // Get the first two digits - hours.
1362 if (tempZoneString[0] == '+')
1363 zoneDeltaTime = 0 - zoneDeltaTime;
1364 dateAsTime += zoneDeltaTime;
1366 return dateAsTime;
1370 /*! Parses a mail header and fills the headers BMessage
1372 status_t
1373 parse_header(BMessage &headers, BPositionIO &input)
1375 char *buffer = NULL;
1376 size_t bufferSize = 0;
1377 int32 length;
1379 while ((length = readfoldedline(input, &buffer, &bufferSize)) >= 2) {
1380 --length;
1381 // Don't include the \n at the end of the buffer.
1383 // convert to UTF-8 and null-terminate the buffer
1384 length = rfc2047_to_utf8(&buffer, &bufferSize, length);
1385 buffer[length] = '\0';
1387 const char *delimiter = strstr(buffer, ":");
1388 if (delimiter == NULL)
1389 continue;
1391 BString header(buffer, delimiter - buffer);
1392 header.CapitalizeEachWord();
1393 // unified case for later fetch
1395 delimiter++; // Skip the colon.
1396 // Skip over leading white space and tabs.
1397 // TODO: (comments in brackets).
1398 while (isspace(*delimiter))
1399 delimiter++;
1401 // TODO: implement joining of multiple header tags (i.e. multiple "Cc:"s)
1402 headers.AddString(header.String(), delimiter);
1404 free(buffer);
1406 return B_OK;
1410 status_t
1411 extract_from_header(const BString& header, const BString& field,
1412 BString& target)
1414 int32 headerLength = header.Length();
1415 int32 fieldEndPos = 0;
1416 while (true) {
1417 int32 pos = header.IFindFirst(field, fieldEndPos);
1418 if (pos < 0)
1419 return B_BAD_VALUE;
1420 fieldEndPos = pos + field.Length();
1422 if (pos != 0 && header.ByteAt(pos - 1) != '\n')
1423 continue;
1424 if (header.ByteAt(fieldEndPos) == ':')
1425 break;
1427 fieldEndPos++;
1429 int32 crPos = fieldEndPos;
1430 while (true) {
1431 fieldEndPos = crPos;
1432 crPos = header.FindFirst('\n', crPos);
1433 if (crPos < 0)
1434 crPos = headerLength;
1435 BString temp;
1436 header.CopyInto(temp, fieldEndPos, crPos - fieldEndPos);
1437 if (header.ByteAt(crPos - 1) == '\r') {
1438 temp.Truncate(temp.Length() - 1);
1439 temp += " ";
1441 target += temp;
1442 crPos++;
1443 if (crPos >= headerLength)
1444 break;
1445 char nextByte = header.ByteAt(crPos);
1446 if (nextByte != ' ' && nextByte != '\t')
1447 break;
1448 crPos++;
1451 size_t bufferSize = target.Length();
1452 char* buffer = target.LockBuffer(bufferSize);
1453 size_t length = rfc2047_to_utf8(&buffer, &bufferSize, bufferSize);
1454 target.UnlockBuffer(length);
1456 trim_white_space(target);
1458 return B_OK;
1462 void
1463 extract_address(BString &address)
1465 const char *string = address.String();
1466 int32 first;
1468 // first, remove all quoted text
1470 if ((first = address.FindFirst('"')) >= 0) {
1471 int32 last = first + 1;
1472 while (string[last] && string[last] != '"')
1473 last++;
1475 if (string[last] == '"')
1476 address.Remove(first, last + 1 - first);
1479 // try to extract the address now
1481 if ((first = address.FindFirst('<')) >= 0) {
1482 // the world likes us and we can just get the address the easy way...
1483 int32 last = address.FindFirst('>');
1484 if (last >= 0) {
1485 address.Truncate(last);
1486 address.Remove(0, first + 1);
1488 return;
1492 // then, see if there is anything in parenthesis to throw away
1494 if ((first = address.FindFirst('(')) >= 0) {
1495 int32 last = first + 1;
1496 while (string[last] && string[last] != ')')
1497 last++;
1499 if (string[last] == ')')
1500 address.Remove(first, last + 1 - first);
1503 // now, there shouldn't be much else left
1505 trim_white_space(address);
1509 void
1510 get_address_list(BList &list, const char *string,
1511 void (*cleanupFunc)(BString &))
1513 if (string == NULL || !string[0])
1514 return;
1516 const char *start = string;
1518 while (true) {
1519 if (string[0] == '"') {
1520 const char *quoteEnd = ++string;
1522 while (quoteEnd[0] && quoteEnd[0] != '"')
1523 quoteEnd++;
1525 if (!quoteEnd[0]) // string exceeds line!
1526 quoteEnd = string;
1528 string = quoteEnd + 1;
1531 if (string[0] == ',' || string[0] == '\0') {
1532 BString address(start, string - start);
1533 trim_white_space(address);
1535 if (cleanupFunc)
1536 cleanupFunc(address);
1538 list.AddItem(strdup(address.String()));
1540 start = string + 1;
1543 if (!string[0])
1544 break;
1546 string++;