btrfs: Attempt to fix GCC2 build.
[haiku.git] / src / kits / mail / mail_util.cpp
blob1ae92c7ac8488728dd16a4c6ade1881fa206a02f
1 /*
2 * Copyright 2011-2016, Haiku, Inc. All rights reserved.
3 * Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved.
4 */
7 #include <mail_util.h>
9 #include <stdlib.h>
10 #include <strings.h>
11 #include <stdio.h>
12 #define __USE_GNU
13 #include <regex.h>
14 #include <ctype.h>
15 #include <errno.h>
17 #include <FindDirectory.h>
18 #include <List.h>
19 #include <Locker.h>
20 #include <parsedate.h>
21 #include <Path.h>
22 #include <String.h>
23 #include <UTF8.h>
25 #include <mail_encoding.h>
27 #include <AttributeUtilities.h>
28 #include <CharacterSet.h>
29 #include <CharacterSetRoster.h>
32 using namespace BPrivate;
35 #define CRLF "\r\n"
37 struct CharsetConversionEntry {
38 const char *charset;
39 uint32 flavor;
42 extern const CharsetConversionEntry mail_charsets[] = {
43 // In order of authority, so when searching for the name for a particular
44 // numbered conversion, start at the beginning of the array.
45 {"iso-8859-1", B_ISO1_CONVERSION}, // MIME STANDARD
46 {"iso-8859-2", B_ISO2_CONVERSION}, // MIME STANDARD
47 {"iso-8859-3", B_ISO3_CONVERSION}, // MIME STANDARD
48 {"iso-8859-4", B_ISO4_CONVERSION}, // MIME STANDARD
49 {"iso-8859-5", B_ISO5_CONVERSION}, // MIME STANDARD
50 {"iso-8859-6", B_ISO6_CONVERSION}, // MIME STANDARD
51 {"iso-8859-7", B_ISO7_CONVERSION}, // MIME STANDARD
52 {"iso-8859-8", B_ISO8_CONVERSION}, // MIME STANDARD
53 {"iso-8859-9", B_ISO9_CONVERSION}, // MIME STANDARD
54 {"iso-8859-10", B_ISO10_CONVERSION}, // MIME STANDARD
55 {"iso-8859-13", B_ISO13_CONVERSION}, // MIME STANDARD
56 {"iso-8859-14", B_ISO14_CONVERSION}, // MIME STANDARD
57 {"iso-8859-15", B_ISO15_CONVERSION}, // MIME STANDARD
59 {"shift_jis", B_SJIS_CONVERSION}, // MIME STANDARD
60 {"shift-jis", B_SJIS_CONVERSION},
61 {"iso-2022-jp", B_JIS_CONVERSION}, // MIME STANDARD
62 {"euc-jp", B_EUC_CONVERSION}, // MIME STANDARD
64 {"euc-kr", B_EUC_KR_CONVERSION}, // Shift encoding 7 bit and KSC-5601 if bit 8 is on. // MIME STANDARD
65 {"ksc5601", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE?
66 {"ks_c_5601-1987", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE with stupid MS software
68 {"koi8-r", B_KOI8R_CONVERSION}, // MIME STANDARD
69 {"windows-1251",B_MS_WINDOWS_1251_CONVERSION}, // MIME STANDARD
70 {"windows-1252",B_MS_WINDOWS_CONVERSION}, // MIME STANDARD
72 {"dos-437", B_MS_DOS_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM437? )
73 {"dos-866", B_MS_DOS_866_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM866? )
74 {"x-mac-roman", B_MAC_ROMAN_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( macintosh? + x-mac-roman? )
76 {"big5", 24}, // MIME STANDARD
78 {"gb18030", 25}, // WRONG NAME : MIME STANDARD NAME = NONE ( GB18030? )
79 {"gb2312", 25}, // COMPATIBLE
80 {"gbk", 25}, // COMPATIBLE
82 /* {"utf-16", B_UNICODE_CONVERSION}, Might not work due to NULs in text, needs testing. */
83 {"us-ascii", B_MAIL_US_ASCII_CONVERSION}, // MIME STANDARD
84 {"utf-8", B_MAIL_UTF8_CONVERSION /* Special code for no conversion */}, // MIME STANDARD
86 {NULL, (uint32) -1} /* End of list marker, NULL string pointer is the key. */
90 static int32 gLocker = 0;
91 static size_t gNsub = 1;
92 static re_pattern_buffer gRe;
93 static re_pattern_buffer *gRebuf = NULL;
94 static unsigned char gTranslation[256];
97 static int
98 handle_non_rfc2047_encoding(char **buffer, size_t *bufferLength,
99 size_t *sourceLength)
101 char *string = *buffer;
102 int32 length = *sourceLength;
103 int32 i;
105 // check for 8-bit characters
106 for (i = 0;i < length;i++)
107 if (string[i] & 0x80)
108 break;
109 if (i == length)
110 return false;
112 // check for groups of 8-bit characters - this code is not very smart;
113 // it just can detect some sort of single-byte encoded stuff, the rest
114 // is regarded as UTF-8
116 int32 singletons = 0,doubles = 0;
118 for (i = 0;i < length;i++)
120 if (string[i] & 0x80)
122 if ((string[i + 1] & 0x80) == 0)
123 singletons++;
124 else doubles++;
125 i++;
129 if (singletons != 0) // can't be valid UTF-8 anymore, so we assume ISO-Latin-1
131 int32 state = 0;
132 // just to be sure
133 int32 destLength = length * 4 + 1;
134 int32 destBufferLength = destLength;
135 char *dest = (char*)malloc(destLength);
136 if (dest == NULL)
137 return 0;
139 if (convert_to_utf8(B_ISO1_CONVERSION, string, &length,dest,
140 &destLength, &state) == B_OK) {
141 *buffer = dest;
142 *bufferLength = destBufferLength;
143 *sourceLength = destLength;
144 return true;
146 free(dest);
147 return false;
150 // we assume a valid UTF-8 string here, but yes, we don't check it
151 return true;
155 // #pragma mark -
158 status_t
159 write_read_attr(BNode& node, read_flags flag)
161 if (node.WriteAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32))
162 < 0)
163 return B_ERROR;
165 // Manage the status string only if it currently has a known state
166 BString currentStatus;
167 if (node.ReadAttrString(B_MAIL_ATTR_STATUS, &currentStatus) == B_OK
168 && currentStatus.ICompare("New") != 0
169 && currentStatus.ICompare("Read") != 0
170 && currentStatus.ICompare("Seen") != 0) {
171 return B_OK;
174 const char* statusString = flag == B_READ ? "Read"
175 : flag == B_SEEN ? "Seen" : "New";
176 if (node.WriteAttr(B_MAIL_ATTR_STATUS, B_STRING_TYPE, 0, statusString,
177 strlen(statusString)) < 0)
178 return B_ERROR;
180 return B_OK;
184 status_t
185 read_read_attr(BNode& node, read_flags& flag)
187 if (node.ReadAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32))
188 == sizeof(int32))
189 return B_OK;
191 BString statusString;
192 if (node.ReadAttrString(B_MAIL_ATTR_STATUS, &statusString) == B_OK) {
193 if (statusString.ICompare("New"))
194 flag = B_UNREAD;
195 else
196 flag = B_READ;
198 return B_OK;
201 return B_ERROR;
205 // The next couple of functions are our wrapper around convert_to_utf8 and
206 // convert_from_utf8 so that they can also convert from UTF-8 to UTF-8 by
207 // specifying the B_MAIL_UTF8_CONVERSION constant as the conversion operation.
208 // It also lets us add new conversions, like B_MAIL_US_ASCII_CONVERSION.
211 status_t
212 mail_convert_to_utf8(uint32 srcEncoding, const char *src, int32 *srcLen,
213 char *dst, int32 *dstLen, int32 *state, char substitute)
215 int32 copyAmount;
216 char *originalDst = dst;
217 status_t returnCode = -1;
219 if (srcEncoding == B_MAIL_UTF8_CONVERSION) {
220 copyAmount = *srcLen;
221 if (*dstLen < copyAmount)
222 copyAmount = *dstLen;
223 memcpy (dst, src, copyAmount);
224 *srcLen = copyAmount;
225 *dstLen = copyAmount;
226 returnCode = B_OK;
227 } else if (srcEncoding == B_MAIL_US_ASCII_CONVERSION) {
228 int32 i;
229 unsigned char letter;
230 copyAmount = *srcLen;
231 if (*dstLen < copyAmount)
232 copyAmount = *dstLen;
233 for (i = 0; i < copyAmount; i++) {
234 letter = *src++;
235 if (letter > 0x80U)
236 // Invalid, could also use substitute, but better to strip high bit.
237 *dst++ = letter - 0x80U;
238 else if (letter == 0x80U)
239 // Can't convert to 0x00 since that's NUL, which would cause problems.
240 *dst++ = substitute;
241 else
242 *dst++ = letter;
244 *srcLen = copyAmount;
245 *dstLen = copyAmount;
246 returnCode = B_OK;
247 } else
248 returnCode = convert_to_utf8 (srcEncoding, src, srcLen,
249 dst, dstLen, state, substitute);
251 if (returnCode == B_OK) {
252 // Replace spurious NUL bytes, which should normally not be in the
253 // output of the decoding (not normal UTF-8 characters, and no NULs are
254 // in our usual input strings). They happen for some odd ISO-2022-JP
255 // byte pair combinations which are improperly handled by the BeOS
256 // routines. Like "\e$ByD\e(B" where \e is the ESC character $1B, the
257 // first ESC $ B switches to a Japanese character set, then the next
258 // two bytes "yD" specify a character, then ESC ( B switches back to
259 // the ASCII character set. The UTF-8 conversion yields a NUL byte.
260 int32 i;
261 for (i = 0; i < *dstLen; i++)
262 if (originalDst[i] == 0)
263 originalDst[i] = substitute;
265 return returnCode;
269 status_t
270 mail_convert_from_utf8(uint32 dstEncoding, const char *src, int32 *srcLen,
271 char *dst, int32 *dstLen, int32 *state, char substitute)
273 int32 copyAmount;
274 status_t errorCode;
275 int32 originalDstLen = *dstLen;
276 int32 tempDstLen;
277 int32 tempSrcLen;
279 if (dstEncoding == B_MAIL_UTF8_CONVERSION) {
280 copyAmount = *srcLen;
281 if (*dstLen < copyAmount)
282 copyAmount = *dstLen;
283 memcpy (dst, src, copyAmount);
284 *srcLen = copyAmount;
285 *dstLen = copyAmount;
286 return B_OK;
289 if (dstEncoding == B_MAIL_US_ASCII_CONVERSION) {
290 int32 characterLength;
291 int32 dstRemaining = *dstLen;
292 unsigned char letter;
293 int32 srcRemaining = *srcLen;
295 // state contains the number of source bytes to skip, left over from a
296 // partial UTF-8 character split over the end of the buffer from last
297 // time.
298 if (srcRemaining <= *state) {
299 *state -= srcRemaining;
300 *dstLen = 0;
301 return B_OK;
303 srcRemaining -= *state;
304 src += *state;
305 *state = 0;
307 while (true) {
308 if (srcRemaining <= 0 || dstRemaining <= 0)
309 break;
310 letter = *src;
311 if (letter < 0x80)
312 characterLength = 1; // Regular ASCII equivalent code.
313 else if (letter < 0xC0)
314 characterLength = 1; // Invalid in-between data byte 10xxxxxx.
315 else if (letter < 0xE0)
316 characterLength = 2;
317 else if (letter < 0xF0)
318 characterLength = 3;
319 else if (letter < 0xF8)
320 characterLength = 4;
321 else if (letter < 0xFC)
322 characterLength = 5;
323 else if (letter < 0xFE)
324 characterLength = 6;
325 else
326 characterLength = 1; // 0xFE and 0xFF are invalid in UTF-8.
327 if (letter < 0x80)
328 *dst++ = *src;
329 else
330 *dst++ = substitute;
331 dstRemaining--;
332 if (srcRemaining < characterLength) {
333 // Character split past the end of the buffer.
334 *state = characterLength - srcRemaining;
335 srcRemaining = 0;
336 } else {
337 src += characterLength;
338 srcRemaining -= characterLength;
341 // Update with the amounts used.
342 *srcLen = *srcLen - srcRemaining;
343 *dstLen = *dstLen - dstRemaining;
344 return B_OK;
347 errorCode = convert_from_utf8(dstEncoding, src, srcLen, dst, dstLen, state,
348 substitute);
349 if (errorCode != B_OK)
350 return errorCode;
352 if (dstEncoding != B_JIS_CONVERSION)
353 return B_OK;
355 // B_JIS_CONVERSION (ISO-2022-JP) works by shifting between different
356 // character subsets. For E-mail headers (and other uses), it needs to be
357 // switched back to ASCII at the end (otherwise the last character gets
358 // lost or other weird things happen in the headers). Note that we can't
359 // just append the escape code since the convert_from_utf8 "state" will be
360 // wrong. So we append an ASCII letter and throw it away, leaving just the
361 // escape code. Well, it actually switches to the Roman character set, not
362 // ASCII, but that should be OK.
364 tempDstLen = originalDstLen - *dstLen;
365 if (tempDstLen < 3) // Not enough space remaining in the output.
366 return B_OK; // Sort of an error, but we did convert the rest OK.
367 tempSrcLen = 1;
368 errorCode = convert_from_utf8(dstEncoding, "a", &tempSrcLen,
369 dst + *dstLen, &tempDstLen, state, substitute);
370 if (errorCode != B_OK)
371 return errorCode;
372 *dstLen += tempDstLen - 1 /* don't include the ASCII letter */;
373 return B_OK;
377 ssize_t
378 rfc2047_to_utf8(char **bufp, size_t *bufLen, size_t strLen)
380 char *head, *tail;
381 char *charset, *encoding, *end;
382 ssize_t ret = B_OK;
384 if (bufp == NULL || *bufp == NULL)
385 return -1;
387 char *string = *bufp;
389 //---------Handle *&&^%*&^ non-RFC compliant, 8bit mail
390 if (handle_non_rfc2047_encoding(bufp,bufLen,&strLen))
391 return strLen;
393 // set up string length
394 if (strLen == 0)
395 strLen = strlen(*bufp);
396 char lastChar = (*bufp)[strLen];
397 (*bufp)[strLen] = '\0';
399 //---------Whew! Now for RFC compliant mail
400 bool encodedWordFoundPreviously = false;
401 for (head = tail = string;
402 ((charset = strstr(tail, "=?")) != NULL)
403 && (((encoding = strchr(charset + 2, '?')) != NULL)
404 && encoding[1] && (encoding[2] == '?') && encoding[3])
405 && (end = strstr(encoding + 3, "?=")) != NULL;
406 // found "=?...charset...?e?...text...?= (e == encoding)
407 // ^charset ^encoding ^end
408 tail = end)
410 // Copy non-encoded text (from tail up to charset) to the output.
411 // Ignore spaces between two encoded "words". RFC2047 says the words
412 // should be concatenated without the space (designed for Asian
413 // sentences which have no spaces yet need to be broken into "words" to
414 // keep within the line length limits).
415 bool nonSpaceFound = false;
416 for (int i = 0; i < charset-tail; i++) {
417 if (!isspace (tail[i])) {
418 nonSpaceFound = true;
419 break;
422 if (!encodedWordFoundPreviously || nonSpaceFound) {
423 if (string != tail && tail != charset)
424 memmove(string, tail, charset-tail);
425 string += charset-tail;
427 tail = charset;
428 encodedWordFoundPreviously = true;
430 // move things to point at what they should:
431 // =?...charset...?e?...text...?= (e == encoding)
432 // ^charset ^encoding ^end
433 charset += 2;
434 encoding += 1;
435 end += 2;
437 // find the charset this text is in now
438 size_t cLen = encoding - 1 - charset;
439 bool base64encoded = toupper(*encoding) == 'B';
441 uint32 convertID = B_MAIL_NULL_CONVERSION;
442 char charsetName[cLen + 1];
443 memcpy(charsetName, charset, cLen);
444 charsetName[cLen] = '\0';
445 if (strcasecmp(charsetName, "us-ascii") == 0) {
446 convertID = B_MAIL_US_ASCII_CONVERSION;
447 } else if (strcasecmp(charsetName, "utf-8") == 0) {
448 convertID = B_MAIL_UTF8_CONVERSION;
449 } else {
450 const BCharacterSet* charSet
451 = BCharacterSetRoster::FindCharacterSetByName(charsetName);
452 if (charSet != NULL) {
453 convertID = charSet->GetConversionID();
456 if (convertID == B_MAIL_NULL_CONVERSION) {
457 // unidentified charset
458 // what to do? doing nothing skips the encoded text;
459 // but we should keep it: we copy it to the output.
460 if (string != tail && tail != end)
461 memmove(string, tail, end-tail);
462 string += end-tail;
463 continue;
465 // else we've successfully identified the charset
467 char *src = encoding+2;
468 int32 srcLen = end - 2 - src;
469 // encoded text: src..src+srcLen
471 // decode text, get decoded length (reducing xforms)
472 srcLen = !base64encoded ? decode_qp(src, src, srcLen, 1)
473 : decode_base64(src, src, srcLen);
475 // allocate space for the converted text
476 int32 dstLen = end-string + *bufLen-strLen;
477 char *dst = (char*)malloc(dstLen);
478 int32 cvLen = srcLen;
479 int32 convState = 0;
482 // do the conversion
484 ret = mail_convert_to_utf8(convertID, src, &cvLen, dst, &dstLen,
485 &convState);
486 if (ret != B_OK) {
487 // what to do? doing nothing skips the encoded text
488 // but we should keep it: we copy it to the output.
490 free(dst);
492 if (string != tail && tail != end)
493 memmove(string, tail, end-tail);
494 string += end-tail;
495 continue;
497 /* convert_to_ is either returning something wrong or my
498 test data is screwed up. Whatever it is, Not Enough
499 Space is not the only cause of the below, so we just
500 assume it succeeds if it converts anything at all.
501 else if (cvLen < srcLen)
503 // not enough room to convert the data;
504 // grow *buf and retry
506 free(dst);
508 char *temp = (char*)realloc(*bufp, 2*(*bufLen + 1));
509 if (temp == NULL)
511 ret = B_NO_MEMORY;
512 break;
515 *bufp = temp;
516 *bufLen = 2*(*bufLen + 1);
518 string = *bufp + (string-head);
519 tail = *bufp + (tail-head);
520 charset = *bufp + (charset-head);
521 encoding = *bufp + (encoding-head);
522 end = *bufp + (end-head);
523 src = *bufp + (src-head);
524 head = *bufp;
525 continue;
528 else {
529 if (dstLen > end-string) {
530 // copy the string forward...
531 memmove(string+dstLen, end, strLen - (end-head) + 1);
532 strLen += string+dstLen - end;
533 end = string + dstLen;
536 memcpy(string, dst, dstLen);
537 string += dstLen;
538 free(dst);
539 continue;
543 // copy everything that's left
544 size_t tailLen = strLen - (tail - head);
545 memmove(string, tail, tailLen+1);
546 string += tailLen;
548 // replace the last char
549 (*bufp)[strLen] = lastChar;
551 return ret < B_OK ? ret : string-head;
555 ssize_t
556 utf8_to_rfc2047 (char **bufp, ssize_t length, uint32 charset, char encoding)
558 struct word {
559 BString originalWord;
560 BString convertedWord;
561 bool needsEncoding;
563 // Convert the word from UTF-8 to the desired character set. The
564 // converted version also includes the escape codes to return to ASCII
565 // mode, if relevant. Also note if it uses unprintable characters,
566 // which means it will need that special encoding treatment later.
567 void ConvertWordToCharset (uint32 charset) {
568 int32 state = 0;
569 int32 originalLength = originalWord.Length();
570 int32 convertedLength = originalLength * 5 + 1;
571 char *convertedBuffer = convertedWord.LockBuffer (convertedLength);
572 mail_convert_from_utf8 (charset, originalWord.String(),
573 &originalLength, convertedBuffer, &convertedLength, &state);
574 for (int i = 0; i < convertedLength; i++) {
575 if ((convertedBuffer[i] & (1 << 7)) ||
576 (convertedBuffer[i] >= 0 && convertedBuffer[i] < 32)) {
577 needsEncoding = true;
578 break;
581 convertedWord.UnlockBuffer (convertedLength);
584 struct word *currentWord;
585 BList words;
587 // Break the header into words. White space characters (including tabs and
588 // newlines) separate the words. Each word includes any space before it as
589 // part of the word. Actually, quotes and other special characters
590 // (",()<>@) are treated as separate words of their own so that they don't
591 // get encoded (because MIME headers get the quotes parsed before character
592 // set unconversion is done). The reader is supposed to ignore all white
593 // space between encoded words, which can be inserted so that older mail
594 // parsers don't have overly long line length problems.
596 const char *source = *bufp;
597 const char *bufEnd = *bufp + length;
598 const char *specialChars = "\"()<>@,";
600 while (source < bufEnd) {
601 currentWord = new struct word;
602 currentWord->needsEncoding = false;
604 int wordEnd = 0;
606 // Include leading spaces as part of the word.
607 while (source + wordEnd < bufEnd && isspace (source[wordEnd]))
608 wordEnd++;
610 if (source + wordEnd < bufEnd &&
611 strchr (specialChars, source[wordEnd]) != NULL) {
612 // Got a quote mark or other special character, which is treated as
613 // a word in itself since it shouldn't be encoded, which would hide
614 // it from the mail system.
615 wordEnd++;
616 } else {
617 // Find the end of the word. Leave wordEnd pointing just after the
618 // last character in the word.
619 while (source + wordEnd < bufEnd) {
620 if (isspace(source[wordEnd]) ||
621 strchr (specialChars, source[wordEnd]) != NULL)
622 break;
623 if (wordEnd > 51 /* Makes Base64 ISO-2022-JP "word" a multiple of 4 bytes */ &&
624 0xC0 == (0xC0 & (unsigned int) source[wordEnd])) {
625 // No English words are that long (46 is the longest),
626 // break up what is likely Asian text (which has no spaces)
627 // at the start of the next non-ASCII UTF-8 character (high
628 // two bits are both ones). Note that two encoded words in
629 // a row get joined together, even if there is a space
630 // between them in the final output text, according to the
631 // standard. Next word will also be conveniently get
632 // encoded due to the 0xC0 test.
633 currentWord->needsEncoding = true;
634 break;
636 wordEnd++;
639 currentWord->originalWord.SetTo (source, wordEnd);
640 currentWord->ConvertWordToCharset (charset);
641 words.AddItem(currentWord);
642 source += wordEnd;
645 // Combine adjacent words which contain unprintable text so that the
646 // overhead of switching back and forth between regular text and specially
647 // encoded text is reduced. However, the combined word must be shorter
648 // than the maximum of 75 bytes, including character set specification and
649 // all those delimiters (worst case 22 bytes of overhead).
651 struct word *run;
653 for (int32 i = 0; (currentWord = (struct word *) words.ItemAt (i)) != NULL; i++) {
654 if (!currentWord->needsEncoding)
655 continue; // No need to combine unencoded words.
656 for (int32 g = i+1; (run = (struct word *) words.ItemAt (g)) != NULL; g++) {
657 if (!run->needsEncoding)
658 break; // Don't want to combine encoded and unencoded words.
659 if ((currentWord->convertedWord.Length() + run->convertedWord.Length() <= 53)) {
660 currentWord->originalWord.Append (run->originalWord);
661 currentWord->ConvertWordToCharset (charset);
662 words.RemoveItem(g);
663 delete run;
664 g--;
665 } else // Can't merge this word, result would be too long.
666 break;
670 // Combine the encoded and unencoded words into one line, doing the
671 // quoted-printable or base64 encoding. Insert an extra space between
672 // words which are both encoded to make word wrapping easier, since there
673 // is normally none, and you're allowed to insert space (the receiver
674 // throws it away if it is between encoded words).
676 BString rfc2047;
677 bool previousWordNeededEncoding = false;
679 const char *charset_dec = "none-bug";
680 for (int32 i = 0; mail_charsets[i].charset != NULL; i++) {
681 if (mail_charsets[i].flavor == charset) {
682 charset_dec = mail_charsets[i].charset;
683 break;
687 while ((currentWord = (struct word *)words.RemoveItem((int32)0)) != NULL) {
688 if ((encoding != quoted_printable && encoding != base64) ||
689 !currentWord->needsEncoding) {
690 rfc2047.Append (currentWord->convertedWord);
691 } else {
692 // This word needs encoding. Try to insert a space between it and
693 // the previous word.
694 if (previousWordNeededEncoding)
695 rfc2047 << ' '; // Can insert as many spaces as you want between encoded words.
696 else {
697 // Previous word is not encoded, spaces are significant. Try
698 // to move a space from the start of this word to be outside of
699 // the encoded text, so that there is a bit of space between
700 // this word and the previous one to enhance word wrapping
701 // chances later on.
702 if (currentWord->originalWord.Length() > 1 &&
703 isspace (currentWord->originalWord[0])) {
704 rfc2047 << currentWord->originalWord[0];
705 currentWord->originalWord.Remove (0 /* offset */, 1 /* length */);
706 currentWord->ConvertWordToCharset (charset);
710 char *encoded = NULL;
711 ssize_t encoded_len = 0;
712 int32 convertedLength = currentWord->convertedWord.Length ();
713 const char *convertedBuffer = currentWord->convertedWord.String ();
715 switch (encoding) {
716 case quoted_printable:
717 encoded = (char *) malloc (convertedLength * 3);
718 encoded_len = encode_qp (encoded, convertedBuffer, convertedLength, true /* headerMode */);
719 break;
720 case base64:
721 encoded = (char *) malloc (convertedLength * 2);
722 encoded_len = encode_base64 (encoded, convertedBuffer, convertedLength, true /* headerMode */);
723 break;
724 default: // Unknown encoding type, shouldn't happen.
725 encoded = (char *) convertedBuffer;
726 encoded_len = convertedLength;
727 break;
730 rfc2047 << "=?" << charset_dec << '?' << encoding << '?';
731 rfc2047.Append (encoded, encoded_len);
732 rfc2047 << "?=";
734 if (encoding == quoted_printable || encoding == base64)
735 free(encoded);
737 previousWordNeededEncoding = currentWord->needsEncoding;
738 delete currentWord;
741 free(*bufp);
743 ssize_t finalLength = rfc2047.Length ();
744 *bufp = (char *) (malloc (finalLength + 1));
745 memcpy (*bufp, rfc2047.String(), finalLength);
746 (*bufp)[finalLength] = 0;
748 return finalLength;
752 void
753 FoldLineAtWhiteSpaceAndAddCRLF(BString &string)
755 int inputLength = string.Length();
756 int lineStartIndex;
757 const int maxLineLength = 78; // Doesn't include CRLF.
758 BString output;
759 int splitIndex;
760 int tempIndex;
762 lineStartIndex = 0;
763 while (true) {
764 // If we don't need to wrap the text, just output the remainder, if any.
766 if (lineStartIndex + maxLineLength >= inputLength) {
767 if (lineStartIndex < inputLength) {
768 output.Insert (string, lineStartIndex /* source offset */,
769 inputLength - lineStartIndex /* count */,
770 output.Length() /* insert at */);
771 output.Append (CRLF);
773 break;
776 // Look ahead for a convenient spot to split it, between a comma and
777 // space, which you often see between e-mail addresses like this:
778 // "Joe Who" joe@dot.com, "Someone Else" else@blot.com
780 tempIndex = lineStartIndex + maxLineLength;
781 if (tempIndex > inputLength)
782 tempIndex = inputLength;
783 splitIndex = string.FindLast (", ", tempIndex);
784 if (splitIndex >= lineStartIndex)
785 splitIndex++; // Point to the space character.
787 // If none of those exist, try splitting at any white space.
789 if (splitIndex <= lineStartIndex)
790 splitIndex = string.FindLast (" ", tempIndex);
791 if (splitIndex <= lineStartIndex)
792 splitIndex = string.FindLast ("\t", tempIndex);
794 // If none of those exist, allow for a longer word - split at the next
795 // available white space.
797 if (splitIndex <= lineStartIndex)
798 splitIndex = string.FindFirst (" ", lineStartIndex + 1);
799 if (splitIndex <= lineStartIndex)
800 splitIndex = string.FindFirst ("\t", lineStartIndex + 1);
802 // Give up, the whole rest of the line can't be split, just dump it
803 // out.
805 if (splitIndex <= lineStartIndex) {
806 if (lineStartIndex < inputLength) {
807 output.Insert (string, lineStartIndex /* source offset */,
808 inputLength - lineStartIndex /* count */,
809 output.Length() /* insert at */);
810 output.Append (CRLF);
812 break;
815 // Do the split. The current line up to but not including the space
816 // gets output, followed by a CRLF. The space remains to become the
817 // start of the next line (and that tells the message reader that it is
818 // a continuation line).
820 output.Insert (string, lineStartIndex /* source offset */,
821 splitIndex - lineStartIndex /* count */,
822 output.Length() /* insert at */);
823 output.Append (CRLF);
824 lineStartIndex = splitIndex;
826 string.SetTo (output);
830 ssize_t
831 readfoldedline(FILE *file, char **buffer, size_t *buflen)
833 ssize_t len = buflen && *buflen ? *buflen : 0;
834 char * buf = buffer && *buffer ? *buffer : NULL;
835 ssize_t cnt = 0; // Number of characters currently in the buffer.
836 int c;
838 while (true) {
839 // Make sure there is space in the buffer for two more characters (one
840 // for the next character, and one for the end of string NUL byte).
841 if (buf == NULL || cnt + 2 >= len) {
842 char *temp = (char *)realloc(buf, len + 64);
843 if (temp == NULL) {
844 // Out of memory, however existing buffer remains allocated.
845 cnt = ENOMEM;
846 break;
848 len += 64;
849 buf = temp;
852 // Read the next character, or end of file, or IO error.
853 if ((c = fgetc(file)) == EOF) {
854 if (ferror (file)) {
855 cnt = errno;
856 if (cnt >= 0)
857 cnt = -1; // Error codes must be negative.
858 } else {
859 // Really is end of file. Also make it end of line if there is
860 // some text already read in. If the first thing read was EOF,
861 // just return an empty string.
862 if (cnt > 0) {
863 buf[cnt++] = '\n';
864 if (buf[cnt-2] == '\r') {
865 buf[cnt-2] = '\n';
866 --cnt;
870 break;
873 buf[cnt++] = c;
875 if (c == '\n') {
876 // Convert CRLF end of line to just a LF. Do it before folding, in
877 // case we don't need to fold.
878 if (cnt >= 2 && buf[cnt-2] == '\r') {
879 buf[cnt-2] = '\n';
880 --cnt;
882 // If the current line is empty then return it (so that empty lines
883 // don't disappear if the next line starts with a space).
884 if (cnt <= 1)
885 break;
886 // Fold if first character on the next line is whitespace.
887 c = fgetc(file); // Note it's OK to read EOF and ungetc it too.
888 if (c == ' ' || c == '\t')
889 buf[cnt-1] = c; // Replace \n with the white space character.
890 else {
891 // Not folding, we finished reading a line; break out of the loop
892 ungetc(c,file);
893 break;
898 if (buf != NULL && cnt >= 0)
899 buf[cnt] = '\0';
901 if (buffer)
902 *buffer = buf;
903 else if (buf)
904 free(buf);
906 if (buflen)
907 *buflen = len;
909 return cnt;
913 ssize_t
914 readfoldedline(BPositionIO &in, char **buffer, size_t *buflen)
916 ssize_t len = buflen && *buflen ? *buflen : 0;
917 char * buf = buffer && *buffer ? *buffer : NULL;
918 ssize_t cnt = 0; // Number of characters currently in the buffer.
919 char c;
920 status_t errorCode;
922 while (true) {
923 // Make sure there is space in the buffer for two more characters (one
924 // for the next character, and one for the end of string NUL byte).
925 if (buf == NULL || cnt + 2 >= len) {
926 char *temp = (char *)realloc(buf, len + 64);
927 if (temp == NULL) {
928 // Out of memory, however existing buffer remains allocated.
929 cnt = ENOMEM;
930 break;
932 len += 64;
933 buf = temp;
936 errorCode = in.Read (&c,1); // A really slow way of reading - unbuffered.
937 if (errorCode != 1) {
938 if (errorCode < 0) {
939 cnt = errorCode; // IO error encountered, just return the code.
940 } else {
941 // Really is end of file. Also make it end of line if there is
942 // some text already read in. If the first thing read was EOF,
943 // just return an empty string.
944 if (cnt > 0) {
945 buf[cnt++] = '\n';
946 if (buf[cnt-2] == '\r') {
947 buf[cnt-2] = '\n';
948 --cnt;
952 break;
955 buf[cnt++] = c;
957 if (c == '\n') {
958 // Convert CRLF end of line to just a LF. Do it before folding, in
959 // case we don't need to fold.
960 if (cnt >= 2 && buf[cnt-2] == '\r') {
961 buf[cnt-2] = '\n';
962 --cnt;
964 // If the current line is empty then return it (so that empty lines
965 // don't disappear if the next line starts with a space).
966 if (cnt <= 1)
967 break;
968 // if first character on the next line is whitespace, fold lines
969 errorCode = in.Read(&c,1);
970 if (errorCode == 1) {
971 if (c == ' ' || c == '\t')
972 buf[cnt-1] = c; // Replace \n with the white space character.
973 else {
974 // Not folding, we finished reading a whole line.
975 in.Seek(-1,SEEK_CUR); // Undo the look-ahead character read.
976 break;
978 } else if (errorCode < 0) {
979 cnt = errorCode;
980 break;
981 } else // No next line; at the end of the file. Return the line.
982 break;
986 if (buf != NULL && cnt >= 0)
987 buf[cnt] = '\0';
989 if (buffer)
990 *buffer = buf;
991 else if (buf)
992 free(buf);
994 if (buflen)
995 *buflen = len;
997 return cnt;
1001 ssize_t
1002 nextfoldedline(const char** header, char **buffer, size_t *buflen)
1004 ssize_t len = buflen && *buflen ? *buflen : 0;
1005 char * buf = buffer && *buffer ? *buffer : NULL;
1006 ssize_t cnt = 0; // Number of characters currently in the buffer.
1007 char c;
1009 while (true)
1011 // Make sure there is space in the buffer for two more characters (one
1012 // for the next character, and one for the end of string NUL byte).
1013 if (buf == NULL || cnt + 2 >= len)
1015 char *temp = (char *)realloc(buf, len + 64);
1016 if (temp == NULL) {
1017 // Out of memory, however existing buffer remains allocated.
1018 cnt = ENOMEM;
1019 break;
1021 len += 64;
1022 buf = temp;
1025 // Read the next character, or end of file.
1026 if ((c = *(*header)++) == 0) {
1027 // End of file. Also make it end of line if there is some text
1028 // already read in. If the first thing read was EOF, just return
1029 // an empty string.
1030 if (cnt > 0) {
1031 buf[cnt++] = '\n';
1032 if (buf[cnt-2] == '\r') {
1033 buf[cnt-2] = '\n';
1034 --cnt;
1037 break;
1040 buf[cnt++] = c;
1042 if (c == '\n') {
1043 // Convert CRLF end of line to just a LF. Do it before folding, in
1044 // case we don't need to fold.
1045 if (cnt >= 2 && buf[cnt-2] == '\r') {
1046 buf[cnt-2] = '\n';
1047 --cnt;
1049 // If the current line is empty then return it (so that empty lines
1050 // don't disappear if the next line starts with a space).
1051 if (cnt <= 1)
1052 break;
1053 // if first character on the next line is whitespace, fold lines
1054 c = *(*header)++;
1055 if (c == ' ' || c == '\t')
1056 buf[cnt-1] = c; // Replace \n with the white space character.
1057 else {
1058 // Not folding, we finished reading a line; break out of the loop
1059 (*header)--; // Undo read of the non-whitespace.
1060 break;
1066 if (buf != NULL && cnt >= 0)
1067 buf[cnt] = '\0';
1069 if (buffer)
1070 *buffer = buf;
1071 else if (buf)
1072 free(buf);
1074 if (buflen)
1075 *buflen = len;
1077 return cnt;
1081 void
1082 trim_white_space(BString &string)
1084 int32 i;
1085 int32 length = string.Length();
1086 char *buffer = string.LockBuffer(length + 1);
1088 while (length > 0 && isspace(buffer[length - 1]))
1089 length--;
1090 buffer[length] = '\0';
1092 for (i = 0; buffer[i] && isspace(buffer[i]); i++) {}
1093 if (i != 0) {
1094 length -= i;
1095 memmove(buffer,buffer + i,length + 1);
1097 string.UnlockBuffer(length);
1101 /*! Tries to return a human-readable name from the specified
1102 header parameter (should be from "To:" or "From:").
1103 Tries to return the name rather than the eMail address.
1105 void
1106 extract_address_name(BString &header)
1108 BString name;
1109 const char *start = header.String();
1110 const char *stop = start + strlen (start);
1112 // Find a string S in the header (email foo) that matches:
1113 // Old style name in brackets: foo@bar.com (S)
1114 // New style quotes: "S" <foo@bar.com>
1115 // New style no quotes if nothing else found: S <foo@bar.com>
1116 // If nothing else found then use the whole thing: S
1118 for (int i = 0; i <= 3; i++) {
1119 // Set p1 to the first letter in the name and p2 to just past the last
1120 // letter in the name. p2 stays NULL if a name wasn't found in this
1121 // pass.
1122 const char *p1 = NULL, *p2 = NULL;
1124 switch (i) {
1125 case 0: // foo@bar.com (S)
1126 if ((p1 = strchr(start,'(')) != NULL) {
1127 p1++; // Advance to first letter in the name.
1128 size_t nest = 1; // Handle nested brackets.
1129 for (p2 = p1; p2 < stop; ++p2)
1131 if (*p2 == ')')
1132 --nest;
1133 else if (*p2 == '(')
1134 ++nest;
1135 if (nest <= 0)
1136 break;
1138 if (nest != 0)
1139 p2 = NULL; // False alarm, no terminating bracket.
1141 break;
1142 case 1: // "S" <foo@bar.com>
1143 if ((p1 = strchr(start, '\"')) != NULL)
1144 p2 = strchr(++p1, '\"');
1145 break;
1146 case 2: // S <foo@bar.com>
1147 p1 = start;
1148 if (name.Length() == 0)
1149 p2 = strchr(start, '<');
1150 break;
1151 case 3: // S
1152 p1 = start;
1153 if (name.Length() == 0)
1154 p2 = stop;
1155 break;
1158 // Remove leading and trailing space-like characters and save the
1159 // result if it is longer than any other likely names found.
1160 if (p2 != NULL) {
1161 while (p1 < p2 && (isspace (*p1)))
1162 ++p1;
1164 while (p1 < p2 && (isspace (p2[-1])))
1165 --p2;
1167 int newLength = p2 - p1;
1168 if (name.Length() < newLength)
1169 name.SetTo(p1, newLength);
1173 int32 lessIndex = name.FindFirst('<');
1174 int32 greaterIndex = name.FindLast('>');
1176 if (lessIndex == 0) {
1177 // Have an address of the form <address> and nothing else, so remove
1178 // the greater and less than signs, if any.
1179 if (greaterIndex > 0)
1180 name.Remove(greaterIndex, 1);
1181 name.Remove(lessIndex, 1);
1182 } else if (lessIndex > 0 && lessIndex < greaterIndex) {
1183 // Yahoo stupidly inserts the e-mail address into the name string, so
1184 // this bit of code fixes: "Joe <joe@yahoo.com>" <joe@yahoo.com>
1185 name.Remove(lessIndex, greaterIndex - lessIndex + 1);
1188 trim_white_space(name);
1189 header = name;
1193 /*! Given a subject in a BString, remove the extraneous RE: re: and other stuff
1194 to get down to the core subject string, which should be identical for all
1195 messages posted about a topic. The input string is modified in place to
1196 become the output core subject string.
1198 void
1199 SubjectToThread (BString &string)
1201 // a regex that matches a non-ASCII UTF8 character:
1202 #define U8C \
1203 "[\302-\337][\200-\277]" \
1204 "|\340[\302-\337][\200-\277]" \
1205 "|[\341-\357][\200-\277][\200-\277]" \
1206 "|\360[\220-\277][\200-\277][\200-\277]" \
1207 "|[\361-\367][\200-\277][\200-\277][\200-\277]" \
1208 "|\370[\210-\277][\200-\277][\200-\277][\200-\277]" \
1209 "|[\371-\373][\200-\277][\200-\277][\200-\277][\200-\277]" \
1210 "|\374[\204-\277][\200-\277][\200-\277][\200-\277][\200-\277]" \
1211 "|\375[\200-\277][\200-\277][\200-\277][\200-\277][\200-\277]"
1213 #define PATTERN \
1214 "^ +" \
1215 "|^(\\[[^]]*\\])(\\<| +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \
1216 "|^( +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \
1217 "| *\\(fwd\\) *$"
1219 if (gRebuf == NULL && atomic_add(&gLocker, 1) == 0) {
1220 // the idea is to compile the regexp once to speed up testing
1222 for (int i=0; i<256; ++i) gTranslation[i]=i;
1223 for (int i='a'; i<='z'; ++i) gTranslation[i]=toupper(i);
1225 gRe.translate = gTranslation;
1226 gRe.regs_allocated = REGS_FIXED;
1227 re_syntax_options = RE_SYNTAX_POSIX_EXTENDED;
1229 const char *pattern = PATTERN;
1230 // count subexpressions in PATTERN
1231 for (unsigned int i=0; pattern[i] != 0; ++i)
1233 if (pattern[i] == '\\')
1234 ++i;
1235 else if (pattern[i] == '(')
1236 ++gNsub;
1239 const char *err = re_compile_pattern(pattern,strlen(pattern),&gRe);
1240 if (err == NULL)
1241 gRebuf = &gRe;
1242 else
1243 fprintf(stderr, "Failed to compile the regex: %s\n", err);
1244 } else {
1245 int32 tries = 200;
1246 while (gRebuf == NULL && tries-- > 0)
1247 snooze(10000);
1250 if (gRebuf) {
1251 struct re_registers regs;
1252 // can't be static if this function is to be thread-safe
1254 regs.num_regs = gNsub;
1255 regs.start = (regoff_t*)malloc(gNsub*sizeof(regoff_t));
1256 regs.end = (regoff_t*)malloc(gNsub*sizeof(regoff_t));
1258 for (int start = 0; (start = re_search(gRebuf, string.String(),
1259 string.Length(), 0, string.Length(), &regs)) >= 0;) {
1261 // we found something
1264 // don't delete [bemaildaemon]...
1265 if (start == regs.start[1])
1266 start = regs.start[2];
1268 string.Remove(start,regs.end[0]-start);
1269 if (start)
1270 string.Insert(' ',1,start);
1272 // TODO: for some subjects this results in an endless loop, check
1273 // why this happen.
1274 if (regs.end[0] - start <= 1)
1275 break;
1278 free(regs.start);
1279 free(regs.end);
1282 // Finally remove leading and trailing space. Some software, like
1283 // tm-edit 1.8, appends a space to the subject, which would break
1284 // threading if we left it in.
1285 trim_white_space(string);
1289 /*! Converts a date to a time. Handles numeric time zones too, unlike
1290 parsedate(). Returns -1 if it fails.
1292 time_t
1293 ParseDateWithTimeZone(const char *DateString)
1295 time_t currentTime;
1296 time_t dateAsTime;
1297 char tempDateString[80];
1298 char tempZoneString[6];
1299 time_t zoneDeltaTime;
1300 int zoneIndex;
1301 char *zonePntr;
1303 // See if we can remove the time zone portion. parsedate understands time
1304 // zone 3 letter names, but doesn't understand the numeric +9999 time zone
1305 // format. To do: see if a newer parsedate exists.
1307 strncpy (tempDateString, DateString, sizeof (tempDateString));
1308 tempDateString[sizeof (tempDateString) - 1] = 0;
1310 // Remove trailing spaces.
1311 zonePntr = tempDateString + strlen (tempDateString) - 1;
1312 while (zonePntr >= tempDateString && isspace (*zonePntr))
1313 *zonePntr-- = 0;
1314 if (zonePntr < tempDateString)
1315 return -1; // Empty string.
1317 // Remove the trailing time zone in round brackets, like in
1318 // Fri, 22 Feb 2002 15:22:42 EST (-0500)
1319 // Thu, 25 Apr 1996 11:44:19 -0400 (EDT)
1320 if (tempDateString[strlen(tempDateString)-1] == ')')
1322 zonePntr = strrchr (tempDateString, '(');
1323 if (zonePntr != NULL)
1325 *zonePntr-- = 0; // Zap the '(', then remove trailing spaces.
1326 while (zonePntr >= tempDateString && isspace (*zonePntr))
1327 *zonePntr-- = 0;
1328 if (zonePntr < tempDateString)
1329 return -1; // Empty string.
1333 // Look for a numeric time zone like Tue, 30 Dec 2003 05:01:40 +0000
1334 for (zoneIndex = strlen (tempDateString); zoneIndex >= 0; zoneIndex--)
1336 zonePntr = tempDateString + zoneIndex;
1337 if (zonePntr[0] == '+' || zonePntr[0] == '-')
1339 if (zonePntr[1] >= '0' && zonePntr[1] <= '9' &&
1340 zonePntr[2] >= '0' && zonePntr[2] <= '9' &&
1341 zonePntr[3] >= '0' && zonePntr[3] <= '9' &&
1342 zonePntr[4] >= '0' && zonePntr[4] <= '9')
1343 break;
1346 if (zoneIndex >= 0)
1348 // Remove the zone from the date string and any following time zone
1349 // letter codes. Also put in GMT so that the date gets parsed as GMT.
1350 memcpy (tempZoneString, zonePntr, 5);
1351 tempZoneString [5] = 0;
1352 strcpy (zonePntr, "GMT");
1354 else // No numeric time zone found.
1355 strcpy (tempZoneString, "+0000");
1357 time (&currentTime);
1358 dateAsTime = parsedate (tempDateString, currentTime);
1359 if (dateAsTime == (time_t) -1)
1360 return -1; // Failure.
1362 zoneDeltaTime = 60 * atol (tempZoneString + 3); // Get the last two digits - minutes.
1363 tempZoneString[3] = 0;
1364 zoneDeltaTime += atol (tempZoneString + 1) * 60 * 60; // Get the first two digits - hours.
1365 if (tempZoneString[0] == '+')
1366 zoneDeltaTime = 0 - zoneDeltaTime;
1367 dateAsTime += zoneDeltaTime;
1369 return dateAsTime;
1373 /*! Parses a mail header and fills the headers BMessage
1375 status_t
1376 parse_header(BMessage &headers, BPositionIO &input)
1378 char *buffer = NULL;
1379 size_t bufferSize = 0;
1380 int32 length;
1382 while ((length = readfoldedline(input, &buffer, &bufferSize)) >= 2) {
1383 --length;
1384 // Don't include the \n at the end of the buffer.
1386 // convert to UTF-8 and null-terminate the buffer
1387 length = rfc2047_to_utf8(&buffer, &bufferSize, length);
1388 buffer[length] = '\0';
1390 const char *delimiter = strstr(buffer, ":");
1391 if (delimiter == NULL)
1392 continue;
1394 BString header(buffer, delimiter - buffer);
1395 header.CapitalizeEachWord();
1396 // unified case for later fetch
1398 delimiter++; // Skip the colon.
1399 // Skip over leading white space and tabs.
1400 // TODO: (comments in brackets).
1401 while (isspace(*delimiter))
1402 delimiter++;
1404 // TODO: implement joining of multiple header tags (i.e. multiple "Cc:"s)
1405 headers.AddString(header.String(), delimiter);
1407 free(buffer);
1409 return B_OK;
1413 status_t
1414 extract_from_header(const BString& header, const BString& field,
1415 BString& target)
1417 int32 headerLength = header.Length();
1418 int32 fieldEndPos = 0;
1419 while (true) {
1420 int32 pos = header.IFindFirst(field, fieldEndPos);
1421 if (pos < 0)
1422 return B_BAD_VALUE;
1423 fieldEndPos = pos + field.Length();
1425 if (pos != 0 && header.ByteAt(pos - 1) != '\n')
1426 continue;
1427 if (header.ByteAt(fieldEndPos) == ':')
1428 break;
1430 fieldEndPos++;
1432 int32 crPos = fieldEndPos;
1433 while (true) {
1434 fieldEndPos = crPos;
1435 crPos = header.FindFirst('\n', crPos);
1436 if (crPos < 0)
1437 crPos = headerLength;
1438 BString temp;
1439 header.CopyInto(temp, fieldEndPos, crPos - fieldEndPos);
1440 if (header.ByteAt(crPos - 1) == '\r') {
1441 temp.Truncate(temp.Length() - 1);
1442 temp += " ";
1444 target += temp;
1445 crPos++;
1446 if (crPos >= headerLength)
1447 break;
1448 char nextByte = header.ByteAt(crPos);
1449 if (nextByte != ' ' && nextByte != '\t')
1450 break;
1451 crPos++;
1454 size_t bufferSize = target.Length();
1455 char* buffer = target.LockBuffer(bufferSize);
1456 size_t length = rfc2047_to_utf8(&buffer, &bufferSize, bufferSize);
1457 target.UnlockBuffer(length);
1459 trim_white_space(target);
1461 return B_OK;
1465 void
1466 extract_address(BString &address)
1468 const char *string = address.String();
1469 int32 first;
1471 // first, remove all quoted text
1473 if ((first = address.FindFirst('"')) >= 0) {
1474 int32 last = first + 1;
1475 while (string[last] && string[last] != '"')
1476 last++;
1478 if (string[last] == '"')
1479 address.Remove(first, last + 1 - first);
1482 // try to extract the address now
1484 if ((first = address.FindFirst('<')) >= 0) {
1485 // the world likes us and we can just get the address the easy way...
1486 int32 last = address.FindFirst('>');
1487 if (last >= 0) {
1488 address.Truncate(last);
1489 address.Remove(0, first + 1);
1491 return;
1495 // then, see if there is anything in parenthesis to throw away
1497 if ((first = address.FindFirst('(')) >= 0) {
1498 int32 last = first + 1;
1499 while (string[last] && string[last] != ')')
1500 last++;
1502 if (string[last] == ')')
1503 address.Remove(first, last + 1 - first);
1506 // now, there shouldn't be much else left
1508 trim_white_space(address);
1512 void
1513 get_address_list(BList &list, const char *string,
1514 void (*cleanupFunc)(BString &))
1516 if (string == NULL || !string[0])
1517 return;
1519 const char *start = string;
1521 while (true) {
1522 if (string[0] == '"') {
1523 const char *quoteEnd = ++string;
1525 while (quoteEnd[0] && quoteEnd[0] != '"')
1526 quoteEnd++;
1528 if (!quoteEnd[0]) // string exceeds line!
1529 quoteEnd = string;
1531 string = quoteEnd + 1;
1534 if (string[0] == ',' || string[0] == '\0') {
1535 BString address(start, string - start);
1536 trim_white_space(address);
1538 if (cleanupFunc)
1539 cleanupFunc(address);
1541 list.AddItem(strdup(address.String()));
1543 start = string + 1;
1546 if (!string[0])
1547 break;
1549 string++;
1554 status_t
1555 CopyMailFolderAttributes(const char* targetPath)
1557 BPath path;
1558 status_t status = find_directory(B_USER_SETTINGS_DIRECTORY, &path);
1559 if (status != B_OK)
1560 return status;
1562 path.Append("Tracker");
1563 path.Append("DefaultQueryTemplates");
1564 path.Append("text_x-email");
1566 BNode source(path.Path());
1567 BNode target(targetPath);
1568 return BPrivate::CopyAttributes(source, target);