2 * Copyright 2005-2009, Haiku Inc.
3 * This file may be used under the terms of the MIT License.
5 * Originally public domain written by Alexander G. M. Smith.
9 /*! MboxToBeMail is a utility program that converts Unix mailbox (mbox) files
10 (the kind that Pine uses) into e-mail files for use with BeOS. It also
11 handles news files from rn and trn, which have messages very similar to mail
12 messages but with a different separator line. The input files store
13 multiple mail messages in text format separated by "From ..." lines or
14 "Article ..." lines. The output is a bunch of separate files, each one with
15 one message plus BeOS BFS attributes describing that message. For
16 convenience, all the messages that were from one file are put in a specified
25 #include <Application.h>
27 #include <StorageKit.h>
28 #include <SupportKit.h>
30 #include <MailMessage.h>
31 #include <mail_util.h>
34 extern const char* __progname
;
35 static const char* kProgramName
= __progname
;
37 char InputPathName
[B_PATH_NAME_LENGTH
];
42 typedef enum StandardHeaderEnum
44 STD_HDR_DATE
= 0, /* The Date: field. First one since it is numeric. */
45 STD_HDR_FROM
, /* The whole From: field, including quotes and address. */
46 STD_HDR_TO
, /* All the stuff in the To: field. */
47 STD_HDR_CC
, /* All the CC: field (originally means carbon copy). */
48 STD_HDR_REPLY
, /* Things in the reply-to: field. */
49 STD_HDR_SUBJECT
, /* The Subject: field. */
50 STD_HDR_PRIORITY
, /* The Priority: and related fields, usually "Normal". */
51 STD_HDR_STATUS
, /* The BeOS mail Read / New status text attribute. */
52 STD_HDR_THREAD
, /* The subject simplified. */
53 STD_HDR_NAME
, /* The From address simplified into a plain name. */
55 } StandardHeaderCodes
;
57 const char *g_StandardAttributeNames
[STD_HDR_MAX
] =
73 /******************************************************************************
74 * Global utility function to display an error message and return. The message
75 * part describes the error, and if ErrorNumber is non-zero, gets the string
76 * ", error code $X (standard description)." appended to it. If the message
77 * is NULL then it gets defaulted to "Something went wrong".
80 static void DisplayErrorMessage (
81 const char *MessageString
= NULL
,
83 const char *TitleString
= NULL
)
85 char ErrorBuffer
[B_PATH_NAME_LENGTH
+ 80 /* error message */ + 80];
87 if (TitleString
== NULL
)
88 TitleString
= "Error Message:";
90 if (MessageString
== NULL
)
93 MessageString
= "No error, no message, why bother?";
95 MessageString
= "Something went wrong";
100 sprintf (ErrorBuffer
, "%s, error code $%X/%d (%s) has occured.",
101 MessageString
, ErrorNumber
, ErrorNumber
, strerror (ErrorNumber
));
102 MessageString
= ErrorBuffer
;
105 fputs (TitleString
, stderr
);
106 fputc ('\n', stderr
);
107 fputs (MessageString
, stderr
);
108 fputc ('\n', stderr
);
113 /******************************************************************************
114 * Determine if a line of text is the start of another message. Pine mailbox
115 * files have messages that start with a line that could say something like
116 * "From agmsmith@achilles.net Fri Oct 31 21:19:36 EST 1997" or maybe something
117 * like "From POPmail Mon Oct 20 21:12:36 1997" or in a more modern format,
118 * "From agmsmith@achilles.net Tue Sep 4 09:04:11 2001 -0400". I generalise it
119 * to "From blah Day MMM NN XX:XX:XX TZONE1 YYYY TZONE2". Blah is an e-mail
120 * address you can ignore (just treat it as a word separated by spaces). Day
121 * is a 3 letter day of the week. MMM is a 3 letter month name. NN is the two
122 * digit day of the week, has a leading space if the day is less than 10.
123 * XX:XX:XX is the time, the X's are digits. TZONE1 is the old style optional
124 * time zone of 3 capital letters. YYYY is the four digit year. TZONE2 is the
125 * optional modern time zone info, a plus or minus sign and 4 digits. Returns
126 * true if the line of text (ended with a NUL byte, no line feed or carriage
127 * returns at the end) is the start of a message.
130 bool IsStartOfMailMessage (char *LineString
)
134 /* It starts with "From " */
136 if (memcmp ("From ", LineString
, 5) != 0)
138 StringPntr
= LineString
+ 4;
139 while (*StringPntr
== ' ')
142 /* Skip over the e-mail address (or stop at the end of string). */
144 while (*StringPntr
!= ' ' && *StringPntr
!= 0)
146 while (*StringPntr
== ' ')
149 /* Look for the 3 letter day of the week. */
151 if (memcmp (StringPntr
, "Mon", 3) != 0 &&
152 memcmp (StringPntr
, "Tue", 3) != 0 &&
153 memcmp (StringPntr
, "Wed", 3) != 0 &&
154 memcmp (StringPntr
, "Thu", 3) != 0 &&
155 memcmp (StringPntr
, "Fri", 3) != 0 &&
156 memcmp (StringPntr
, "Sat", 3) != 0 &&
157 memcmp (StringPntr
, "Sun", 3) != 0)
159 printf ("False alarm, not a valid day of the week in \"%s\".\n",
164 while (*StringPntr
== ' ')
167 /* Look for the 3 letter month code. */
169 if (memcmp (StringPntr
, "Jan", 3) != 0 &&
170 memcmp (StringPntr
, "Feb", 3) != 0 &&
171 memcmp (StringPntr
, "Mar", 3) != 0 &&
172 memcmp (StringPntr
, "Apr", 3) != 0 &&
173 memcmp (StringPntr
, "May", 3) != 0 &&
174 memcmp (StringPntr
, "Jun", 3) != 0 &&
175 memcmp (StringPntr
, "Jul", 3) != 0 &&
176 memcmp (StringPntr
, "Aug", 3) != 0 &&
177 memcmp (StringPntr
, "Sep", 3) != 0 &&
178 memcmp (StringPntr
, "Oct", 3) != 0 &&
179 memcmp (StringPntr
, "Nov", 3) != 0 &&
180 memcmp (StringPntr
, "Dec", 3) != 0)
182 printf ("False alarm, not a valid month name in \"%s\".\n",
187 while (*StringPntr
== ' ')
190 /* Skip the day of the month. Require at least one digit. */
192 if (*StringPntr
< '0' || *StringPntr
> '9')
194 printf ("False alarm, not a valid day of the month number in \"%s\".\n",
198 while (*StringPntr
>= '0' && *StringPntr
<= '9')
200 while (*StringPntr
== ' ')
203 /* Check the time. Look for the sequence
204 digit-digit-colon-digit-digit-colon-digit-digit. */
206 if (StringPntr
[0] < '0' || StringPntr
[0] > '9' ||
207 StringPntr
[1] < '0' || StringPntr
[1] > '9' ||
208 StringPntr
[2] != ':' ||
209 StringPntr
[3] < '0' || StringPntr
[3] > '9' ||
210 StringPntr
[4] < '0' || StringPntr
[4] > '9' ||
211 StringPntr
[5] != ':' ||
212 StringPntr
[6] < '0' || StringPntr
[6] > '9' ||
213 StringPntr
[7] < '0' || StringPntr
[7] > '9')
215 printf ("False alarm, not a valid time value in \"%s\".\n",
220 while (*StringPntr
== ' ')
223 /* Look for the optional antique 3 capital letter time zone and skip it. */
225 if (StringPntr
[0] >= 'A' && StringPntr
[0] <= 'Z' &&
226 StringPntr
[1] >= 'A' && StringPntr
[1] <= 'Z' &&
227 StringPntr
[2] >= 'A' && StringPntr
[2] <= 'Z')
230 while (*StringPntr
== ' ')
234 /* Look for the 4 digit year. */
236 if (StringPntr
[0] < '0' || StringPntr
[0] > '9' ||
237 StringPntr
[1] < '0' || StringPntr
[1] > '9' ||
238 StringPntr
[2] < '0' || StringPntr
[2] > '9' ||
239 StringPntr
[3] < '0' || StringPntr
[3] > '9')
241 printf ("False alarm, not a valid 4 digit year in \"%s\".\n",
246 while (*StringPntr
== ' ')
249 /* Look for the optional modern time zone and skip over it if present. */
251 if ((StringPntr
[0] == '+' || StringPntr
[0] == '-') &&
252 StringPntr
[1] >= '0' && StringPntr
[1] <= '9' &&
253 StringPntr
[2] >= '0' && StringPntr
[2] <= '9' &&
254 StringPntr
[3] >= '0' && StringPntr
[3] <= '9' &&
255 StringPntr
[4] >= '0' && StringPntr
[4] <= '9')
258 while (*StringPntr
== ' ')
262 /* Look for end of string. */
264 if (*StringPntr
!= 0)
266 printf ("False alarm, extra stuff after the year/time zone in \"%s\".\n",
276 /******************************************************************************
277 * Determine if a line of text is the start of a news article. TRN and RN news
278 * article save files have messages that start with a line that looks like
279 * "Article 11721 of rec.games.video.3do:". Returns true if the line of text
280 * (ended with a NUL byte, no line feed or carriage returns at the end) is the
281 * start of an article.
284 bool IsStartOfUsenetArticle (char *LineString
)
288 /* It starts with "Article " */
290 if (memcmp ("Article ", LineString
, 8) != 0)
292 StringPntr
= LineString
+ 7;
293 while (*StringPntr
== ' ')
296 /* Skip the article number. Require at least one digit. */
298 if (*StringPntr
< '0' || *StringPntr
> '9')
300 printf ("False alarm, not a valid article number in \"%s\".\n",
304 while (*StringPntr
>= '0' && *StringPntr
<= '9')
306 while (*StringPntr
== ' ')
309 /* Now it should have "of " */
311 if (memcmp ("of ", StringPntr
, 3) != 0)
313 printf ("False alarm, article line \"of\" misssing in \"%s\".\n",
318 while (*StringPntr
== ' ')
321 /* Skip over the newsgroup name (no spaces) to the colon. */
323 while (*StringPntr
!= ':' && *StringPntr
!= ' ' && *StringPntr
!= 0)
326 if (StringPntr
[0] != ':' || StringPntr
[1] != 0)
328 printf ("False alarm, article doesn't end with a colon in \"%s\".\n",
338 /******************************************************************************
339 * Saves the message text to a file in the output directory. The file name is
340 * derived from the message headers. Returns zero if successful, a negative
341 * error code if an error occured.
344 status_t
SaveMessage (BString
&MessageText
)
346 time_t DateInSeconds
;
349 BString HeaderValues
[STD_HDR_MAX
];
352 BEmailMessage MailMessage
;
354 char TempString
[80];
355 struct tm TimeFields
;
356 BString UniqueFileName
;
359 /* Remove blank lines from the end of the message (a pet peeve of mine), but
360 end the message with a single new line to avoid annoying text editors that
363 i
= MessageText
.Length ();
364 while (i
> 0 && (MessageText
[i
-1] == '\n' || MessageText
[i
-1] == '\r'))
366 MessageText
.Truncate (i
);
367 MessageText
.Append ("\r\n");
369 /* Make a pretend file to hold the message, so the MDR library can use it. */
371 BMemoryIO
FakeFile (MessageText
.String (), MessageText
.Length ());
373 /* Hand the message text off to the MDR library, which will parse it, extract
374 the subject, sender's name, and other attributes, taking into account the
375 character set headers. */
377 ErrorCode
= MailMessage
.SetToRFC822 (&FakeFile
,
378 MessageText
.Length (), false /* parse_now - decodes message body */);
379 if (ErrorCode
!= B_OK
)
381 DisplayErrorMessage ("Mail library was unable to process a mail "
382 "message for some reason", ErrorCode
);
386 /* Get the values for the standard attributes. NULL if missing. */
388 HeaderValues
[STD_HDR_TO
] = MailMessage
.To ();
389 HeaderValues
[STD_HDR_FROM
] = MailMessage
.From ();
390 HeaderValues
[STD_HDR_CC
] = MailMessage
.CC ();
391 HeaderValues
[STD_HDR_DATE
] = MailMessage
.Date ();
392 HeaderValues
[STD_HDR_REPLY
] = MailMessage
.ReplyTo ();
393 HeaderValues
[STD_HDR_SUBJECT
] = MailMessage
.Subject ();
394 HeaderValues
[STD_HDR_STATUS
] = "Read";
395 if (MailMessage
.Priority () != 3 /* Normal */)
397 sprintf (TempString
, "%d", MailMessage
.Priority ());
398 HeaderValues
[STD_HDR_PRIORITY
] = TempString
;
401 HeaderValues
[STD_HDR_THREAD
] = HeaderValues
[STD_HDR_SUBJECT
];
402 SubjectToThread (HeaderValues
[STD_HDR_THREAD
]);
403 if (HeaderValues
[STD_HDR_THREAD
].Length() <= 0)
404 HeaderValues
[STD_HDR_THREAD
] = "No Subject";
406 HeaderValues
[STD_HDR_NAME
] = HeaderValues
[STD_HDR_FROM
];
407 extract_address_name (HeaderValues
[STD_HDR_NAME
]);
409 // Generate a file name for the incoming message.
411 FileName
= HeaderValues
[STD_HDR_THREAD
];
412 if (FileName
[0] == '.')
413 FileName
.Prepend ("_"); // Avoid hidden files, starting with a dot.
415 // Convert the date into a year-month-day fixed digit width format, so that
416 // sorting by file name will give all the messages with the same subject in
420 ParseDateWithTimeZone (HeaderValues
[STD_HDR_DATE
].String());
421 if (DateInSeconds
== -1)
422 DateInSeconds
= 0; /* Set it to the earliest time if date isn't known. */
424 localtime_r (&DateInSeconds
, &TimeFields
);
425 sprintf (TempString
, "%04d%02d%02d%02d%02d%02d",
426 TimeFields
.tm_year
+ 1900,
427 TimeFields
.tm_mon
+ 1,
432 FileName
<< " " << TempString
<< " " << HeaderValues
[STD_HDR_NAME
];
433 FileName
.Truncate (240); // reserve space for the uniquer
435 // Get rid of annoying characters which are hard to use in the shell.
436 FileName
.ReplaceAll('/','_');
437 FileName
.ReplaceAll('\'','_');
438 FileName
.ReplaceAll('"','_');
439 FileName
.ReplaceAll('!','_');
440 FileName
.ReplaceAll('<','_');
441 FileName
.ReplaceAll('>','_');
442 while (FileName
.FindFirst(" ") >= 0) // Remove multiple spaces.
443 FileName
.Replace(" " /* Old */, " " /* New */, 1024 /* Count */);
446 UniqueFileName
= FileName
;
449 ErrorCode
= OutputFile
.SetTo (&OutputDir
,
450 const_cast<const char *> (UniqueFileName
.String ()),
451 B_READ_WRITE
| B_CREATE_FILE
| B_FAIL_IF_EXISTS
);
452 if (ErrorCode
== B_OK
)
454 if (ErrorCode
!= B_FILE_EXISTS
)
456 UniqueFileName
.Prepend ("Unable to create file \"");
457 UniqueFileName
.Append ("\" for writing a message to");
458 DisplayErrorMessage (UniqueFileName
.String (), ErrorCode
);
462 UniqueFileName
= FileName
;
463 UniqueFileName
<< " " << Uniquer
;
466 /* Write the message contents to the file, use the unchanged original one. */
468 ErrorCode
= OutputFile
.Write (MessageText
.String (), MessageText
.Length ());
471 UniqueFileName
.Prepend ("Error while writing file \"");
472 UniqueFileName
.Append ("\"");
473 DisplayErrorMessage (UniqueFileName
.String (), ErrorCode
);
477 /* Attach the attributes to the file. Save the MIME type first, otherwise
478 the live queries don't pick up the new file. Theoretically it would be
479 better to do it last so that other programs don't start reading the message
480 before the other attributes are set. */
482 OutputFile
.WriteAttr ("BEOS:TYPE", B_MIME_STRING_TYPE
, 0,
485 OutputFile
.WriteAttr (g_StandardAttributeNames
[STD_HDR_DATE
],
486 B_TIME_TYPE
, 0, &DateInSeconds
, sizeof (DateInSeconds
));
488 /* Write out all the string based attributes. */
490 for (i
= 1 /* The date was zero */; i
< STD_HDR_MAX
; i
++)
492 if ((Length
= HeaderValues
[i
].Length()) > 0)
493 OutputFile
.WriteAttr (g_StandardAttributeNames
[i
], B_STRING_TYPE
, 0,
494 HeaderValues
[i
].String(), Length
+ 1);
501 int main (int argc
, char** argv
)
503 char ErrorMessage
[B_PATH_NAME_LENGTH
+ 80];
504 bool HaveOldMessage
= false;
505 int MessagesDoneCount
= 0;
507 BApplication
MyApp ("application/x-vnd.Haiku-mbox2mail");
509 char OutputDirectoryPathName
[B_PATH_NAME_LENGTH
];
510 status_t ReturnCode
= -1;
511 bool SaveSeparatorLine
= false;
513 char TempString
[102400];
517 printf ("%s is a utility for converting Pine e-mail\n",
519 printf ("files (mbox files) to Mail e-mail files with attributes. It\n");
520 printf ("could well work with other Unix style mailbox files, and\n");
521 printf ("saved Usenet article files. Each message in the input\n");
522 printf ("mailbox is converted into a separate file. You can\n");
523 printf ("optionally specify a directory (will be created if needed) to\n");
524 printf ("put all the output files in, otherwise it scatters them into\n");
525 printf ("the current directory. The -s option makes it leave in the\n");
526 printf ("separator text line at the top of each message, the default\n");
527 printf ("is to lose it.\n\n");
528 printf ("Usage:\n\n");
529 printf ("%s [-s] InputFile [OutputDirectory]\n\n", kProgramName
);
530 printf ("Public domain, by Alexander G. M. Smith.\n");
535 if (strcmp (argv
[NextArgIndex
], "-s") == 0)
537 SaveSeparatorLine
= true;
541 /* Try to open the input file. */
543 if (NextArgIndex
>= argc
)
546 DisplayErrorMessage ("Missing the input file (mbox file) name argument.");
549 strncpy (InputPathName
, argv
[NextArgIndex
], sizeof (InputPathName
) - 1);
551 InputFile
= fopen (InputPathName
, "rb");
552 if (InputFile
== NULL
)
555 sprintf (ErrorMessage
, "Unable to open file \"%s\" for reading",
557 DisplayErrorMessage (ErrorMessage
, ReturnCode
);
561 /* Try to make the output directory. First get its name. */
563 if (NextArgIndex
< argc
)
565 strncpy (OutputDirectoryPathName
, argv
[NextArgIndex
],
566 sizeof (OutputDirectoryPathName
) - 2
567 /* Leave space for adding trailing slash and NUL byte */);
571 strcpy (OutputDirectoryPathName
, ".");
573 /* Remove trailing '/' characters from the output directory path. */
576 OutputDirectoryPathName
+ (strlen (OutputDirectoryPathName
) - 1);
577 while (StringPntr
>= OutputDirectoryPathName
)
579 if (*StringPntr
!= '/')
585 if (StringPntr
- OutputDirectoryPathName
> 0 &&
586 strcmp (OutputDirectoryPathName
, ".") != 0)
588 if (mkdir (OutputDirectoryPathName
, 0777))
591 if (ReturnCode
!= B_FILE_EXISTS
)
593 sprintf (ErrorMessage
, "Unable to make output directory \"%s\"",
594 OutputDirectoryPathName
);
595 DisplayErrorMessage (ErrorMessage
, ReturnCode
);
601 /* Set the output BDirectory. */
603 ReturnCode
= OutputDir
.SetTo (OutputDirectoryPathName
);
604 if (ReturnCode
!= B_OK
)
606 sprintf (ErrorMessage
, "Unable to set output BDirectory to \"%s\"",
607 OutputDirectoryPathName
);
608 DisplayErrorMessage (ErrorMessage
, ReturnCode
);
612 printf ("Input file: \"%s\", Output directory: \"%s\", ",
613 InputPathName
, OutputDirectoryPathName
);
614 printf ("%ssaving separator text line at the top of each message. Working",
615 SaveSeparatorLine
? "" : "not ");
617 /* Extract a text message from the mail file. It starts with a line that
618 says "From blah Day MM NN XX:XX:XX YYYY TZONE". Blah is an e-mail address
619 you can ignore (just treat it as a word separated by spaces). Day is a 3
620 letter day of the week. MM is a 3 letter month name. NN is the two digit
621 day of the week, has a leading space if the day is less than 10. XX:XX:XX is
622 the time, the X's are digits. YYYY is the four digit year. TZONE is the
623 optional time zone info, a plus or minus sign and 4 digits. */
625 while (!feof (InputFile
))
627 /* First read in one line of text. */
629 if (!fgets (TempString
, sizeof (TempString
), InputFile
))
632 if (ferror (InputFile
))
634 sprintf (ErrorMessage
,
635 "Error while reading from \"%s\"", InputPathName
);
636 DisplayErrorMessage (ErrorMessage
, ReturnCode
);
639 break; /* No error, just end of file. */
642 /* Remove any trailing control characters (line feed usually, or CRLF).
643 Might also nuke trailing tabs too. Doesn't usually matter. The main thing
644 is to allow input files with both LF and CRLF endings (and even CR endings
645 if you come from the Macintosh world). */
647 StringPntr
= TempString
+ strlen (TempString
) - 1;
648 while (StringPntr
>= TempString
&& *StringPntr
< 32)
652 /* See if this is the start of a new message. */
654 if (IsStartOfUsenetArticle (TempString
) ||
655 IsStartOfMailMessage (TempString
))
659 if ((ReturnCode
= SaveMessage (MessageText
)) != 0)
665 HaveOldMessage
= true;
666 MessageText
.SetTo (SaveSeparatorLine
? TempString
: "");
670 /* Append the line to the current message text. */
672 if (MessageText
.Length () > 0)
673 MessageText
.Append ("\r\n"); /* Yes, BeMail expects CR/LF line ends. */
674 MessageText
.Append (TempString
);
678 /* Flush out the last message. */
682 if ((ReturnCode
= SaveMessage (MessageText
)) != 0)
687 printf (" Did %d messages.\n", MessagesDoneCount
);
692 if (InputFile
!= NULL
)