5 // Copyright (C) 2005 Novell, Inc.
6 // Copyright (C) 2005 Debajyoti Bera
9 // Permission is hereby granted, free of charge, to any person obtaining a
10 // copy of this software and associated documentation files (the "Software"),
11 // to deal in the Software without restriction, including without limitation
12 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 // and/or sell copies of the Software, and to permit persons to whom the
14 // Software is furnished to do so, subject to the following conditions:
16 // The above copyright notice and this permission notice shall be included in
17 // all copies or substantial portions of the Software.
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 // DEALINGS IN THE SOFTWARE.
29 using System
.Collections
;
35 namespace Beagle
.Daemon
.KMailQueryable
{
39 * The bulk of the indexing work is done here
41 public class KMailIndexer
{
42 // location of mail folder
43 private string mail_root
;
44 public string MailRoot
{
45 get { return mail_root; }
47 // account name for this folder
48 private string account_name
;
49 public string AccountName
{
50 get { return account_name; }
52 // mail folders not to scan
53 private ArrayList excludes
;
54 // list of maildir directories which store mails in cur/, new/, tmp/ subdirs
55 private ArrayList mail_directories
;
56 // list of directories which contain mbox files and other mail folders
57 private ArrayList folder_directories
;
59 private ArrayList mbox_files
;
60 // also store the queryable
61 private KMailQueryable queryable
;
62 public KMailQueryable Queryable
{
63 get { return queryable; }
66 private string lastGoodDirPath
= ""; // cache last successful directory
68 public KMailIndexer (KMailQueryable queryable
, string account
, string root
)
70 this.queryable
= queryable
;
71 account_name
= account
;
73 mail_directories
= new ArrayList ();
74 Logger
.Log
.Debug ("mail_directories created for:" + mail_root
+ " (" + mail_directories
.Count
+ ")");
75 folder_directories
= new ArrayList ();
76 mbox_files
= new ArrayList ();
78 excludes
= new ArrayList ();
79 excludes
.Add ("spam");
80 excludes
.Add ("outbox");
81 excludes
.Add ("trash");
82 excludes
.Add ("drafts");
88 private void OnInotifyEvent (Inotify
.Watch watch
,
92 Inotify
.EventType type
)
94 //FIXME this case should NEVER occur, still it does
95 if (mail_directories
== null) {
96 Logger
.Log
.Debug ("*** WEIRD AVIRAM CASE for :" + mail_root
);
97 Logger
.Log
.Debug ("Received inotify event{3} for {4}: path={0}, subitem={1}, srcpath={2}", path
, subitem
, srcpath
, type
, mail_root
);
103 string fullPath
= Path
.Combine (path
, subitem
);
105 // we need to watch for all kinds of events - this is tricky
107 // Case: new file is created
108 // - if it is one of the folder_directories, index it
109 // - if is in one of the mail_directories, index it if it is an mbox file
110 if ((type
& Inotify
.EventType
.Create
) != 0 && (type
& Inotify
.EventType
.IsDirectory
) == 0) {
111 if (IsMailDir (path
)) {
112 Indexable indexable
= MaildirMessageToIndexable (fullPath
);
113 AddIndexableTask (indexable
, fullPath
);
115 // add mbox file to mbox_files
116 string mbox
= GetMboxFile (path
, subitem
);
118 mbox_files
.Add (mbox
);
119 IndexMbox (mbox
, true);
125 // Case: file is deleted
126 // - if it is a mail file, we might like it to be deleted
127 if ((type
& Inotify
.EventType
.MovedFrom
) != 0 ||
128 ((type
& Inotify
.EventType
.Delete
) != 0 &&
129 (type
& Inotify
.EventType
.IsDirectory
) == 0)) {
130 if (IsMailDir (path
))
131 RemoveMail (fullPath
);
132 else if (mbox_files
.Contains (fullPath
)) {
133 RemoveMbox (fullPath
);
134 mbox_files
.Remove (fullPath
);
139 // Case: file is moved
140 // - files are moved from tmp/new to cur
141 // - need to delete from the source
142 if ((type
& Inotify
.EventType
.MovedTo
) != 0 && (type
& Inotify
.EventType
.IsDirectory
) == 0) {
143 if (IsMailDir (path
)) {
144 Indexable indexable
= MaildirMessageToIndexable (fullPath
);
145 AddIndexableTask (indexable
, fullPath
);
147 if (IsMailDir (srcpath
))
148 RemoveMail (srcpath
);
149 if (mbox_files
.Contains (fullPath
)) {
150 // check if this because of compaction, in which case need to delete previous mbox
151 if (srcpath
!= null && srcpath
.EndsWith ("." + subitem
+ ".compacted"))
152 RemoveMbox (fullPath
);
153 // FIXME need to ensure IndexMbox is scheduled *after* RemoveMbox finishes
154 // RemoveMbox creates a job with immediate priority while
155 // IndexMbox creates a job with the default priority of a generator
156 // Is there a better way to ensure the order ?
157 IndexMbox (fullPath
, true);
162 // Case: file is modified i.e. there was no create event but closewrite event
163 // - possibly some mbox was changed
164 // FIXME kmail doesnt physically delete the deleted mails from mbox files unless compacted
165 // - which means one has to read the .index files to find deleted messages...
166 // - need to find the format of the .index/.index.ids etc files and parse them
167 if ((type
& Inotify
.EventType
.Modify
) != 0 && (type
& Inotify
.EventType
.IsDirectory
) == 0) {
168 if (mbox_files
.Contains (fullPath
))
169 IndexMbox (fullPath
, false);
173 // Case: a directory is created:
174 // well watch it anyway but also make sure its a maildir directory
175 // if it a maildir directory, then add it to maildir_dirs
176 if ((type
& Inotify
.EventType
.Create
) != 0 && (type
& Inotify
.EventType
.IsDirectory
) != 0) {
177 if (!IgnoreFolder (fullPath
)) {
179 UpdateDirectories(fullPath
);
184 // Case: if a directory is deleted:
186 if ((type
& Inotify
.EventType
.Delete
) != 0 && (type
& Inotify
.EventType
.IsDirectory
) != 0) {
187 watch
.Unsubscribe ();
188 mail_directories
.Remove (fullPath
);
189 folder_directories
.Remove (fullPath
);
193 // Case: directory is moved
194 // FIXME: implement renaming of mail folders
199 * Add watch to the parameter directory and its subdirs, recursively
201 public void Watch (string path
)
203 DirectoryInfo root
= new DirectoryInfo (path
);
207 Queue queue
= new Queue ();
208 queue
.Enqueue (root
);
210 while (queue
.Count
> 0) {
211 DirectoryInfo dir
= queue
.Dequeue () as DirectoryInfo
;
216 //log.Debug ("Adding inotify watch to " + dir.FullName);
217 Inotify
.Subscribe (dir
.FullName
, OnInotifyEvent
,
218 Inotify
.EventType
.Create
219 | Inotify
.EventType
.Delete
220 | Inotify
.EventType
.MovedFrom
221 | Inotify
.EventType
.MovedTo
);
223 foreach (DirectoryInfo subdir
in DirectoryWalker
.GetDirectoryInfos (dir
))
224 queue
.Enqueue (subdir
);
229 * Recursively traverse the files and dirctories under mail_root
230 * to find files that need to be indexed, directories that
231 * need to be watched for changes
235 if (!Directory
.Exists (mail_root
))
238 mail_directories
.Clear ();
239 folder_directories
.Clear ();
242 Queue pending
= new Queue ();
243 pending
.Enqueue (mail_root
);
244 folder_directories
.Add (mail_root
);
245 // add inotify watch to root folder
247 Inotify
.Subscribe (mail_root
, OnInotifyEvent
,
248 Inotify
.EventType
.Create
249 | Inotify
.EventType
.Delete
250 | Inotify
.EventType
.MovedFrom
251 | Inotify
.EventType
.MovedTo
252 | Inotify
.EventType
.Modify
);
254 while (pending
.Count
> 0) {
256 string dir
= (string) pending
.Dequeue ();
257 Logger
.Log
.Debug ("Searching for mbox and maildirs in " + dir
);
259 foreach (FileInfo fi
in DirectoryWalker
.GetFileInfos (dir
)) {
260 if (!fi
.Name
.EndsWith (".index"))
262 string indexFile
= fi
.Name
;
263 string mailFolderName
=
264 indexFile
.Substring (1, indexFile
.LastIndexOf (".index")-1);
265 string mailFolder
= Path
.Combine (dir
, mailFolderName
);
266 if (IgnoreFolder (mailFolder
))
268 if (Directory
.Exists (mailFolder
)) {
269 mail_directories
.Add (mailFolder
);
272 } else if (File
.Exists (mailFolder
)) {
273 mbox_files
.Add (mailFolder
);
275 // if there is a directory with name .<mailFolderName>.directory
276 // then it contains sub-folders
278 Path
.Combine (dir
, "." + mailFolderName
+ ".directory");
279 if (Directory
.Exists (subFolder
)) {
280 pending
.Enqueue (subFolder
);
281 folder_directories
.Add (subFolder
);
283 Inotify
.Subscribe (subFolder
, OnInotifyEvent
,
284 Inotify
.EventType
.Create
285 | Inotify
.EventType
.Delete
286 | Inotify
.EventType
.MovedFrom
287 | Inotify
.EventType
.MovedTo
288 | Inotify
.EventType
.Modify
);
293 // copy the contents as mail_directories, mbox_files might change due to async events
294 ArrayList _mail_directories
= new ArrayList (mail_directories
);
295 ArrayList _mbox_files
= new ArrayList (mbox_files
);
297 if (queryable
.ThisScheduler
.ContainsByTag (mail_root
)) {
298 Logger
.Log
.Debug ("Not adding task for already running task: {0}", mail_root
);
301 KMaildirIndexableGenerator generator
= new KMaildirIndexableGenerator (this, _mail_directories
);
302 AddIIndexableTask (generator
, mail_root
);
305 foreach (string mbox_file
in _mbox_files
) {
306 IndexMbox (mbox_file
, true);
310 private void AddIndexableTask (Indexable indexable
, string tag
)
312 if (indexable
== null)
315 Scheduler
.Task task
= queryable
.NewAddTask (indexable
);
316 task
.Priority
= Scheduler
.Priority
.Immediate
;
318 queryable
.ThisScheduler
.Add (task
);
321 private void AddIIndexableTask (IIndexableGenerator generator
, string tag
)
323 if (generator
== null)
326 Scheduler
.Task task
= queryable
.NewAddTask (generator
);
328 queryable
.ThisScheduler
.Add (task
);
332 * Start a task for indexing an mbox file
334 public void IndexMbox (string mbox_file
, bool initial_scan
)
336 if (queryable
.ThisScheduler
.ContainsByTag (mbox_file
)) {
337 Logger
.Log
.Debug ("Not adding task for already running task: {0}", mbox_file
);
341 //Logger.Log.Debug ("Creating task to index mbox {0}", mbox_file);
342 KMailMboxIndexableGenerator generator
= new KMailMboxIndexableGenerator (this, mbox_file
, initial_scan
);
343 AddIIndexableTask (generator
, mbox_file
);
347 * Remove maildir mail file
349 private void RemoveMail (string file
)
351 Logger
.Log
.Debug ("Removing mail:" + file
);
352 Uri uri
= UriFu
.PathToFileUri (file
);
353 Scheduler
.Task task
= queryable
.NewRemoveTask (uri
);
354 task
.Priority
= Scheduler
.Priority
.Immediate
;
355 task
.SubPriority
= 0;
356 queryable
.ThisScheduler
.Add (task
);
360 * Create an indexable from a maildir message
362 public Indexable
MaildirMessageToIndexable (string filename
)
364 //Logger.Log.Debug ("+ indexing maildir mail:" + filename);
365 String folder
= GetFolderMaildir(filename
);
366 Uri file_uri
= UriFu
.PathToFileUri (filename
);
368 Indexable indexable
= new Indexable (file_uri
);
369 indexable
.HitType
= "MailMessage";
370 indexable
.MimeType
= "message/rfc822";
371 indexable
.CacheContent
= false;
373 indexable
.AddProperty (Property
.NewUnsearched ("fixme:client", "kmail"));
374 indexable
.AddProperty (Property
.NewUnsearched ("fixme:account", account_name
));
375 indexable
.AddProperty (Property
.NewUnsearched ("fixme:folder", folder
));
376 indexable
.ContentUri
= file_uri
;
382 * Create an indexable from an mbox message
383 * Most of the code here is from Evo backend
385 public Indexable
MessageToIndexable (string file_name
, System
.Uri uri
, GMime
.Message message
, string folder_name
)
387 //Logger.Log.Debug ("Indexing " + uri + " in folder " + folder_name);
388 Indexable indexable
= new Indexable (uri
);
389 // set parent uri to the filename so that when an mbox file
390 // is deleted, all the messages in that file can be deleted
391 indexable
.ParentUri
= UriFu
.PathToFileUri (file_name
);
393 indexable
.Timestamp
= message
.Date
.ToUniversalTime ();
394 indexable
.HitType
= "MailMessage";
395 indexable
.MimeType
= "message/rfc822";
396 indexable
.CacheContent
= false;
398 indexable
.AddProperty (Property
.NewUnsearched ("fixme:client", "kmail"));
399 indexable
.AddProperty (Property
.NewUnsearched ("fixme:account", account_name
));
400 indexable
.AddProperty (Property
.NewUnsearched ("fixme:folder", folder_name
));
402 GMime
.InternetAddressList addrs
;
404 addrs
= message
.GetRecipients (GMime
.Message
.RecipientType
.To
);
405 foreach (GMime
.InternetAddress ia
in addrs
) {
406 if (folder_name
== Queryable
.SentMailFolderName
&& ia
.AddressType
!= GMime
.InternetAddressType
.Group
)
407 indexable
.AddProperty (Property
.NewKeyword ("fixme:sentTo", ia
.Addr
));
411 addrs
= message
.GetRecipients (GMime
.Message
.RecipientType
.Cc
);
412 foreach (GMime
.InternetAddress ia
in addrs
) {
413 if (folder_name
== Queryable
.SentMailFolderName
&& ia
.AddressType
!= GMime
.InternetAddressType
.Group
)
414 indexable
.AddProperty (Property
.NewKeyword ("fixme:sentTo", ia
.Addr
));
418 addrs
= GMime
.InternetAddressList
.ParseString (GMime
.Utils
.HeaderDecodePhrase (message
.Sender
));
419 foreach (GMime
.InternetAddress ia
in addrs
) {
420 if (folder_name
!= Queryable
.SentMailFolderName
&& ia
.AddressType
!= GMime
.InternetAddressType
.Group
)
421 indexable
.AddProperty (Property
.NewKeyword ("fixme:gotFrom", ia
.Addr
));
425 if (folder_name
== Queryable
.SentMailFolderName
)
426 indexable
.AddProperty (Property
.NewFlag ("fixme:isSent"));
428 string kmail_msg_sent
= message
.GetHeader ("X-KMail-Link-Type");
429 if (kmail_msg_sent
== "reply")
430 indexable
.AddProperty (Property
.NewFlag ("fixme:isSent"));
433 // no need to store date again, use the issent flag to determine if the date is sentdate or not
435 if (folder_name
== Queryable
.SentMailFolderName
)
436 indexable
.AddProperty (Property
.NewDate ("fixme:sentdate", message
.Date
.ToUniversalTime ()));
438 indexable
.AddProperty (Property
.NewDate ("fixme:received", message
.Date
.ToUniversalTime ()));
441 indexable
.SetBinaryStream (message
.Stream
);
447 * deleting mbox means deleting all the mails which were in this mbox
448 * we use the idea of parent-uri
449 * while creating indexables, we set the parent uri to be the uri of the mbox file
450 * so to delete all mails in the mbox we just delete all documents whose parent uri
451 * is the uri of the mbox file
453 public void RemoveMbox (string file
)
455 Logger
.Log
.Debug ("Removing mbox:" + file
);
456 Uri uri
= UriFu
.PathToFileUri (file
);
457 Scheduler
.Task task
= queryable
.NewRemoveTask (uri
);
458 task
.Priority
= Scheduler
.Priority
.Immediate
;
459 task
.SubPriority
= 0;
460 queryable
.ThisScheduler
.Add (task
);
463 ///////////////////////////////////////////////////////////
468 * a maildir is of format:
469 * some_dir_in_currently_watched_directories/{cur,new,tmp}
470 * again we ignore tmp - no point trying to watch it - it will be moved anyway
471 * should we check with the kmail directory structure ?
472 * presence of files like directory.index, directory.index.ids ?
474 public bool IsMailDir (string dirPath
)
476 if (dirPath
== null || ! (dirPath
.EndsWith("cur") || dirPath
.EndsWith("new")))
479 string possibleMaildir
= (Directory
.GetParent (dirPath
)).FullName
;
480 if (lastGoodDirPath
== possibleMaildir
)
482 Logger
.Log
.Debug ("checking if " + possibleMaildir
+ " is a maildir ?");
483 if (mail_directories
.Contains (possibleMaildir
)) {
484 lastGoodDirPath
= possibleMaildir
;
491 * how to decide if this filename denotes an mbox file ?
492 * if its of the form .aaa.index, then aaa is the inbox file
493 * if its of the form aaa (no .index) then there should be a .aaa.index
495 public string GetMboxFile (string dir
, string filename
)
497 int pos
= filename
.LastIndexOf (".index");
499 string possible_mbox_name
= filename
.Substring (1, pos
- 2); //Remove (pos, 6).Remove (0,1);
500 possible_mbox_name
= Path
.Combine (dir
, possible_mbox_name
);
501 if (File
.Exists (possible_mbox_name
))
502 return possible_mbox_name
;
504 string possible_index_name
= "." + filename
+ ".index";
505 possible_index_name
= Path
.Combine (dir
, possible_index_name
);
506 if (File
.Exists (possible_index_name
))
507 return Path
.Combine (dir
, filename
);
510 return null; // not found
514 * Called when a new directory is created
515 * Decide what to do with this new directory
517 public void UpdateDirectories (string dirPath
)
519 string parentDir
= (Directory
.GetParent (dirPath
)).FullName
;
520 DirectoryInfo dirinfo
= new DirectoryInfo (dirPath
);
521 string dirName
= dirinfo
.Name
;
523 if (dirName
== "cur" || dirName
== "new" || dirName
== "tmp") {
524 // check and add the parentdir to mail_directories
525 if (!mail_directories
.Contains (parentDir
))
526 mail_directories
.Add (parentDir
);
530 // format .name.directory - in which case add it to folder_dir
531 // format name - in which case add it to mail_dir
532 if (dirName
.EndsWith (".directory"))
533 folder_directories
.Add (dirPath
);
535 mail_directories
.Add (dirPath
);
539 * FIXME:if we can parse kmailrc file, then we might be
540 * able to deduce the mail folder name
541 * currently get it from the file name (mbox) or parent.parent directory name
544 public string GetFolderMbox (string mbox_file
)
546 FileInfo fi
= new FileInfo (mbox_file
);
550 public string GetFolderMaildir (string mailFile
)
552 return (Directory
.GetParent ((Directory
.GetParent (mailFile
)).FullName
).Name
);
555 private bool IgnoreFolder (string path
)
557 foreach (string exclude
in excludes
) {
558 if (path
.ToLower().EndsWith (exclude
))