Indexable is not marked _done_ until all the child indexables (including child of...
[beagle.git] / beagled / KMailQueryable / KMailIndexer.cs
blobdcb1a26238329b580bdd74828708e2a0dab4afc2
2 //
3 // KMailIndexer.cs
4 //
5 // Copyright (C) 2005 Novell, Inc.
6 // Copyright (C) 2005 Debajyoti Bera
7 //
8 //
9 // Permission is hereby granted, free of charge, to any person obtaining a
10 // copy of this software and associated documentation files (the "Software"),
11 // to deal in the Software without restriction, including without limitation
12 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 // and/or sell copies of the Software, and to permit persons to whom the
14 // Software is furnished to do so, subject to the following conditions:
16 // The above copyright notice and this permission notice shall be included in
17 // all copies or substantial portions of the Software.
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 // DEALINGS IN THE SOFTWARE.
28 using System;
29 using System.Collections;
30 using System.IO;
32 using Beagle.Util;
33 using Beagle.Daemon;
35 namespace Beagle.Daemon.KMailQueryable {
37 /**
38 * Main indexer class
39 * The bulk of the indexing work is done here
41 public class KMailIndexer {
42 // location of mail folder
43 private string mail_root;
44 public string MailRoot {
45 get { return mail_root; }
47 // account name for this folder
48 private string account_name;
49 public string AccountName {
50 get { return account_name; }
52 // mail folders not to scan
53 private ArrayList excludes;
54 // list of maildir directories which store mails in cur/, new/, tmp/ subdirs
55 private ArrayList mail_directories;
56 // list of directories which contain mbox files and other mail folders
57 private ArrayList folder_directories;
58 // list of mbox files
59 private ArrayList mbox_files;
60 // also store the queryable
61 private KMailQueryable queryable;
62 public KMailQueryable Queryable {
63 get { return queryable; }
66 private string lastGoodDirPath = ""; // cache last successful directory
68 public KMailIndexer (KMailQueryable queryable, string account, string root)
70 this.queryable = queryable;
71 account_name = account;
72 mail_root = root;
73 mail_directories = new ArrayList ();
74 Logger.Log.Debug ("mail_directories created for:" + mail_root + " (" + mail_directories.Count + ")");
75 folder_directories = new ArrayList ();
76 mbox_files = new ArrayList ();
78 excludes = new ArrayList ();
79 excludes.Add ("spam");
80 excludes.Add ("outbox");
81 excludes.Add ("trash");
82 excludes.Add ("drafts");
85 /**
86 * inotify callback
88 private void OnInotifyEvent (Inotify.Watch watch,
89 string path,
90 string subitem,
91 string srcpath,
92 Inotify.EventType type)
94 //FIXME this case should NEVER occur, still it does
95 if (mail_directories == null) {
96 Logger.Log.Debug ("*** WEIRD AVIRAM CASE for :" + mail_root);
97 Logger.Log.Debug ("Received inotify event{3} for {4}: path={0}, subitem={1}, srcpath={2}", path, subitem, srcpath, type, mail_root);
98 return;
101 if (subitem == "")
102 return;
103 string fullPath = Path.Combine (path, subitem);
105 // we need to watch for all kinds of events - this is tricky
107 // Case: new file is created
108 // - if it is one of the folder_directories, index it
109 // - if is in one of the mail_directories, index it if it is an mbox file
110 if ((type & Inotify.EventType.Create) != 0 && (type & Inotify.EventType.IsDirectory) == 0) {
111 if (IsMailDir (path)) {
112 Indexable indexable = MaildirMessageToIndexable (fullPath);
113 AddIndexableTask (indexable, fullPath);
114 } else {
115 // add mbox file to mbox_files
116 string mbox = GetMboxFile (path, subitem);
117 if (mbox != null) {
118 mbox_files.Add (mbox);
119 IndexMbox (mbox, true);
122 return;
125 // Case: file is deleted
126 // - if it is a mail file, we might like it to be deleted
127 if ((type & Inotify.EventType.MovedFrom) != 0 ||
128 ((type & Inotify.EventType.Delete) != 0 &&
129 (type & Inotify.EventType.IsDirectory) == 0)) {
130 if (IsMailDir (path))
131 RemoveMail (fullPath);
132 else if (mbox_files.Contains (fullPath)) {
133 RemoveMbox (fullPath);
134 mbox_files.Remove (fullPath);
136 return;
139 // Case: file is moved
140 // - files are moved from tmp/new to cur
141 // - need to delete from the source
142 if ((type & Inotify.EventType.MovedTo) != 0 && (type & Inotify.EventType.IsDirectory) == 0) {
143 if (IsMailDir (path)) {
144 Indexable indexable = MaildirMessageToIndexable (fullPath);
145 AddIndexableTask (indexable, fullPath);
147 if (IsMailDir (srcpath))
148 RemoveMail (srcpath);
149 if (mbox_files.Contains (fullPath)) {
150 // check if this because of compaction, in which case need to delete previous mbox
151 if (srcpath != null && srcpath.EndsWith ("." + subitem + ".compacted"))
152 RemoveMbox (fullPath);
153 // FIXME need to ensure IndexMbox is scheduled *after* RemoveMbox finishes
154 // RemoveMbox creates a job with immediate priority while
155 // IndexMbox creates a job with the default priority of a generator
156 // Is there a better way to ensure the order ?
157 IndexMbox (fullPath, true);
159 return;
162 // Case: file is modified i.e. there was no create event but closewrite event
163 // - possibly some mbox was changed
164 // FIXME kmail doesnt physically delete the deleted mails from mbox files unless compacted
165 // - which means one has to read the .index files to find deleted messages...
166 // - need to find the format of the .index/.index.ids etc files and parse them
167 if ((type & Inotify.EventType.Modify) != 0 && (type & Inotify.EventType.IsDirectory) == 0) {
168 if (mbox_files.Contains (fullPath))
169 IndexMbox (fullPath, false);
170 return;
173 // Case: a directory is created:
174 // well watch it anyway but also make sure its a maildir directory
175 // if it a maildir directory, then add it to maildir_dirs
176 if ((type & Inotify.EventType.Create) != 0 && (type & Inotify.EventType.IsDirectory) != 0) {
177 if (!IgnoreFolder (fullPath)) {
178 Watch (fullPath);
179 UpdateDirectories(fullPath);
181 return;
184 // Case: if a directory is deleted:
185 // remove watch
186 if ((type & Inotify.EventType.Delete) != 0 && (type & Inotify.EventType.IsDirectory) != 0) {
187 watch.Unsubscribe ();
188 mail_directories.Remove (fullPath);
189 folder_directories.Remove (fullPath);
190 return;
193 // Case: directory is moved
194 // FIXME: implement renaming of mail folders
199 * Add watch to the parameter directory and its subdirs, recursively
201 public void Watch (string path)
203 DirectoryInfo root = new DirectoryInfo (path);
204 if (! root.Exists)
205 return;
207 Queue queue = new Queue ();
208 queue.Enqueue (root);
210 while (queue.Count > 0) {
211 DirectoryInfo dir = queue.Dequeue () as DirectoryInfo;
213 if (! dir.Exists)
214 continue;
216 //log.Debug ("Adding inotify watch to " + dir.FullName);
217 Inotify.Subscribe (dir.FullName, OnInotifyEvent,
218 Inotify.EventType.Create
219 | Inotify.EventType.Delete
220 | Inotify.EventType.MovedFrom
221 | Inotify.EventType.MovedTo);
223 foreach (DirectoryInfo subdir in DirectoryWalker.GetDirectoryInfos (dir))
224 queue.Enqueue (subdir);
229 * Recursively traverse the files and dirctories under mail_root
230 * to find files that need to be indexed, directories that
231 * need to be watched for changes
233 public void Crawl ()
235 if (!Directory.Exists (mail_root))
236 return;
238 mail_directories.Clear ();
239 folder_directories.Clear ();
240 mbox_files.Clear();
242 Queue pending = new Queue ();
243 pending.Enqueue (mail_root);
244 folder_directories.Add (mail_root);
245 // add inotify watch to root folder
246 if (Inotify.Enabled)
247 Inotify.Subscribe (mail_root, OnInotifyEvent,
248 Inotify.EventType.Create
249 | Inotify.EventType.Delete
250 | Inotify.EventType.MovedFrom
251 | Inotify.EventType.MovedTo
252 | Inotify.EventType.Modify);
254 while (pending.Count > 0) {
256 string dir = (string) pending.Dequeue ();
257 Logger.Log.Debug ("Searching for mbox and maildirs in " + dir);
259 foreach (FileInfo fi in DirectoryWalker.GetFileInfos (dir)) {
260 if (!fi.Name.EndsWith (".index"))
261 continue;
262 string indexFile = fi.Name;
263 string mailFolderName =
264 indexFile.Substring (1, indexFile.LastIndexOf (".index")-1);
265 string mailFolder = Path.Combine (dir, mailFolderName);
266 if (IgnoreFolder (mailFolder))
267 continue;
268 if (Directory.Exists (mailFolder)) {
269 mail_directories.Add (mailFolder);
270 if (Inotify.Enabled)
271 Watch (mailFolder);
272 } else if (File.Exists (mailFolder)) {
273 mbox_files.Add (mailFolder);
275 // if there is a directory with name .<mailFolderName>.directory
276 // then it contains sub-folders
277 string subFolder =
278 Path.Combine (dir, "." + mailFolderName + ".directory");
279 if (Directory.Exists (subFolder)) {
280 pending.Enqueue (subFolder);
281 folder_directories.Add (subFolder);
282 if (Inotify.Enabled)
283 Inotify.Subscribe (subFolder, OnInotifyEvent,
284 Inotify.EventType.Create
285 | Inotify.EventType.Delete
286 | Inotify.EventType.MovedFrom
287 | Inotify.EventType.MovedTo
288 | Inotify.EventType.Modify);
293 // copy the contents as mail_directories, mbox_files might change due to async events
294 ArrayList _mail_directories = new ArrayList (mail_directories);
295 ArrayList _mbox_files = new ArrayList (mbox_files);
297 if (queryable.ThisScheduler.ContainsByTag (mail_root)) {
298 Logger.Log.Debug ("Not adding task for already running task: {0}", mail_root);
299 return;
300 } else {
301 KMaildirIndexableGenerator generator = new KMaildirIndexableGenerator (this, _mail_directories);
302 AddIIndexableTask (generator, mail_root);
305 foreach (string mbox_file in _mbox_files) {
306 IndexMbox (mbox_file, true);
310 private void AddIndexableTask (Indexable indexable, string tag)
312 if (indexable == null)
313 return;
315 Scheduler.Task task = queryable.NewAddTask (indexable);
316 task.Priority = Scheduler.Priority.Immediate;
317 task.Tag = tag;
318 queryable.ThisScheduler.Add (task);
321 private void AddIIndexableTask (IIndexableGenerator generator, string tag)
323 if (generator == null)
324 return;
326 Scheduler.Task task = queryable.NewAddTask (generator);
327 task.Tag = tag;
328 queryable.ThisScheduler.Add (task);
332 * Start a task for indexing an mbox file
334 public void IndexMbox (string mbox_file, bool initial_scan)
336 if (queryable.ThisScheduler.ContainsByTag (mbox_file)) {
337 Logger.Log.Debug ("Not adding task for already running task: {0}", mbox_file);
338 return;
341 //Logger.Log.Debug ("Creating task to index mbox {0}", mbox_file);
342 KMailMboxIndexableGenerator generator = new KMailMboxIndexableGenerator (this, mbox_file, initial_scan);
343 AddIIndexableTask (generator, mbox_file);
347 * Remove maildir mail file
349 private void RemoveMail (string file)
351 Logger.Log.Debug ("Removing mail:" + file);
352 Uri uri = UriFu.PathToFileUri (file);
353 Scheduler.Task task = queryable.NewRemoveTask (uri);
354 task.Priority = Scheduler.Priority.Immediate;
355 task.SubPriority = 0;
356 queryable.ThisScheduler.Add (task);
359 /**
360 * Create an indexable from a maildir message
362 public Indexable MaildirMessageToIndexable (string filename)
364 //Logger.Log.Debug ("+ indexing maildir mail:" + filename);
365 String folder = GetFolderMaildir(filename);
366 Uri file_uri = UriFu.PathToFileUri (filename);
368 Indexable indexable = new Indexable (file_uri);
369 indexable.HitType = "MailMessage";
370 indexable.MimeType = "message/rfc822";
371 indexable.CacheContent = false;
373 indexable.AddProperty (Property.NewUnsearched ("fixme:client", "kmail"));
374 indexable.AddProperty (Property.NewUnsearched ("fixme:account", account_name));
375 indexable.AddProperty (Property.NewUnsearched ("fixme:folder", folder));
376 indexable.ContentUri = file_uri;
378 return indexable;
382 * Create an indexable from an mbox message
383 * Most of the code here is from Evo backend
385 public Indexable MessageToIndexable (string file_name, System.Uri uri, GMime.Message message, string folder_name)
387 //Logger.Log.Debug ("Indexing " + uri + " in folder " + folder_name);
388 Indexable indexable = new Indexable (uri);
389 // set parent uri to the filename so that when an mbox file
390 // is deleted, all the messages in that file can be deleted
391 indexable.ParentUri = UriFu.PathToFileUri (file_name);
393 indexable.Timestamp = message.Date.ToUniversalTime ();
394 indexable.HitType = "MailMessage";
395 indexable.MimeType = "message/rfc822";
396 indexable.CacheContent = false;
398 indexable.AddProperty (Property.NewUnsearched ("fixme:client", "kmail"));
399 indexable.AddProperty (Property.NewUnsearched ("fixme:account", account_name));
400 indexable.AddProperty (Property.NewUnsearched ("fixme:folder", folder_name));
402 GMime.InternetAddressList addrs;
404 addrs = message.GetRecipients (GMime.Message.RecipientType.To);
405 foreach (GMime.InternetAddress ia in addrs) {
406 if (folder_name == Queryable.SentMailFolderName && ia.AddressType != GMime.InternetAddressType.Group)
407 indexable.AddProperty (Property.NewKeyword ("fixme:sentTo", ia.Addr));
409 addrs.Dispose ();
411 addrs = message.GetRecipients (GMime.Message.RecipientType.Cc);
412 foreach (GMime.InternetAddress ia in addrs) {
413 if (folder_name == Queryable.SentMailFolderName && ia.AddressType != GMime.InternetAddressType.Group)
414 indexable.AddProperty (Property.NewKeyword ("fixme:sentTo", ia.Addr));
416 addrs.Dispose ();
418 addrs = GMime.InternetAddressList.ParseString (GMime.Utils.HeaderDecodePhrase (message.Sender));
419 foreach (GMime.InternetAddress ia in addrs) {
420 if (folder_name != Queryable.SentMailFolderName && ia.AddressType != GMime.InternetAddressType.Group)
421 indexable.AddProperty (Property.NewKeyword ("fixme:gotFrom", ia.Addr));
423 addrs.Dispose ();
425 if (folder_name == Queryable.SentMailFolderName)
426 indexable.AddProperty (Property.NewFlag ("fixme:isSent"));
427 else {
428 string kmail_msg_sent = message.GetHeader ("X-KMail-Link-Type");
429 if (kmail_msg_sent == "reply")
430 indexable.AddProperty (Property.NewFlag ("fixme:isSent"));
433 // no need to store date again, use the issent flag to determine if the date is sentdate or not
434 #if false
435 if (folder_name == Queryable.SentMailFolderName)
436 indexable.AddProperty (Property.NewDate ("fixme:sentdate", message.Date.ToUniversalTime ()));
437 else
438 indexable.AddProperty (Property.NewDate ("fixme:received", message.Date.ToUniversalTime ()));
439 #endif
441 indexable.SetBinaryStream (message.Stream);
443 return indexable;
447 * deleting mbox means deleting all the mails which were in this mbox
448 * we use the idea of parent-uri
449 * while creating indexables, we set the parent uri to be the uri of the mbox file
450 * so to delete all mails in the mbox we just delete all documents whose parent uri
451 * is the uri of the mbox file
453 public void RemoveMbox (string file)
455 Logger.Log.Debug ("Removing mbox:" + file);
456 Uri uri = UriFu.PathToFileUri (file);
457 Scheduler.Task task = queryable.NewRemoveTask (uri);
458 task.Priority = Scheduler.Priority.Immediate;
459 task.SubPriority = 0;
460 queryable.ThisScheduler.Add (task);
463 ///////////////////////////////////////////////////////////
465 // Helpers
468 * a maildir is of format:
469 * some_dir_in_currently_watched_directories/{cur,new,tmp}
470 * again we ignore tmp - no point trying to watch it - it will be moved anyway
471 * should we check with the kmail directory structure ?
472 * presence of files like directory.index, directory.index.ids ?
474 public bool IsMailDir (string dirPath)
476 if (dirPath == null || ! (dirPath.EndsWith("cur") || dirPath.EndsWith("new")))
477 return false;
479 string possibleMaildir = (Directory.GetParent (dirPath)).FullName;
480 if (lastGoodDirPath == possibleMaildir)
481 return true;
482 Logger.Log.Debug ("checking if " + possibleMaildir + " is a maildir ?");
483 if (mail_directories.Contains (possibleMaildir)) {
484 lastGoodDirPath = possibleMaildir;
485 return true;
486 } else
487 return false;
491 * how to decide if this filename denotes an mbox file ?
492 * if its of the form .aaa.index, then aaa is the inbox file
493 * if its of the form aaa (no .index) then there should be a .aaa.index
495 public string GetMboxFile (string dir, string filename)
497 int pos = filename.LastIndexOf (".index");
498 if (pos > 0) {
499 string possible_mbox_name = filename.Substring (1, pos - 2); //Remove (pos, 6).Remove (0,1);
500 possible_mbox_name = Path.Combine (dir, possible_mbox_name);
501 if (File.Exists (possible_mbox_name))
502 return possible_mbox_name;
503 } else {
504 string possible_index_name = "." + filename + ".index";
505 possible_index_name = Path.Combine (dir, possible_index_name);
506 if (File.Exists (possible_index_name))
507 return Path.Combine (dir, filename);
510 return null; // not found
514 * Called when a new directory is created
515 * Decide what to do with this new directory
517 public void UpdateDirectories (string dirPath)
519 string parentDir = (Directory.GetParent (dirPath)).FullName;
520 DirectoryInfo dirinfo = new DirectoryInfo (dirPath);
521 string dirName = dirinfo.Name;
523 if (dirName == "cur" || dirName == "new" || dirName == "tmp") {
524 // check and add the parentdir to mail_directories
525 if (!mail_directories.Contains (parentDir))
526 mail_directories.Add (parentDir);
527 return;
530 // format .name.directory - in which case add it to folder_dir
531 // format name - in which case add it to mail_dir
532 if (dirName.EndsWith (".directory"))
533 folder_directories.Add (dirPath);
534 else
535 mail_directories.Add (dirPath);
539 * FIXME:if we can parse kmailrc file, then we might be
540 * able to deduce the mail folder name
541 * currently get it from the file name (mbox) or parent.parent directory name
544 public string GetFolderMbox (string mbox_file)
546 FileInfo fi = new FileInfo (mbox_file);
547 return fi.Name;
550 public string GetFolderMaildir (string mailFile)
552 return (Directory.GetParent ((Directory.GetParent (mailFile)).FullName).Name);
555 private bool IgnoreFolder (string path)
557 foreach (string exclude in excludes) {
558 if (path.ToLower().EndsWith (exclude))
559 return true;
561 return false;