Remove some debug spew
[beagle.git] / beagled / KMailQueryable / KMailIndexableGenerator.cs
blob68ca340f66fa896d3c453af1ebe12cc501923f06
2 //
3 // KMailIndexableGenerator.cs
4 //
5 // Copyright (C) 2005 Novell, Inc.
6 // Copyright (C) 2005 Debajyoti Bera
7 //
8 //
9 // Permission is hereby granted, free of charge, to any person obtaining a
10 // copy of this software and associated documentation files (the "Software"),
11 // to deal in the Software without restriction, including without limitation
12 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 // and/or sell copies of the Software, and to permit persons to whom the
14 // Software is furnished to do so, subject to the following conditions:
16 // The above copyright notice and this permission notice shall be included in
17 // all copies or substantial portions of the Software.
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 // DEALINGS IN THE SOFTWARE.
28 using System;
29 using System.Collections;
30 using System.IO;
31 using System.Runtime.Serialization.Formatters.Binary;
32 using System.Threading;
33 using System.Xml;
35 using Beagle.Util;
36 using Beagle.Daemon;
38 namespace Beagle.Daemon.KMailQueryable {
40 /**
41 * Indexable generator for maildir mails
43 public class KMaildirIndexableGenerator : IIndexableGenerator {
44 // store the indexer
45 private KMailIndexer indexer;
46 // message file currently indexing
47 private FileInfo CrawlFile;
48 // directory currently parsing
49 private DirectoryInfo current_dir;
50 // list of files in current directory
51 private IEnumerable files_to_parse;
52 // list of directories to scan
53 private ArrayList dirs_to_scan;
54 private IEnumerator dir_enumerator = null;
55 private IEnumerator file_enumerator = null;
57 private string account_name {
58 get { return indexer.AccountName; }
61 public KMaildirIndexableGenerator (KMailIndexer indexer, ArrayList mail_directories)
63 this.indexer = indexer;
64 dirs_to_scan = new ArrayList ();
66 foreach (string directory in mail_directories) {
67 AddDirectory (directory);
69 dir_enumerator = dirs_to_scan.GetEnumerator ();
72 public void PostFlushHook ()
76 private void AddDirectory (string _dir) {
77 DirectoryInfo dir;
79 // scan mails in directory cur and new, not tmp
80 if (Directory.Exists (Path.Combine (_dir, "cur"))) {
81 dir = new DirectoryInfo (Path.Combine (_dir, "cur"));
82 dirs_to_scan.Add (dir);
85 if (Directory.Exists (Path.Combine (_dir, "new"))) {
86 dir = new DirectoryInfo (Path.Combine (_dir, "new"));
87 dirs_to_scan.Add (dir);
91 public string StatusName {
92 get { return indexer.MailRoot; }
95 public Indexable GetNextIndexable ()
97 FileInfo file = (FileInfo) file_enumerator.Current;
98 return indexer.MaildirMessageToIndexable (file.FullName);
101 public bool IsUpToDate (string path)
103 return indexer.Queryable.FileAttributesStore.IsUpToDate (path);
106 public bool HasNextIndexable ()
108 do {
109 while (file_enumerator == null || !file_enumerator.MoveNext ()) {
110 if (!dir_enumerator.MoveNext ()) {
111 dir_enumerator = null;
112 return false;
114 current_dir = (DirectoryInfo) dir_enumerator.Current;
115 Logger.Log.Info ("Scanning maildir feeds in " + current_dir.FullName);
116 files_to_parse = DirectoryWalker.GetFileInfos (current_dir);
117 file_enumerator = files_to_parse.GetEnumerator ();
119 CrawlFile = (FileInfo) file_enumerator.Current;
120 } while (IsUpToDate (CrawlFile.FullName));
122 return true;
128 * Indexable generator for mbox mail files
129 * based on Evo code
131 public class KMailMboxIndexableGenerator : IIndexableGenerator {
132 // path of the mbox file
133 private string mbox_file;
134 // fd, stream, parser needed for gmime parsing
135 private int mbox_fd = -1;
136 private GMime.StreamFs mbox_stream;
137 private GMime.Parser mbox_parser;
138 // store the indexer
139 private KMailIndexer indexer;
140 // number of mails scanned
141 private int indexed_count;
142 // is this initial scan - in which case the mbox might have been modified since last scan
143 private bool initial_scan;
145 private string account_name {
146 get { return indexer.AccountName; }
149 private string folder_name {
150 get { return indexer.GetFolderMbox (mbox_file); }
153 public KMailMboxIndexableGenerator (KMailIndexer indexer, string mbox_file, bool initial_scan)
155 this.indexer = indexer;
156 this.mbox_file = mbox_file;
157 this.initial_scan = initial_scan;
160 public void PostFlushHook ()
162 Checkpoint ();
166 * store how long indexing is done on the disk
167 * in case indexing stops midway we dont have to restart from the beginning
168 * if the mbox file hasnt been modified
170 public void Checkpoint ()
172 if (mbox_parser != null) {
173 MboxLastOffset = mbox_parser.Tell ();
174 indexer.Queryable.FileAttributesStore.AttachLastWriteTime (mbox_file, DateTime.UtcNow);
178 public string StatusName {
179 get { return mbox_file; }
182 private long MboxLastOffset {
183 get {
184 string offset_str = indexer.Queryable.ReadDataLine ("offset-" + mbox_file.Replace ('/', '-'));
185 long offset = Convert.ToInt64 (offset_str);
186 return offset;
189 set {
190 indexer.Queryable.WriteDataLine ("offset-" + mbox_file.Replace ('/', '-'), value.ToString ());
194 public bool IsUpToDate (string path)
196 //Logger.Log.Info (path + " is uptodate:" + indexer.Queryable.FileAttributesStore.IsUpToDate (path));
197 return indexer.Queryable.FileAttributesStore.IsUpToDate (path);
201 * Advance to the next mail in the mbox file.
203 public bool HasNextIndexable ()
205 if (mbox_fd < 0) {
206 Logger.Log.Debug ("Opening mbox {0}", mbox_file);
208 try {
209 KMailQueryable.InitializeGMime ();
210 } catch (Exception e) {
211 Logger.Log.Warn (e, "Caught exception trying to initalize gmime:");
212 return false;
216 try {
217 mbox_fd = Mono.Unix.Native.Syscall.open (mbox_file, Mono.Unix.Native.OpenFlags.O_RDONLY);
218 } catch (System.IO.FileNotFoundException e) {
219 Logger.Log.Warn ("mbox " + mbox_file + " deleted while indexing.");
220 return false;
222 mbox_stream = new GMime.StreamFs (mbox_fd);
223 if (initial_scan && !IsUpToDate (mbox_file))
224 // this is the initial scan and
225 // file has changed since last scan =>
226 // set mboxlastoffset to 0 and seek to 0
227 mbox_stream.Seek ((int)(MboxLastOffset = 0));
228 else
229 mbox_stream.Seek ((int) MboxLastOffset);
230 mbox_parser = new GMime.Parser (mbox_stream);
231 mbox_parser.ScanFrom = true;
234 if (mbox_parser.Eos ()) {
235 // save the state ASAP
236 Checkpoint ();
238 mbox_stream.Close ();
239 mbox_fd = -1;
240 mbox_stream.Dispose ();
241 mbox_stream = null;
242 mbox_parser.Dispose ();
243 mbox_parser = null;
245 Logger.Log.Debug ("{0}: Finished indexing {1} messages", folder_name, indexed_count);
246 return false;
247 } else
248 return true;
251 public Indexable GetNextIndexable ()
253 GMime.Message message = null;
254 try {
255 message = mbox_parser.ConstructMessage ();
256 } catch (System.IO.FileNotFoundException e) {
257 Logger.Log.Warn ("mbox " + mbox_file + " deleted while parsing.");
258 return null;
261 try {
262 // Again comment from Evo :P
263 // Work around what I think is a bug in GMime: If you
264 // have a zero-byte file or seek to the end of a
265 // file, parser.Eos () will return true until it
266 // actually tries to read something off the wire.
267 // Since parser.ConstructMessage() always returns a
268 // message (which may also be a bug), we'll often get
269 // one empty message which we need to deal with here.
271 // Check if its empty by seeing if the Headers
272 // property is null or empty.
273 if (message == null || message.Headers == null || message.Headers == "") {
274 return null;
277 // mbox KIO slave uses the From line as URI - how weird!
278 // are those lines supposed to be unique ???
279 string id = mbox_parser.From;
280 System.Uri uri = EmailUri (id);
282 Indexable indexable = indexer.MessageToIndexable (mbox_file, uri, message, indexer.GetFolderMbox (mbox_file));
284 if (indexable == null)
285 return null;
287 ++indexed_count;
289 return indexable;
290 } finally {
291 if (message != null)
292 message.Dispose ();
296 // TODO: confirm that this works with the mbox kio-slave from new kdepim
297 public Uri EmailUri (string id)
299 FileInfo fi = new FileInfo (mbox_file);
300 return new Uri (String.Format ("mbox:///{0}/{1}", fi.FullName, id));