3 // KMailIndexableGenerator.cs
5 // Copyright (C) 2005 Novell, Inc.
6 // Copyright (C) 2005 Debajyoti Bera
9 // Permission is hereby granted, free of charge, to any person obtaining a
10 // copy of this software and associated documentation files (the "Software"),
11 // to deal in the Software without restriction, including without limitation
12 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 // and/or sell copies of the Software, and to permit persons to whom the
14 // Software is furnished to do so, subject to the following conditions:
16 // The above copyright notice and this permission notice shall be included in
17 // all copies or substantial portions of the Software.
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 // DEALINGS IN THE SOFTWARE.
29 using System
.Collections
;
31 using System
.Runtime
.Serialization
.Formatters
.Binary
;
32 using System
.Threading
;
38 namespace Beagle
.Daemon
.KMailQueryable
{
41 * Indexable generator for maildir mails
43 public class KMaildirIndexableGenerator
: IIndexableGenerator
{
45 private KMailIndexer indexer
;
46 // message file currently indexing
47 private FileInfo CrawlFile
;
48 // directory currently parsing
49 private DirectoryInfo current_dir
;
50 // list of files in current directory
51 private IEnumerable files_to_parse
;
52 // list of directories to scan
53 private ArrayList dirs_to_scan
;
54 private IEnumerator dir_enumerator
= null;
55 private IEnumerator file_enumerator
= null;
57 private string account_name
{
58 get { return indexer.AccountName; }
61 public KMaildirIndexableGenerator (KMailIndexer indexer
, ArrayList mail_directories
)
63 this.indexer
= indexer
;
64 dirs_to_scan
= new ArrayList ();
66 foreach (string directory
in mail_directories
) {
67 AddDirectory (directory
);
69 dir_enumerator
= dirs_to_scan
.GetEnumerator ();
72 public void PostFlushHook ()
76 private void AddDirectory (string _dir
) {
79 // scan mails in directory cur and new, not tmp
80 if (Directory
.Exists (Path
.Combine (_dir
, "cur"))) {
81 dir
= new DirectoryInfo (Path
.Combine (_dir
, "cur"));
82 dirs_to_scan
.Add (dir
);
85 if (Directory
.Exists (Path
.Combine (_dir
, "new"))) {
86 dir
= new DirectoryInfo (Path
.Combine (_dir
, "new"));
87 dirs_to_scan
.Add (dir
);
91 public string StatusName
{
92 get { return indexer.MailRoot; }
95 public Indexable
GetNextIndexable ()
97 FileInfo file
= (FileInfo
) file_enumerator
.Current
;
98 return indexer
.MaildirMessageToIndexable (file
.FullName
);
101 public bool IsUpToDate (string path
)
103 return indexer
.Queryable
.FileAttributesStore
.IsUpToDate (path
);
106 public bool HasNextIndexable ()
109 while (file_enumerator
== null || !file_enumerator
.MoveNext ()) {
110 if (!dir_enumerator
.MoveNext ()) {
111 dir_enumerator
= null;
114 current_dir
= (DirectoryInfo
) dir_enumerator
.Current
;
115 Logger
.Log
.Info ("Scanning maildir feeds in " + current_dir
.FullName
);
116 files_to_parse
= DirectoryWalker
.GetFileInfos (current_dir
);
117 file_enumerator
= files_to_parse
.GetEnumerator ();
119 CrawlFile
= (FileInfo
) file_enumerator
.Current
;
120 } while (IsUpToDate (CrawlFile
.FullName
));
128 * Indexable generator for mbox mail files
131 public class KMailMboxIndexableGenerator
: IIndexableGenerator
{
132 // path of the mbox file
133 private string mbox_file
;
134 // fd, stream, parser needed for gmime parsing
135 private int mbox_fd
= -1;
136 private GMime
.StreamFs mbox_stream
;
137 private GMime
.Parser mbox_parser
;
139 private KMailIndexer indexer
;
140 // number of mails scanned
141 private int indexed_count
;
142 // is this initial scan - in which case the mbox might have been modified since last scan
143 private bool initial_scan
;
145 private string account_name
{
146 get { return indexer.AccountName; }
149 private string folder_name
{
150 get { return indexer.GetFolderMbox (mbox_file); }
153 public KMailMboxIndexableGenerator (KMailIndexer indexer
, string mbox_file
, bool initial_scan
)
155 this.indexer
= indexer
;
156 this.mbox_file
= mbox_file
;
157 this.initial_scan
= initial_scan
;
160 public void PostFlushHook ()
166 * store how long indexing is done on the disk
167 * in case indexing stops midway we dont have to restart from the beginning
168 * if the mbox file hasnt been modified
170 public void Checkpoint ()
172 if (mbox_parser
!= null) {
173 MboxLastOffset
= mbox_parser
.Tell ();
174 indexer
.Queryable
.FileAttributesStore
.AttachLastWriteTime (mbox_file
, DateTime
.UtcNow
);
178 public string StatusName
{
179 get { return mbox_file; }
182 private long MboxLastOffset
{
184 string offset_str
= indexer
.Queryable
.ReadDataLine ("offset-" + mbox_file
.Replace ('/', '-'));
185 long offset
= Convert
.ToInt64 (offset_str
);
190 indexer
.Queryable
.WriteDataLine ("offset-" + mbox_file
.Replace ('/', '-'), value.ToString ());
194 public bool IsUpToDate (string path
)
196 //Logger.Log.Info (path + " is uptodate:" + indexer.Queryable.FileAttributesStore.IsUpToDate (path));
197 return indexer
.Queryable
.FileAttributesStore
.IsUpToDate (path
);
201 * Advance to the next mail in the mbox file.
203 public bool HasNextIndexable ()
206 Logger
.Log
.Debug ("Opening mbox {0}", mbox_file
);
209 KMailQueryable
.InitializeGMime ();
210 } catch (Exception e
) {
211 Logger
.Log
.Warn (e
, "Caught exception trying to initalize gmime:");
217 mbox_fd
= Mono
.Unix
.Native
.Syscall
.open (mbox_file
, Mono
.Unix
.Native
.OpenFlags
.O_RDONLY
);
218 } catch (System
.IO
.FileNotFoundException e
) {
219 Logger
.Log
.Warn ("mbox " + mbox_file
+ " deleted while indexing.");
222 mbox_stream
= new GMime
.StreamFs (mbox_fd
);
223 if (initial_scan
&& !IsUpToDate (mbox_file
))
224 // this is the initial scan and
225 // file has changed since last scan =>
226 // set mboxlastoffset to 0 and seek to 0
227 mbox_stream
.Seek ((int)(MboxLastOffset
= 0));
229 mbox_stream
.Seek ((int) MboxLastOffset
);
230 mbox_parser
= new GMime
.Parser (mbox_stream
);
231 mbox_parser
.ScanFrom
= true;
234 if (mbox_parser
.Eos ()) {
235 // save the state ASAP
238 mbox_stream
.Close ();
240 mbox_stream
.Dispose ();
242 mbox_parser
.Dispose ();
245 Logger
.Log
.Debug ("{0}: Finished indexing {1} messages", folder_name
, indexed_count
);
251 public Indexable
GetNextIndexable ()
253 GMime
.Message message
= null;
255 message
= mbox_parser
.ConstructMessage ();
256 } catch (System
.IO
.FileNotFoundException e
) {
257 Logger
.Log
.Warn ("mbox " + mbox_file
+ " deleted while parsing.");
262 // Again comment from Evo :P
263 // Work around what I think is a bug in GMime: If you
264 // have a zero-byte file or seek to the end of a
265 // file, parser.Eos () will return true until it
266 // actually tries to read something off the wire.
267 // Since parser.ConstructMessage() always returns a
268 // message (which may also be a bug), we'll often get
269 // one empty message which we need to deal with here.
271 // Check if its empty by seeing if the Headers
272 // property is null or empty.
273 if (message
== null || message
.Headers
== null || message
.Headers
== "") {
277 // mbox KIO slave uses the From line as URI - how weird!
278 // are those lines supposed to be unique ???
279 string id
= mbox_parser
.From
;
280 System
.Uri uri
= EmailUri (id
);
282 Indexable indexable
= indexer
.MessageToIndexable (mbox_file
, uri
, message
, indexer
.GetFolderMbox (mbox_file
));
284 if (indexable
== null)
296 // TODO: confirm that this works with the mbox kio-slave from new kdepim
297 public Uri
EmailUri (string id
)
299 FileInfo fi
= new FileInfo (mbox_file
);
300 return new Uri (String
.Format ("mbox:///{0}/{1}", fi
.FullName
, id
));