Fixed #374055:Only the first "tag" is detected in digikam.
[beagle.git] / Filters / FilterMail.cs
blob1f899d0acf26b3f6787b7ca2a159726ef7ce0673
2 //
3 // FilterMail.cs
4 //
5 // Copyright (C) 2004-2005 Novell, Inc.
6 //
7 //
8 //
9 // Permission is hereby granted, free of charge, to any person obtaining a
10 // copy of this software and associated documentation files (the "Software"),
11 // to deal in the Software without restriction, including without limitation
12 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 // and/or sell copies of the Software, and to permit persons to whom the
14 // Software is furnished to do so, subject to the following conditions:
16 // The above copyright notice and this permission notice shall be included in
17 // all copies or substantial portions of the Software.
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 // DEALINGS IN THE SOFTWARE.
28 using System;
29 using System.Collections;
30 using System.IO;
32 using GMime;
34 using Beagle;
35 using Beagle.Daemon;
36 using Beagle.Util;
38 namespace Beagle.Filters {
40 [PropertyKeywordMapping (Keyword="mailfrom", PropertyName="fixme:from_name", IsKeyword=false)]
41 [PropertyKeywordMapping (Keyword="mailfromaddr", PropertyName="fixme:from_address", IsKeyword=false)]
42 [PropertyKeywordMapping (Keyword="mailto", PropertyName="fixme:to_name", IsKeyword=false)]
43 [PropertyKeywordMapping (Keyword="mailtoaddr", PropertyName="fixme:to_address", IsKeyword=false)]
44 [PropertyKeywordMapping (Keyword="mailinglist", PropertyName="fixme:mlist", IsKeyword=true, Description="Mailing list id")]
45 public class FilterMail : Beagle.Daemon.Filter, IDisposable {
47 private static bool gmime_initialized = false;
49 private GMime.Message message;
50 private PartHandler handler;
52 public FilterMail ()
54 // 1: Make email addresses non-keyword, add sanitized version
55 // for eaching for parts of an email address.
56 // 2: No need to separately add sanitized version of emails.
57 // BeagleAnalyzer uses a tokenfilter taking care of this.
58 SetVersion (2);
60 AddSupportedFlavor (FilterFlavor.NewFromMimeType ("message/rfc822"));
63 protected override void DoOpen (FileInfo info)
65 if (!gmime_initialized) {
66 try {
67 GMime.Global.Init ();
68 gmime_initialized = true;
69 } catch {
70 Error ();
71 return;
75 int mail_fd = Mono.Unix.Native.Syscall.open (info.FullName, Mono.Unix.Native.OpenFlags.O_RDONLY);
77 if (mail_fd == -1)
78 throw new IOException (String.Format ("Unable to read {0} for parsing mail", info.FullName));
80 GMime.StreamFs stream = new GMime.StreamFs (mail_fd);
81 GMime.Parser parser = new GMime.Parser (stream);
82 this.message = parser.ConstructMessage ();
83 stream.Dispose ();
84 parser.Dispose ();
86 if (this.message == null)
87 Error ();
90 private bool HasAttachments (GMime.Object mime_part)
92 if (mime_part is GMime.MessagePart)
93 return true;
95 // Messages that are multipart/alternative shouldn't be considered as having
96 // attachments. Unless of course they do.
97 if (mime_part is GMime.Multipart && mime_part.ContentType.Subtype.ToLower () != "alternative")
98 return true;
100 return false;
103 protected override void DoPullProperties ()
105 string subject = GMime.Utils.HeaderDecodePhrase (this.message.Subject);
106 AddProperty (Property.New ("dc:title", subject));
108 AddProperty (Property.NewDate ("fixme:date", message.Date.ToUniversalTime ()));
110 GMime.InternetAddressList addrs;
111 addrs = this.message.GetRecipients (GMime.Message.RecipientType.To);
112 foreach (GMime.InternetAddress ia in addrs) {
113 AddProperty (Property.NewUnsearched ("fixme:to", ia.ToString (false)));
114 if (ia.AddressType != GMime.InternetAddressType.Group)
115 AddProperty (Property.New ("fixme:to_address", ia.Addr));
117 AddProperty (Property.New ("fixme:to_name", ia.Name));
119 addrs.Dispose ();
121 addrs = this.message.GetRecipients (GMime.Message.RecipientType.Cc);
122 foreach (GMime.InternetAddress ia in addrs) {
123 AddProperty (Property.NewUnsearched ("fixme:cc", ia.ToString (false)));
124 if (ia.AddressType != GMime.InternetAddressType.Group)
125 AddProperty (Property.New ("fixme:cc_address", ia.Addr));
127 AddProperty (Property.New ("fixme:cc_name", ia.Name));
129 addrs.Dispose ();
131 addrs = GMime.InternetAddressList.ParseString (GMime.Utils.HeaderDecodePhrase (this.message.Sender));
132 foreach (GMime.InternetAddress ia in addrs) {
133 AddProperty (Property.NewUnsearched ("fixme:from", ia.ToString (false)));
134 if (ia.AddressType != GMime.InternetAddressType.Group)
135 AddProperty (Property.New ("fixme:from_address", ia.Addr));
137 AddProperty (Property.New ("fixme:from_name", ia.Name));
139 addrs.Dispose ();
141 if (HasAttachments (this.message.MimePart))
142 AddProperty (Property.NewFlag ("fixme:hasAttachments"));
144 // Store the message ID and references are unsearched
145 // properties. They will be used to generate
146 // conversations in the frontend.
147 string msgid = this.message.GetHeader ("Message-Id");
148 if (msgid != null)
149 AddProperty (Property.NewUnsearched ("fixme:msgid", GMime.Utils.DecodeMessageId (msgid)));
151 foreach (GMime.References refs in this.message.References)
152 AddProperty (Property.NewUnsearched ("fixme:reference", refs.Msgid));
154 string list_id = this.message.GetHeader ("List-Id");
155 if (list_id != null) {
156 // FIXME: Might need some additional parsing.
157 AddProperty (Property.NewKeyword ("fixme:mlist", GMime.Utils.HeaderDecodePhrase (list_id)));
160 // KMail can store replies in the same folder
161 // Use issent flag to distinguish between incoming
162 // and outgoing message
163 string kmail_msg_sent = this.message.GetHeader ("X-KMail-Link-Type");
164 bool issent_is_set = false;
165 foreach (Property property in IndexableProperties) {
166 if (property.Key == "fixme:isSent") {
167 issent_is_set = true;
168 break;
171 if (!issent_is_set && kmail_msg_sent != null && kmail_msg_sent == "reply")
172 AddProperty (Property.NewFlag ("fixme:isSent"));
175 protected override void DoPullSetup ()
177 this.handler = new PartHandler (this);
178 using (GMime.Object mime_part = this.message.MimePart)
179 this.handler.OnEachPart (mime_part);
181 AddChildIndexables (this.handler.ChildIndexables);
184 protected override void DoPull ()
186 if (handler.Reader == null) {
187 Finished ();
188 return;
191 string l = handler.Reader.ReadLine ();
193 if (l == null)
194 Finished ();
195 else if (l.Length > 0) {
196 AppendText (l);
197 AppendStructuralBreak ();
201 protected override void DoClose ()
203 Dispose ();
206 public void Dispose ()
208 if (this.handler != null && this.handler.Reader != null)
209 this.handler.Reader.Close ();
210 this.handler = null;
212 if (this.message != null) {
213 this.message.Dispose ();
214 this.message = null;
218 private class PartHandler {
219 private Beagle.Daemon.Filter filter;
220 private int count = 0; // parts handled so far
221 private int depth = 0; // part recursion depth
222 private ArrayList child_indexables = new ArrayList ();
223 private TextReader reader;
225 // Blacklist a handful of common MIME types that are
226 // either pointless on their own or ones that we don't
227 // have filters for.
228 static private string[] blacklisted_mime_types = new string[] {
229 "application/pgp-signature",
230 "application/x-pkcs7-signature",
231 "application/ms-tnef",
232 "text/x-vcalendar",
233 "text/x-vcard"
236 public PartHandler (Beagle.Daemon.Filter filter)
238 this.filter = filter;
241 private bool IsMimeTypeHandled (string mime_type)
243 foreach (FilterFlavor flavor in FilterFlavor.Flavors) {
244 if (flavor.IsMatch (null, null, mime_type.ToLower ()))
245 return true;
248 return false;
251 public void OnEachPart (GMime.Object mime_part)
253 GMime.Object part = null;
254 bool part_needs_dispose = false;
256 //for (int i = 0; i < this.depth; i++)
257 // Console.Write (" ");
258 //Console.WriteLine ("Content-Type: {0}", mime_part.ContentType);
260 ++depth;
262 if (mime_part is GMime.MessagePart) {
263 GMime.MessagePart msg_part = (GMime.MessagePart) mime_part;
265 using (GMime.Message message = msg_part.Message) {
266 using (GMime.Object subpart = message.MimePart)
267 this.OnEachPart (subpart);
269 } else if (mime_part is GMime.Multipart) {
270 GMime.Multipart multipart = (GMime.Multipart) mime_part;
272 int num_parts = multipart.Number;
274 // If the mimetype is multipart/alternative, we only want to index
275 // one part -- the richest one we can filter.
276 if (mime_part.ContentType.Subtype.ToLower () == "alternative") {
277 // The richest formats are at the end, so work from there
278 // backward.
279 for (int i = num_parts - 1; i >= 0; i--) {
280 GMime.Object subpart = multipart.GetPart (i);
282 if (IsMimeTypeHandled (subpart.ContentType.ToString ())) {
283 part = subpart;
284 part_needs_dispose = true;
285 break;
286 } else {
287 subpart.Dispose ();
292 // If it's not alternative, or we don't know how to filter any of
293 // the parts, treat them like a bunch of attachments.
294 if (part == null) {
295 for (int i = 0; i < num_parts; i++) {
296 using (GMime.Object subpart = multipart.GetPart (i))
297 this.OnEachPart (subpart);
300 } else if (mime_part is GMime.Part)
301 part = mime_part;
302 else
303 throw new Exception (String.Format ("Unknown part type: {0}", part.GetType ()));
305 if (part != null) {
306 System.IO.Stream stream = null;
308 using (GMime.DataWrapper content_obj = ((GMime.Part) part).ContentObject)
309 stream = content_obj.Stream;
311 // If this is the only part and it's plain text, we
312 // want to just attach it to our filter instead of
313 // creating a child indexable for it.
314 bool no_child_needed = false;
316 string mime_type = part.ContentType.ToString ().ToLower ();
318 if (this.depth == 1 && this.count == 0) {
319 if (mime_type == "text/plain") {
320 no_child_needed = true;
322 this.reader = new StreamReader (stream);
326 if (!no_child_needed) {
327 // Check the mime type against the blacklist and don't index any
328 // parts that are contained within. That way the user doesn't
329 // get flooded with pointless signatures and vcard and ical
330 // attachments along with (real) attachments.
332 if (Array.IndexOf (blacklisted_mime_types, mime_type) == -1) {
333 string sub_uri = this.filter.Uri.ToString () + "#" + this.count;
334 Indexable child = new Indexable (new Uri (sub_uri));
336 child.HitType = "MailMessage";
337 child.MimeType = mime_type;
338 child.CacheContent = false;
340 child.AddProperty (Property.NewKeyword ("fixme:attachment_title", ((GMime.Part)part).Filename));
342 if (part.ContentType.Type.ToLower () == "text")
343 child.SetTextReader (new StreamReader (stream));
344 else
345 child.SetBinaryStream (stream);
347 this.child_indexables.Add (child);
348 } else {
349 Log.Debug ("Skipping attachment {0}#{1} with blacklisted mime type {2}",
350 this.filter.Uri, this.count, mime_type);
354 this.count++;
357 if (part_needs_dispose)
358 part.Dispose ();
360 --depth;
363 public ICollection ChildIndexables {
364 get { return this.child_indexables; }
367 public TextReader Reader {
368 get { return this.reader; }