5 // Copyright (C) 2004-2005 Novell, Inc.
9 // Permission is hereby granted, free of charge, to any person obtaining a
10 // copy of this software and associated documentation files (the "Software"),
11 // to deal in the Software without restriction, including without limitation
12 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 // and/or sell copies of the Software, and to permit persons to whom the
14 // Software is furnished to do so, subject to the following conditions:
16 // The above copyright notice and this permission notice shall be included in
17 // all copies or substantial portions of the Software.
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 // DEALINGS IN THE SOFTWARE.
29 using System
.Collections
;
38 namespace Beagle
.Filters
{
40 [PropertyKeywordMapping (Keyword
="mailfrom", PropertyName
="fixme:from_name", IsKeyword
=false)]
41 [PropertyKeywordMapping (Keyword
="mailfromaddr", PropertyName
="fixme:from_address", IsKeyword
=false)]
42 [PropertyKeywordMapping (Keyword
="mailto", PropertyName
="fixme:to_name", IsKeyword
=false)]
43 [PropertyKeywordMapping (Keyword
="mailtoaddr", PropertyName
="fixme:to_address", IsKeyword
=false)]
44 [PropertyKeywordMapping (Keyword
="mailinglist", PropertyName
="fixme:mlist", IsKeyword
=true, Description
="Mailing list id")]
45 public class FilterMail
: Beagle
.Daemon
.Filter
, IDisposable
{
47 private static bool gmime_initialized
= false;
49 private GMime
.Message message
;
50 private PartHandler handler
;
54 // 1: Make email addresses non-keyword, add sanitized version
55 // for eaching for parts of an email address.
56 // 2: No need to separately add sanitized version of emails.
57 // BeagleAnalyzer uses a tokenfilter taking care of this.
60 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("message/rfc822"));
63 protected override void DoOpen (FileInfo info
)
65 if (!gmime_initialized
) {
68 gmime_initialized
= true;
75 int mail_fd
= Mono
.Unix
.Native
.Syscall
.open (info
.FullName
, Mono
.Unix
.Native
.OpenFlags
.O_RDONLY
);
78 throw new IOException (String
.Format ("Unable to read {0} for parsing mail", info
.FullName
));
80 GMime
.StreamFs stream
= new GMime
.StreamFs (mail_fd
);
81 GMime
.Parser parser
= new GMime
.Parser (stream
);
82 this.message
= parser
.ConstructMessage ();
86 if (this.message
== null)
90 private bool HasAttachments (GMime
.Object mime_part
)
92 if (mime_part
is GMime
.MessagePart
)
95 // Messages that are multipart/alternative shouldn't be considered as having
96 // attachments. Unless of course they do.
97 if (mime_part
is GMime
.Multipart
&& mime_part
.ContentType
.Subtype
.ToLower () != "alternative")
103 protected override void DoPullProperties ()
105 string subject
= GMime
.Utils
.HeaderDecodePhrase (this.message
.Subject
);
106 AddProperty (Property
.New ("dc:title", subject
));
108 AddProperty (Property
.NewDate ("fixme:date", message
.Date
.ToUniversalTime ()));
110 GMime
.InternetAddressList addrs
;
111 addrs
= this.message
.GetRecipients (GMime
.Message
.RecipientType
.To
);
112 foreach (GMime
.InternetAddress ia
in addrs
) {
113 AddProperty (Property
.NewUnsearched ("fixme:to", ia
.ToString (false)));
114 if (ia
.AddressType
!= GMime
.InternetAddressType
.Group
)
115 AddProperty (Property
.New ("fixme:to_address", ia
.Addr
));
117 AddProperty (Property
.New ("fixme:to_name", ia
.Name
));
121 addrs
= this.message
.GetRecipients (GMime
.Message
.RecipientType
.Cc
);
122 foreach (GMime
.InternetAddress ia
in addrs
) {
123 AddProperty (Property
.NewUnsearched ("fixme:cc", ia
.ToString (false)));
124 if (ia
.AddressType
!= GMime
.InternetAddressType
.Group
)
125 AddProperty (Property
.New ("fixme:cc_address", ia
.Addr
));
127 AddProperty (Property
.New ("fixme:cc_name", ia
.Name
));
131 addrs
= GMime
.InternetAddressList
.ParseString (GMime
.Utils
.HeaderDecodePhrase (this.message
.Sender
));
132 foreach (GMime
.InternetAddress ia
in addrs
) {
133 AddProperty (Property
.NewUnsearched ("fixme:from", ia
.ToString (false)));
134 if (ia
.AddressType
!= GMime
.InternetAddressType
.Group
)
135 AddProperty (Property
.New ("fixme:from_address", ia
.Addr
));
137 AddProperty (Property
.New ("fixme:from_name", ia
.Name
));
141 if (HasAttachments (this.message
.MimePart
))
142 AddProperty (Property
.NewFlag ("fixme:hasAttachments"));
144 // Store the message ID and references are unsearched
145 // properties. They will be used to generate
146 // conversations in the frontend.
147 string msgid
= this.message
.GetHeader ("Message-Id");
149 AddProperty (Property
.NewUnsearched ("fixme:msgid", GMime
.Utils
.DecodeMessageId (msgid
)));
151 foreach (GMime
.References refs
in this.message
.References
)
152 AddProperty (Property
.NewUnsearched ("fixme:reference", refs
.Msgid
));
154 string list_id
= this.message
.GetHeader ("List-Id");
155 if (list_id
!= null) {
156 // FIXME: Might need some additional parsing.
157 AddProperty (Property
.NewKeyword ("fixme:mlist", GMime
.Utils
.HeaderDecodePhrase (list_id
)));
160 // KMail can store replies in the same folder
161 // Use issent flag to distinguish between incoming
162 // and outgoing message
163 string kmail_msg_sent
= this.message
.GetHeader ("X-KMail-Link-Type");
164 bool issent_is_set
= false;
165 foreach (Property property
in IndexableProperties
) {
166 if (property
.Key
== "fixme:isSent") {
167 issent_is_set
= true;
171 if (!issent_is_set
&& kmail_msg_sent
!= null && kmail_msg_sent
== "reply")
172 AddProperty (Property
.NewFlag ("fixme:isSent"));
175 protected override void DoPullSetup ()
177 this.handler
= new PartHandler (this);
178 using (GMime
.Object mime_part
= this.message
.MimePart
)
179 this.handler
.OnEachPart (mime_part
);
181 AddChildIndexables (this.handler
.ChildIndexables
);
184 protected override void DoPull ()
186 if (handler
.Reader
== null) {
191 string l
= handler
.Reader
.ReadLine ();
195 else if (l
.Length
> 0) {
197 AppendStructuralBreak ();
201 protected override void DoClose ()
206 public void Dispose ()
208 if (this.handler
!= null && this.handler
.Reader
!= null)
209 this.handler
.Reader
.Close ();
212 if (this.message
!= null) {
213 this.message
.Dispose ();
218 private class PartHandler
{
219 private Beagle
.Daemon
.Filter filter
;
220 private int count
= 0; // parts handled so far
221 private int depth
= 0; // part recursion depth
222 private ArrayList child_indexables
= new ArrayList ();
223 private TextReader reader
;
225 // Blacklist a handful of common MIME types that are
226 // either pointless on their own or ones that we don't
228 static private string[] blacklisted_mime_types
= new string[] {
229 "application/pgp-signature",
230 "application/x-pkcs7-signature",
231 "application/ms-tnef",
236 public PartHandler (Beagle
.Daemon
.Filter filter
)
238 this.filter
= filter
;
241 private bool IsMimeTypeHandled (string mime_type
)
243 foreach (FilterFlavor flavor
in FilterFlavor
.Flavors
) {
244 if (flavor
.IsMatch (null, null, mime_type
.ToLower ()))
251 public void OnEachPart (GMime
.Object mime_part
)
253 GMime
.Object part
= null;
254 bool part_needs_dispose
= false;
256 //for (int i = 0; i < this.depth; i++)
257 // Console.Write (" ");
258 //Console.WriteLine ("Content-Type: {0}", mime_part.ContentType);
262 if (mime_part
is GMime
.MessagePart
) {
263 GMime
.MessagePart msg_part
= (GMime
.MessagePart
) mime_part
;
265 using (GMime
.Message message
= msg_part
.Message
) {
266 using (GMime
.Object subpart
= message
.MimePart
)
267 this.OnEachPart (subpart
);
269 } else if (mime_part
is GMime
.Multipart
) {
270 GMime
.Multipart multipart
= (GMime
.Multipart
) mime_part
;
272 int num_parts
= multipart
.Number
;
274 // If the mimetype is multipart/alternative, we only want to index
275 // one part -- the richest one we can filter.
276 if (mime_part
.ContentType
.Subtype
.ToLower () == "alternative") {
277 // The richest formats are at the end, so work from there
279 for (int i
= num_parts
- 1; i
>= 0; i
--) {
280 GMime
.Object subpart
= multipart
.GetPart (i
);
282 if (IsMimeTypeHandled (subpart
.ContentType
.ToString ())) {
284 part_needs_dispose
= true;
292 // If it's not alternative, or we don't know how to filter any of
293 // the parts, treat them like a bunch of attachments.
295 for (int i
= 0; i
< num_parts
; i
++) {
296 using (GMime
.Object subpart
= multipart
.GetPart (i
))
297 this.OnEachPart (subpart
);
300 } else if (mime_part
is GMime
.Part
)
303 throw new Exception (String
.Format ("Unknown part type: {0}", part
.GetType ()));
306 System
.IO
.Stream stream
= null;
308 using (GMime
.DataWrapper content_obj
= ((GMime
.Part
) part
).ContentObject
)
309 stream
= content_obj
.Stream
;
311 // If this is the only part and it's plain text, we
312 // want to just attach it to our filter instead of
313 // creating a child indexable for it.
314 bool no_child_needed
= false;
316 string mime_type
= part
.ContentType
.ToString ().ToLower ();
318 if (this.depth
== 1 && this.count
== 0) {
319 if (mime_type
== "text/plain") {
320 no_child_needed
= true;
322 this.reader
= new StreamReader (stream
);
326 if (!no_child_needed
) {
327 // Check the mime type against the blacklist and don't index any
328 // parts that are contained within. That way the user doesn't
329 // get flooded with pointless signatures and vcard and ical
330 // attachments along with (real) attachments.
332 if (Array
.IndexOf (blacklisted_mime_types
, mime_type
) == -1) {
333 string sub_uri
= this.filter
.Uri
.ToString () + "#" + this.count
;
334 Indexable child
= new Indexable (new Uri (sub_uri
));
336 child
.DisplayUri
= new Uri (this.filter
.DisplayUri
.ToString () + "#" + this.count
);
338 child
.HitType
= "MailMessage";
339 child
.MimeType
= mime_type
;
340 child
.CacheContent
= false;
342 child
.AddProperty (Property
.NewKeyword ("fixme:attachment_title", ((GMime
.Part
)part
).Filename
));
344 if (part
.ContentType
.Type
.ToLower () == "text")
345 child
.SetTextReader (new StreamReader (stream
));
347 child
.SetBinaryStream (stream
);
349 this.child_indexables
.Add (child
);
351 Log
.Debug ("Skipping attachment {0}#{1} with blacklisted mime type {2}",
352 this.filter
.Uri
, this.count
, mime_type
);
359 if (part_needs_dispose
)
365 public ICollection ChildIndexables
{
366 get { return this.child_indexables; }
369 public TextReader Reader
{
370 get { return this.reader; }