Make the DisplayUri a real thing rather than just selecting the ContentUri if
[beagle.git] / BeagleClient / Indexable.cs
blob82bd893dc0e642e0a8bde8badc612c4aa1540c67
1 //
2 // Indexable.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 using System;
29 using System.Collections;
30 using System.IO;
31 using System.Text;
32 using System.Threading;
33 using System.Xml;
34 using System.Xml.Serialization;
35 using Beagle.Util;
37 namespace Beagle {
39 public enum IndexableType {
40 Add,
41 Remove,
42 PropertyChange
45 public enum IndexableFiltering {
46 Never, // Never try to filter this indexable, it contains no content
47 AlreadyFiltered, // The readers promise to return nice clean text, so do nothing
48 Automatic, // Try to determine automatically if this needs to be filtered
49 Always // Always try to filter this indexable
52 public class Indexable : Versioned, IComparable {
54 static private bool Debug = false;
56 // This is the type of indexing operation represented by
57 // this Indexable object. We default to Add, for historical
58 // reasons.
59 private IndexableType type = IndexableType.Add;
61 // The URI of the item being indexed.
62 private Uri uri = null;
64 // The URI of the parent indexable, if any.
65 private Uri parent_uri = null;
67 // The URI of the contents to index
68 private Uri contentUri = null;
70 // The URI of the hot contents to index
71 private Uri hotContentUri = null;
73 // Whether the content should be deleted after indexing
74 private bool deleteContent = false;
76 // File, WebLink, MailMessage, IMLog, etc.
77 private String hit_type = null;
79 // If applicable, otherwise set to null.
80 private String mimeType = null;
82 // The source backend that generated this indexable
83 private string source = null;
85 // List of Property objects
86 private ArrayList properties = new ArrayList ();
88 // Is this being indexed because of crawling or other
89 // background activity?
90 private bool crawled = true;
92 // Is this object inherently contentless?
93 private bool no_content = false;
95 // If necessary, should we cache this object's content?
96 // The cached version is used to generate snippets.
97 private bool cache_content = true;
99 // Is this indexable a child of another indexable ?
100 // If true, then parent_uri points to the uri of the parent
101 // However, an indexable can have parent_uri set but may not be a child
102 private bool is_child = false;
104 // A stream of the content to index
105 private TextReader textReader;
107 // A stream of the hot content to index
108 private TextReader hotTextReader;
110 // A stream of binary data to filter
111 private Stream binary_stream;
113 // When should we try to filter this indexable?
114 private IndexableFiltering filtering = IndexableFiltering.Automatic;
116 // Local state: these are key/value pairs that never get serialized
117 // into XML
118 Hashtable local_state = new Hashtable ();
120 //////////////////////////
122 static private XmlSerializer our_serializer;
124 static Indexable ()
126 our_serializer = new XmlSerializer (typeof (Indexable));
129 //////////////////////////
131 public Indexable (IndexableType type,
132 Uri uri)
134 this.type = type;
135 this.uri = uri;
136 this.hit_type = "File"; // FIXME: Why do we default to this?
139 public Indexable (Uri uri) : this (IndexableType.Add, uri)
142 public Indexable ()
144 // Only used when reading from xml
147 public static Indexable NewFromXml (string xml)
149 StringReader reader = new StringReader (xml);
150 return (Indexable) our_serializer.Deserialize (reader);
153 //////////////////////////
155 [XmlAttribute ("Type")]
156 public IndexableType Type {
157 get { return type; }
158 set { type = value; }
161 [XmlIgnore]
162 public Uri Uri {
163 get { return uri; }
164 set { uri = value; }
167 [XmlAttribute ("Uri")]
168 public string UriString {
169 get { return UriFu.UriToEscapedString (uri); }
170 set { uri = UriFu.EscapedStringToUri (value); }
173 [XmlIgnore]
174 public Uri ParentUri {
175 get { return parent_uri; }
176 set { parent_uri = value; }
179 [XmlAttribute ("ParentUri")]
180 public string ParentUriString {
181 get {
182 if (parent_uri == null)
183 return null;
185 return UriFu.UriToEscapedString (parent_uri);
188 set {
189 if (value == null)
190 parent_uri = null;
191 else
192 parent_uri = UriFu.EscapedStringToUri (value);
196 [XmlIgnore]
197 public Uri ContentUri {
198 get { return contentUri != null ? contentUri : Uri; }
199 set { contentUri = value; }
202 [XmlAttribute ("ContentUri")]
203 public string ContentUriString {
204 get { return UriFu.UriToEscapedString (ContentUri); }
205 set { contentUri = UriFu.EscapedStringToUri (value); }
208 [XmlIgnore]
209 private Uri HotContentUri {
210 get { return hotContentUri; }
211 set { hotContentUri = value; }
214 [XmlAttribute ("HotContentUri")]
215 public string HotContentUriString {
216 get { return HotContentUri != null ? UriFu.UriToEscapedString (HotContentUri) : ""; }
217 set { hotContentUri = (value != "") ? UriFu.EscapedStringToUri (value) : null; }
220 private Uri display_uri = null;
222 [XmlIgnore]
223 public Uri DisplayUri {
224 get { return display_uri != null ? display_uri : Uri; }
225 set { display_uri = value; }
228 [XmlAttribute ("DisplayUri")]
229 public string DisplayUriString {
230 get { return UriFu.UriToEscapedString (DisplayUri); }
231 set { DisplayUri = UriFu.EscapedStringToUri (value); }
234 [XmlAttribute]
235 public bool DeleteContent {
236 get { return deleteContent; }
237 set { deleteContent = value; }
240 [XmlAttribute]
241 public String HitType {
242 get { return hit_type; }
243 set { hit_type = value; }
246 [XmlAttribute]
247 public String MimeType {
248 get { return mimeType; }
249 set { mimeType = value; }
252 [XmlAttribute]
253 public string Source {
254 get { return source; }
255 set { source = value; }
258 [XmlIgnore]
259 public bool IsNonTransient {
260 /* Not transient if
261 * - content should not be deleted after indexing and
262 * - actual source of data (data might be stored in temporary files for indexing) is a file and
263 * - there is no parent uri set.
265 get { return ! DeleteContent && ContentUri.IsFile && ParentUri == null; }
268 [XmlAttribute]
269 public bool Crawled {
270 get { return crawled; }
271 set { crawled = value; }
274 [XmlAttribute]
275 public bool NoContent {
276 get { return no_content; }
277 set { no_content = value; }
280 [XmlAttribute]
281 public bool CacheContent {
282 get { return cache_content; }
283 set { cache_content = value; }
286 [XmlAttribute]
287 public IndexableFiltering Filtering {
288 get { return filtering; }
289 set { filtering = value; }
292 [XmlIgnore]
293 public IDictionary LocalState {
294 get { return local_state; }
297 [XmlAttribute]
298 public bool IsChild {
299 get { return is_child; }
300 set { is_child = value; }
303 //////////////////////////
305 public void Cleanup ()
307 if (DeleteContent) {
308 if (contentUri != null) {
309 if (Debug)
310 Logger.Log.Debug ("Cleaning up {0}", contentUri.LocalPath);
312 try {
313 File.Delete (contentUri.LocalPath);
314 } catch {
315 // It might be gone already, so catch the exception.
318 contentUri = null;
321 if (hotContentUri != null) {
322 if (Debug)
323 Logger.Log.Debug ("Cleaning up {0}", hotContentUri.LocalPath);
325 try {
326 File.Delete (hotContentUri.LocalPath);
327 } catch {
328 // Ditto
331 hotContentUri = null;
336 private Stream StreamFromUri (Uri uri)
338 Stream stream = null;
340 if (uri != null && uri.IsFile && ! no_content) {
341 stream = new FileStream (uri.LocalPath,
342 FileMode.Open,
343 FileAccess.Read,
344 FileShare.Read);
347 return stream;
350 private TextReader ReaderFromUri (Uri uri)
352 Stream stream = StreamFromUri (uri);
354 if (stream == null)
355 return null;
357 return new StreamReader (stream);
361 public TextReader GetTextReader ()
363 if (NoContent)
364 return null;
366 if (textReader == null)
367 textReader = ReaderFromUri (ContentUri);
369 return textReader;
372 public void SetTextReader (TextReader reader)
374 textReader = reader;
377 public TextReader GetHotTextReader ()
379 if (NoContent)
380 return null;
382 if (hotTextReader == null)
383 hotTextReader = ReaderFromUri (HotContentUri);
384 return hotTextReader;
387 public void SetHotTextReader (TextReader reader)
389 hotTextReader = reader;
392 public Stream GetBinaryStream ()
394 if (NoContent)
395 return null;
397 if (binary_stream == null)
398 binary_stream = StreamFromUri (ContentUri);
400 return binary_stream;
403 public void SetBinaryStream (Stream stream)
405 binary_stream = stream;
408 [XmlArrayItem (ElementName="Property", Type=typeof (Property))]
409 public ArrayList Properties {
410 get { return properties; }
413 public void AddProperty (Property prop) {
414 if (prop != null) {
416 if (type == IndexableType.PropertyChange && ! prop.IsMutable)
417 throw new ArgumentException ("Non-mutable properties aren't allowed in this indexable");
419 // If this is a mutable property, make sure that
420 // we don't already contain another mutable property
421 // with the same name. If we do, replace it.
422 if (prop.IsMutable) {
423 for (int i = 0; i < properties.Count; ++i) {
424 Property other_prop = properties [i] as Property;
425 if (other_prop.IsMutable && prop.Key == other_prop.Key) {
426 properties [i] = prop;
427 return;
432 properties.Add (prop);
436 public bool HasProperty (string keyword) {
437 foreach (Property property in properties)
438 if (property.Key == keyword)
439 return true;
441 return false;
444 // This doesn't check if it makes sense to actually
445 // merge the two indexables: it just does it.
446 public void Merge (Indexable other)
448 this.Timestamp = other.Timestamp;
450 foreach (Property prop in other.Properties)
451 this.AddProperty (prop);
453 foreach (DictionaryEntry entry in other.local_state)
454 this.local_state [entry.Key] = entry.Value;
457 //////////////////////////
459 public void SetChildOf (Indexable parent)
461 this.IsChild = true;
462 if (parent.IsChild)
463 this.ParentUri = parent.ParentUri;
464 else
465 this.ParentUri = parent.Uri;
467 if (!this.ValidTimestamp)
468 this.Timestamp = parent.Timestamp;
470 // FIXME: Set all of the parent's properties on the
471 // child so that we get matches against the child
472 // that otherwise would match only the parent, at
473 // least until we have proper RDF support.
475 // FIXME: Copying the correct properties from parent to child:
476 // (This is not perfect yet)
477 // It does not make sense to have parent:parent:parent:...:parent:foo
478 // for property names of a nested child
479 // Moreover, if indexable a.mbox has child b.zip which has child c.zip,
480 // then upon matching c.zip, we would like to get the information from
481 // a.mbox (i.e. the toplevel indexable) only. Intermediate parent information
482 // is not necessary for displaying results; in fact, storing them would cause
483 // confusion during display. E.g. storing parent:beagle:filename for all parents
484 // would cause, parent:beagle:filename=a.mbox, parent.beagle.filename=b.zip
485 // whereas we are only interested in toplevel parent:beagle:filename=a.mbox
486 // For indexables which need to store the intermediate/immediate parent info
487 // separately, explicitly store them.
488 // Another problem is, toplevel indexable might want to store information
489 // which should not be matched when searching for its child. Copying those
490 // properties in all children will incorrectly match them.
493 if (parent.IsChild) {
494 // If parent itself is a child,
495 // then only copy parents' parent:xxx and _private:xxx properties
496 foreach (Property prop in parent.Properties) {
497 if (prop.Key.StartsWith ("parent:") ||
498 prop.Key.StartsWith (Property.PrivateNamespace)) {
499 Property new_prop = (Property) prop.Clone ();
500 this.AddProperty (new_prop);
501 } else {
502 Property new_prop = (Property) prop.Clone ();
503 new_prop.IsStored = false;
504 this.AddProperty (new_prop);
507 } else {
508 // Parent is a top level indexable
509 // Copy all properties
510 foreach (Property prop in parent.Properties) {
511 Property new_prop = (Property) prop.Clone ();
512 // Add parent: to property names ONLY IF
513 // - not private property (these are not properties of the file content)
514 // - property name does not already start with parent:
515 if (! new_prop.Key.StartsWith (Property.PrivateNamespace) &&
516 ! new_prop.Key.StartsWith ("parent:"))
517 new_prop.Key = "parent:" + new_prop.Key;
518 this.AddProperty (new_prop);
523 //////////////////////////
525 public override string ToString ()
527 StringWriter writer = new StringWriter ();
528 our_serializer.Serialize (writer, this);
529 writer.Close ();
530 return writer.ToString ();
533 //////////////////////////
535 const int BUFFER_SIZE = 8192;
537 private static char [] GetCharBuffer ()
539 LocalDataStoreSlot slot;
540 slot = Thread.GetNamedDataSlot ("Char Buffer");
542 object obj;
543 char [] buffer;
544 obj = Thread.GetData (slot);
545 if (obj == null) {
546 buffer = new char [BUFFER_SIZE];
547 Thread.SetData (slot, buffer);
548 } else {
549 buffer = (char []) obj;
552 return buffer;
555 private static byte [] GetByteBuffer ()
557 LocalDataStoreSlot slot;
558 slot = Thread.GetNamedDataSlot ("Byte Buffer");
560 object obj;
561 byte [] buffer;
562 obj = Thread.GetData (slot);
563 if (obj == null) {
564 buffer = new byte [BUFFER_SIZE];
565 Thread.SetData (slot, buffer);
566 } else {
567 buffer = (byte []) obj;
570 return buffer;
573 //////////////////////////
575 private static Uri TextReaderToTempFileUri (TextReader reader)
577 if (reader == null)
578 return null;
580 string filename = Path.GetTempFileName ();
581 FileStream fileStream = File.OpenWrite (filename);
583 // When we dump the contents of an indexable into a file, we
584 // expect to use it again soon.
585 FileAdvise.PreLoad (fileStream);
587 // Make sure the temporary file is only readable by the owner.
588 // FIXME: There is probably a race here. Could some malicious program
589 // do something to the file between creation and the chmod?
590 Mono.Unix.Native.Syscall.chmod (filename, (Mono.Unix.Native.FilePermissions) 256);
592 BufferedStream bufferedStream = new BufferedStream (fileStream);
593 StreamWriter writer = new StreamWriter (bufferedStream);
596 char [] buffer;
597 buffer = GetCharBuffer ();
599 int read;
600 do {
601 read = reader.Read (buffer, 0, buffer.Length);
602 if (read > 0)
603 writer.Write (buffer, 0, read);
604 } while (read > 0);
606 writer.Close ();
608 return UriFu.PathToFileUri (filename);
611 private static Uri BinaryStreamToTempFileUri (Stream stream)
613 if (stream == null)
614 return null;
616 string filename = Path.GetTempFileName ();
617 FileStream fileStream = File.OpenWrite (filename);
619 // When we dump the contents of an indexable into a file, we
620 // expect to use it again soon.
621 FileAdvise.PreLoad (fileStream);
623 // Make sure the temporary file is only readable by the owner.
624 // FIXME: There is probably a race here. Could some malicious program
625 // do something to the file between creation and the chmod?
626 Mono.Unix.Native.Syscall.chmod (filename, (Mono.Unix.Native.FilePermissions) 256);
628 BufferedStream bufferedStream = new BufferedStream (fileStream);
630 byte [] buffer;
631 buffer = GetByteBuffer ();
633 int read;
634 do {
635 read = stream.Read (buffer, 0, buffer.Length);
636 if (read > 0)
637 bufferedStream.Write (buffer, 0, read);
638 } while (read > 0);
640 bufferedStream.Close ();
642 return UriFu.PathToFileUri (filename);
645 public void StoreStream () {
646 if (textReader != null) {
647 ContentUri = TextReaderToTempFileUri (textReader);
649 if (Debug)
650 Logger.Log.Debug ("Storing text content from {0} in {1}", Uri, ContentUri);
652 DeleteContent = true;
653 } else if (binary_stream != null) {
654 ContentUri = BinaryStreamToTempFileUri (binary_stream);
656 if (Debug)
657 Logger.Log.Debug ("Storing binary content from {0} in {1}", Uri, ContentUri);
659 DeleteContent = true;
662 if (hotTextReader != null) {
663 HotContentUri = TextReaderToTempFileUri (hotTextReader);
665 if (Debug)
666 Logger.Log.Debug ("Storing hot content from {0} in {1}", Uri, HotContentUri);
668 DeleteContent = true;
672 public void CloseStreams ()
674 if (textReader != null)
675 textReader.Close ();
676 else if (binary_stream != null)
677 binary_stream.Close ();
679 if (hotTextReader != null)
680 hotTextReader.Close ();
683 //////////////////////////
685 public override int GetHashCode ()
687 return (uri != null ? uri.GetHashCode () : 0) ^ type.GetHashCode ();
690 public int CompareTo (object obj)
692 Indexable other = (Indexable) obj;
693 return DateTime.Compare (this.Timestamp, other.Timestamp);