Add --enable-deletion option to buildindex. If used, buildindex will remove deleted...
[beagle.git] / BeagleClient / Indexable.cs
blob3fa8997cd740e841aa28cbf2dc18529668e5134a
1 //
2 // Indexable.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 using System;
29 using System.Collections;
30 using System.IO;
31 using System.Text;
32 using System.Threading;
33 using System.Xml;
34 using System.Xml.Serialization;
35 using Beagle.Util;
37 namespace Beagle {
39 public enum IndexableType {
40 Add,
41 Remove,
42 PropertyChange
45 public enum IndexableFiltering {
46 Never, // Never try to filter this indexable, it contains no content
47 AlreadyFiltered, // The readers promise to return nice clean text, so do nothing
48 Automatic, // Try to determine automatically if this needs to be filtered
49 Always // Always try to filter this indexable
52 public class Indexable : Versioned, IComparable {
54 static private bool Debug = false;
56 // This is the type of indexing operation represented by
57 // this Indexable object. We default to Add, for historical
58 // reasons.
59 private IndexableType type = IndexableType.Add;
61 // The URI of the item being indexed.
62 private Uri uri = null;
64 // The URI of the parent indexable, if any.
65 private Uri parent_uri = null;
67 // The URI of the contents to index
68 private Uri contentUri = null;
70 // The URI of the hot contents to index
71 private Uri hotContentUri = null;
73 // Whether the content should be deleted after indexing
74 private bool deleteContent = false;
76 // File, WebLink, MailMessage, IMLog, etc.
77 private String hit_type = null;
79 // If applicable, otherwise set to null.
80 private String mimeType = null;
82 // The source backend that generated this indexable
83 private string source = null;
85 // List of Property objects
86 private ArrayList properties = new ArrayList ();
88 // Is this being indexed because of crawling or other
89 // background activity?
90 private bool crawled = true;
92 // Is this object inherently contentless?
93 private bool no_content = false;
95 // If necessary, should we cache this object's content?
96 // The cached version is used to generate snippets.
97 private bool cache_content = true;
99 // A stream of the content to index
100 private TextReader textReader;
102 // A stream of the hot content to index
103 private TextReader hotTextReader;
105 // A stream of binary data to filter
106 private Stream binary_stream;
108 // When should we try to filter this indexable?
109 private IndexableFiltering filtering = IndexableFiltering.Automatic;
111 // Local state: these are key/value pairs that never get serialized
112 // into XML
113 Hashtable local_state = new Hashtable ();
115 //////////////////////////
117 static private XmlSerializer our_serializer;
119 static Indexable ()
121 our_serializer = new XmlSerializer (typeof (Indexable));
124 //////////////////////////
126 public Indexable (IndexableType type,
127 Uri uri)
129 this.type = type;
130 this.uri = uri;
131 this.hit_type = "File"; // FIXME: Why do we default to this?
134 public Indexable (Uri uri) : this (IndexableType.Add, uri)
137 public Indexable ()
139 // Only used when reading from xml
142 public static Indexable NewFromXml (string xml)
144 StringReader reader = new StringReader (xml);
145 return (Indexable) our_serializer.Deserialize (reader);
148 //////////////////////////
150 [XmlAttribute ("Type")]
151 public IndexableType Type {
152 get { return type; }
153 set { type = value; }
156 [XmlIgnore]
157 public Uri Uri {
158 get { return uri; }
159 set { uri = value; }
162 [XmlAttribute ("Uri")]
163 public string UriString {
164 get { return UriFu.UriToSerializableString (uri); }
165 set { uri = UriFu.UriStringToUri (value); }
168 [XmlIgnore]
169 public Uri ParentUri {
170 get { return parent_uri; }
171 set { parent_uri = value; }
174 [XmlAttribute ("ParentUri")]
175 public string ParentUriString {
176 get {
177 if (parent_uri == null)
178 return null;
180 return UriFu.UriToSerializableString (parent_uri);
183 set {
184 if (value == null)
185 parent_uri = null;
186 else
187 parent_uri = UriFu.UriStringToUri (value);
191 [XmlIgnore]
192 public Uri ContentUri {
193 get { return contentUri != null ? contentUri : Uri; }
194 set { contentUri = value; }
197 [XmlAttribute ("ContentUri")]
198 public string ContentUriString {
199 get { return UriFu.UriToSerializableString (ContentUri); }
200 set { contentUri = UriFu.UriStringToUri (value); }
203 [XmlIgnore]
204 private Uri HotContentUri {
205 get { return hotContentUri; }
206 set { hotContentUri = value; }
209 [XmlAttribute ("HotContentUri")]
210 public string HotContentUriString {
211 get { return HotContentUri != null ? UriFu.UriToSerializableString (HotContentUri) : ""; }
212 set { hotContentUri = (value != "") ? new Uri (value) : null; }
215 [XmlIgnore]
216 public Uri DisplayUri {
217 get { return uri.Scheme == GuidFu.UriScheme ? ContentUri : Uri; }
220 [XmlAttribute]
221 public bool DeleteContent {
222 get { return deleteContent; }
223 set { deleteContent = value; }
226 [XmlAttribute]
227 public String HitType {
228 get { return hit_type; }
229 set { hit_type = value; }
232 [XmlAttribute]
233 public String MimeType {
234 get { return mimeType; }
235 set { mimeType = value; }
238 [XmlAttribute]
239 public string Source {
240 get { return source; }
241 set { source = value; }
244 [XmlIgnore]
245 public bool IsNonTransient {
246 get { return ! DeleteContent && ContentUri.IsFile && ParentUri == null; }
249 [XmlAttribute]
250 public bool Crawled {
251 get { return crawled; }
252 set { crawled = value; }
255 [XmlAttribute]
256 public bool NoContent {
257 get { return no_content; }
258 set { no_content = value; }
261 [XmlAttribute]
262 public bool CacheContent {
263 get { return cache_content; }
264 set { cache_content = value; }
267 [XmlAttribute]
268 public IndexableFiltering Filtering {
269 get { return filtering; }
270 set { filtering = value; }
273 [XmlIgnore]
274 public IDictionary LocalState {
275 get { return local_state; }
278 //////////////////////////
280 public void Cleanup ()
282 if (DeleteContent) {
283 if (contentUri != null) {
284 if (Debug)
285 Logger.Log.Debug ("Cleaning up {0}", contentUri.LocalPath);
287 try {
288 File.Delete (contentUri.LocalPath);
289 } catch (Exception ex)
291 // It might be gone already, so catch the exception.
293 contentUri = null;
295 if (hotContentUri != null) {
296 if (Debug)
297 Logger.Log.Debug ("Cleaning up {0}", hotContentUri.LocalPath);
299 try {
300 File.Delete (hotContentUri.LocalPath);
301 } catch (Exception ex)
303 // Ditto
305 hotContentUri = null;
310 private Stream StreamFromUri (Uri uri)
312 Stream stream = null;
314 if (uri != null && uri.IsFile && ! no_content) {
315 stream = new FileStream (uri.LocalPath,
316 FileMode.Open,
317 FileAccess.Read,
318 FileShare.Read);
321 return stream;
324 private TextReader ReaderFromUri (Uri uri)
326 Stream stream = StreamFromUri (uri);
328 if (stream == null)
329 return null;
331 return new StreamReader (stream);
335 public TextReader GetTextReader ()
337 if (NoContent)
338 return null;
340 if (textReader == null)
341 textReader = ReaderFromUri (ContentUri);
343 return textReader;
346 public void SetTextReader (TextReader reader)
348 textReader = reader;
351 public TextReader GetHotTextReader ()
353 if (NoContent)
354 return null;
356 if (hotTextReader == null)
357 hotTextReader = ReaderFromUri (HotContentUri);
358 return hotTextReader;
361 public void SetHotTextReader (TextReader reader)
363 hotTextReader = reader;
366 public Stream GetBinaryStream ()
368 if (NoContent)
369 return null;
371 if (binary_stream == null)
372 binary_stream = StreamFromUri (ContentUri);
374 return binary_stream;
377 public void SetBinaryStream (Stream stream)
379 binary_stream = stream;
382 [XmlArrayItem (ElementName="Property", Type=typeof (Property))]
383 public ArrayList Properties {
384 get { return properties; }
387 public void AddProperty (Property prop) {
388 if (prop != null) {
390 if (type == IndexableType.PropertyChange && ! prop.IsMutable)
391 throw new ArgumentException ("Non-mutable properties aren't allowed in this indexable");
393 // If this is a mutable property, make sure that
394 // we don't already contain another mutable property
395 // with the same name. If we do, replace it.
396 if (prop.IsMutable) {
397 for (int i = 0; i < properties.Count; ++i) {
398 Property other_prop = properties [i] as Property;
399 if (other_prop.IsMutable && prop.Key == other_prop.Key) {
400 properties [i] = prop;
401 return;
406 properties.Add (prop);
410 public bool HasProperty (string keyword) {
411 foreach (Property property in properties)
412 if (property.Key == keyword)
413 return true;
415 return false;
418 // This doesn't check if it makes sense to actually
419 // merge the two indexables: it just does it.
420 public void Merge (Indexable other)
422 this.Timestamp = other.Timestamp;
424 foreach (Property prop in other.Properties)
425 this.AddProperty (prop);
427 foreach (DictionaryEntry entry in other.local_state)
428 this.local_state [entry.Key] = entry.Value;
431 //////////////////////////
433 public void SetChildOf (Indexable parent)
435 this.ParentUri = parent.Uri;
437 if (!this.ValidTimestamp)
438 this.Timestamp = parent.Timestamp;
440 // FIXME: Set all of the parent's properties on the
441 // child so that we get matches against the child
442 // that otherwise would match only the parent, at
443 // least until we have proper RDF support.
444 foreach (Property prop in parent.Properties) {
445 Property new_prop = (Property) prop.Clone ();
446 new_prop.Key = "parent:" + new_prop.Key;
447 this.AddProperty (new_prop);
451 //////////////////////////
453 public override string ToString ()
455 StringWriter writer = new StringWriter ();
456 our_serializer.Serialize (writer, this);
457 writer.Close ();
458 return writer.ToString ();
461 //////////////////////////
463 const int BUFFER_SIZE = 8192;
465 private static char [] GetCharBuffer ()
467 LocalDataStoreSlot slot;
468 slot = Thread.GetNamedDataSlot ("Char Buffer");
470 object obj;
471 char [] buffer;
472 obj = Thread.GetData (slot);
473 if (obj == null) {
474 buffer = new char [BUFFER_SIZE];
475 Thread.SetData (slot, buffer);
476 } else {
477 buffer = (char []) obj;
480 return buffer;
483 private static byte [] GetByteBuffer ()
485 LocalDataStoreSlot slot;
486 slot = Thread.GetNamedDataSlot ("Byte Buffer");
488 object obj;
489 byte [] buffer;
490 obj = Thread.GetData (slot);
491 if (obj == null) {
492 buffer = new byte [BUFFER_SIZE];
493 Thread.SetData (slot, buffer);
494 } else {
495 buffer = (byte []) obj;
498 return buffer;
501 //////////////////////////
503 private static Uri TextReaderToTempFileUri (TextReader reader)
505 if (reader == null)
506 return null;
508 string filename = Path.GetTempFileName ();
509 FileStream fileStream = File.OpenWrite (filename);
511 // When we dump the contents of an indexable into a file, we
512 // expect to use it again soon.
513 FileAdvise.PreLoad (fileStream);
515 // Make sure the temporary file is only readable by the owner.
516 // FIXME: There is probably a race here. Could some malicious program
517 // do something to the file between creation and the chmod?
518 Mono.Unix.Native.Syscall.chmod (filename, (Mono.Unix.Native.FilePermissions) 256);
520 BufferedStream bufferedStream = new BufferedStream (fileStream);
521 StreamWriter writer = new StreamWriter (bufferedStream);
524 char [] buffer;
525 buffer = GetCharBuffer ();
527 int read;
528 do {
529 read = reader.Read (buffer, 0, buffer.Length);
530 if (read > 0)
531 writer.Write (buffer, 0, read);
532 } while (read > 0);
534 writer.Close ();
536 return UriFu.PathToFileUri (filename);
539 private static Uri BinaryStreamToTempFileUri (Stream stream)
541 if (stream == null)
542 return null;
544 string filename = Path.GetTempFileName ();
545 FileStream fileStream = File.OpenWrite (filename);
547 // When we dump the contents of an indexable into a file, we
548 // expect to use it again soon.
549 FileAdvise.PreLoad (fileStream);
551 // Make sure the temporary file is only readable by the owner.
552 // FIXME: There is probably a race here. Could some malicious program
553 // do something to the file between creation and the chmod?
554 Mono.Unix.Native.Syscall.chmod (filename, (Mono.Unix.Native.FilePermissions) 256);
556 BufferedStream bufferedStream = new BufferedStream (fileStream);
558 byte [] buffer;
559 buffer = GetByteBuffer ();
561 int read;
562 do {
563 read = stream.Read (buffer, 0, buffer.Length);
564 if (read > 0)
565 bufferedStream.Write (buffer, 0, read);
566 } while (read > 0);
568 bufferedStream.Close ();
570 return UriFu.PathToFileUri (filename);
573 public void StoreStream () {
574 if (textReader != null) {
575 ContentUri = TextReaderToTempFileUri (textReader);
577 if (Debug)
578 Logger.Log.Debug ("Storing text content from {0} in {1}", Uri, ContentUri);
580 DeleteContent = true;
581 } else if (binary_stream != null) {
582 ContentUri = BinaryStreamToTempFileUri (binary_stream);
584 if (Debug)
585 Logger.Log.Debug ("Storing binary content from {0} in {1}", Uri, ContentUri);
587 DeleteContent = true;
590 if (hotTextReader != null) {
591 HotContentUri = TextReaderToTempFileUri (hotTextReader);
593 if (Debug)
594 Logger.Log.Debug ("Storing hot content from {0} in {1}", Uri, HotContentUri);
596 DeleteContent = true;
600 //////////////////////////
602 public override int GetHashCode ()
604 return (uri != null ? uri.GetHashCode () : 0) ^ type.GetHashCode ();
607 public int CompareTo (object obj)
609 Indexable other = (Indexable) obj;
610 return DateTime.Compare (this.Timestamp, other.Timestamp);