4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
29 using System
.Collections
;
32 using System
.Threading
;
34 using System
.Xml
.Serialization
;
39 public enum IndexableType
{
45 public enum IndexableFiltering
{
46 Never
, // Never try to filter this indexable, it contains no content
47 AlreadyFiltered
, // The readers promise to return nice clean text, so do nothing
48 Automatic
, // Try to determine automatically if this needs to be filtered
49 Always
// Always try to filter this indexable
52 public class Indexable
: Versioned
, IComparable
{
54 static private bool Debug
= false;
56 // This is the type of indexing operation represented by
57 // this Indexable object. We default to Add, for historical
59 private IndexableType type
= IndexableType
.Add
;
61 // The URI of the item being indexed.
62 private Uri uri
= null;
64 // The URI of the parent indexable, if any.
65 private Uri parent_uri
= null;
67 // The URI of the contents to index
68 private Uri contentUri
= null;
70 // The URI of the hot contents to index
71 private Uri hotContentUri
= null;
73 // Whether the content should be deleted after indexing
74 private bool deleteContent
= false;
76 // File, WebLink, MailMessage, IMLog, etc.
77 private String hit_type
= null;
79 // If applicable, otherwise set to null.
80 private String mimeType
= null;
82 // The source backend that generated this indexable
83 private string source
= null;
85 // List of Property objects
86 private ArrayList properties
= new ArrayList ();
88 // Is this being indexed because of crawling or other
89 // background activity?
90 private bool crawled
= true;
92 // Is this object inherently contentless?
93 private bool no_content
= false;
95 // If necessary, should we cache this object's content?
96 // The cached version is used to generate snippets.
97 private bool cache_content
= true;
99 // A stream of the content to index
100 private TextReader textReader
;
102 // A stream of the hot content to index
103 private TextReader hotTextReader
;
105 // A stream of binary data to filter
106 private Stream binary_stream
;
108 // When should we try to filter this indexable?
109 private IndexableFiltering filtering
= IndexableFiltering
.Automatic
;
111 // Local state: these are key/value pairs that never get serialized
113 Hashtable local_state
= new Hashtable ();
115 //////////////////////////
117 static private XmlSerializer our_serializer
;
121 our_serializer
= new XmlSerializer (typeof (Indexable
));
124 //////////////////////////
126 public Indexable (IndexableType type
,
131 this.hit_type
= "File"; // FIXME: Why do we default to this?
134 public Indexable (Uri uri
) : this (IndexableType
.Add
, uri
)
139 // Only used when reading from xml
142 public static Indexable
NewFromXml (string xml
)
144 StringReader reader
= new StringReader (xml
);
145 return (Indexable
) our_serializer
.Deserialize (reader
);
148 //////////////////////////
150 [XmlAttribute ("Type")]
151 public IndexableType Type
{
153 set { type = value; }
162 [XmlAttribute ("Uri")]
163 public string UriString
{
164 get { return UriFu.UriToEscapedString (uri); }
165 set { uri = UriFu.EscapedStringToUri (value); }
169 public Uri ParentUri
{
170 get { return parent_uri; }
171 set { parent_uri = value; }
174 [XmlAttribute ("ParentUri")]
175 public string ParentUriString
{
177 if (parent_uri
== null)
180 return UriFu
.UriToEscapedString (parent_uri
);
187 parent_uri
= UriFu
.EscapedStringToUri (value);
192 public Uri ContentUri
{
193 get { return contentUri != null ? contentUri : Uri; }
194 set { contentUri = value; }
197 [XmlAttribute ("ContentUri")]
198 public string ContentUriString
{
199 get { return UriFu.UriToEscapedString (ContentUri); }
200 set { contentUri = UriFu.EscapedStringToUri (value); }
204 private Uri HotContentUri
{
205 get { return hotContentUri; }
206 set { hotContentUri = value; }
209 [XmlAttribute ("HotContentUri")]
210 public string HotContentUriString
{
211 get { return HotContentUri != null ? UriFu.UriToEscapedString (HotContentUri) : ""; }
212 set { hotContentUri = (value != "") ? UriFu.EscapedStringToUri (value) : null; }
216 public Uri DisplayUri
{
217 get { return uri.Scheme == GuidFu.UriScheme ? ContentUri : Uri; }
221 public bool DeleteContent
{
222 get { return deleteContent; }
223 set { deleteContent = value; }
227 public String HitType
{
228 get { return hit_type; }
229 set { hit_type = value; }
233 public String MimeType
{
234 get { return mimeType; }
235 set { mimeType = value; }
239 public string Source
{
240 get { return source; }
241 set { source = value; }
245 public bool IsNonTransient
{
246 get { return ! DeleteContent && ContentUri.IsFile && ParentUri == null; }
250 public bool Crawled
{
251 get { return crawled; }
252 set { crawled = value; }
256 public bool NoContent
{
257 get { return no_content; }
258 set { no_content = value; }
262 public bool CacheContent
{
263 get { return cache_content; }
264 set { cache_content = value; }
268 public IndexableFiltering Filtering
{
269 get { return filtering; }
270 set { filtering = value; }
274 public IDictionary LocalState
{
275 get { return local_state; }
278 //////////////////////////
280 public void Cleanup ()
283 if (contentUri
!= null) {
285 Logger
.Log
.Debug ("Cleaning up {0}", contentUri
.LocalPath
);
288 File
.Delete (contentUri
.LocalPath
);
290 // It might be gone already, so catch the exception.
296 if (hotContentUri
!= null) {
298 Logger
.Log
.Debug ("Cleaning up {0}", hotContentUri
.LocalPath
);
301 File
.Delete (hotContentUri
.LocalPath
);
306 hotContentUri
= null;
311 private Stream
StreamFromUri (Uri uri
)
313 Stream stream
= null;
315 if (uri
!= null && uri
.IsFile
&& ! no_content
) {
316 stream
= new FileStream (uri
.LocalPath
,
325 private TextReader
ReaderFromUri (Uri uri
)
327 Stream stream
= StreamFromUri (uri
);
332 return new StreamReader (stream
);
336 public TextReader
GetTextReader ()
341 if (textReader
== null)
342 textReader
= ReaderFromUri (ContentUri
);
347 public void SetTextReader (TextReader reader
)
352 public TextReader
GetHotTextReader ()
357 if (hotTextReader
== null)
358 hotTextReader
= ReaderFromUri (HotContentUri
);
359 return hotTextReader
;
362 public void SetHotTextReader (TextReader reader
)
364 hotTextReader
= reader
;
367 public Stream
GetBinaryStream ()
372 if (binary_stream
== null)
373 binary_stream
= StreamFromUri (ContentUri
);
375 return binary_stream
;
378 public void SetBinaryStream (Stream stream
)
380 binary_stream
= stream
;
383 [XmlArrayItem (ElementName
="Property", Type
=typeof (Property
))]
384 public ArrayList Properties
{
385 get { return properties; }
388 public void AddProperty (Property prop
) {
391 if (type
== IndexableType
.PropertyChange
&& ! prop
.IsMutable
)
392 throw new ArgumentException ("Non-mutable properties aren't allowed in this indexable");
394 // If this is a mutable property, make sure that
395 // we don't already contain another mutable property
396 // with the same name. If we do, replace it.
397 if (prop
.IsMutable
) {
398 for (int i
= 0; i
< properties
.Count
; ++i
) {
399 Property other_prop
= properties
[i
] as Property
;
400 if (other_prop
.IsMutable
&& prop
.Key
== other_prop
.Key
) {
401 properties
[i
] = prop
;
407 properties
.Add (prop
);
411 public bool HasProperty (string keyword
) {
412 foreach (Property property
in properties
)
413 if (property
.Key
== keyword
)
419 // This doesn't check if it makes sense to actually
420 // merge the two indexables: it just does it.
421 public void Merge (Indexable other
)
423 this.Timestamp
= other
.Timestamp
;
425 foreach (Property prop
in other
.Properties
)
426 this.AddProperty (prop
);
428 foreach (DictionaryEntry entry
in other
.local_state
)
429 this.local_state
[entry
.Key
] = entry
.Value
;
432 //////////////////////////
434 public void SetChildOf (Indexable parent
)
436 this.ParentUri
= parent
.Uri
;
438 if (!this.ValidTimestamp
)
439 this.Timestamp
= parent
.Timestamp
;
441 // FIXME: Set all of the parent's properties on the
442 // child so that we get matches against the child
443 // that otherwise would match only the parent, at
444 // least until we have proper RDF support.
445 foreach (Property prop
in parent
.Properties
) {
446 Property new_prop
= (Property
) prop
.Clone ();
447 new_prop
.Key
= "parent:" + new_prop
.Key
;
448 this.AddProperty (new_prop
);
452 //////////////////////////
454 public override string ToString ()
456 StringWriter writer
= new StringWriter ();
457 our_serializer
.Serialize (writer
, this);
459 return writer
.ToString ();
462 //////////////////////////
464 const int BUFFER_SIZE
= 8192;
466 private static char [] GetCharBuffer ()
468 LocalDataStoreSlot slot
;
469 slot
= Thread
.GetNamedDataSlot ("Char Buffer");
473 obj
= Thread
.GetData (slot
);
475 buffer
= new char [BUFFER_SIZE
];
476 Thread
.SetData (slot
, buffer
);
478 buffer
= (char []) obj
;
484 private static byte [] GetByteBuffer ()
486 LocalDataStoreSlot slot
;
487 slot
= Thread
.GetNamedDataSlot ("Byte Buffer");
491 obj
= Thread
.GetData (slot
);
493 buffer
= new byte [BUFFER_SIZE
];
494 Thread
.SetData (slot
, buffer
);
496 buffer
= (byte []) obj
;
502 //////////////////////////
504 private static Uri
TextReaderToTempFileUri (TextReader reader
)
509 string filename
= Path
.GetTempFileName ();
510 FileStream fileStream
= File
.OpenWrite (filename
);
512 // When we dump the contents of an indexable into a file, we
513 // expect to use it again soon.
514 FileAdvise
.PreLoad (fileStream
);
516 // Make sure the temporary file is only readable by the owner.
517 // FIXME: There is probably a race here. Could some malicious program
518 // do something to the file between creation and the chmod?
519 Mono
.Unix
.Native
.Syscall
.chmod (filename
, (Mono
.Unix
.Native
.FilePermissions
) 256);
521 BufferedStream bufferedStream
= new BufferedStream (fileStream
);
522 StreamWriter writer
= new StreamWriter (bufferedStream
);
526 buffer
= GetCharBuffer ();
530 read
= reader
.Read (buffer
, 0, buffer
.Length
);
532 writer
.Write (buffer
, 0, read
);
537 return UriFu
.PathToFileUri (filename
);
540 private static Uri
BinaryStreamToTempFileUri (Stream stream
)
545 string filename
= Path
.GetTempFileName ();
546 FileStream fileStream
= File
.OpenWrite (filename
);
548 // When we dump the contents of an indexable into a file, we
549 // expect to use it again soon.
550 FileAdvise
.PreLoad (fileStream
);
552 // Make sure the temporary file is only readable by the owner.
553 // FIXME: There is probably a race here. Could some malicious program
554 // do something to the file between creation and the chmod?
555 Mono
.Unix
.Native
.Syscall
.chmod (filename
, (Mono
.Unix
.Native
.FilePermissions
) 256);
557 BufferedStream bufferedStream
= new BufferedStream (fileStream
);
560 buffer
= GetByteBuffer ();
564 read
= stream
.Read (buffer
, 0, buffer
.Length
);
566 bufferedStream
.Write (buffer
, 0, read
);
569 bufferedStream
.Close ();
571 return UriFu
.PathToFileUri (filename
);
574 public void StoreStream () {
575 if (textReader
!= null) {
576 ContentUri
= TextReaderToTempFileUri (textReader
);
579 Logger
.Log
.Debug ("Storing text content from {0} in {1}", Uri
, ContentUri
);
581 DeleteContent
= true;
582 } else if (binary_stream
!= null) {
583 ContentUri
= BinaryStreamToTempFileUri (binary_stream
);
586 Logger
.Log
.Debug ("Storing binary content from {0} in {1}", Uri
, ContentUri
);
588 DeleteContent
= true;
591 if (hotTextReader
!= null) {
592 HotContentUri
= TextReaderToTempFileUri (hotTextReader
);
595 Logger
.Log
.Debug ("Storing hot content from {0} in {1}", Uri
, HotContentUri
);
597 DeleteContent
= true;
601 //////////////////////////
603 public override int GetHashCode ()
605 return (uri
!= null ? uri
.GetHashCode () : 0) ^ type
.GetHashCode ();
608 public int CompareTo (object obj
)
610 Indexable other
= (Indexable
) obj
;
611 return DateTime
.Compare (this.Timestamp
, other
.Timestamp
);