4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
29 using System
.Collections
;
32 using System
.Threading
;
34 using System
.Xml
.Serialization
;
39 public enum IndexableType
{
45 public enum IndexableFiltering
{
46 Never
, // Never try to filter this indexable, it contains no content
47 AlreadyFiltered
, // The readers promise to return nice clean text, so do nothing
48 Automatic
, // Try to determine automatically if this needs to be filtered
49 Always
// Always try to filter this indexable
52 public class Indexable
: Versioned
, IComparable
{
54 static private bool Debug
= false;
56 // This is the type of indexing operation represented by
57 // this Indexable object. We default to Add, for historical
59 private IndexableType type
= IndexableType
.Add
;
61 // The URI of the item being indexed.
62 private Uri uri
= null;
64 // The URI of the parent indexable, if any.
65 private Uri parent_uri
= null;
67 // The URI of the contents to index
68 private Uri contentUri
= null;
70 // The URI of the hot contents to index
71 private Uri hotContentUri
= null;
73 // Whether the content should be deleted after indexing
74 private bool deleteContent
= false;
76 // File, WebLink, MailMessage, IMLog, etc.
77 private String hit_type
= null;
79 // If applicable, otherwise set to null.
80 private String mimeType
= null;
82 // The source backend that generated this indexable
83 private string source
= null;
85 // List of Property objects
86 private ArrayList properties
= new ArrayList ();
88 // Is this being indexed because of crawling or other
89 // background activity?
90 private bool crawled
= true;
92 // Is this object inherently contentless?
93 private bool no_content
= false;
95 // If necessary, should we cache this object's content?
96 // The cached version is used to generate snippets.
97 private bool cache_content
= true;
99 // A stream of the content to index
100 private TextReader textReader
;
102 // A stream of the hot content to index
103 private TextReader hotTextReader
;
105 // A stream of binary data to filter
106 private Stream binary_stream
;
108 // When should we try to filter this indexable?
109 private IndexableFiltering filtering
= IndexableFiltering
.Automatic
;
111 // Local state: these are key/value pairs that never get serialized
113 Hashtable local_state
= new Hashtable ();
115 //////////////////////////
117 static private XmlSerializer our_serializer
;
121 our_serializer
= new XmlSerializer (typeof (Indexable
));
124 //////////////////////////
126 public Indexable (IndexableType type
,
131 this.hit_type
= "File"; // FIXME: Why do we default to this?
134 public Indexable (Uri uri
) : this (IndexableType
.Add
, uri
)
139 // Only used when reading from xml
142 public static Indexable
NewFromXml (string xml
)
144 StringReader reader
= new StringReader (xml
);
145 return (Indexable
) our_serializer
.Deserialize (reader
);
148 //////////////////////////
150 [XmlAttribute ("Type")]
151 public IndexableType Type
{
153 set { type = value; }
162 [XmlAttribute ("Uri")]
163 public string UriString
{
164 get { return UriFu.UriToSerializableString (uri); }
165 set { uri = UriFu.UriStringToUri (value); }
169 public Uri ParentUri
{
170 get { return parent_uri; }
171 set { parent_uri = value; }
174 [XmlAttribute ("ParentUri")]
175 public string ParentUriString
{
177 if (parent_uri
== null)
180 return UriFu
.UriToSerializableString (parent_uri
);
187 parent_uri
= UriFu
.UriStringToUri (value);
192 public Uri ContentUri
{
193 get { return contentUri != null ? contentUri : Uri; }
194 set { contentUri = value; }
197 [XmlAttribute ("ContentUri")]
198 public string ContentUriString
{
199 get { return UriFu.UriToSerializableString (ContentUri); }
200 set { contentUri = UriFu.UriStringToUri (value); }
204 private Uri HotContentUri
{
205 get { return hotContentUri; }
206 set { hotContentUri = value; }
209 [XmlAttribute ("HotContentUri")]
210 public string HotContentUriString
{
211 get { return HotContentUri != null ? UriFu.UriToSerializableString (HotContentUri) : ""; }
212 set { hotContentUri = (value != "") ? new Uri (value) : null; }
216 public Uri DisplayUri
{
217 get { return uri.Scheme == GuidFu.UriScheme ? ContentUri : Uri; }
221 public bool DeleteContent
{
222 get { return deleteContent; }
223 set { deleteContent = value; }
227 public String HitType
{
228 get { return hit_type; }
229 set { hit_type = value; }
233 public String MimeType
{
234 get { return mimeType; }
235 set { mimeType = value; }
239 public string Source
{
240 get { return source; }
241 set { source = value; }
245 public bool IsNonTransient
{
246 get { return ! DeleteContent && ContentUri.IsFile && ParentUri == null; }
250 public bool Crawled
{
251 get { return crawled; }
252 set { crawled = value; }
256 public bool NoContent
{
257 get { return no_content; }
258 set { no_content = value; }
262 public bool CacheContent
{
263 get { return cache_content; }
264 set { cache_content = value; }
268 public IndexableFiltering Filtering
{
269 get { return filtering; }
270 set { filtering = value; }
274 public IDictionary LocalState
{
275 get { return local_state; }
278 //////////////////////////
280 public void Cleanup ()
283 if (contentUri
!= null) {
285 Logger
.Log
.Debug ("Cleaning up {0}", contentUri
.LocalPath
);
288 File
.Delete (contentUri
.LocalPath
);
289 } catch (Exception ex
)
291 // It might be gone already, so catch the exception.
295 if (hotContentUri
!= null) {
297 Logger
.Log
.Debug ("Cleaning up {0}", hotContentUri
.LocalPath
);
300 File
.Delete (hotContentUri
.LocalPath
);
301 } catch (Exception ex
)
305 hotContentUri
= null;
310 private Stream
StreamFromUri (Uri uri
)
312 Stream stream
= null;
314 if (uri
!= null && uri
.IsFile
&& ! no_content
) {
315 stream
= new FileStream (uri
.LocalPath
,
324 private TextReader
ReaderFromUri (Uri uri
)
326 Stream stream
= StreamFromUri (uri
);
331 return new StreamReader (stream
);
335 public TextReader
GetTextReader ()
340 if (textReader
== null)
341 textReader
= ReaderFromUri (ContentUri
);
346 public void SetTextReader (TextReader reader
)
351 public TextReader
GetHotTextReader ()
356 if (hotTextReader
== null)
357 hotTextReader
= ReaderFromUri (HotContentUri
);
358 return hotTextReader
;
361 public void SetHotTextReader (TextReader reader
)
363 hotTextReader
= reader
;
366 public Stream
GetBinaryStream ()
371 if (binary_stream
== null)
372 binary_stream
= StreamFromUri (ContentUri
);
374 return binary_stream
;
377 public void SetBinaryStream (Stream stream
)
379 binary_stream
= stream
;
382 [XmlArrayItem (ElementName
="Property", Type
=typeof (Property
))]
383 public ArrayList Properties
{
384 get { return properties; }
387 public void AddProperty (Property prop
) {
390 if (type
== IndexableType
.PropertyChange
&& ! prop
.IsMutable
)
391 throw new ArgumentException ("Non-mutable properties aren't allowed in this indexable");
393 // If this is a mutable property, make sure that
394 // we don't already contain another mutable property
395 // with the same name. If we do, replace it.
396 if (prop
.IsMutable
) {
397 for (int i
= 0; i
< properties
.Count
; ++i
) {
398 Property other_prop
= properties
[i
] as Property
;
399 if (other_prop
.IsMutable
&& prop
.Key
== other_prop
.Key
) {
400 properties
[i
] = prop
;
406 properties
.Add (prop
);
410 public bool HasProperty (string keyword
) {
411 foreach (Property property
in properties
)
412 if (property
.Key
== keyword
)
418 // This doesn't check if it makes sense to actually
419 // merge the two indexables: it just does it.
420 public void Merge (Indexable other
)
422 this.Timestamp
= other
.Timestamp
;
424 foreach (Property prop
in other
.Properties
)
425 this.AddProperty (prop
);
427 foreach (DictionaryEntry entry
in other
.local_state
)
428 this.local_state
[entry
.Key
] = entry
.Value
;
431 //////////////////////////
433 public void SetChildOf (Indexable parent
)
435 this.ParentUri
= parent
.Uri
;
437 if (!this.ValidTimestamp
)
438 this.Timestamp
= parent
.Timestamp
;
440 // FIXME: Set all of the parent's properties on the
441 // child so that we get matches against the child
442 // that otherwise would match only the parent, at
443 // least until we have proper RDF support.
444 foreach (Property prop
in parent
.Properties
) {
445 Property new_prop
= (Property
) prop
.Clone ();
446 new_prop
.Key
= "parent:" + new_prop
.Key
;
447 this.AddProperty (new_prop
);
451 //////////////////////////
453 public override string ToString ()
455 StringWriter writer
= new StringWriter ();
456 our_serializer
.Serialize (writer
, this);
458 return writer
.ToString ();
461 //////////////////////////
463 const int BUFFER_SIZE
= 8192;
465 private static char [] GetCharBuffer ()
467 LocalDataStoreSlot slot
;
468 slot
= Thread
.GetNamedDataSlot ("Char Buffer");
472 obj
= Thread
.GetData (slot
);
474 buffer
= new char [BUFFER_SIZE
];
475 Thread
.SetData (slot
, buffer
);
477 buffer
= (char []) obj
;
483 private static byte [] GetByteBuffer ()
485 LocalDataStoreSlot slot
;
486 slot
= Thread
.GetNamedDataSlot ("Byte Buffer");
490 obj
= Thread
.GetData (slot
);
492 buffer
= new byte [BUFFER_SIZE
];
493 Thread
.SetData (slot
, buffer
);
495 buffer
= (byte []) obj
;
501 //////////////////////////
503 private static Uri
TextReaderToTempFileUri (TextReader reader
)
508 string filename
= Path
.GetTempFileName ();
509 FileStream fileStream
= File
.OpenWrite (filename
);
511 // When we dump the contents of an indexable into a file, we
512 // expect to use it again soon.
513 FileAdvise
.PreLoad (fileStream
);
515 // Make sure the temporary file is only readable by the owner.
516 // FIXME: There is probably a race here. Could some malicious program
517 // do something to the file between creation and the chmod?
518 Mono
.Unix
.Native
.Syscall
.chmod (filename
, (Mono
.Unix
.Native
.FilePermissions
) 256);
520 BufferedStream bufferedStream
= new BufferedStream (fileStream
);
521 StreamWriter writer
= new StreamWriter (bufferedStream
);
525 buffer
= GetCharBuffer ();
529 read
= reader
.Read (buffer
, 0, buffer
.Length
);
531 writer
.Write (buffer
, 0, read
);
536 return UriFu
.PathToFileUri (filename
);
539 private static Uri
BinaryStreamToTempFileUri (Stream stream
)
544 string filename
= Path
.GetTempFileName ();
545 FileStream fileStream
= File
.OpenWrite (filename
);
547 // When we dump the contents of an indexable into a file, we
548 // expect to use it again soon.
549 FileAdvise
.PreLoad (fileStream
);
551 // Make sure the temporary file is only readable by the owner.
552 // FIXME: There is probably a race here. Could some malicious program
553 // do something to the file between creation and the chmod?
554 Mono
.Unix
.Native
.Syscall
.chmod (filename
, (Mono
.Unix
.Native
.FilePermissions
) 256);
556 BufferedStream bufferedStream
= new BufferedStream (fileStream
);
559 buffer
= GetByteBuffer ();
563 read
= stream
.Read (buffer
, 0, buffer
.Length
);
565 bufferedStream
.Write (buffer
, 0, read
);
568 bufferedStream
.Close ();
570 return UriFu
.PathToFileUri (filename
);
573 public void StoreStream () {
574 if (textReader
!= null) {
575 ContentUri
= TextReaderToTempFileUri (textReader
);
578 Logger
.Log
.Debug ("Storing text content from {0} in {1}", Uri
, ContentUri
);
580 DeleteContent
= true;
581 } else if (binary_stream
!= null) {
582 ContentUri
= BinaryStreamToTempFileUri (binary_stream
);
585 Logger
.Log
.Debug ("Storing binary content from {0} in {1}", Uri
, ContentUri
);
587 DeleteContent
= true;
590 if (hotTextReader
!= null) {
591 HotContentUri
= TextReaderToTempFileUri (hotTextReader
);
594 Logger
.Log
.Debug ("Storing hot content from {0} in {1}", Uri
, HotContentUri
);
596 DeleteContent
= true;
600 //////////////////////////
602 public override int GetHashCode ()
604 return (uri
!= null ? uri
.GetHashCode () : 0) ^ type
.GetHashCode ();
607 public int CompareTo (object obj
)
609 Indexable other
= (Indexable
) obj
;
610 return DateTime
.Compare (this.Timestamp
, other
.Timestamp
);