4 // Copyright (C) 2004 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
29 using System
.Collections
;
32 using System
.Reflection
;
36 namespace Beagle
.Daemon
{
40 // Derived classes always must have a constructor that
41 // takes no arguments.
44 //////////////////////////
46 private string identifier
;
48 public string Identifier
{
49 get { return identifier; }
50 set { identifier = value; }
53 //////////////////////////
55 private bool delete_content
;
57 public bool DeleteContent
{
58 get { return this.delete_content; }
59 set { this.delete_content = value; }
62 //////////////////////////
64 private ArrayList supported_flavors
= new ArrayList ();
66 protected void AddSupportedFlavor (FilterFlavor flavor
)
68 supported_flavors
.Add (flavor
);
71 public ICollection SupportedFlavors
{
72 get { return supported_flavors; }
75 //////////////////////////
77 // Filters are versioned. This allows us to automatically re-index
78 // files when a newer filter is available.
81 get { return this.GetType ().Name; }
84 private int version
= -1;
87 get { return version < 0 ? 0 : version; }
90 protected void SetVersion (int v
)
94 msg
= String
.Format ("Attempt to set invalid version {0} on Filter {1}", v
, Name
);
95 throw new Exception (msg
);
100 msg
= String
.Format ("Attempt to re-set version from {0} to {1} on Filter {2}", version
, v
, Name
);
101 throw new Exception (msg
);
109 //////////////////////////
111 private string this_mime_type
= null;
112 private string this_extension
= null;
114 public string MimeType
{
115 get { return this_mime_type; }
116 set { this_mime_type = value; }
119 public string Extension
{
120 get { return this_extension; }
121 set { this_extension = value; }
124 //////////////////////////
126 private bool crawl_mode
= false;
128 public void EnableCrawlMode ()
133 protected bool CrawlMode
{
134 get { return crawl_mode; }
137 //////////////////////////
147 public void HotDown ()
154 get { return hotCount > 0; }
157 public void FreezeUp ()
162 public void FreezeDown ()
168 public bool IsFrozen
{
169 get { return freezeCount > 0; }
172 //////////////////////////
174 private bool snippetMode
= false;
175 private bool originalIsText
= false;
176 private TextWriter snippetWriter
= null;
178 public bool SnippetMode
{
179 get { return snippetMode; }
180 set { snippetMode = value; }
183 public bool OriginalIsText
{
184 get { return originalIsText; }
185 set { originalIsText = value; }
188 public void AttachSnippetWriter (TextWriter writer
)
191 snippetWriter
= writer
;
194 //////////////////////////
196 private ArrayList textPool
;
197 private ArrayList hotPool
;
198 private ArrayList propertyPool
;
200 private bool last_was_structural_break
= true;
202 // This two-arg AppendText() will give flexibility to
203 // filters to segregate hot-contents and
204 // normal-contents of a para and call this method with
205 // respective contents.
207 // str : Holds both the normal-contents and hot contents.
208 // strHot: Holds only hot-contents.
210 // Ex:- suppose the actual-content is "one <b>two</b> three"
211 // str = "one two three"
214 // NOTE: HotUp() or HotDown() has NO-EFFECT on this variant
217 public void AppendText (string str
, string strHot
)
219 if (!IsFrozen
&& str
!= null && str
!= "") {
221 // FIXME: Do we need to handle '\n' in any
225 string[] lines
= str
.Split ('\n');
226 for (i
= 0; i
< lines
.Length
; i
++) {
227 line
= lines
[i
].Trim();
228 if (line
.Length
> 0) {
229 ReallyAppendText (line
, null);
230 AppendStructuralBreak ();
234 ReallyAppendText (null, strHot
);
237 public void AppendText (string str
)
239 //Logger.Log.Debug ("AppendText (\"{0}\")", str);
240 if (! IsFrozen
&& str
!= null && str
!= "")
241 AppendText (str
, IsHot
? str
: null);
244 // Does adding text to to text/hot pools respectively.
245 private void ReallyAppendText (string str
, string strHot
)
247 if (!IsFrozen
&& strHot
!= null && strHot
!= "")
248 hotPool
.Add (strHot
.Trim()+" ");
253 if (snippetWriter
!= null)
254 snippetWriter
.Write (str
);
256 last_was_structural_break
= false;
259 private bool NeedsWhiteSpace (ArrayList array
)
261 if (array
.Count
== 0)
264 string last
= (string) array
[array
.Count
-1];
266 && char.IsWhiteSpace (last
[last
.Length
-1]))
272 public void AppendWhiteSpace ()
274 if (last_was_structural_break
)
277 //Logger.Log.Debug ("AppendWhiteSpace ()");
278 if (NeedsWhiteSpace (textPool
)) {
280 if (snippetWriter
!= null)
281 snippetWriter
.Write (" ");
282 last_was_structural_break
= false;
286 public void AddProperty (Property prop
)
288 if (prop
!= null && prop
.Value
!= null)
289 propertyPool
.Add (prop
);
292 public void AppendStructuralBreak ()
294 if (snippetWriter
!= null && ! last_was_structural_break
) {
295 snippetWriter
.WriteLine ();
296 last_was_structural_break
= true;
298 // When adding a "newline" to the textCache, we need to
299 // append a "Whitespace" to the text pool.
300 if (NeedsWhiteSpace (textPool
))
304 //////////////////////////
306 private bool isFinished
= false;
308 public bool IsFinished
{
309 get { return isFinished; }
312 protected void Finished ()
317 private bool has_error
= false;
319 public bool HasError
{
320 get { return has_error; }
323 protected void Error ()
328 //////////////////////////
330 protected virtual void DoOpen (FileSystemInfo info
) {
331 if (info
is FileInfo
)
332 DoOpen (info
as FileInfo
);
333 else if (info
is DirectoryInfo
)
334 DoOpen (info
as DirectoryInfo
);
337 protected virtual void DoOpen (FileInfo info
) { }
339 protected virtual void DoOpen (DirectoryInfo info
) { }
341 protected virtual void DoPullProperties () { }
343 protected virtual void DoPullSetup () { }
345 protected virtual void DoPull () { Finished (); }
347 protected virtual void DoClose () { }
349 //////////////////////////
353 (1) DoOpen (FileInfo info) or DoOpen (Stream)
354 (2) DoPullProperties ()
356 At this point all properties must be in place
358 Once someone starts reading from the TextReader,
359 the following are called:
360 DoPull () [until Finished() is called]
361 DoClose () [when finished]
365 private string tempFile
= null;
366 private FileSystemInfo currentInfo
= null;
367 private FileStream currentStream
= null;
368 private StreamReader currentReader
= null;
370 public bool Open (TextReader reader
)
372 tempFile
= Path
.GetTempFileName ();
373 FileStream file_stream
= File
.OpenWrite (tempFile
);
375 // When we dump the contents of a reader into a file, we
376 // expect to use it again soon.
377 FileAdvise
.PreLoad (file_stream
);
379 // Make sure the temporary file is only readable by the owner.
380 // FIXME: There is probably a race here. Could some malicious program
381 // do something to the file between creation and the chmod?
382 Mono
.Posix
.Syscall
.chmod (tempFile
, (Mono
.Posix
.FileMode
) 256);
384 BufferedStream buffered_stream
= new BufferedStream (file_stream
);
385 StreamWriter writer
= new StreamWriter (buffered_stream
);
387 const int BUFFER_SIZE
= 8192;
388 char [] buffer
= new char [BUFFER_SIZE
];
392 read
= reader
.Read (buffer
, 0, BUFFER_SIZE
);
394 writer
.Write (buffer
, 0, read
);
399 return Open (new FileInfo (tempFile
));
402 public bool Open (Stream stream
)
404 tempFile
= Path
.GetTempFileName ();
405 FileStream file_stream
= File
.OpenWrite (tempFile
);
407 // When we dump the contents of a reader into a file, we
408 // expect to use it again soon.
409 FileAdvise
.PreLoad (file_stream
);
411 // Make sure the temporary file is only readable by the owner.
412 // FIXME: There is probably a race here. Could some malicious program
413 // do something to the file between creation and the chmod?
414 Mono
.Posix
.Syscall
.chmod (tempFile
, (Mono
.Posix
.FileMode
) 256);
416 BufferedStream buffered_stream
= new BufferedStream (file_stream
);
418 const int BUFFER_SIZE
= 8192;
419 byte [] buffer
= new byte [BUFFER_SIZE
];
423 read
= stream
.Read (buffer
, 0, BUFFER_SIZE
);
425 buffered_stream
.Write (buffer
, 0, read
);
428 buffered_stream
.Close ();
430 return Open (new FileInfo (tempFile
));
433 public bool Open (FileSystemInfo info
)
436 textPool
= new ArrayList ();
437 hotPool
= new ArrayList ();
438 propertyPool
= new ArrayList ();
442 if (info
is FileInfo
) {
443 // Open a stream for this file.
444 currentStream
= new FileStream (info
.FullName
,
449 // Our default assumption is sequential reads.
450 // FIXME: Is this the right thing to do here?
451 FileAdvise
.IncreaseReadAhead (currentStream
);
453 // Give the OS a hint that we will be reading this
455 FileAdvise
.PreLoad (currentStream
);
473 // Close and reset our TextReader
474 if (currentReader
!= null) {
475 currentReader
.Close ();
476 currentReader
= null;
479 // Seek back to the beginning of our stream
480 currentStream
.Seek (0, SeekOrigin
.Begin
);
486 } catch (Exception e
) {
487 Logger
.Log
.Warn ("Unable to filter {0}: {1}", info
.FullName
, e
.Message
);
494 public bool Open (string path
)
496 if (File
.Exists (path
))
497 return Open (new FileInfo (path
));
498 else if (Directory
.Exists (path
))
499 return Open (new DirectoryInfo (path
));
504 public FileInfo FileInfo
{
505 get { return currentInfo as FileInfo; }
508 public DirectoryInfo DirectoryInfo
{
509 get { return currentInfo as DirectoryInfo; }
512 public Stream Stream
{
513 get { return currentStream; }
516 public TextReader TextReader
{
518 if (currentReader
== null
519 && currentStream
!= null) {
520 currentReader
= new StreamReader (currentStream
);
523 return currentReader
;
539 private void Close ()
541 if (currentStream
== null)
546 // When crawling, give the OS a hint that we don't
547 // need to keep this file around in the page cache.
549 FileAdvise
.FlushCache (currentStream
);
551 if (currentReader
!= null)
552 currentReader
.Close ();
554 currentStream
.Close ();
555 currentStream
= null;
557 if (snippetWriter
!= null)
558 snippetWriter
.Close ();
560 if (tempFile
!= null)
561 File
.Delete (tempFile
);
563 if (currentInfo
!= null && this.DeleteContent
) {
565 currentInfo
.Delete ();
566 } catch (Exception e
) {
567 Logger
.Log
.Debug ("Caught exception trying to delete {0} in filter '{1}'",
568 currentInfo
.FullName
, Name
);
569 Logger
.Log
.Debug (e
);
574 private bool PullFromArray (ArrayList array
, StringBuilder sb
)
576 while (array
.Count
== 0 && Pull ()) { }
578 if (array
.Count
> 0) {
579 foreach (string str
in array
)
588 private bool PullTextCarefully (ArrayList array
, StringBuilder sb
)
592 pulled
= PullFromArray (array
, sb
);
593 } catch (Exception ex
) {
594 Logger
.Log
.Debug ("Caught exception while pulling text in filter '{0}'", Name
);
595 Logger
.Log
.Debug (ex
);
601 private bool PullText (StringBuilder sb
)
603 return PullTextCarefully (textPool
, sb
);
606 private bool PullHotText (StringBuilder sb
)
608 return PullTextCarefully (hotPool
, sb
);
611 public TextReader
GetTextReader ()
613 PullingReader pr
= new PullingReader (new PullingReader
.Pull (PullText
));
614 pr
.Identifier
= Identifier
;
618 public TextReader
GetHotTextReader ()
620 return new PullingReader (new PullingReader
.Pull (PullHotText
));
623 public IEnumerable Properties
{
624 get { return propertyPool; }
627 //////////////////////////////
629 // This is used primarily for the generation of URIs for the
630 // child indexables that can be created as a result of the
631 // filtering process.
633 private Uri uri
= null;
640 //////////////////////////////
642 private ArrayList child_indexables
= new ArrayList ();
644 protected void AddChildIndexable (Indexable indexable
)
646 this.child_indexables
.Add (indexable
);
649 protected void AddChildIndexables (ICollection indexables
)
651 this.child_indexables
.AddRange (indexables
);
654 public ArrayList ChildIndexables
{
655 get { return this.child_indexables; }