Thumbnail file hits. Based on a patch from D Bera
[beagle.git] / beagled / Filter.cs
blob7285273875ac761e6dba9055739ff0a106fe98af
1 //
2 // Filter.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 using System;
29 using System.Collections;
30 using System.IO;
31 using System.Text;
32 using System.Reflection;
34 using Beagle.Util;
36 namespace Beagle.Daemon {
38 public class Filter {
40 // Derived classes always must have a constructor that
41 // takes no arguments.
42 public Filter () { }
44 //////////////////////////
46 private string identifier;
48 public string Identifier {
49 get { return identifier; }
50 set { identifier = value; }
53 //////////////////////////
55 private bool delete_content;
57 public bool DeleteContent {
58 get { return this.delete_content; }
59 set { this.delete_content = value; }
62 //////////////////////////
64 private ArrayList supported_flavors = new ArrayList ();
66 protected void AddSupportedFlavor (FilterFlavor flavor)
68 supported_flavors.Add (flavor);
71 public ICollection SupportedFlavors {
72 get { return supported_flavors; }
75 //////////////////////////
77 // Filters are versioned. This allows us to automatically re-index
78 // files when a newer filter is available.
80 public string Name {
81 get { return this.GetType ().Name; }
84 private int version = -1;
86 public int Version {
87 get { return version < 0 ? 0 : version; }
90 protected void SetVersion (int v)
92 if (v < 0) {
93 string msg;
94 msg = String.Format ("Attempt to set invalid version {0} on Filter {1}", v, Name);
95 throw new Exception (msg);
98 if (version != -1) {
99 string msg;
100 msg = String.Format ("Attempt to re-set version from {0} to {1} on Filter {2}", version, v, Name);
101 throw new Exception (msg);
104 version = v;
109 //////////////////////////
111 private string this_mime_type = null;
112 private string this_extension = null;
114 public string MimeType {
115 get { return this_mime_type; }
116 set { this_mime_type = value; }
119 public string Extension {
120 get { return this_extension; }
121 set { this_extension = value; }
124 //////////////////////////
126 private bool crawl_mode = false;
128 public void EnableCrawlMode ()
130 crawl_mode = true;
133 protected bool CrawlMode {
134 get { return crawl_mode; }
137 //////////////////////////
139 int hotCount = 0;
140 int freezeCount = 0;
142 public void HotUp ()
144 ++hotCount;
147 public void HotDown ()
149 if (hotCount > 0)
150 --hotCount;
153 public bool IsHot {
154 get { return hotCount > 0; }
157 public void FreezeUp ()
159 ++freezeCount;
162 public void FreezeDown ()
164 if (freezeCount > 0)
165 --freezeCount;
168 public bool IsFrozen {
169 get { return freezeCount > 0; }
172 //////////////////////////
174 private bool snippetMode = false;
175 private bool originalIsText = false;
176 private TextWriter snippetWriter = null;
178 public bool SnippetMode {
179 get { return snippetMode; }
180 set { snippetMode = value; }
183 public bool OriginalIsText {
184 get { return originalIsText; }
185 set { originalIsText = value; }
188 public void AttachSnippetWriter (TextWriter writer)
190 if (snippetMode)
191 snippetWriter = writer;
194 //////////////////////////
196 private ArrayList textPool;
197 private ArrayList hotPool;
198 private ArrayList propertyPool;
200 private bool last_was_structural_break = true;
202 // This two-arg AppendText() will give flexibility to
203 // filters to segregate hot-contents and
204 // normal-contents of a para and call this method with
205 // respective contents.
207 // str : Holds both the normal-contents and hot contents.
208 // strHot: Holds only hot-contents.
210 // Ex:- suppose the actual-content is "one <b>two</b> three"
211 // str = "one two three"
212 // strHot = "two"
214 // NOTE: HotUp() or HotDown() has NO-EFFECT on this variant
215 // of AppendText ()
217 public void AppendText (string str, string strHot)
219 if (!IsFrozen && str != null && str != "") {
221 // FIXME: Do we need to handle '\n' in any
222 // other way?
223 int i = 0;
224 string line;
225 string[] lines = str.Split ('\n');
226 for (i = 0; i < lines.Length; i++) {
227 line = lines[i].Trim();
228 if (line.Length > 0) {
229 ReallyAppendText (line, null);
230 AppendStructuralBreak ();
234 ReallyAppendText (null, strHot);
237 public void AppendText (string str)
239 //Logger.Log.Debug ("AppendText (\"{0}\")", str);
240 if (! IsFrozen && str != null && str != "")
241 AppendText (str, IsHot ? str : null);
244 // Does adding text to to text/hot pools respectively.
245 private void ReallyAppendText (string str, string strHot)
247 if (!IsFrozen && strHot != null && strHot != "")
248 hotPool.Add (strHot.Trim()+" ");
250 if (str != null) {
251 textPool.Add (str);
253 if (snippetWriter != null)
254 snippetWriter.Write (str);
256 last_was_structural_break = false;
259 private bool NeedsWhiteSpace (ArrayList array)
261 if (array.Count == 0)
262 return true;
264 string last = (string) array [array.Count-1];
265 if (last.Length > 0
266 && char.IsWhiteSpace (last [last.Length-1]))
267 return false;
269 return true;
272 public void AppendWhiteSpace ()
274 if (last_was_structural_break)
275 return;
277 //Logger.Log.Debug ("AppendWhiteSpace ()");
278 if (NeedsWhiteSpace (textPool)) {
279 textPool.Add (" ");
280 if (snippetWriter != null)
281 snippetWriter.Write (" ");
282 last_was_structural_break = false;
286 public void AddProperty (Property prop)
288 if (prop != null && prop.Value != null)
289 propertyPool.Add (prop);
292 public void AppendStructuralBreak ()
294 if (snippetWriter != null && ! last_was_structural_break) {
295 snippetWriter.WriteLine ();
296 last_was_structural_break = true;
298 // When adding a "newline" to the textCache, we need to
299 // append a "Whitespace" to the text pool.
300 if (NeedsWhiteSpace (textPool))
301 textPool.Add (" ");
304 //////////////////////////
306 private bool isFinished = false;
308 public bool IsFinished {
309 get { return isFinished; }
312 protected void Finished ()
314 isFinished = true;
317 private bool has_error = false;
319 public bool HasError {
320 get { return has_error; }
323 protected void Error ()
325 has_error = true;
328 //////////////////////////
330 protected virtual void DoOpen (FileSystemInfo info) {
331 if (info is FileInfo)
332 DoOpen (info as FileInfo);
333 else if (info is DirectoryInfo)
334 DoOpen (info as DirectoryInfo);
337 protected virtual void DoOpen (FileInfo info) { }
339 protected virtual void DoOpen (DirectoryInfo info) { }
341 protected virtual void DoPullProperties () { }
343 protected virtual void DoPullSetup () { }
345 protected virtual void DoPull () { Finished (); }
347 protected virtual void DoClose () { }
349 //////////////////////////
352 Open () calls:
353 (1) DoOpen (FileInfo info) or DoOpen (Stream)
354 (2) DoPullProperties ()
355 (3) DoPullSetup ()
356 At this point all properties must be in place
358 Once someone starts reading from the TextReader,
359 the following are called:
360 DoPull () [until Finished() is called]
361 DoClose () [when finished]
365 private string tempFile = null;
366 private FileSystemInfo currentInfo = null;
367 private FileStream currentStream = null;
368 private StreamReader currentReader = null;
370 public bool Open (TextReader reader)
372 tempFile = Path.GetTempFileName ();
373 FileStream file_stream = File.OpenWrite (tempFile);
375 // When we dump the contents of a reader into a file, we
376 // expect to use it again soon.
377 FileAdvise.PreLoad (file_stream);
379 // Make sure the temporary file is only readable by the owner.
380 // FIXME: There is probably a race here. Could some malicious program
381 // do something to the file between creation and the chmod?
382 Mono.Posix.Syscall.chmod (tempFile, (Mono.Posix.FileMode) 256);
384 BufferedStream buffered_stream = new BufferedStream (file_stream);
385 StreamWriter writer = new StreamWriter (buffered_stream);
387 const int BUFFER_SIZE = 8192;
388 char [] buffer = new char [BUFFER_SIZE];
390 int read;
391 do {
392 read = reader.Read (buffer, 0, BUFFER_SIZE);
393 if (read > 0)
394 writer.Write (buffer, 0, read);
395 } while (read > 0);
397 writer.Close ();
399 return Open (new FileInfo (tempFile));
402 public bool Open (Stream stream)
404 tempFile = Path.GetTempFileName ();
405 FileStream file_stream = File.OpenWrite (tempFile);
407 // When we dump the contents of a reader into a file, we
408 // expect to use it again soon.
409 FileAdvise.PreLoad (file_stream);
411 // Make sure the temporary file is only readable by the owner.
412 // FIXME: There is probably a race here. Could some malicious program
413 // do something to the file between creation and the chmod?
414 Mono.Posix.Syscall.chmod (tempFile, (Mono.Posix.FileMode) 256);
416 BufferedStream buffered_stream = new BufferedStream (file_stream);
418 const int BUFFER_SIZE = 8192;
419 byte [] buffer = new byte [BUFFER_SIZE];
421 int read;
422 do {
423 read = stream.Read (buffer, 0, BUFFER_SIZE);
424 if (read > 0)
425 buffered_stream.Write (buffer, 0, read);
426 } while (read > 0);
428 buffered_stream.Close ();
430 return Open (new FileInfo (tempFile));
433 public bool Open (FileSystemInfo info)
435 isFinished = false;
436 textPool = new ArrayList ();
437 hotPool = new ArrayList ();
438 propertyPool = new ArrayList ();
440 currentInfo = info;
442 if (info is FileInfo) {
443 // Open a stream for this file.
444 currentStream = new FileStream (info.FullName,
445 FileMode.Open,
446 FileAccess.Read,
447 FileShare.Read);
449 // Our default assumption is sequential reads.
450 // FIXME: Is this the right thing to do here?
451 FileAdvise.IncreaseReadAhead (currentStream);
453 // Give the OS a hint that we will be reading this
454 // file soon.
455 FileAdvise.PreLoad (currentStream);
458 try {
459 DoOpen (info);
461 if (IsFinished)
462 return true;
463 else if (HasError)
464 return false;
466 DoPullProperties ();
468 if (IsFinished)
469 return true;
470 else if (HasError)
471 return false;
473 // Close and reset our TextReader
474 if (currentReader != null) {
475 currentReader.Close ();
476 currentReader = null;
479 // Seek back to the beginning of our stream
480 currentStream.Seek (0, SeekOrigin.Begin);
482 DoPullSetup ();
484 if (HasError)
485 return false;
486 } catch (Exception e) {
487 Logger.Log.Warn ("Unable to filter {0}: {1}", info.FullName, e.Message);
488 return false;
491 return true;
494 public bool Open (string path)
496 if (File.Exists (path))
497 return Open (new FileInfo (path));
498 else if (Directory.Exists (path))
499 return Open (new DirectoryInfo (path));
500 else
501 return false;
504 public FileInfo FileInfo {
505 get { return currentInfo as FileInfo; }
508 public DirectoryInfo DirectoryInfo {
509 get { return currentInfo as DirectoryInfo; }
512 public Stream Stream {
513 get { return currentStream; }
516 public TextReader TextReader {
517 get {
518 if (currentReader == null
519 && currentStream != null) {
520 currentReader = new StreamReader (currentStream);
523 return currentReader;
527 private bool Pull ()
529 if (IsFinished) {
530 Close ();
531 return false;
534 DoPull ();
536 return true;
539 private void Close ()
541 if (currentStream == null)
542 return;
544 DoClose ();
546 // When crawling, give the OS a hint that we don't
547 // need to keep this file around in the page cache.
548 if (CrawlMode)
549 FileAdvise.FlushCache (currentStream);
551 if (currentReader != null)
552 currentReader.Close ();
554 currentStream.Close ();
555 currentStream = null;
557 if (snippetWriter != null)
558 snippetWriter.Close ();
560 if (tempFile != null)
561 File.Delete (tempFile);
563 if (currentInfo != null && this.DeleteContent) {
564 try {
565 currentInfo.Delete ();
566 } catch (Exception e) {
567 Logger.Log.Debug ("Caught exception trying to delete {0} in filter '{1}'",
568 currentInfo.FullName, Name);
569 Logger.Log.Debug (e);
574 private bool PullFromArray (ArrayList array, StringBuilder sb)
576 while (array.Count == 0 && Pull ()) { }
578 if (array.Count > 0) {
579 foreach (string str in array)
580 sb.Append (str);
582 array.Clear ();
583 return true;
585 return false;
588 private bool PullTextCarefully (ArrayList array, StringBuilder sb)
590 bool pulled = false;
591 try {
592 pulled = PullFromArray (array, sb);
593 } catch (Exception ex) {
594 Logger.Log.Debug ("Caught exception while pulling text in filter '{0}'", Name);
595 Logger.Log.Debug (ex);
598 return pulled;
601 private bool PullText (StringBuilder sb)
603 return PullTextCarefully (textPool, sb);
606 private bool PullHotText (StringBuilder sb)
608 return PullTextCarefully (hotPool, sb);
611 public TextReader GetTextReader ()
613 PullingReader pr = new PullingReader (new PullingReader.Pull (PullText));
614 pr.Identifier = Identifier;
615 return pr;
618 public TextReader GetHotTextReader ()
620 return new PullingReader (new PullingReader.Pull (PullHotText));
623 public IEnumerable Properties {
624 get { return propertyPool; }
627 //////////////////////////////
629 // This is used primarily for the generation of URIs for the
630 // child indexables that can be created as a result of the
631 // filtering process.
633 private Uri uri = null;
635 public Uri Uri {
636 get { return uri; }
637 set { uri = value; }
640 //////////////////////////////
642 private ArrayList child_indexables = new ArrayList ();
644 protected void AddChildIndexable (Indexable indexable)
646 this.child_indexables.Add (indexable);
649 protected void AddChildIndexables (ICollection indexables)
651 this.child_indexables.AddRange (indexables);
654 public ArrayList ChildIndexables {
655 get { return this.child_indexables; }