FilterHtml.cs: Add ExtractText(string) command to extract text out of large html...
[beagle.git] / beagled / StaticQueryable.cs
blob8c94a337e3b8efd6665c246e4389ae3c60e2edbb
1 //
2 // StaticQueryable.cs
3 //
4 // Copyright (C) 2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.IO;
29 using System.Collections;
30 using System.Threading;
32 using System.Xml;
33 using System.Xml.Serialization;
35 using Beagle.Util;
37 namespace Beagle.Daemon {
39 public class StaticQueryable : LuceneQueryable {
41 protected TextCache text_cache;
43 public StaticQueryable (string index_name, string index_path, bool read_only_mode) : base (index_path, read_only_mode)
45 Logger.Log.Debug ("Initializing static queryable: {0}", index_path);
47 if (Directory.Exists (Path.Combine (index_path, "TextCache"))) {
48 try {
49 text_cache = new TextCache (index_path, true);
50 } catch (UnauthorizedAccessException) {
51 Logger.Log.Warn ("Unable to purge static queryable text cache in {0}. Will run without it.", index_path);
56 override public string GetSnippet (string[] query_terms, Hit hit)
58 if (text_cache == null)
59 return null;
61 // Look up the hit in our local text cache.
62 TextReader reader = text_cache.GetReader (hit.Uri);
63 if (reader == null)
64 return null;
66 string snippet = SnippetFu.GetSnippet (query_terms, reader);
67 reader.Close ();
69 return snippet;
72 override protected bool HitIsValid (Uri uri)
74 // We can't check anything else than file uris
75 if (! uri.IsFile)
76 return true;
78 // FIXME: This is a hack, we need to support parent Uri's in some sane way
79 try {
80 int j = uri.LocalPath.LastIndexOf ('#');
81 return File.Exists ((j == -1) ? uri.LocalPath : uri.LocalPath.Substring (0, j));
82 } catch (Exception e) {
83 Logger.Log.Warn ("Exception executing HitIsValid on {0}", uri.LocalPath);
84 return false;