Thumbnail file hits. Based on a patch from D Bera
[beagle.git] / beagled / SnippetFu.cs
blobb3ad2a58b76c241074980027f66cf4cd36927f26
1 //
2 // SnippetFu.cs
3 //
4 // Copyright (C) 2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
31 using Beagle.Util;
33 // FIXME: Hack. Use Lucence.Net highlighting.
35 namespace Beagle.Daemon {
37 public class SnippetFu {
39 public delegate string StringSource ();
41 static private bool IsTokenSeparator (char c)
43 return Char.IsWhiteSpace (c)
44 || Char.IsSeparator (c)
45 || Char.IsPunctuation (c);
48 static private void HighlightTerms (string [] stemmed_terms, string text, ref ArrayList matches)
50 int pos = 0, prev_stop_pos = 0, prev_end_pos = 0;
51 string prev_match = "";
53 while (pos < text.Length) {
55 // Find the beginning of the next token.
56 if (IsTokenSeparator (text [pos])) {
57 ++pos;
58 continue;
61 // Find the end of the next token
62 int next_pos = pos+1;
63 while (next_pos < text.Length && !IsTokenSeparator (text [next_pos]))
64 ++next_pos;
66 string stemmed_token = null;
67 int hl_offset = 0;
69 // Iterate through the stemmed terms and match the token
70 for (int i = 0; i < stemmed_terms.Length; i++) {
72 // If this term is longer than the token in question, give up.
73 if (next_pos - pos < stemmed_terms [i].Length)
74 continue;
76 // Make sure this isn't a stop word.
77 if (LuceneCommon.IsStopWord (stemmed_terms [i]))
78 continue;
80 // We cache the token, so as to avoid stemming it more than once
81 // when considering multiple terms.
82 if (stemmed_token == null) {
83 string token = text.Substring (pos, next_pos - pos).ToLower ();
84 stemmed_token = LuceneCommon.Stem (token);
87 if (stemmed_terms [i] != stemmed_token)
88 continue;
90 // We have a match!
92 int start_pos = pos;
93 int stop_pos = next_pos;
95 // FIXME: This is a hack, I should be shot.
96 for (int count = 0; count < 3 && start_pos > 0; start_pos--) {
97 if ((text[start_pos] == ' '))
98 count++;
101 if (start_pos != 0)
102 start_pos += 2;
104 for (int count = 0; count < 3 && stop_pos < text.Length; stop_pos++) {
105 if (text[stop_pos] == ' ')
106 count++;
109 if (stop_pos != text.Length)
110 stop_pos--;
112 bool append_to_prev_match = false;
114 if (prev_stop_pos > start_pos) {
115 start_pos = prev_end_pos;
116 prev_match = prev_match.Substring (0, prev_match.Length - (prev_stop_pos - prev_end_pos));
117 append_to_prev_match = true;
120 string new_match = String.Concat (text.Substring (start_pos, pos - start_pos),
121 "<font color=\"",
122 colors [(i - hl_offset) % colors.Length],
123 "\"><b>",
124 text.Substring (pos, next_pos-pos),
125 "</b></font>",
126 text.Substring (next_pos, stop_pos-next_pos));
128 if (append_to_prev_match) {
129 prev_match += new_match;
130 } else {
131 if (prev_match != "")
132 matches.Add (prev_match);
133 prev_match = new_match;
136 prev_stop_pos = stop_pos;
137 prev_end_pos = next_pos;
139 break;
142 pos = next_pos;
145 // Add trailing match
146 if (prev_match != "")
147 matches.Add (prev_match);
150 static string[] colors = new string [] {"red", "blue", "green", "orange", "purple", "brown"};
152 const int soft_snippet_limit = 400;
154 static public string GetSnippet (string[] query_terms,
155 StringSource string_source)
157 // FIXME: If the query doesn't have search text (or is null), we should
158 // generate a 'summary snippet'.
160 if (string_source == null)
161 return null;
163 #if false
164 int N = query_terms.Length;
165 string[] stemmed_terms = new string [N];
166 for (int i = 0; i < N; ++i) {
167 string term = query_terms [i];
168 if (term [0] == '-')
169 continue;
170 stemmed_terms [i] = LuceneCommon.Stem (query_terms [i]).ToLower ();
172 #endif
174 ArrayList matches = new ArrayList ();
176 string str;
177 while ( (str = string_source ()) != null) {
178 HighlightTerms (query_terms, str, ref matches);
181 string snippet = "";
183 for (int i = 0; i < matches.Count && snippet.Length < soft_snippet_limit; i++)
184 snippet += String.Concat((string)matches[i], " ... ");
186 return snippet;
190 static public string GetSnippet (string[] query_terms,
191 TextReader reader)
193 return GetSnippet (query_terms, new StringSource (reader.ReadLine));
196 static public string GetSnippetFromFile (string[] query_terms,
197 string filename)
199 return GetSnippet (query_terms, new StreamReader (filename));