4 // Copyright (C) 2005 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 using System
.Collections
;
33 // FIXME: Hack. Use Lucence.Net highlighting.
35 namespace Beagle
.Daemon
{
37 public class SnippetFu
{
39 public delegate string StringSource ();
41 static private bool IsTokenSeparator (char c
)
43 return Char
.IsWhiteSpace (c
)
44 || Char
.IsSeparator (c
)
45 || Char
.IsPunctuation (c
);
48 static private void HighlightTerms (string [] stemmed_terms
, string text
, ref ArrayList matches
)
50 int pos
= 0, prev_stop_pos
= 0, prev_end_pos
= 0;
51 string prev_match
= "";
53 while (pos
< text
.Length
) {
55 // Find the beginning of the next token.
56 if (IsTokenSeparator (text
[pos
])) {
61 // Find the end of the next token
63 while (next_pos
< text
.Length
&& !IsTokenSeparator (text
[next_pos
]))
66 string stemmed_token
= null;
69 // Iterate through the stemmed terms and match the token
70 for (int i
= 0; i
< stemmed_terms
.Length
; i
++) {
72 // If this term is longer than the token in question, give up.
73 if (next_pos
- pos
< stemmed_terms
[i
].Length
)
76 // Make sure this isn't a stop word.
77 if (LuceneCommon
.IsStopWord (stemmed_terms
[i
]))
80 // We cache the token, so as to avoid stemming it more than once
81 // when considering multiple terms.
82 if (stemmed_token
== null) {
83 string token
= text
.Substring (pos
, next_pos
- pos
).ToLower ();
84 stemmed_token
= LuceneCommon
.Stem (token
);
87 if (stemmed_terms
[i
] != stemmed_token
)
93 int stop_pos
= next_pos
;
95 // FIXME: This is a hack, I should be shot.
96 for (int count
= 0; count
< 3 && start_pos
> 0; start_pos
--) {
97 if ((text
[start_pos
] == ' '))
104 for (int count
= 0; count
< 3 && stop_pos
< text
.Length
; stop_pos
++) {
105 if (text
[stop_pos
] == ' ')
109 if (stop_pos
!= text
.Length
)
112 bool append_to_prev_match
= false;
114 if (prev_stop_pos
> start_pos
) {
115 start_pos
= prev_end_pos
;
116 prev_match
= prev_match
.Substring (0, prev_match
.Length
- (prev_stop_pos
- prev_end_pos
));
117 append_to_prev_match
= true;
120 string new_match
= String
.Concat (text
.Substring (start_pos
, pos
- start_pos
),
122 colors
[(i
- hl_offset
) % colors
.Length
],
124 text
.Substring (pos
, next_pos
-pos
),
126 text
.Substring (next_pos
, stop_pos
-next_pos
));
128 if (append_to_prev_match
) {
129 prev_match
+= new_match
;
131 if (prev_match
!= "")
132 matches
.Add (prev_match
);
133 prev_match
= new_match
;
136 prev_stop_pos
= stop_pos
;
137 prev_end_pos
= next_pos
;
145 // Add trailing match
146 if (prev_match
!= "")
147 matches
.Add (prev_match
);
150 static string[] colors
= new string [] {"red", "blue", "green", "orange", "purple", "brown"}
;
152 const int soft_snippet_limit
= 400;
154 static public string GetSnippet (string[] query_terms
,
155 StringSource string_source
)
157 // FIXME: If the query doesn't have search text (or is null), we should
158 // generate a 'summary snippet'.
160 if (string_source
== null)
164 int N
= query_terms
.Length
;
165 string[] stemmed_terms
= new string [N
];
166 for (int i
= 0; i
< N
; ++i
) {
167 string term
= query_terms
[i
];
170 stemmed_terms
[i
] = LuceneCommon
.Stem (query_terms
[i
]).ToLower ();
174 ArrayList matches
= new ArrayList ();
177 while ( (str
= string_source ()) != null) {
178 HighlightTerms (query_terms
, str
, ref matches
);
183 for (int i
= 0; i
< matches
.Count
&& snippet
.Length
< soft_snippet_limit
; i
++)
184 snippet
+= String
.Concat((string)matches
[i
], " ... ");
190 static public string GetSnippet (string[] query_terms
,
193 return GetSnippet (query_terms
, new StringSource (reader
.ReadLine
));
196 static public string GetSnippetFromFile (string[] query_terms
,
199 return GetSnippet (query_terms
, new StreamReader (filename
));