Oops, fix a broken part of the patch
[beagle.git] / beagled / QueryStringParser.cs
blob90177f242968f7f8d63c6c6d03657dec86d4dba0
1 //
2 // QueryStringParser.cs
3 //
4 // Copyright (C) 2004-2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
30 using Beagle.Util;
31 using FSQ=Beagle.Daemon.FileSystemQueryable;
33 namespace Beagle.Daemon {
35 public class QueryStringParser {
37 private QueryStringParser () { } // a static class
39 // Returns an ICollection of QueryPart objects.
40 static public ICollection Parse (string query_string)
42 ArrayList token_list;
43 token_list = new ArrayList ();
45 while (true) {
46 Token token = ExtractToken (ref query_string);
47 if (token == null)
48 break;
49 token_list.Add (token);
52 return TokensToQueryParts (token_list);
55 /////////////////////////////////////////////////////////
57 private enum TokenType {
58 Unknown,
59 StandAlone,
60 Plus,
61 Minus,
62 Operator,
63 Meta
66 private class Token {
67 public TokenType Type = TokenType.Unknown;
68 public string Text;
69 public bool IsQuoted;
72 // Our tiny query language:
73 // prefix terms by + or - to require or prohibit them
74 //
76 static private Token ExtractToken (ref string in_string)
78 if (in_string == null || in_string.Length == 0)
79 return null;
81 // Find the first non-whitespace character.
82 int first_pos = -1;
83 char first = ' ';
84 bool first_is_singleton = false;
85 for (int i = 0; i < in_string.Length; ++i) {
86 first = in_string [i];
87 if (! Char.IsWhiteSpace (first)) {
88 first_pos = i;
89 if (i == in_string.Length - 1)
90 first_is_singleton = true;
91 break;
95 // This is only whitespace, and thus doesn't
96 // contain any tokens.
97 if (first_pos == -1)
98 return null;
100 Token token;
101 token = new Token ();
103 // Based on the first character, decide what kind of a
104 // token this is. Advance first_pos as necessary to
105 // skip special characters (like + and -)
106 switch (first) {
108 case '+':
109 if (first_is_singleton) {
110 token.Type = TokenType.Meta;
111 token.Text = "DanglingPlus";
112 in_string = null;
113 return token;
116 token.Type = TokenType.Plus;
117 ++first_pos;
118 break;
120 case '-':
121 if (first_is_singleton) {
122 token.Type = TokenType.Meta;
123 token.Text = "DanglingMinus";
124 in_string = null;
125 return token;
128 token.Type = TokenType.Minus;
129 ++first_pos;
130 break;
132 case '"':
133 if (first_is_singleton) {
134 token.Type = TokenType.Meta;
135 token.Text = "DanglingQuote";
136 in_string = null;
137 return token;
140 token.Type = TokenType.StandAlone;
141 token.IsQuoted = true;
142 ++first_pos;
143 break;
145 default:
146 token.Type = TokenType.StandAlone;
147 break;
150 char last;
151 last = token.IsQuoted ? '"' : ' ';
153 int last_pos;
154 last_pos = in_string.IndexOf (last, first_pos);
156 if (last_pos == -1) {
157 // We don't worry about missing close-quotes.
158 // FIXME: Maybe we should, or at least return a meta-token
159 token.Text = in_string.Substring (first_pos);
160 in_string = null;
161 } else {
162 token.Text = in_string.Substring (first_pos, last_pos - first_pos);
163 if (last_pos == in_string.Length-1)
164 in_string = null;
165 else
166 in_string = in_string.Substring (last_pos+1);
169 // Trap the OR operator
170 if (token.Type == TokenType.StandAlone && token.Text == "OR") {
171 token.Type = TokenType.Operator;
172 token.Text = "Or";
175 // Ah, the dreaded "internal error".
176 if (token.Type == TokenType.Unknown)
177 throw new Exception ("Internal QueryStringParser.ExtractToken Error");
179 return token;
182 // FIXME support Date queries
183 static private QueryPart TokenToQueryPart (Token token) {
184 string query_text = token.Text;
186 // also support extension query of form
187 // beagle .pdf
188 // i.e. the extension is just given as a query word
189 bool is_extension_query = ((query_text [0] == '.') && (query_text.Length != 1));
190 if (is_extension_query) {
191 QueryPart_Property query_part = new QueryPart_Property ();
192 query_part.Key = FSQ.FileSystemQueryable.FilenameExtensionPropKey;
193 query_part.Value = query_text.ToLower (); // the whole .abc part
194 query_part.Type = PropertyType.Keyword;
195 Logger.Log.Debug ("Extension query:" + query_text.ToLower ());
196 return query_part;
199 int pos = query_text.IndexOf (":");
200 if ( pos == -1) {
201 QueryPart_Text query_part = new QueryPart_Text ();
202 query_part.Text = query_text;
203 Logger.Log.Debug ("Parsed query '" + query_text + "' as text_query");
204 return query_part;
207 string prop_name = query_text.Substring (0, pos);
208 string prop_string = null;
209 bool is_present;
210 PropertyType prop_type;
211 is_present = PropertyKeywordFu.GetPropertyDetails (prop_name, out prop_string, out prop_type);
212 // if prop_name is not present in the mapping, assume the query is a text query
213 // i.e. if token is foo:bar and there is no mappable property named foo,
214 // assume "foo:bar" as text query
215 // FIXME the analyzer changes the text query "foo:bar" to "foo bar"
216 // which might not be the right thing to do
217 if (!is_present) {
218 QueryPart_Text query_part = new QueryPart_Text ();
219 query_part.Text = query_text;
220 Logger.Log.Debug ("Could not find property, parsed query '" + query_text + "' as text_query");
221 return query_part;
224 QueryPart_Property query_part_prop = new QueryPart_Property ();
225 query_part_prop.Key = prop_string;
226 query_part_prop.Value = query_text.Substring (pos + 1);
227 // if query was of type ext:mp3 or extension:mp3
228 // change value to .mp3
229 // if query was of type ext:
230 // change value to ""
231 // Change extension query value to lowercase - thats how they are stored on disk
232 if (query_part_prop.Key == FSQ.FileSystemQueryable.FilenameExtensionPropKey &&
233 query_part_prop.Value != String.Empty)
234 query_part_prop.Value = "." + query_part_prop.Value.ToLower ();
235 query_part_prop.Type = prop_type;
236 Logger.Log.Debug ("Parsed query '" + query_text +
237 "' as prop query:key=" + query_part_prop.Key +
238 ", value=" + query_part_prop.Value +
239 " and property type=" + query_part_prop.Type);
240 return query_part_prop;
243 static private ICollection TokensToQueryParts (ArrayList token_list)
245 ArrayList parts;
246 parts = new ArrayList ();
248 int i = 0;
249 ArrayList or_list = null;
251 while (i < token_list.Count) {
252 Token token;
253 token = token_list [i] as Token;
255 if (token.Type == TokenType.Meta) {
256 ++i;
257 continue;
260 // Skip any extra operators
261 if (token.Type == TokenType.Operator) {
262 ++i;
263 continue;
266 // Assemble a part for this token.
268 QueryPart query_part = TokenToQueryPart (token);
269 if (token.Type == TokenType.Minus)
270 query_part.Logic = QueryPartLogic.Prohibited;
271 else
272 query_part.Logic = QueryPartLogic.Required;
275 if (or_list != null) {
276 or_list.Add (query_part);
277 query_part = null;
280 Token next_token = null;
281 if (i < token_list.Count - 1)
282 next_token = token_list [i+1] as Token;
285 // If the next token is an or, start an or_list
286 // (if we don't have one already) and skip
287 // ahead to the next part.
288 if (next_token != null
289 && next_token.Type == TokenType.Operator
290 && next_token.Text == "Or") {
291 if (or_list == null) {
292 or_list = new ArrayList ();
293 or_list.Add (query_part);
295 i += 2;
296 continue;
299 // If we have a non-empty or-list going,
300 // Create the appropriate QueryPart and add it
301 // to the list.
302 if (or_list != null) {
303 QueryPart_Or or_part;
304 or_part = new QueryPart_Or ();
305 or_part.Logic = QueryPartLogic.Required;
306 foreach (QueryPart sub_part in or_list)
307 or_part.Add (sub_part);
308 parts.Add (or_part);
309 or_list = null;
312 // Add the next text part
313 if (query_part != null)
314 parts.Add (query_part);
316 ++i;
319 // If we ended with an or_parts list, do the right thing.
320 if (or_list != null) {
321 QueryPart_Or or_part;
322 or_part = new QueryPart_Or ();
323 or_part.Logic = QueryPartLogic.Required;
324 foreach (QueryPart sub_part in or_list)
325 or_part.Add (sub_part);
328 return parts;