Tokenize 001234 as 1234. Include a testing function in NoiseFilter to figure out...
[beagle.git] / beagled / ThunderbirdQueryable / LuceneAccess.cs
blob5a04364e6c4ba9352d856039430699dc6b2b5a0e
1 //
2 // LuceneAccess.cs: Provides low level access to the underlying Lucene database
3 //
4 // Copyright (C) 2006 Pierre Östlund
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
27 using System;
28 using System.Collections;
30 using Lucene.Net.Documents;
31 using Lucene.Net.Index;
32 using LNS = Lucene.Net.Search;
34 using Beagle.Daemon;
35 using Beagle.Util;
37 namespace Beagle.Daemon.ThunderbirdQueryable {
39 public class LuceneAccess : LuceneQueryingDriver {
40 public class StoredInfo
42 public DateTime LastIndex;
43 public bool FullyIndexed;
44 public Uri Uri;
47 public LuceneAccess (string index_name, int minor_version, bool read_only)
48 : base (index_name, minor_version, read_only)
53 public StoredInfo DocumentToStoredInfo (Document doc)
55 int count = 0;
56 StoredInfo info = new StoredInfo ();
58 info.Uri = GetUriFromDocument (doc);
60 foreach (Field f in doc.Fields ()) {
61 Property prop = GetPropertyFromDocument (f, doc, false);
62 if (prop == null)
63 continue;
65 switch (prop.Key) {
66 case "fixme:indexDateTime":
67 info.LastIndex = StringFu.StringToDateTime (prop.Value);
68 count++;
69 break;
70 case "fixme:fullyIndexed":
71 info.FullyIndexed = Convert.ToBoolean (prop.Value);
72 count++;
73 break;
76 if (count == 2)
77 break;
80 return info;
83 private class SingletonCollector : LNS.HitCollector
85 public int MatchId = -1;
87 public override void Collect (int id, float score)
89 MatchId = id;
93 public StoredInfo GetStoredInfo (Uri uri)
95 StoredInfo info = new StoredInfo ();
97 LNS.Query query = UriQuery ("Uri", uri);
98 SingletonCollector collector = new SingletonCollector ();
100 LNS.IndexSearcher searcher = LuceneCommon.GetSearcher (PrimaryStore);
101 searcher.Search (query, null, collector);
103 if (collector.MatchId != -1) {
104 Document doc = searcher.Doc (collector.MatchId);
105 info = DocumentToStoredInfo (doc);
108 LuceneCommon.ReleaseSearcher (searcher);
110 return info;
113 public Hashtable GetStoredUriStrings (string server, string file)
115 Hashtable uris = new Hashtable ();
117 Term term = new Term (PropertyToFieldName (PropertyType.Keyword, "fixme:file"), file);
118 LNS.QueryFilter filter = new LNS.QueryFilter (new LNS.TermQuery (term));
120 term = new Term (PropertyToFieldName (PropertyType.Keyword, "fixme:account"), server);
121 LNS.TermQuery query = new LNS.TermQuery (term);
123 LNS.IndexSearcher searcher = LuceneCommon.GetSearcher (PrimaryStore);
124 LNS.Hits hits = searcher.Search (query, filter);
126 for (int i = 0; i < hits.Length (); i++) {
127 StoredInfo info = DocumentToStoredInfo (hits.Doc (i));
128 uris.Add (info.Uri.ToString (), info.FullyIndexed);
131 LuceneCommon.ReleaseSearcher (searcher);
133 return uris;