Tokenize 001234 as 1234. Include a testing function in NoiseFilter to figure out...
[beagle.git] / beagled / LuceneFileQueryable.cs
blob3e29adf696e4ef8a1686fc02988457a3d6a27d11
1 //
2 // LuceneFileQueryable.cs
3 //
4 // Copyright (C) 2005 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
27 using System;
28 using System.Collections;
29 using System.IO;
31 using Beagle.Util;
33 namespace Beagle.Daemon {
35 /**
36 * This queryable just takes the LuceneQueryable and adds some sane
37 * default behavior for indexing files containing multiple indexables.
38 * Suitable for feedfiles or mbox style mail files.
39 * Use this only if the backend generates multiple indexables from one
40 * physical source file; _do not_ use this if multiple indexables
41 * are created from one indexable while filtering (aka child indexables).
43 public abstract class LuceneFileQueryable : LuceneQueryable {
45 public LuceneFileQueryable (string index_name, int minor_version, bool disable_locking) :
46 base (index_name, minor_version, disable_locking)
47 { }
49 public LuceneFileQueryable (string index_name) : this (index_name, -1, false) { }
51 public LuceneFileQueryable (string index_name, bool disable_locking) : this (index_name, -1, disable_locking) { }
53 public LuceneFileQueryable (string index_name, int minor_version) : this (index_name, minor_version, false) { }
55 ///////////////////////////////////////////////////////////////////////////
57 private Hashtable file_references_count = new Hashtable ();
59 private void IncrementReferenceCount (string path)
61 if (! file_references_count.Contains (path)) {
62 file_references_count [path] = 1;
63 return;
66 file_references_count [path] = (int) file_references_count [path] + 1;
69 // returns
70 // true : reference left
71 // false: no more reference left
72 private bool DecrementReferenceCount (string path)
74 if (! file_references_count.Contains (path))
75 throw new Exception ("Shared file is not referenced: " + path);
77 int reference_count = (int) file_references_count [path] - 1;
79 if (reference_count == 0) {
80 file_references_count.Remove (path);
81 return false;
84 file_references_count [path] = reference_count;
85 return true;
88 ///////////////////////////////////////////////////////////////////////////
90 private class CachedFileInfo {
91 public Uri Uri;
92 public string Path;
93 public DateTime Mtime;
94 public bool Shared = false;
97 private Hashtable file_info_cache = UriFu.NewHashtable ();
99 override protected bool PreAddIndexableHook (Indexable indexable)
101 // None of this applies for Removes
102 if (indexable.Type == IndexableType.Remove)
103 return true;
105 CachedFileInfo info = (CachedFileInfo) file_info_cache [indexable.Uri];
107 if (info == null)
108 info = new CachedFileInfo ();
110 info.Uri = indexable.Uri;
112 if (indexable.Uri.IsFile && indexable.IsNonTransient)
113 info.Path = indexable.Uri.LocalPath;
114 else if (indexable.ContentUri.IsFile && indexable.IsNonTransient)
115 info.Path = indexable.ContentUri.LocalPath;
116 else if (indexable.ParentUri != null && indexable.ParentUri.IsFile) {
117 info.Path = indexable.ParentUri.LocalPath;
118 info.Shared = true;
119 IncrementReferenceCount (info.Path);
122 // The path could be null in certain cases:
123 // * The indexable is a non-file URI and no
124 // parent URI is set.
125 // * The indexable is a child indexable and the
126 // parent URI is not a file URI.
127 if (info.Path == null)
128 return true;
130 try {
131 info.Mtime = FileSystem.GetLastWriteTimeUtc (info.Path);
132 } catch (FileNotFoundException ex) {
133 // If we can't get an mtime for the file, it must
134 // have disappeared out from under us. In that case,
135 // don't bother adding anything.
136 return false;
139 file_info_cache [info.Uri] = info;
141 return true;
144 override protected void PostAddHook (Indexable indexable, IndexerAddedReceipt receipt)
146 // Retrieve our cached info about the file.
147 CachedFileInfo info;
148 info = file_info_cache [receipt.Uri] as CachedFileInfo;
149 if (info == null)
150 return;
152 // Yeah, this is ghetto. If it's a file that's shared across multiple
153 // indexables, only tag it with when the last indexable has been indexed.
154 if (info.Shared && DecrementReferenceCount (info.Path))
155 return;
157 // Since we know that the file has been successfully
158 // indexed, update the file attributes accordingly.
159 // Don't set filter information on a file if multiple
160 // indexables has been created from it.
161 FileAttributes attr;
162 attr = FileAttributesStore.ReadOrCreate (info.Path);
164 attr.LastWriteTime = info.Mtime;
166 // Don't set filter information on a file if multiple indexables has been
167 // created from it.
168 if (! info.Shared) {
169 attr.FilterName = receipt.FilterName;
170 attr.FilterVersion = receipt.FilterVersion;
173 if (! FileAttributesStore.Write (attr))
174 Logger.Log.Warn ("Couldn't write attributes for {0}", info.Path);
176 file_info_cache.Remove (info.Uri);
179 override protected void PostRemoveHook (Indexable indexable, IndexerRemovedReceipt receipt)
181 file_info_cache.Remove (indexable.Uri);
184 override protected bool HitIsValid (Uri uri)
186 // Do the right thing if the Uri is a file.
187 // If the file Uri we need is the ContentUri, this won't work.
188 if (! uri.IsFile)
189 return true;
191 try {
192 return FileSystem.Exists (uri.LocalPath);
193 } catch (Exception e) {
194 Logger.Log.Warn ("Exception executing HitIsValid on {0}", uri.LocalPath);
195 return false;
199 ///////////////////////////////////////////////////////////////////////////
201 // Convenience functions
203 public bool IsUpToDate (string path, Filter filter)
205 return FileAttributesStore.IsUpToDate (path, filter);
208 public bool IsUpToDate (string path)
210 return FileAttributesStore.IsUpToDate (path);