2 // LuceneIndexingDriver.cs
4 // Copyright (C) 2004-2005 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 // This should be the only piece of source code that knows anything
29 // about Lucene's internals.
33 using System
.Collections
;
34 using System
.Diagnostics
;
35 using System
.Globalization
;
38 using System
.Threading
;
40 using System
.Xml
.Serialization
;
42 using Lucene
.Net
.Analysis
;
43 using Lucene
.Net
.Analysis
.Standard
;
44 using Lucene
.Net
.Documents
;
45 using Lucene
.Net
.Index
;
46 using Lucene
.Net
.QueryParsers
;
47 using LNS
= Lucene
.Net
.Search
;
51 namespace Beagle
.Daemon
{
53 public class LuceneIndexingDriver
: LuceneCommon
, IIndexer
{
55 object flush_lock
= new object ();
57 public LuceneIndexingDriver (string index_name
, int minor_version
, bool build_usercache
)
58 : base (index_name
, minor_version
)
66 text_cache
= TextCache
.UserCache
;
69 public LuceneIndexingDriver (string index_name
, int minor_version
)
70 : this (index_name
, minor_version
, true) { }
72 public LuceneIndexingDriver (string index_name
, bool build_usercache
)
73 : this (index_name
, 0, build_usercache
) { }
75 public LuceneIndexingDriver (string index_name
)
76 : this (index_name
, 0, true) { }
78 ////////////////////////////////////////////////////////////////
81 // Implementation of the IIndexer interface
84 public IndexerReceipt
[] Flush (IndexerRequest request
)
86 // This is just to keep a big block of code from being
87 // indented an extra eight spaces.
89 return Flush_Unlocked (request
);
92 private IndexerReceipt
[] Flush_Unlocked (IndexerRequest request
)
94 ArrayList receipt_queue
;
95 receipt_queue
= new ArrayList ();
97 IndexReader primary_reader
, secondary_reader
;
98 primary_reader
= IndexReader
.Open (PrimaryStore
);
99 secondary_reader
= IndexReader
.Open (SecondaryStore
);
101 // Step #1: Make our first pass over the list of
102 // indexables that make up our request. For each add
103 // or remove in the request, delete the associated
104 // items from the index. Assemble a query that will
105 // be used to find the secondary documents for any
106 // property change requests.
108 LNS
.BooleanQuery prop_change_query
= null;
109 int delete_count
= 0;
111 foreach (Indexable indexable
in request
.Indexables
) {
113 switch (indexable
.Type
) {
115 case IndexableType
.Add
:
116 case IndexableType
.Remove
:
119 uri_str
= UriFu
.UriToEscapedString (indexable
.Uri
);
121 Logger
.Log
.Debug ("-{0}", indexable
.DisplayUri
);
124 term
= new Term ("Uri", uri_str
);
125 delete_count
+= primary_reader
.Delete (term
);
126 if (secondary_reader
!= null)
127 secondary_reader
.Delete (term
);
129 // When we delete an indexable, also delete any children.
130 // FIXME: Shouldn't we also delete any children of children, etc.?
131 term
= new Term ("ParentUri", uri_str
);
132 delete_count
+= primary_reader
.Delete (term
);
133 if (secondary_reader
!= null)
134 secondary_reader
.Delete (term
);
136 // If this is a strict removal (and not a deletion that
137 // we are doing in anticipation of adding something back),
138 // queue up a removed receipt.
139 if (indexable
.Type
== IndexableType
.Remove
) {
140 IndexerRemovedReceipt r
;
141 r
= new IndexerRemovedReceipt (indexable
.Uri
);
142 receipt_queue
.Add (r
);
147 case IndexableType
.PropertyChange
:
148 if (prop_change_query
== null)
149 prop_change_query
= new LNS
.BooleanQuery ();
150 prop_change_query
.Add (UriQuery ("Uri", indexable
.Uri
), false, false);
156 AdjustItemCount (-delete_count
);
158 SetItemCount (primary_reader
);
160 // Step #2: If we have are doing any property changes,
161 // we read in the current secondary documents and
162 // store them in a hash table for use later. Then we
163 // delete the current secondary documents.
164 Hashtable prop_change_docs
= null;
165 if (prop_change_query
!= null) {
166 prop_change_docs
= UriFu
.NewHashtable ();
168 LNS
.IndexSearcher secondary_searcher
;
169 secondary_searcher
= new LNS
.IndexSearcher (secondary_reader
);
172 hits
= secondary_searcher
.Search (prop_change_query
);
174 ArrayList delete_terms
;
175 delete_terms
= new ArrayList ();
177 int N
= hits
.Length ();
178 for (int i
= 0; i
< N
; ++i
) {
183 uri_str
= doc
.Get ("Uri");
186 uri
= UriFu
.EscapedStringToUri (uri_str
);
187 prop_change_docs
[uri
] = doc
;
190 term
= new Term ("Uri", uri_str
);
191 delete_terms
.Add (term
);
194 secondary_searcher
.Close ();
196 foreach (Term term
in delete_terms
)
197 secondary_reader
.Delete (term
);
200 // We are now done with the readers, so we close them.
201 primary_reader
.Close ();
202 secondary_reader
.Close ();
204 // FIXME: If we crash at exactly this point, we are in
205 // trouble. Items will have been dropped from the index
206 // without the proper replacements being added.
208 // Step #3: Make another pass across our list of indexables
209 // and write out any new documents.
211 if (text_cache
!= null)
212 text_cache
.BeginTransaction ();
214 IndexWriter primary_writer
, secondary_writer
;
215 primary_writer
= new IndexWriter (PrimaryStore
, IndexingAnalyzer
, false);
216 secondary_writer
= null;
218 foreach (Indexable indexable
in request
.Indexables
) {
220 if (indexable
.Type
== IndexableType
.Remove
)
223 IndexerAddedReceipt r
;
224 r
= new IndexerAddedReceipt (indexable
.Uri
);
225 receipt_queue
.Add (r
);
227 if (indexable
.Type
== IndexableType
.PropertyChange
) {
229 Logger
.Log
.Debug ("+{0} (props only)", indexable
.DisplayUri
);
230 r
.PropertyChangesOnly
= true;
233 doc
= prop_change_docs
[indexable
.Uri
] as Document
;
236 new_doc
= RewriteDocument (doc
, indexable
);
238 // Write out the new document...
239 if (secondary_writer
== null)
240 secondary_writer
= new IndexWriter (SecondaryStore
, IndexingAnalyzer
, false);
241 secondary_writer
.AddDocument (new_doc
);
243 continue; // ...and proceed to the next Indexable
246 // If we reach this point we know we are dealing with an IndexableType.Add
248 if (indexable
.Type
!= IndexableType
.Add
)
249 throw new Exception ("When I said it was an IndexableType.Add, I meant it!");
251 Logger
.Log
.Debug ("+{0}", indexable
.DisplayUri
);
253 Filter filter
= null;
255 // If we have content, try to find a filter
256 // which we can use to process the indexable.
258 FilterFactory
.FilterIndexable (indexable
, text_cache
, out filter
);
259 } catch (Exception e
) {
260 Logger
.Log
.Error (e
, "Unable to filter {0} (mimetype={1})", indexable
.DisplayUri
, indexable
.MimeType
);
261 indexable
.NoContent
= true;
264 Document primary_doc
= null, secondary_doc
= null;
267 BuildDocuments (indexable
, out primary_doc
, out secondary_doc
);
268 primary_writer
.AddDocument (primary_doc
);
269 } catch (Exception ex
) {
271 // If an exception was thrown, something bad probably happened
272 // while we were filtering the content. Set NoContent to true
273 // and try again -- that way it will at least end up in the index,
274 // even if we don't manage to extract the fulltext.
276 Logger
.Log
.Debug (ex
, "First attempt to index {0} failed", indexable
.DisplayUri
);
278 indexable
.NoContent
= true;
281 BuildDocuments (indexable
, out primary_doc
, out secondary_doc
);
282 primary_writer
.AddDocument (primary_doc
);
283 } catch (Exception ex2
) {
284 Logger
.Log
.Debug (ex2
, "Second attempt to index {0} failed, giving up...", indexable
.DisplayUri
);
288 if (filter
!= null) {
290 // Force the clean-up of temporary files, just in case.
293 r
.FilterName
= filter
.GetType ().ToString ();
294 r
.FilterVersion
= filter
.Version
;
296 // Create a receipt containing any child indexables.
297 if (filter
.ChildIndexables
.Count
> 0) {
298 IndexerChildIndexablesReceipt cr
;
299 cr
= new IndexerChildIndexablesReceipt (indexable
, filter
.ChildIndexables
);
300 receipt_queue
.Add (cr
);
304 if (secondary_doc
!= null) {
305 if (secondary_writer
== null)
306 secondary_writer
= new IndexWriter (SecondaryStore
, IndexingAnalyzer
, false);
308 secondary_writer
.AddDocument (secondary_doc
);
313 // Clean up any temporary files associated with filtering this indexable.
314 indexable
.Cleanup ();
317 if (text_cache
!= null)
318 text_cache
.CommitTransaction ();
320 if (request
.OptimizeIndex
) {
321 Stopwatch watch
= new Stopwatch ();
322 Logger
.Log
.Debug ("Optimizing {0}", IndexName
);
324 primary_writer
.Optimize ();
325 if (secondary_writer
== null)
326 secondary_writer
= new IndexWriter (SecondaryStore
, IndexingAnalyzer
, false);
327 secondary_writer
.Optimize ();
329 Logger
.Log
.Debug ("{0} optimized in {1}", IndexName
, watch
);
332 // Step #4. Close our writers and return the events to
333 // indicate what has happened.
335 primary_writer
.Close ();
336 if (secondary_writer
!= null)
337 secondary_writer
.Close ();
339 IndexerReceipt
[] receipt_array
;
340 receipt_array
= new IndexerReceipt
[receipt_queue
.Count
];
341 for (int i
= 0; i
< receipt_queue
.Count
; ++i
)
342 receipt_array
[i
] = (IndexerReceipt
) receipt_queue
[i
];
344 return receipt_array
;
347 ////////////////////////////////////////////////////////////////
349 public void OptimizeNow ()
353 writer
= new IndexWriter (PrimaryStore
, null, false);
357 if (SecondaryStore
!= null) {
358 writer
= new IndexWriter (SecondaryStore
, null, false);
365 public void Merge (LuceneCommon index_to_merge
)
367 // FIXME: Error recovery
369 // Merge the primary index
370 IndexWriter primary_writer
;
371 Lucene
.Net
.Store
.Directory
[] primary_store
= {index_to_merge.PrimaryStore}
;
372 primary_writer
= new IndexWriter (PrimaryStore
, null, false);
374 primary_writer
.AddIndexes (primary_store
);
375 primary_writer
.Close ();
377 // Merge the secondary index
378 IndexWriter secondary_writer
;
379 Lucene
.Net
.Store
.Directory
[] secondary_store
= {index_to_merge.SecondaryStore}
;
380 secondary_writer
= new IndexWriter (SecondaryStore
, null, false);
382 secondary_writer
.AddIndexes (secondary_store
);
383 secondary_writer
.Close ();