2 // LuceneIndexingDriver.cs
4 // Copyright (C) 2004-2005 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 // This should be the only piece of source code that knows anything
29 // about Lucene's internals.
33 using System
.Collections
;
34 using System
.Diagnostics
;
35 using System
.Globalization
;
38 using System
.Threading
;
40 using System
.Xml
.Serialization
;
42 using Lucene
.Net
.Analysis
;
43 using Lucene
.Net
.Analysis
.Standard
;
44 using Lucene
.Net
.Documents
;
45 using Lucene
.Net
.Index
;
46 using Lucene
.Net
.QueryParsers
;
47 using LNS
= Lucene
.Net
.Search
;
51 namespace Beagle
.Daemon
{
53 public class LuceneIndexingDriver
: LuceneCommon
, IIndexer
{
55 object flush_lock
= new object ();
57 public LuceneIndexingDriver (string index_name
, int minor_version
, bool build_usercache
)
58 : base (index_name
, minor_version
)
66 text_cache
= TextCache
.UserCache
;
69 public LuceneIndexingDriver (string index_name
, int minor_version
)
70 : this (index_name
, minor_version
, true) { }
72 public LuceneIndexingDriver (string index_name
, bool build_usercache
)
73 : this (index_name
, 0, build_usercache
) { }
75 public LuceneIndexingDriver (string index_name
)
76 : this (index_name
, 0, true) { }
78 ////////////////////////////////////////////////////////////////
81 // Implementation of the IIndexer interface
84 public IndexerReceipt
[] Flush (IndexerRequest request
)
86 // This is just to keep a big block of code from being
87 // indented an extra eight spaces.
89 return Flush_Unlocked (request
);
92 private IndexerReceipt
[] Flush_Unlocked (IndexerRequest request
)
94 ArrayList receipt_queue
;
95 receipt_queue
= new ArrayList ();
97 IndexReader primary_reader
, secondary_reader
;
98 primary_reader
= IndexReader
.Open (PrimaryStore
);
99 secondary_reader
= IndexReader
.Open (SecondaryStore
);
101 // Step #1: Make our first pass over the list of
102 // indexables that make up our request. For each add
103 // or remove in the request, delete the associated
104 // items from the index. Assemble a query that will
105 // be used to find the secondary documents for any
106 // property change requests.
108 LNS
.BooleanQuery prop_change_query
= null;
109 int delete_count
= 0;
111 foreach (Indexable indexable
in request
.Indexables
) {
113 switch (indexable
.Type
) {
115 case IndexableType
.Add
:
116 case IndexableType
.Remove
:
119 uri_str
= UriFu
.UriToSerializableString (indexable
.Uri
);
121 Logger
.Log
.Debug ("-{0}", indexable
.DisplayUri
);
124 term
= new Term ("Uri", uri_str
);
125 delete_count
+= primary_reader
.Delete (term
);
126 if (secondary_reader
!= null)
127 secondary_reader
.Delete (term
);
129 // When we delete an indexable, also delete any children.
130 // FIXME: Shouldn't we also delete any children of children, etc.?
131 term
= new Term ("ParentUri", uri_str
);
132 delete_count
+= primary_reader
.Delete (term
);
133 if (secondary_reader
!= null)
134 secondary_reader
.Delete (term
);
136 // If this is a strict removal (and not a deletion that
137 // we are doing in anticipation of adding something back),
138 // queue up a removed receipt.
139 if (indexable
.Type
== IndexableType
.Remove
) {
140 IndexerRemovedReceipt r
;
141 r
= new IndexerRemovedReceipt (indexable
.Uri
);
142 receipt_queue
.Add (r
);
147 case IndexableType
.PropertyChange
:
148 if (prop_change_query
== null)
149 prop_change_query
= new LNS
.BooleanQuery ();
150 prop_change_query
.Add (UriQuery ("Uri", indexable
.Uri
), false, false);
156 AdjustItemCount (-delete_count
);
158 SetItemCount (primary_reader
);
160 // Step #2: If we have are doing any property changes,
161 // we read in the current secondary documents and
162 // store them in a hash table for use later. Then we
163 // delete the current secondary documents.
164 Hashtable prop_change_docs
= null;
165 if (prop_change_query
!= null) {
166 prop_change_docs
= UriFu
.NewHashtable ();
168 LNS
.IndexSearcher secondary_searcher
;
169 secondary_searcher
= new LNS
.IndexSearcher (secondary_reader
);
172 hits
= secondary_searcher
.Search (prop_change_query
);
174 ArrayList delete_terms
;
175 delete_terms
= new ArrayList ();
177 int N
= hits
.Length ();
178 for (int i
= 0; i
< N
; ++i
) {
183 uri_str
= doc
.Get ("Uri");
186 uri
= UriFu
.UriStringToUri (uri_str
);
187 prop_change_docs
[uri
] = doc
;
190 term
= new Term ("Uri", uri_str
);
191 delete_terms
.Add (term
);
194 secondary_searcher
.Close ();
196 foreach (Term term
in delete_terms
)
197 secondary_reader
.Delete (term
);
200 // We are now done with the readers, so we close them.
201 primary_reader
.Close ();
202 secondary_reader
.Close ();
204 // FIXME: If we crash at exactly this point, we are in
205 // trouble. Items will have been dropped from the index
206 // without the proper replacements being added.
208 // Step #3: Make another pass across our list of indexables
209 // and write out any new documents.
211 if (text_cache
!= null)
212 text_cache
.BeginTransaction ();
214 IndexWriter primary_writer
, secondary_writer
;
215 primary_writer
= new IndexWriter (PrimaryStore
, IndexingAnalyzer
, false);
216 secondary_writer
= null;
218 foreach (Indexable indexable
in request
.Indexables
) {
220 if (indexable
.Type
== IndexableType
.Remove
)
223 IndexerAddedReceipt r
;
224 r
= new IndexerAddedReceipt (indexable
.Uri
);
225 receipt_queue
.Add (r
);
227 if (indexable
.Type
== IndexableType
.PropertyChange
) {
229 Logger
.Log
.Debug ("+{0} (props only)", indexable
.DisplayUri
);
230 r
.PropertyChangesOnly
= true;
233 doc
= prop_change_docs
[indexable
.Uri
] as Document
;
236 new_doc
= RewriteDocument (doc
, indexable
);
238 // Write out the new document...
239 if (secondary_writer
== null)
240 secondary_writer
= new IndexWriter (SecondaryStore
, IndexingAnalyzer
, false);
241 secondary_writer
.AddDocument (new_doc
);
243 continue; // ...and proceed to the next Indexable
246 // If we reach this point we know we are dealing with an IndexableType.Add
248 if (indexable
.Type
!= IndexableType
.Add
)
249 throw new Exception ("When I said it was an IndexableType.Add, I meant it!");
251 Logger
.Log
.Debug ("+{0}", indexable
.DisplayUri
);
253 Filter filter
= null;
255 // If we have content, try to find a filter
256 // which we can use to process the indexable.
258 FilterFactory
.FilterIndexable (indexable
, text_cache
, out filter
);
259 } catch (Exception e
) {
260 Logger
.Log
.Error ("Unable to filter {0} (mimetype={1})", indexable
.DisplayUri
, indexable
.MimeType
);
261 Logger
.Log
.Error (e
);
262 indexable
.NoContent
= true;
265 Document primary_doc
= null, secondary_doc
= null;
268 BuildDocuments (indexable
, out primary_doc
, out secondary_doc
);
269 primary_writer
.AddDocument (primary_doc
);
270 } catch (Exception ex
) {
272 // If an exception was thrown, something bad probably happened
273 // while we were filtering the content. Set NoContent to true
274 // and try again -- that way it will at least end up in the index,
275 // even if we don't manage to extract the fulltext.
277 Logger
.Log
.Debug ("First attempt to index {0} failed", indexable
.DisplayUri
);
278 Logger
.Log
.Debug (ex
);
280 indexable
.NoContent
= true;
283 BuildDocuments (indexable
, out primary_doc
, out secondary_doc
);
284 primary_writer
.AddDocument (primary_doc
);
285 } catch (Exception ex2
) {
286 Logger
.Log
.Debug ("Second attempt to index {0} failed, giving up...", indexable
.DisplayUri
);
287 Logger
.Log
.Debug (ex2
);
291 if (filter
!= null) {
293 // Force the clean-up of temporary files, just in case.
296 r
.FilterName
= filter
.GetType ().ToString ();
297 r
.FilterVersion
= filter
.Version
;
299 // Create a receipt containing any child indexables.
300 if (filter
.ChildIndexables
.Count
> 0) {
301 IndexerChildIndexablesReceipt cr
;
302 cr
= new IndexerChildIndexablesReceipt (indexable
, filter
.ChildIndexables
);
303 receipt_queue
.Add (cr
);
307 if (secondary_doc
!= null) {
308 if (secondary_writer
== null)
309 secondary_writer
= new IndexWriter (SecondaryStore
, IndexingAnalyzer
, false);
311 secondary_writer
.AddDocument (secondary_doc
);
316 // Clean up any temporary files associated with filtering this indexable.
317 indexable
.Cleanup ();
320 if (text_cache
!= null)
321 text_cache
.CommitTransaction ();
323 if (request
.OptimizeIndex
) {
324 Stopwatch watch
= new Stopwatch ();
325 Logger
.Log
.Debug ("Optimizing {0}", IndexName
);
327 primary_writer
.Optimize ();
328 if (secondary_writer
== null)
329 secondary_writer
= new IndexWriter (SecondaryStore
, IndexingAnalyzer
, false);
330 secondary_writer
.Optimize ();
332 Logger
.Log
.Debug ("{0} optimized in {1}", IndexName
, watch
);
335 // Step #4. Close our writers and return the events to
336 // indicate what has happened.
338 primary_writer
.Close ();
339 if (secondary_writer
!= null)
340 secondary_writer
.Close ();
342 IndexerReceipt
[] receipt_array
;
343 receipt_array
= new IndexerReceipt
[receipt_queue
.Count
];
344 for (int i
= 0; i
< receipt_queue
.Count
; ++i
)
345 receipt_array
[i
] = (IndexerReceipt
) receipt_queue
[i
];
347 return receipt_array
;
350 ////////////////////////////////////////////////////////////////
352 public void OptimizeNow ()
356 writer
= new IndexWriter (PrimaryStore
, null, false);
360 if (SecondaryStore
!= null) {
361 writer
= new IndexWriter (SecondaryStore
, null, false);
368 public void Merge (LuceneCommon index_to_merge
)
370 // FIXME: Error recovery
372 // Merge the primary index
373 IndexWriter primary_writer
;
374 Lucene
.Net
.Store
.Directory
[] primary_store
= {index_to_merge.PrimaryStore}
;
375 primary_writer
= new IndexWriter (PrimaryStore
, null, false);
377 primary_writer
.AddIndexes (primary_store
);
378 primary_writer
.Close ();
380 // Merge the secondary index
381 IndexWriter secondary_writer
;
382 Lucene
.Net
.Store
.Directory
[] secondary_store
= {index_to_merge.SecondaryStore}
;
383 secondary_writer
= new IndexWriter (SecondaryStore
, null, false);
385 secondary_writer
.AddIndexes (secondary_store
);
386 secondary_writer
.Close ();