2 // LuceneIndexingDriver.cs
4 // Copyright (C) 2004-2005 Novell, Inc.
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 // This should be the only piece of source code that knows anything
29 // about Lucene's internals.
33 using System
.Collections
;
34 using System
.Diagnostics
;
35 using System
.Globalization
;
38 using System
.Threading
;
40 using System
.Xml
.Serialization
;
42 using Lucene
.Net
.Analysis
;
43 using Lucene
.Net
.Analysis
.Standard
;
44 using Lucene
.Net
.Documents
;
45 using Lucene
.Net
.Index
;
46 using Lucene
.Net
.QueryParsers
;
47 using LNS
= Lucene
.Net
.Search
;
51 namespace Beagle
.Daemon
{
53 public class LuceneIndexingDriver
: LuceneCommon
, IIndexer
{
55 Hashtable pending_by_uri
= UriFu
.NewHashtable ();
56 bool optimize_during_next_flush
= false;
58 public LuceneIndexingDriver (string index_name
, int minor_version
) : base (index_name
, minor_version
)
66 public LuceneIndexingDriver (string index_name
) : this (index_name
, 0)
69 ////////////////////////////////////////////////////////////////
72 // Implementation of the IIndexer interface
75 public void Add (Indexable indexable
)
77 lock (pending_by_uri
) {
78 Indexable existing_indexable
;
79 existing_indexable
= pending_by_uri
[indexable
.Uri
] as Indexable
;
81 // If we already have an Indexable queued up and this is a property-change
82 // only Indexable, just change the original Indexable's properties.
83 if (existing_indexable
!= null && indexable
.PropertyChangesOnly
) {
84 existing_indexable
.MergeProperties (indexable
);
88 pending_by_uri
[indexable
.Uri
] = indexable
;
92 public void Remove (Uri uri
)
94 lock (pending_by_uri
) {
95 pending_by_uri
[uri
] = null;
99 public void Optimize ()
101 optimize_during_next_flush
= true;
104 public IndexerReceipt
[] FlushAndBlock ()
106 ArrayList receipt_queue
;
108 lock (pending_by_uri
) {
110 receipt_queue
= new ArrayList ();
112 // Step #1: Delete all items with the same URIs
113 // as our pending items from the index.
115 IndexReader primary_reader
, secondary_reader
;
116 primary_reader
= IndexReader
.Open (PrimaryStore
);
117 secondary_reader
= IndexReader
.Open (SecondaryStore
);
119 LNS
.BooleanQuery prop_change_query
= null;
121 int delete_count
= 0;
123 foreach (DictionaryEntry entry
in pending_by_uri
) {
124 Uri uri
= entry
.Key
as Uri
;
125 Indexable indexable
= entry
.Value
as Indexable
;
127 // If this indexable only contains property changes,
128 // all we do at this point is assemble the query that we will
129 // use to retrieve the current property values. We'll ultimately
130 // need to delete the existing secondary documents, but not
131 // until we've loaded them...
132 if (indexable
!= null && indexable
.PropertyChangesOnly
) {
133 if (prop_change_query
== null)
134 prop_change_query
= new LNS
.BooleanQuery ();
135 prop_change_query
.Add (UriQuery ("Uri", uri
), false, false);
139 Logger
.Log
.Debug ("-{0}", uri
);
142 term
= new Term ("Uri", UriFu
.UriToSerializableString (uri
));
143 delete_count
+= primary_reader
.Delete (term
);
144 if (secondary_reader
!= null)
145 secondary_reader
.Delete (term
);
147 // When we delete an indexable, also delete any children.
148 // FIXME: Shouldn't we also delete any children of children, etc.?
149 term
= new Term ("ParentUri", UriFu
.UriToSerializableString (uri
));
150 delete_count
+= primary_reader
.Delete (term
);
151 if (secondary_reader
!= null)
152 secondary_reader
.Delete (term
);
154 // If this is a strict removal (and not a deletion that
155 // we are doing in anticipation of adding something back),
156 // queue up a removed event.
157 if (indexable
== null) {
158 IndexerRemovedReceipt r
;
159 r
= new IndexerRemovedReceipt (uri
);
160 receipt_queue
.Add (r
);
165 AdjustItemCount (-delete_count
);
167 SetItemCount (primary_reader
);
169 // If we have are doing any property changes,
170 // we read in the current secondary documents
171 // and store them in a hash table for use
172 // later. Then we delete the current
173 // secondary documents.
174 Hashtable current_docs
= null;
175 if (prop_change_query
!= null) {
176 current_docs
= UriFu
.NewHashtable ();
178 LNS
.IndexSearcher secondary_searcher
;
179 secondary_searcher
= new LNS
.IndexSearcher (secondary_reader
);
182 hits
= secondary_searcher
.Search (prop_change_query
);
184 ArrayList delete_terms
;
185 delete_terms
= new ArrayList ();
189 for (int i
= 0; i
< N
; ++i
) {
194 doc_uri
= GetUriFromDocument (doc
);
196 current_docs
[doc_uri
] = doc
;
199 term
= new Term ("Uri", UriFu
.UriToSerializableString (doc_uri
));
200 delete_terms
.Add (term
);
203 secondary_searcher
.Close ();
205 foreach (Term term
in delete_terms
)
206 secondary_reader
.Delete (term
);
209 // FIXME: Would we gain more "transactionality" if we didn't close
210 // the readers until later? Would that even be possible, or will
211 // it create locking problems?
212 primary_reader
.Close ();
213 secondary_reader
.Close ();
216 // Step #2: Write out the pending adds.
218 if (text_cache
!= null)
219 text_cache
.BeginTransaction ();
221 IndexWriter primary_writer
, secondary_writer
;
222 primary_writer
= new IndexWriter (PrimaryStore
, IndexingAnalyzer
, false);
223 secondary_writer
= null;
225 foreach (Indexable indexable
in pending_by_uri
.Values
) {
227 if (indexable
== null)
230 IndexerAddedReceipt r
;
231 r
= new IndexerAddedReceipt (indexable
.Uri
);
232 r
.Properties
= indexable
.Properties
;
234 // Handle property changes
235 if (indexable
.PropertyChangesOnly
) {
236 Logger
.Log
.Debug ("+{0} (props only)", indexable
.DisplayUri
);
238 Document current_doc
;
239 current_doc
= current_docs
[indexable
.Uri
] as Document
;
242 new_doc
= RewriteDocument (current_doc
, indexable
);
244 // Write out the new document...
245 if (secondary_writer
== null)
246 secondary_writer
= new IndexWriter (SecondaryStore
, IndexingAnalyzer
, false);
247 secondary_writer
.AddDocument (new_doc
);
249 r
.PropertyChangesOnly
= true;
250 receipt_queue
.Add (r
);
252 continue; // ...and proceed to the next Indexable
255 Logger
.Log
.Debug ("+{0}", indexable
.DisplayUri
);
257 Filter filter
= null;
260 FilterFactory
.FilterIndexable (indexable
, text_cache
, out filter
);
261 } catch (Exception e
) {
262 Logger
.Log
.Error ("Unable to filter {0} (mimetype={1})", indexable
.DisplayUri
, indexable
.MimeType
);
263 Logger
.Log
.Error (e
);
264 indexable
.NoContent
= true;
267 Document primary_doc
= null, secondary_doc
= null;
270 BuildDocuments (indexable
, out primary_doc
, out secondary_doc
);
271 primary_writer
.AddDocument (primary_doc
);
272 } catch (Exception ex
) {
274 // If an exception was thrown, something bad probably happened
275 // while we were filtering the content. Set NoContent to true
278 Logger
.Log
.Debug ("First attempt to index {0} failed", indexable
.DisplayUri
);
279 Logger
.Log
.Debug (ex
);
281 indexable
.NoContent
= true;
284 BuildDocuments (indexable
, out primary_doc
, out secondary_doc
);
285 primary_writer
.AddDocument (primary_doc
);
286 } catch (Exception ex2
) {
287 Logger
.Log
.Debug ("Second attempt to index {0} failed, giving up...", indexable
.DisplayUri
);
288 Logger
.Log
.Debug (ex2
);
292 if (filter
!= null) {
293 r
.FilterName
= filter
.GetType ().ToString ();
294 r
.FilterVersion
= filter
.Version
;
297 receipt_queue
.Add (r
);
299 if (secondary_doc
!= null) {
300 if (secondary_writer
== null)
301 secondary_writer
= new IndexWriter (SecondaryStore
, IndexingAnalyzer
, false);
303 secondary_writer
.AddDocument (secondary_doc
);
309 if (text_cache
!= null)
310 text_cache
.CommitTransaction ();
312 if (optimize_during_next_flush
) {
313 Logger
.Log
.Debug ("Optimizing");
314 primary_writer
.Optimize ();
315 if (secondary_writer
== null)
316 secondary_writer
= new IndexWriter (SecondaryStore
, IndexingAnalyzer
, false);
317 secondary_writer
.Optimize ();
318 optimize_during_next_flush
= false;
321 // Step #3. Close our writers and return the events to
322 // indicate what has happened.
324 primary_writer
.Close ();
325 if (secondary_writer
!= null)
326 secondary_writer
.Close ();
328 pending_by_uri
.Clear ();
330 IndexerReceipt
[] receipt_array
;
331 receipt_array
= new IndexerReceipt
[receipt_queue
.Count
];
332 for (int i
= 0; i
< receipt_queue
.Count
; ++i
)
333 receipt_array
[i
] = (IndexerReceipt
) receipt_queue
[i
];
335 return receipt_array
;
341 // FIXME: Right now we don't support a non-blocking flush,
342 // but it would be easy enough to do it in a thread.
344 IndexerReceipt
[] receipts
;
346 receipts
= FlushAndBlock ();
348 if (FlushEvent
!= null) {
349 if (receipts
!= null)
350 FlushEvent (this, receipts
); // this returns the receipts to anyone who cares
351 FlushEvent (this, null); // and this indicates that we are all done
356 public event IIndexerFlushHandler FlushEvent
;
358 ////////////////////////////////////////////////////////////////
360 public void OptimizeNow ()
364 writer
= new IndexWriter (PrimaryStore
, null, false);
368 if (SecondaryStore
!= null) {
369 writer
= new IndexWriter (SecondaryStore
, null, false);
375 public void Merge (LuceneCommon index_to_merge
)
377 // FIXME: Error recovery
379 // Merge the primary index
380 IndexWriter primary_writer
;
381 Lucene
.Net
.Store
.Directory
[] primary_store
= {index_to_merge.PrimaryStore}
;
382 primary_writer
= new IndexWriter (PrimaryStore
, null, false);
384 primary_writer
.AddIndexes (primary_store
);
385 primary_writer
.Close ();
387 // Merge the secondary index
388 IndexWriter secondary_writer
;
389 Lucene
.Net
.Store
.Directory
[] secondary_store
= {index_to_merge.SecondaryStore}
;
390 secondary_writer
= new IndexWriter (SecondaryStore
, null, false);
392 secondary_writer
.AddIndexes (secondary_store
);
393 secondary_writer
.Close ();