cvsimport
[beagle.git] / beagled / Lucene.Net / Index / ParallelReader.cs
blob641d5dd17026a8384ef91d6ca6d8c8a8fb66408b
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using System;
18 using Document = Lucene.Net.Documents.Document;
19 using Field = Lucene.Net.Documents.Field;
21 namespace Lucene.Net.Index
24 /// <summary>An IndexReader which reads multiple, parallel indexes. Each index added
25 /// must have the same number of documents, but typically each contains
26 /// different fields. Each document contains the union of the fields of all
27 /// documents with the same document number. When searching, matches for a
28 /// query term are from the first index added that has the field.
29 ///
30 /// <p>This is useful, e.g., with collections that have large fields which
31 /// change rarely and small fields that change more frequently. The smaller
32 /// fields may be re-indexed in a new index and both indexes may be searched
33 /// together.
34 ///
35 /// <p><strong>Warning:</strong> It is up to you to make sure all indexes
36 /// are created and modified the same way. For example, if you add
37 /// documents to one index, you need to add the same documents in the
38 /// same order to the other indexes. <em>Failure to do so will result in
39 /// undefined behavior</em>.
40 /// </summary>
41 public class ParallelReader : IndexReader
43 private System.Collections.ArrayList readers = new System.Collections.ArrayList();
44 private System.Collections.SortedList fieldToReader = new System.Collections.SortedList();
45 private System.Collections.ArrayList storedFieldReaders = new System.Collections.ArrayList();
47 private int maxDoc;
48 private int numDocs;
49 private bool hasDeletions;
51 /// <summary>Construct a ParallelReader. </summary>
52 public ParallelReader() : base(null)
56 /// <summary>Add an IndexReader. </summary>
57 public virtual void Add(IndexReader reader)
59 Add(reader, false);
62 /// <summary>Add an IndexReader whose stored fields will not be returned. This can
63 /// accellerate search when stored fields are only needed from a subset of
64 /// the IndexReaders.
65 ///
66 /// </summary>
67 /// <throws> IllegalArgumentException if not all indexes contain the same number </throws>
68 /// <summary> of documents
69 /// </summary>
70 /// <throws> IllegalArgumentException if not all indexes have the same value </throws>
71 /// <summary> of {@link IndexReader#MaxDoc()}
72 /// </summary>
73 public virtual void Add(IndexReader reader, bool ignoreStoredFields)
76 if (readers.Count == 0)
78 this.maxDoc = reader.MaxDoc();
79 this.numDocs = reader.NumDocs();
80 this.hasDeletions = reader.HasDeletions();
83 if (reader.MaxDoc() != maxDoc)
84 // check compatibility
85 throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc());
86 if (reader.NumDocs() != numDocs)
87 throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
89 System.Collections.IEnumerator i = reader.GetFieldNames(IndexReader.FieldOption.ALL).GetEnumerator();
90 while (i.MoveNext())
92 System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) i.Current;
94 // update fieldToReader map
95 System.String field = fi.Key.ToString();
96 if (fieldToReader[field] == null)
97 fieldToReader[field] = reader;
100 if (!ignoreStoredFields)
101 storedFieldReaders.Add(reader); // add to storedFieldReaders
102 readers.Add(reader);
105 public override int NumDocs()
107 return numDocs;
110 public override int MaxDoc()
112 return maxDoc;
115 public override bool HasDeletions()
117 return hasDeletions;
120 // check first reader
121 public override bool IsDeleted(int n)
123 if (readers.Count > 0)
124 return ((IndexReader) readers[0]).IsDeleted(n);
125 return false;
128 // delete in all readers
129 protected internal override void DoDelete(int n)
131 for (int i = 0; i < readers.Count; i++)
133 ((IndexReader) readers[i]).DoDelete(n);
135 hasDeletions = true;
138 // undeleteAll in all readers
139 protected internal override void DoUndeleteAll()
141 for (int i = 0; i < readers.Count; i++)
143 ((IndexReader) readers[i]).DoUndeleteAll();
145 hasDeletions = false;
148 // append fields from storedFieldReaders
149 public override Document Document(int n)
151 Document result = new Document();
152 for (int i = 0; i < storedFieldReaders.Count; i++)
154 IndexReader reader = (IndexReader) storedFieldReaders[i];
155 foreach(Field field in reader.Document(n).Fields())
157 result.Add(field);
160 return result;
163 // get all vectors
164 public override TermFreqVector[] GetTermFreqVectors(int n)
166 System.Collections.ArrayList results = new System.Collections.ArrayList();
167 System.Collections.IEnumerator i = new System.Collections.Hashtable(fieldToReader).GetEnumerator();
168 while (i.MoveNext())
170 System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry) i.Current;
171 IndexReader reader = (IndexReader) e.Key;
172 System.String field = (System.String) e.Value;
173 TermFreqVector vector = reader.GetTermFreqVector(n, field);
174 if (vector != null)
175 results.Add(vector);
177 return (TermFreqVector[]) (results.ToArray(typeof(TermFreqVector)));
180 public override TermFreqVector GetTermFreqVector(int n, System.String field)
182 return ((IndexReader) fieldToReader[field]).GetTermFreqVector(n, field);
185 public override bool HasNorms(System.String field)
187 return ((IndexReader) fieldToReader[field]).HasNorms(field);
190 public override byte[] Norms(System.String field)
192 return ((IndexReader) fieldToReader[field]).Norms(field);
195 public override void Norms(System.String field, byte[] result, int offset)
197 ((IndexReader) fieldToReader[field]).Norms(field, result, offset);
200 protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed)
202 ((IndexReader) fieldToReader[field]).DoSetNorm(n, field, value_Renamed);
205 public override TermEnum Terms()
207 return new ParallelTermEnum(this);
210 public override TermEnum Terms(Term term)
212 return new ParallelTermEnum(this, term);
215 public override int DocFreq(Term term)
217 return ((IndexReader) fieldToReader[term.Field()]).DocFreq(term);
220 public override TermDocs TermDocs(Term term)
222 return new ParallelTermDocs(this, term);
225 public override TermDocs TermDocs()
227 return new ParallelTermDocs(this);
230 public override TermPositions TermPositions(Term term)
232 return new ParallelTermPositions(this, term);
235 public override TermPositions TermPositions()
237 return new ParallelTermPositions(this);
240 protected internal override void DoCommit()
242 for (int i = 0; i < readers.Count; i++)
243 ((IndexReader) readers[i]).Commit();
246 protected internal override void DoClose()
248 lock (this)
250 for (int i = 0; i < readers.Count; i++)
251 ((IndexReader) readers[i]).Close();
255 public override System.Collections.ICollection GetFieldNames()
257 System.Collections.Hashtable result = new System.Collections.Hashtable(fieldToReader.Count);
258 System.Collections.ICollection items = fieldToReader.Keys;
259 foreach (object item in items)
261 result.Add(item, item);
263 return result;
266 public override System.Collections.ICollection GetFieldNames(bool indexed)
268 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
269 for (int i = 0; i < readers.Count; i++)
271 IndexReader reader = ((IndexReader) readers[i]);
272 System.Collections.ICollection names = reader.GetFieldNames(indexed);
273 for (System.Collections.IEnumerator iterator = names.GetEnumerator(); iterator.MoveNext(); )
275 System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) iterator.Current;
276 System.String s = fi.Key.ToString();
277 if (fieldSet.ContainsKey(s) == false)
279 fieldSet.Add(s, s);
283 return fieldSet;
286 public override System.Collections.ICollection GetIndexedFieldNames(Field.TermVector tvSpec)
288 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
289 for (int i = 0; i < readers.Count; i++)
291 IndexReader reader = ((IndexReader) readers[i]);
292 System.Collections.ICollection names = reader.GetIndexedFieldNames(tvSpec);
293 for (System.Collections.IEnumerator iterator = names.GetEnumerator(); iterator.MoveNext(); )
295 System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) iterator.Current;
296 System.String s = fi.Key.ToString();
297 if (fieldSet.ContainsKey(s) == false)
299 fieldSet.Add(s, s);
303 return fieldSet;
306 public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldNames)
308 System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
309 for (int i = 0; i < readers.Count; i++)
311 IndexReader reader = ((IndexReader) readers[i]);
312 System.Collections.ICollection names = reader.GetFieldNames(fieldNames);
313 for (System.Collections.IEnumerator iterator = names.GetEnumerator(); iterator.MoveNext(); )
315 System.Collections.DictionaryEntry fi = (System.Collections.DictionaryEntry) iterator.Current;
316 System.String s = fi.Key.ToString();
317 if (fieldSet.ContainsKey(s) == false)
319 fieldSet.Add(s, s);
323 return fieldSet;
326 private class ParallelTermEnum : TermEnum
328 private void InitBlock(ParallelReader enclosingInstance)
330 this.enclosingInstance = enclosingInstance;
332 private ParallelReader enclosingInstance;
333 public ParallelReader Enclosing_Instance
337 return enclosingInstance;
341 private System.String field;
342 private TermEnum termEnum;
344 public ParallelTermEnum(ParallelReader enclosingInstance)
346 InitBlock(enclosingInstance);
347 field = ((System.String) Enclosing_Instance.fieldToReader.GetKey(0));
348 if (field != null)
349 termEnum = ((IndexReader) Enclosing_Instance.fieldToReader[field]).Terms();
352 public ParallelTermEnum(ParallelReader enclosingInstance, Term term)
354 InitBlock(enclosingInstance);
355 field = term.Field();
356 termEnum = ((IndexReader) Enclosing_Instance.fieldToReader[field]).Terms(term);
359 public override bool Next()
361 if (field == null)
362 return false;
364 bool next = termEnum.Next();
366 // still within field?
367 if (next && (System.Object) termEnum.Term().Field() == (System.Object) field)
368 return true; // yes, keep going
370 termEnum.Close(); // close old termEnum
372 // find the next field, if any
373 field = ((System.String) SupportClass.TailMap(Enclosing_Instance.fieldToReader, field).GetKey(0));
374 if (field != null)
376 termEnum = ((IndexReader) Enclosing_Instance.fieldToReader[field]).Terms();
377 return true;
380 return false; // no more fields
383 public override Term Term()
385 return termEnum.Term();
387 public override int DocFreq()
389 return termEnum.DocFreq();
391 public override void Close()
393 termEnum.Close();
397 // wrap a TermDocs in order to support seek(Term)
398 private class ParallelTermDocs : TermDocs
400 private void InitBlock(ParallelReader enclosingInstance)
402 this.enclosingInstance = enclosingInstance;
404 private ParallelReader enclosingInstance;
405 public ParallelReader Enclosing_Instance
409 return enclosingInstance;
413 protected internal TermDocs termDocs;
415 public ParallelTermDocs(ParallelReader enclosingInstance)
417 InitBlock(enclosingInstance);
419 public ParallelTermDocs(ParallelReader enclosingInstance, Term term)
421 InitBlock(enclosingInstance);
422 Seek(term);
425 public virtual int Doc()
427 return termDocs.Doc();
429 public virtual int Freq()
431 return termDocs.Freq();
434 public virtual void Seek(Term term)
436 termDocs = ((IndexReader) Enclosing_Instance.fieldToReader[term.Field()]).TermDocs(term);
439 public virtual void Seek(TermEnum termEnum)
441 Seek(termEnum.Term());
444 public virtual bool Next()
446 return termDocs.Next();
449 public virtual int Read(int[] docs, int[] freqs)
451 return termDocs.Read(docs, freqs);
454 public virtual bool SkipTo(int target)
456 return termDocs.SkipTo(target);
459 public virtual void Close()
461 termDocs.Close();
465 private class ParallelTermPositions : ParallelTermDocs, TermPositions
467 private void InitBlock(ParallelReader enclosingInstance)
469 this.enclosingInstance = enclosingInstance;
471 private ParallelReader enclosingInstance;
472 public new ParallelReader Enclosing_Instance
476 return enclosingInstance;
481 public ParallelTermPositions(ParallelReader enclosingInstance) : base(enclosingInstance)
483 InitBlock(enclosingInstance);
485 public ParallelTermPositions(ParallelReader enclosingInstance, Term term) : base(enclosingInstance)
487 InitBlock(enclosingInstance);
488 Seek(term);
491 public override void Seek(Term term)
493 termDocs = ((IndexReader) Enclosing_Instance.fieldToReader[term.Field()]).TermPositions(term);
496 public virtual int NextPosition()
498 return ((TermPositions) termDocs).NextPosition();