2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using Document
= Lucene
.Net
.Documents
.Document
;
18 using Field
= Lucene
.Net
.Documents
.Field
;
19 using Directory
= Lucene
.Net
.Store
.Directory
;
20 namespace Lucene
.Net
.Index
23 /// <summary>An IndexReader which reads multiple indexes, appending their content.
26 /// <version> $Id: MultiReader.cs,v 1.2 2005/10/06 19:29:55 dsd Exp $
28 public class MultiReader
: IndexReader
30 private IndexReader
[] subReaders
;
31 private int[] starts
; // 1st docno for each segment
32 private System
.Collections
.Hashtable normsCache
= System
.Collections
.Hashtable
.Synchronized(new System
.Collections
.Hashtable());
33 private int maxDoc
= 0;
34 private int numDocs
= -1;
35 private bool hasDeletions
= false;
37 /// <summary> <p>Construct a MultiReader aggregating the named set of (sub)readers.
38 /// Directory locking for delete, undeleteAll, and setNorm operations is
39 /// left to the subreaders. </p>
40 /// <p>Note that all subreaders are closed if this Multireader is closed.</p>
42 /// <param name="subReaders">set of (sub)readers
44 /// <throws> IOException </throws>
45 public MultiReader(IndexReader
[] subReaders
):base(subReaders
.Length
== 0?null:subReaders
[0].Directory())
47 Initialize(subReaders
);
50 /// <summary>Construct reading the named set of readers. </summary>
51 public /*internal*/ MultiReader(Directory directory
, SegmentInfos sis
, bool closeDirectory
, IndexReader
[] subReaders
) : base(directory
, sis
, closeDirectory
)
53 Initialize(subReaders
);
56 private void Initialize(IndexReader
[] subReaders
)
58 this.subReaders
= subReaders
;
59 starts
= new int[subReaders
.Length
+ 1]; // build starts array
60 for (int i
= 0; i
< subReaders
.Length
; i
++)
63 maxDoc
+= subReaders
[i
].MaxDoc(); // compute maxDocs
65 if (subReaders
[i
].HasDeletions())
68 starts
[subReaders
.Length
] = maxDoc
;
72 /// <summary>Return an array of term frequency vectors for the specified document.
73 /// The array contains a vector for each vectorized Field in the document.
74 /// Each vector vector contains term numbers and frequencies for all terms
75 /// in a given vectorized Field.
76 /// If no such fields existed, the method returns null.
78 public override TermFreqVector
[] GetTermFreqVectors(int n
)
80 int i
= ReaderIndex(n
); // find segment num
81 return subReaders
[i
].GetTermFreqVectors(n
- starts
[i
]); // dispatch to segment
84 public override TermFreqVector
GetTermFreqVector(int n
, System
.String field
)
86 int i
= ReaderIndex(n
); // find segment num
87 return subReaders
[i
].GetTermFreqVector(n
- starts
[i
], field
);
90 public override int NumDocs()
97 int n
= 0; // cache miss--recompute
98 for (int i
= 0; i
< subReaders
.Length
; i
++)
99 n
+= subReaders
[i
].NumDocs(); // sum from readers
106 public override int MaxDoc()
111 public override Document
Document(int n
)
113 int i
= ReaderIndex(n
); // find segment num
114 return subReaders
[i
].Document(n
- starts
[i
]); // dispatch to segment reader
117 public override bool IsDeleted(int n
)
119 int i
= ReaderIndex(n
); // find segment num
120 return subReaders
[i
].IsDeleted(n
- starts
[i
]); // dispatch to segment reader
123 public override bool HasDeletions()
128 protected internal override void DoDelete(int n
)
130 numDocs
= - 1; // invalidate cache
131 int i
= ReaderIndex(n
); // find segment num
132 subReaders
[i
].Delete(n
- starts
[i
]); // dispatch to segment reader
136 protected internal override void DoUndeleteAll()
138 for (int i
= 0; i
< subReaders
.Length
; i
++)
139 subReaders
[i
].UndeleteAll();
140 hasDeletions
= false;
143 private int ReaderIndex(int n
)
145 // find reader for doc n:
146 int lo
= 0; // search starts array
147 int hi
= subReaders
.Length
- 1; // for first element less
151 int mid
= (lo
+ hi
) >> 1;
152 int midValue
= starts
[mid
];
155 else if (n
> midValue
)
160 while (mid
+ 1 < subReaders
.Length
&& starts
[mid
+ 1] == midValue
)
162 mid
++; // scan to last match
170 public override byte[] Norms(System
.String field
)
174 byte[] bytes
= (byte[]) normsCache
[field
];
176 return bytes
; // cache hit
178 bytes
= new byte[MaxDoc()];
179 for (int i
= 0; i
< subReaders
.Length
; i
++)
180 subReaders
[i
].Norms(field
, bytes
, starts
[i
]);
181 normsCache
[field
] = bytes
; // update cache
186 public override void Norms(System
.String field
, byte[] result
, int offset
)
190 byte[] bytes
= (byte[]) normsCache
[field
];
193 Array
.Copy(bytes
, 0, result
, offset
, MaxDoc());
195 for (int i
= 0; i
< subReaders
.Length
; i
++)
196 // read from segments
197 subReaders
[i
].Norms(field
, result
, offset
+ starts
[i
]);
201 protected internal override void DoSetNorm(int n
, System
.String field
, byte value_Renamed
)
203 normsCache
.Remove(field
); // clear cache
204 int i
= ReaderIndex(n
); // find segment num
205 subReaders
[i
].SetNorm(n
- starts
[i
], field
, value_Renamed
); // dispatch
208 public override TermEnum
Terms()
210 return new MultiTermEnum(subReaders
, starts
, null);
213 public override TermEnum
Terms(Term term
)
215 return new MultiTermEnum(subReaders
, starts
, term
);
218 public override int DocFreq(Term t
)
220 int total
= 0; // sum freqs in segments
221 for (int i
= 0; i
< subReaders
.Length
; i
++)
222 total
+= subReaders
[i
].DocFreq(t
);
226 public override TermDocs
TermDocs()
228 return new MultiTermDocs(subReaders
, starts
);
231 public override TermPositions
TermPositions()
233 return new MultiTermPositions(subReaders
, starts
);
236 protected internal override void DoCommit()
238 for (int i
= 0; i
< subReaders
.Length
; i
++)
239 subReaders
[i
].Commit();
242 protected internal override void DoClose()
246 for (int i
= 0; i
< subReaders
.Length
; i
++)
247 subReaders
[i
].Close();
251 /// <seealso cref="IndexReader#GetFieldNames()">
253 public override System
.Collections
.ICollection
GetFieldNames()
255 // maintain a unique set of Field names
256 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
257 for (int i
= 0; i
< subReaders
.Length
; i
++)
259 IndexReader reader
= subReaders
[i
];
260 System
.Collections
.ICollection names
= reader
.GetFieldNames();
261 // iterate through the Field names and add them to the set
262 for (System
.Collections
.IEnumerator iterator
= names
.GetEnumerator(); iterator
.MoveNext(); )
264 System
.Collections
.DictionaryEntry fi
= (System
.Collections
.DictionaryEntry
) iterator
.Current
;
265 System
.String s
= fi
.Key
.ToString();
266 if (fieldSet
.ContainsKey(s
) == false)
275 /// <seealso cref="IndexReader#GetFieldNames(boolean)">
277 public override System
.Collections
.ICollection
GetFieldNames(bool indexed
)
279 // maintain a unique set of Field names
280 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
281 for (int i
= 0; i
< subReaders
.Length
; i
++)
283 IndexReader reader
= subReaders
[i
];
284 System
.Collections
.ICollection names
= reader
.GetFieldNames(indexed
);
285 for (System
.Collections
.IEnumerator iterator
= names
.GetEnumerator(); iterator
.MoveNext(); )
287 System
.Collections
.DictionaryEntry fi
= (System
.Collections
.DictionaryEntry
) iterator
.Current
;
288 System
.String s
= fi
.Key
.ToString();
289 if (fieldSet
.ContainsKey(s
) == false)
298 public override System
.Collections
.ICollection
GetIndexedFieldNames(Field
.TermVector tvSpec
)
300 // maintain a unique set of Field names
301 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
302 for (int i
= 0; i
< subReaders
.Length
; i
++)
304 IndexReader reader
= subReaders
[i
];
305 System
.Collections
.ICollection names
= reader
.GetIndexedFieldNames(tvSpec
);
306 foreach (object item
in names
)
308 if (fieldSet
.ContainsKey(item
) == false)
310 fieldSet
.Add(item
, item
);
317 /// <seealso cref="IndexReader#GetFieldNames(IndexReader.FieldOption)">
319 public override System
.Collections
.ICollection
GetFieldNames(IndexReader
.FieldOption fieldNames
)
321 // maintain a unique set of field names
322 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
323 for (int i
= 0; i
< subReaders
.Length
; i
++)
325 IndexReader reader
= subReaders
[i
];
326 System
.Collections
.ICollection names
= reader
.GetFieldNames(fieldNames
);
327 foreach (object item
in names
)
329 if (fieldSet
.ContainsKey(item
) == false)
331 fieldSet
.Add(item
, item
);
339 class MultiTermEnum
: TermEnum
341 private SegmentMergeQueue queue
;
346 public MultiTermEnum(IndexReader
[] readers
, int[] starts
, Term t
)
348 queue
= new SegmentMergeQueue(readers
.Length
);
349 for (int i
= 0; i
< readers
.Length
; i
++)
351 IndexReader reader
= readers
[i
];
356 termEnum
= reader
.Terms(t
);
359 termEnum
= reader
.Terms();
361 SegmentMergeInfo smi
= new SegmentMergeInfo(starts
[i
], termEnum
, reader
);
362 if (t
== null?smi
.Next():termEnum
.Term() != null)
369 if (t
!= null && queue
.Size() > 0)
375 public override bool Next()
377 SegmentMergeInfo top
= (SegmentMergeInfo
) queue
.Top();
387 while (top
!= null && term
.CompareTo(top
.term
) == 0)
390 docFreq
+= top
.termEnum
.DocFreq(); // increment freq
395 top
.Close(); // done with a segment
396 top
= (SegmentMergeInfo
) queue
.Top();
401 public override Term
Term()
406 public override int DocFreq()
411 public override void Close()
417 class MultiTermDocs
: TermDocs
419 protected internal IndexReader
[] readers
;
420 protected internal int[] starts
;
421 protected internal Term term
;
423 protected internal int base_Renamed
= 0;
424 protected internal int pointer
= 0;
426 private TermDocs
[] readerTermDocs
;
427 protected internal TermDocs current
; // == readerTermDocs[pointer]
429 public MultiTermDocs(IndexReader
[] r
, int[] s
)
434 readerTermDocs
= new TermDocs
[r
.Length
];
437 public virtual int Doc()
439 return base_Renamed
+ current
.Doc();
441 public virtual int Freq()
443 return current
.Freq();
446 public virtual void Seek(Term term
)
449 this.base_Renamed
= 0;
454 public virtual void Seek(TermEnum termEnum
)
456 Seek(termEnum
.Term());
459 public virtual bool Next()
461 if (current
!= null && current
.Next())
465 else if (pointer
< readers
.Length
)
467 base_Renamed
= starts
[pointer
];
468 current
= TermDocs(pointer
++);
475 /// <summary>Optimized implementation. </summary>
476 public virtual int Read(int[] docs
, int[] freqs
)
480 while (current
== null)
482 if (pointer
< readers
.Length
)
485 base_Renamed
= starts
[pointer
];
486 current
= TermDocs(pointer
++);
493 int end
= current
.Read(docs
, freqs
);
496 // none left in segment
502 int b
= base_Renamed
; // adjust doc numbers
503 for (int i
= 0; i
< end
; i
++)
510 /// <summary>As yet unoptimized implementation. </summary>
511 public virtual bool SkipTo(int target
)
518 while (target
> Doc());
522 private TermDocs
TermDocs(int i
)
526 TermDocs result
= readerTermDocs
[i
];
528 result
= readerTermDocs
[i
] = TermDocs(readers
[i
]);
533 protected internal virtual TermDocs
TermDocs(IndexReader reader
)
535 return reader
.TermDocs();
538 public virtual void Close()
540 for (int i
= 0; i
< readerTermDocs
.Length
; i
++)
542 if (readerTermDocs
[i
] != null)
543 readerTermDocs
[i
].Close();
548 class MultiTermPositions
: MultiTermDocs
, TermPositions
550 public MultiTermPositions(IndexReader
[] r
, int[] s
):base(r
, s
)
554 protected internal override TermDocs
TermDocs(IndexReader reader
)
556 return (TermDocs
) reader
.TermPositions();
559 public virtual int NextPosition()
561 return ((TermPositions
) current
).NextPosition();