2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using Document
= Lucene
.Net
.Documents
.Document
;
18 using Directory
= Lucene
.Net
.Store
.Directory
;
19 namespace Lucene
.Net
.Index
22 /// <summary>An IndexReader which reads multiple indexes, appending their content.
25 /// <version> $Id: MultiReader.cs,v 1.1 2005/01/17 19:54:29 joeshaw Exp $
27 public class MultiReader
: IndexReader
29 private IndexReader
[] subReaders
;
30 private int[] starts
; // 1st docno for each segment
31 private System
.Collections
.Hashtable normsCache
= System
.Collections
.Hashtable
.Synchronized(new System
.Collections
.Hashtable());
32 private int maxDoc
= 0;
33 private int numDocs
= -1;
34 private bool hasDeletions
= false;
36 /// <summary> <p>Construct a MultiReader aggregating the named set of (sub)readers.
37 /// Directory locking for delete, undeleteAll, and setNorm operations is
38 /// left to the subreaders. </p>
39 /// <p>Note that all subreaders are closed if this Multireader is closed.</p>
41 /// <param name="subReaders">set of (sub)readers
43 /// <throws> IOException </throws>
44 public MultiReader(IndexReader
[] subReaders
):base(subReaders
.Length
== 0?null:subReaders
[0].Directory())
46 Initialize(subReaders
);
49 /// <summary>Construct reading the named set of readers. </summary>
50 public /*internal*/ MultiReader(Directory directory
, SegmentInfos sis
, bool closeDirectory
, IndexReader
[] subReaders
):base(directory
, sis
, closeDirectory
)
52 Initialize(subReaders
);
55 private void Initialize(IndexReader
[] subReaders
)
57 this.subReaders
= subReaders
;
58 starts
= new int[subReaders
.Length
+ 1]; // build starts array
59 for (int i
= 0; i
< subReaders
.Length
; i
++)
62 maxDoc
+= subReaders
[i
].MaxDoc(); // compute maxDocs
64 if (subReaders
[i
].HasDeletions())
67 starts
[subReaders
.Length
] = maxDoc
;
71 /// <summary>Return an array of term frequency vectors for the specified document.
72 /// The array contains a vector for each vectorized Field in the document.
73 /// Each vector vector contains term numbers and frequencies for all terms
74 /// in a given vectorized Field.
75 /// If no such fields existed, the method returns null.
77 public override TermFreqVector
[] GetTermFreqVectors(int n
)
79 int i
= ReaderIndex(n
); // find segment num
80 return subReaders
[i
].GetTermFreqVectors(n
- starts
[i
]); // dispatch to segment
83 public override TermFreqVector
GetTermFreqVector(int n
, System
.String field
)
85 int i
= ReaderIndex(n
); // find segment num
86 return subReaders
[i
].GetTermFreqVector(n
- starts
[i
], field
);
89 public override int NumDocs()
96 int n
= 0; // cache miss--recompute
97 for (int i
= 0; i
< subReaders
.Length
; i
++)
98 n
+= subReaders
[i
].NumDocs(); // sum from readers
105 public override int MaxDoc()
110 public override Document
Document(int n
)
112 int i
= ReaderIndex(n
); // find segment num
113 return subReaders
[i
].Document(n
- starts
[i
]); // dispatch to segment reader
116 public override bool IsDeleted(int n
)
118 int i
= ReaderIndex(n
); // find segment num
119 return subReaders
[i
].IsDeleted(n
- starts
[i
]); // dispatch to segment reader
122 public override bool HasDeletions()
127 protected internal override void DoDelete(int n
)
129 numDocs
= - 1; // invalidate cache
130 int i
= ReaderIndex(n
); // find segment num
131 subReaders
[i
].Delete(n
- starts
[i
]); // dispatch to segment reader
135 protected internal override void DoUndeleteAll()
137 for (int i
= 0; i
< subReaders
.Length
; i
++)
138 subReaders
[i
].UndeleteAll();
139 hasDeletions
= false;
142 private int ReaderIndex(int n
)
144 // find reader for doc n:
145 int lo
= 0; // search starts array
146 int hi
= subReaders
.Length
- 1; // for first element less
150 int mid
= (lo
+ hi
) >> 1;
151 int midValue
= starts
[mid
];
154 else if (n
> midValue
)
159 while (mid
+ 1 < subReaders
.Length
&& starts
[mid
+ 1] == midValue
)
161 mid
++; // scan to last match
169 public override byte[] Norms(System
.String field
)
173 byte[] bytes
= (byte[]) normsCache
[field
];
175 return bytes
; // cache hit
177 bytes
= new byte[MaxDoc()];
178 for (int i
= 0; i
< subReaders
.Length
; i
++)
179 subReaders
[i
].Norms(field
, bytes
, starts
[i
]);
180 normsCache
[field
] = bytes
; // update cache
185 public override void Norms(System
.String field
, byte[] result
, int offset
)
189 byte[] bytes
= (byte[]) normsCache
[field
];
192 Array
.Copy(bytes
, 0, result
, offset
, MaxDoc());
194 for (int i
= 0; i
< subReaders
.Length
; i
++)
195 // read from segments
196 subReaders
[i
].Norms(field
, result
, offset
+ starts
[i
]);
200 protected internal override void DoSetNorm(int n
, System
.String field
, byte value_Renamed
)
202 normsCache
.Remove(field
); // clear cache
203 int i
= ReaderIndex(n
); // find segment num
204 subReaders
[i
].SetNorm(n
- starts
[i
], field
, value_Renamed
); // dispatch
207 public override TermEnum
Terms()
209 return new MultiTermEnum(subReaders
, starts
, null);
212 public override TermEnum
Terms(Term term
)
214 return new MultiTermEnum(subReaders
, starts
, term
);
217 public override int DocFreq(Term t
)
219 int total
= 0; // sum freqs in segments
220 for (int i
= 0; i
< subReaders
.Length
; i
++)
221 total
+= subReaders
[i
].DocFreq(t
);
225 public override TermDocs
TermDocs()
227 return new MultiTermDocs(subReaders
, starts
);
230 public override TermPositions
TermPositions()
232 return new MultiTermPositions(subReaders
, starts
);
235 protected internal override void DoCommit()
237 for (int i
= 0; i
< subReaders
.Length
; i
++)
238 subReaders
[i
].Commit();
241 protected internal override void DoClose()
245 for (int i
= 0; i
< subReaders
.Length
; i
++)
246 subReaders
[i
].Close();
250 /// <seealso cref="IndexReader#GetFieldNames()">
252 public override System
.Collections
.ICollection
GetFieldNames()
254 // maintain a unique set of Field names
255 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
256 for (int i
= 0; i
< subReaders
.Length
; i
++)
258 IndexReader reader
= subReaders
[i
];
259 System
.Collections
.ICollection names
= reader
.GetFieldNames();
260 // iterate through the Field names and add them to the set
261 for (System
.Collections
.IEnumerator iterator
= names
.GetEnumerator(); iterator
.MoveNext(); )
263 System
.Collections
.DictionaryEntry fi
= (System
.Collections
.DictionaryEntry
) iterator
.Current
;
264 System
.String s
= fi
.Key
.ToString();
265 if (fieldSet
.ContainsKey(s
) == false)
274 /// <seealso cref="IndexReader#GetFieldNames(boolean)">
276 public override System
.Collections
.ICollection
GetFieldNames(bool indexed
)
278 // maintain a unique set of Field names
279 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
280 for (int i
= 0; i
< subReaders
.Length
; i
++)
282 IndexReader reader
= subReaders
[i
];
283 System
.Collections
.ICollection names
= reader
.GetFieldNames(indexed
);
284 for (System
.Collections
.IEnumerator iterator
= names
.GetEnumerator(); iterator
.MoveNext(); )
286 System
.Collections
.DictionaryEntry fi
= (System
.Collections
.DictionaryEntry
) iterator
.Current
;
287 System
.String s
= fi
.Key
.ToString();
288 if (fieldSet
.ContainsKey(s
) == false)
297 public override System
.Collections
.ICollection
GetIndexedFieldNames(bool storedTermVector
)
299 // maintain a unique set of Field names
300 System
.Collections
.Hashtable fieldSet
= new System
.Collections
.Hashtable();
301 for (int i
= 0; i
< subReaders
.Length
; i
++)
303 IndexReader reader
= subReaders
[i
];
304 System
.Collections
.ICollection names
= reader
.GetIndexedFieldNames(storedTermVector
);
305 foreach (object item
in names
)
307 fieldSet
.Add(item
,item
);
314 class MultiTermEnum
:TermEnum
316 private SegmentMergeQueue queue
;
321 public MultiTermEnum(IndexReader
[] readers
, int[] starts
, Term t
)
323 queue
= new SegmentMergeQueue(readers
.Length
);
324 for (int i
= 0; i
< readers
.Length
; i
++)
326 IndexReader reader
= readers
[i
];
331 termEnum
= reader
.Terms(t
);
334 termEnum
= reader
.Terms();
336 SegmentMergeInfo smi
= new SegmentMergeInfo(starts
[i
], termEnum
, reader
);
337 if (t
== null?smi
.Next():termEnum
.Term() != null)
344 if (t
!= null && queue
.Size() > 0)
350 public override bool Next()
352 SegmentMergeInfo top
= (SegmentMergeInfo
) queue
.Top();
362 while (top
!= null && term
.CompareTo(top
.term
) == 0)
365 docFreq
+= top
.termEnum
.DocFreq(); // increment freq
370 top
.Close(); // done with a segment
371 top
= (SegmentMergeInfo
) queue
.Top();
376 public override Term
Term()
381 public override int DocFreq()
386 public override void Close()
392 class MultiTermDocs
: TermDocs
394 protected internal IndexReader
[] readers
;
395 protected internal int[] starts
;
396 protected internal Term term
;
398 protected internal int base_Renamed
= 0;
399 protected internal int pointer
= 0;
401 private TermDocs
[] readerTermDocs
;
402 protected internal TermDocs current
; // == readerTermDocs[pointer]
404 public MultiTermDocs(IndexReader
[] r
, int[] s
)
409 readerTermDocs
= new TermDocs
[r
.Length
];
412 public virtual int Doc()
414 return base_Renamed
+ current
.Doc();
416 public virtual int Freq()
418 return current
.Freq();
421 public virtual void Seek(Term term
)
424 this.base_Renamed
= 0;
429 public virtual void Seek(TermEnum termEnum
)
431 Seek(termEnum
.Term());
434 public virtual bool Next()
436 if (current
!= null && current
.Next())
440 else if (pointer
< readers
.Length
)
442 base_Renamed
= starts
[pointer
];
443 current
= TermDocs(pointer
++);
450 /// <summary>Optimized implementation. </summary>
451 public virtual int Read(int[] docs
, int[] freqs
)
455 while (current
== null)
457 if (pointer
< readers
.Length
)
460 base_Renamed
= starts
[pointer
];
461 current
= TermDocs(pointer
++);
468 int end
= current
.Read(docs
, freqs
);
471 // none left in segment
477 int b
= base_Renamed
; // adjust doc numbers
478 for (int i
= 0; i
< end
; i
++)
485 /// <summary>As yet unoptimized implementation. </summary>
486 public virtual bool SkipTo(int target
)
493 while (target
> Doc());
497 private TermDocs
TermDocs(int i
)
501 TermDocs result
= readerTermDocs
[i
];
503 result
= readerTermDocs
[i
] = TermDocs(readers
[i
]);
508 protected internal virtual TermDocs
TermDocs(IndexReader reader
)
510 return reader
.TermDocs();
513 public virtual void Close()
515 for (int i
= 0; i
< readerTermDocs
.Length
; i
++)
517 if (readerTermDocs
[i
] != null)
518 readerTermDocs
[i
].Close();
523 class MultiTermPositions
:MultiTermDocs
, TermPositions
525 public MultiTermPositions(IndexReader
[] r
, int[] s
):base(r
, s
)
529 protected internal override TermDocs
TermDocs(IndexReader reader
)
531 return (TermDocs
) reader
.TermPositions();
534 public virtual int NextPosition()
536 return ((TermPositions
) current
).NextPosition();