2 using System
.Collections
;
4 using Lucene
.Net
.Store
;
7 namespace Lucene
.Net
.Index
9 /* ====================================================================
10 * The Apache Software License, Version 1.1
12 * Copyright (c) 2001 The Apache Software Foundation. All rights
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in
24 * the documentation and/or other materials provided with the
27 * 3. The end-user documentation included with the redistribution,
28 * if any, must include the following acknowledgment:
29 * "This product includes software developed by the
30 * Apache Software Foundation (http://www.apache.org/)."
31 * Alternately, this acknowledgment may appear in the software itself,
32 * if and wherever such third-party acknowledgments normally appear.
34 * 4. The names "Apache" and "Apache Software Foundation" and
35 * "Apache Lucene" must not be used to endorse or promote products
36 * derived from this software without prior written permission. For
37 * written permission, please contact apache@apache.org.
39 * 5. Products derived from this software may not be called "Apache",
40 * "Apache Lucene", nor may "Apache" appear in their name, without
41 * prior written permission of the Apache Software Foundation.
43 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
44 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
45 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
46 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
47 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
49 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
50 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
51 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
52 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
53 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * ====================================================================
57 * This software consists of voluntary contributions made by many
58 * individuals on behalf of the Apache Software Foundation. For more
59 * information on the Apache Software Foundation, please see
60 * <http://www.apache.org/>.
63 public sealed class SegmentMerger
65 private bool useCompoundFile
;
66 private Directory directory
;
67 private String segment
;
69 private ArrayList readers
= new ArrayList();
70 private FieldInfos fieldInfos
;
72 // File extensions of old-style index files
73 private static string[] COMPOUND_EXTENSIONS
= new string[]
74 {"fnm", "frq", "prx", "fdx", "fdt", "tii", "tis"}
;
76 public SegmentMerger(Directory dir
, String name
, bool compoundFile
)
80 useCompoundFile
= compoundFile
;
83 public void Add(IndexReader reader
)
88 public IndexReader
SegmentReader(int i
)
90 return (IndexReader
)readers
[i
];
101 _value
= MergeNorms();
105 for (int i
= 0; i
< readers
.Count
; i
++)
107 IndexReader reader
= (IndexReader
)readers
[i
];
113 CreateCompoundFile();
118 private void CreateCompoundFile()
120 CompoundFileWriter cfsWriter
=
121 new CompoundFileWriter(directory
, segment
+ ".cfs");
124 new ArrayList(COMPOUND_EXTENSIONS
.Length
+ fieldInfos
.Size());
127 for (int i
=0; i
<COMPOUND_EXTENSIONS
.Length
; i
++)
129 files
.Add(segment
+ "." + COMPOUND_EXTENSIONS
[i
]);
133 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
135 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
138 files
.Add(segment
+ ".f" + i
);
142 // Now merge all added files
143 foreach(string file
in files
)
145 cfsWriter
.AddFile(file
);
151 // Now delete the source files
152 foreach(string file
in files
)
154 directory
.DeleteFile(file
);
158 private void MergeFields()
160 fieldInfos
= new FieldInfos(); // merge field names
161 for (int i
= 0; i
< readers
.Count
; i
++)
163 IndexReader reader
= (IndexReader
)readers
[i
];
165 fieldInfos
.Add(reader
.GetFieldNames(true), true);
166 fieldInfos
.Add(reader
.GetFieldNames(false), false);
168 fieldInfos
.Write(directory
, segment
+ ".fnm");
170 FieldsWriter fieldsWriter
= // merge field values
171 new FieldsWriter(directory
, segment
, fieldInfos
);
174 for (int i
= 0; i
< readers
.Count
; i
++)
176 IndexReader reader
= (IndexReader
)readers
[i
];
178 int maxDoc
= reader
.MaxDoc();
179 for (int j
= 0; j
< maxDoc
; j
++)
180 if (!reader
.IsDeleted(j
)) // skip deleted docs
181 fieldsWriter
.AddDocument(reader
.Document(j
));
186 fieldsWriter
.Close();
190 private OutputStream freqOutput
= null;
191 private OutputStream proxOutput
= null;
192 private TermInfosWriter termInfosWriter
= null;
193 private SegmentMergeQueue queue
= null;
195 private void MergeTerms()
199 freqOutput
= directory
.CreateFile(segment
+ ".frq");
200 proxOutput
= directory
.CreateFile(segment
+ ".prx");
202 new TermInfosWriter(directory
, segment
, fieldInfos
);
208 if (freqOutput
!= null) freqOutput
.Close();
209 if (proxOutput
!= null) proxOutput
.Close();
210 if (termInfosWriter
!= null) termInfosWriter
.Close();
211 if (queue
!= null) queue
.Close();
215 private void MergeTermInfos()
217 queue
= new SegmentMergeQueue(readers
.Count
);
219 for (int i
= 0; i
< readers
.Count
; i
++)
221 IndexReader reader
= (IndexReader
)readers
[i
];
222 TermEnum termEnum
= reader
.Terms();
223 SegmentMergeInfo smi
= new SegmentMergeInfo(_base
, termEnum
, reader
);
224 _base
+= reader
.NumDocs();
226 queue
.Put(smi
); // initialize queue
231 SegmentMergeInfo
[] match
= new SegmentMergeInfo
[readers
.Count
];
233 while (queue
.Size() > 0)
235 int matchSize
= 0; // pop matching terms
236 match
[matchSize
++] = (SegmentMergeInfo
)queue
.Pop();
237 Term term
= match
[0].term
;
238 SegmentMergeInfo top
= (SegmentMergeInfo
)queue
.Top();
240 while (top
!= null && term
.CompareTo(top
.term
) == 0)
242 match
[matchSize
++] = (SegmentMergeInfo
)queue
.Pop();
243 top
= (SegmentMergeInfo
)queue
.Top();
246 MergeTermInfo(match
, matchSize
); // add new TermInfo
248 while (matchSize
> 0)
250 SegmentMergeInfo smi
= match
[--matchSize
];
252 queue
.Put(smi
); // restore queue
254 smi
.Close(); // done with a segment
259 private readonly TermInfo termInfo
= new TermInfo(); // minimize consing
261 private void MergeTermInfo(SegmentMergeInfo
[] smis
, int n
)
263 long freqPointer
= freqOutput
.GetFilePointer();
264 long proxPointer
= proxOutput
.GetFilePointer();
266 int df
= AppendPostings(smis
, n
); // append posting data
270 // add an entry to the dictionary with pointers to prox and freq files
271 termInfo
.Set(df
, freqPointer
, proxPointer
);
272 termInfosWriter
.Add(smis
[0].term
, termInfo
);
276 private int AppendPostings(SegmentMergeInfo
[] smis
, int n
)
279 int df
= 0; // number of docs w/ term
280 for (int i
= 0; i
< n
; i
++)
282 SegmentMergeInfo smi
= smis
[i
];
283 TermPositions postings
= smi
.postings
;
284 int _base
= smi
._base
;
285 int[] docMap
= smi
.docMap
;
286 postings
.Seek(smi
.termEnum
);
287 while (postings
.Next())
289 int doc
= postings
.Doc();
291 doc
= docMap
[doc
]; // map around deletions
292 doc
+= _base
; // convert to merged space
295 throw new InvalidOperationException("docs out of order");
297 int docCode
= (doc
- lastDoc
) << 1; // use low bit to flag freq=1
300 int freq
= postings
.Freq();
303 freqOutput
.WriteVInt(docCode
| 1); // write doc & freq=1
307 freqOutput
.WriteVInt(docCode
); // write doc
308 freqOutput
.WriteVInt(freq
); // write frequency in doc
311 int lastPosition
= 0; // write position deltas
312 for (int j
= 0; j
< freq
; j
++)
314 int position
= postings
.NextPosition();
315 proxOutput
.WriteVInt(position
- lastPosition
);
316 lastPosition
= position
;
325 private int MergeNorms()
328 for (int i
= 0; i
< fieldInfos
.Size(); i
++)
330 FieldInfo fi
= fieldInfos
.FieldInfo(i
);
333 OutputStream output
= directory
.CreateFile(segment
+ ".f" + i
);
336 for (int j
= 0; j
< readers
.Count
; j
++)
338 IndexReader reader
= (IndexReader
)readers
[j
];
339 byte[] input
= reader
.Norms(fi
.name
);
341 int maxDoc
= reader
.MaxDoc();
342 for (int k
= 0; k
< maxDoc
; k
++)
344 byte norm
= input
!= null ? input
[k
] : (byte)0;
345 if (!reader
.IsDeleted(k
))
347 output
.WriteByte(norm
);