7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Search_Lucene
18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
23 /** Zend_Search_Lucene_Exception */
24 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Exception.php';
26 /** Zend_Search_Lucene_Index_SegmentInfo */
27 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Index/SegmentInfo.php';
29 /** Zend_Search_Lucene_Index_SegmentWriter_StreamWriter */
30 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php';
32 /** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */
33 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';
38 * @package Zend_Search_Lucene
40 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
41 * @license http://framework.zend.com/license/new-bsd New BSD License
43 class Zend_Search_Lucene_Index_SegmentMerger
46 * Target segment writer
48 * @var Zend_Search_Lucene_Index_SegmentWriter_StreamWriter
53 * Number of docs in a new segment
60 * A set of segments to be merged
62 * @var array Zend_Search_Lucene_Index_SegmentInfo
64 private $_segmentInfos = array();
67 * Flag to signal, that merge is already done
71 private $_mergeDone = false;
75 * [<segment_name>][<field_number>] => <target_field_number>
79 private $_fieldsMap = array();
86 * Creates new segment merger with $directory as target to merge segments into
87 * and $name as a name of new segment
89 * @param Zend_Search_Lucene_Storage_Directory $directory
92 public function __construct($directory, $name)
94 $this->_writer
= new Zend_Search_Lucene_Index_SegmentWriter_StreamWriter($directory, $name);
99 * Add segmnet to a collection of segments to be merged
101 * @param Zend_Search_Lucene_Index_SegmentInfo $segment
103 public function addSource(Zend_Search_Lucene_Index_SegmentInfo
$segmentInfo)
105 $this->_segmentInfos
[$segmentInfo->getName()] = $segmentInfo;
112 * Returns number of documents in newly created segment
114 * @return Zend_Search_Lucene_Index_SegmentInfo
115 * @throws Zend_Search_Lucene_Exception
117 public function merge()
119 if ($this->_mergeDone
) {
120 throw new Zend_Search_Lucene_Exception('Merge is already done.');
123 if (count($this->_segmentInfos
) < 1) {
124 throw new Zend_Search_Lucene_Exception('Wrong number of segments to be merged ('
125 . count($this->_segmentInfos
)
129 $this->_mergeFields();
130 $this->_mergeNorms();
131 $this->_mergeStoredFields();
132 $this->_mergeTerms();
134 $this->_mergeDone
= true;
136 return $this->_writer
->close();
141 * Merge fields information
143 private function _mergeFields()
145 foreach ($this->_segmentInfos
as $segName => $segmentInfo) {
146 foreach ($segmentInfo->getFieldInfos() as $fieldInfo) {
147 $this->_fieldsMap
[$segName][$fieldInfo->number
] = $this->_writer
->addFieldInfo($fieldInfo);
153 * Merge field's normalization factors
155 private function _mergeNorms()
157 foreach ($this->_writer
->getFieldInfos() as $fieldInfo) {
158 if ($fieldInfo->isIndexed
) {
159 foreach ($this->_segmentInfos
as $segName => $segmentInfo) {
160 if ($segmentInfo->hasDeletions()) {
161 $srcNorm = $segmentInfo->normVector($fieldInfo->name
);
163 $docs = $segmentInfo->count();
164 for ($count = 0; $count < $docs; $count++
) {
165 if (!$segmentInfo->isDeleted($count)) {
166 $norm .= $srcNorm[$count];
169 $this->_writer
->addNorm($fieldInfo->name
, $norm);
171 $this->_writer
->addNorm($fieldInfo->name
, $segmentInfo->normVector($fieldInfo->name
));
179 * Merge fields information
181 private function _mergeStoredFields()
183 $this->_docCount
= 0;
185 foreach ($this->_segmentInfos
as $segName => $segmentInfo) {
186 $fdtFile = $segmentInfo->openCompoundFile('.fdt');
188 for ($count = 0; $count < $segmentInfo->count(); $count++
) {
189 $fieldCount = $fdtFile->readVInt();
190 $storedFields = array();
192 for ($count2 = 0; $count2 < $fieldCount; $count2++
) {
193 $fieldNum = $fdtFile->readVInt();
194 $bits = $fdtFile->readByte();
195 $fieldInfo = $segmentInfo->getField($fieldNum);
197 if (!($bits & 2)) { // Text data
199 new Zend_Search_Lucene_Field($fieldInfo->name
,
200 $fdtFile->readString(),
203 $fieldInfo->isIndexed
,
205 } else { // Binary data
207 new Zend_Search_Lucene_Field($fieldInfo->name
,
208 $fdtFile->readBinary(),
211 $fieldInfo->isIndexed
,
217 if (!$segmentInfo->isDeleted($count)) {
219 $this->_writer
->addStoredFields($storedFields);
227 * Merge fields information
229 private function _mergeTerms()
231 $segmentInfoQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue();
234 foreach ($this->_segmentInfos
as $segName => $segmentInfo) {
235 $segmentStartId = $segmentInfo->reset($segmentStartId, true);
237 // Skip "empty" segments
238 if ($segmentInfo->currentTerm() !== null) {
239 $segmentInfoQueue->put($segmentInfo);
243 $this->_writer
->initializeDictionaryFiles();
246 while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {
247 // Merge positions array
248 $termDocs +
= $segmentInfo->currentTermPositions();
250 if ($segmentInfoQueue->top() === null ||
251 $segmentInfoQueue->top()->currentTerm()->key() !=
252 $segmentInfo->currentTerm()->key()) {
254 ksort($termDocs, SORT_NUMERIC
);
256 // Add term if it's contained in any document
257 if (count($termDocs) > 0) {
258 $this->_writer
->addTerm($segmentInfo->currentTerm(), $termDocs);
263 $segmentInfo->nextTerm();
264 // check, if segment dictionary is finished
265 if ($segmentInfo->currentTerm() !== null) {
266 // Put segment back into the priority queue
267 $segmentInfoQueue->put($segmentInfo);
271 $this->_writer
->closeDictionaryFiles();