adding some strings
[moodle-linuxchix.git] / search / Zend / Search / Lucene / Index / SegmentMerger.php
blob157489c492ecd263f5610a2c3a157a6f3c9a1171
1 <?php
2 /**
3 * Zend Framework
5 * LICENSE
7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
15 * @category Zend
16 * @package Zend_Search_Lucene
17 * @subpackage Index
18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
23 /** Zend_Search_Lucene_Exception */
24 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
26 /** Zend_Search_Lucene_Index_SegmentInfo */
27 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfo.php';
29 /** Zend_Search_Lucene_Index_SegmentWriter_StreamWriter */
30 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php';
32 /** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */
33 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';
36 /**
37 * @category Zend
38 * @package Zend_Search_Lucene
39 * @subpackage Index
40 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
41 * @license http://framework.zend.com/license/new-bsd New BSD License
43 class Zend_Search_Lucene_Index_SegmentMerger
45 /**
46 * Target segment writer
48 * @var Zend_Search_Lucene_Index_SegmentWriter_StreamWriter
50 private $_writer;
52 /**
53 * Number of docs in a new segment
55 * @var integer
57 private $_docCount;
59 /**
60 * A set of segments to be merged
62 * @var array Zend_Search_Lucene_Index_SegmentInfo
64 private $_segmentInfos = array();
66 /**
67 * Flag to signal, that merge is already done
69 * @var boolean
71 private $_mergeDone = false;
73 /**
74 * Field map
75 * [<segment_name>][<field_number>] => <target_field_number>
77 * @var array
79 private $_fieldsMap = array();
83 /**
84 * Object constructor.
86 * Creates new segment merger with $directory as target to merge segments into
87 * and $name as a name of new segment
89 * @param Zend_Search_Lucene_Storage_Directory $directory
90 * @param string $name
92 public function __construct($directory, $name)
94 $this->_writer = new Zend_Search_Lucene_Index_SegmentWriter_StreamWriter($directory, $name);
98 /**
99 * Add segmnet to a collection of segments to be merged
101 * @param Zend_Search_Lucene_Index_SegmentInfo $segment
103 public function addSource(Zend_Search_Lucene_Index_SegmentInfo $segmentInfo)
105 $this->_segmentInfos[$segmentInfo->getName()] = $segmentInfo;
110 * Do merge.
112 * Returns number of documents in newly created segment
114 * @return Zend_Search_Lucene_Index_SegmentInfo
115 * @throws Zend_Search_Lucene_Exception
117 public function merge()
119 if ($this->_mergeDone) {
120 throw new Zend_Search_Lucene_Exception('Merge is already done.');
123 if (count($this->_segmentInfos) < 1) {
124 throw new Zend_Search_Lucene_Exception('Wrong number of segments to be merged ('
125 . count($this->_segmentInfos)
126 . ').');
129 $this->_mergeFields();
130 $this->_mergeNorms();
131 $this->_mergeStoredFields();
132 $this->_mergeTerms();
134 $this->_mergeDone = true;
136 return $this->_writer->close();
141 * Merge fields information
143 private function _mergeFields()
145 foreach ($this->_segmentInfos as $segName => $segmentInfo) {
146 foreach ($segmentInfo->getFieldInfos() as $fieldInfo) {
147 $this->_fieldsMap[$segName][$fieldInfo->number] = $this->_writer->addFieldInfo($fieldInfo);
153 * Merge field's normalization factors
155 private function _mergeNorms()
157 foreach ($this->_writer->getFieldInfos() as $fieldInfo) {
158 if ($fieldInfo->isIndexed) {
159 foreach ($this->_segmentInfos as $segName => $segmentInfo) {
160 if ($segmentInfo->hasDeletions()) {
161 $srcNorm = $segmentInfo->normVector($fieldInfo->name);
162 $norm = '';
163 $docs = $segmentInfo->count();
164 for ($count = 0; $count < $docs; $count++) {
165 if (!$segmentInfo->isDeleted($count)) {
166 $norm .= $srcNorm[$count];
169 $this->_writer->addNorm($fieldInfo->name, $norm);
170 } else {
171 $this->_writer->addNorm($fieldInfo->name, $segmentInfo->normVector($fieldInfo->name));
179 * Merge fields information
181 private function _mergeStoredFields()
183 $this->_docCount = 0;
185 foreach ($this->_segmentInfos as $segName => $segmentInfo) {
186 $fdtFile = $segmentInfo->openCompoundFile('.fdt');
188 for ($count = 0; $count < $segmentInfo->count(); $count++) {
189 $fieldCount = $fdtFile->readVInt();
190 $storedFields = array();
192 for ($count2 = 0; $count2 < $fieldCount; $count2++) {
193 $fieldNum = $fdtFile->readVInt();
194 $bits = $fdtFile->readByte();
195 $fieldInfo = $segmentInfo->getField($fieldNum);
197 if (!($bits & 2)) { // Text data
198 $storedFields[] =
199 new Zend_Search_Lucene_Field($fieldInfo->name,
200 $fdtFile->readString(),
201 'UTF-8',
202 true,
203 $fieldInfo->isIndexed,
204 $bits & 1 );
205 } else { // Binary data
206 $storedFields[] =
207 new Zend_Search_Lucene_Field($fieldInfo->name,
208 $fdtFile->readBinary(),
210 true,
211 $fieldInfo->isIndexed,
212 $bits & 1,
213 true);
217 if (!$segmentInfo->isDeleted($count)) {
218 $this->_docCount++;
219 $this->_writer->addStoredFields($storedFields);
227 * Merge fields information
229 private function _mergeTerms()
231 $segmentInfoQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue();
233 $segmentStartId = 0;
234 foreach ($this->_segmentInfos as $segName => $segmentInfo) {
235 $segmentStartId = $segmentInfo->reset($segmentStartId, true);
237 // Skip "empty" segments
238 if ($segmentInfo->currentTerm() !== null) {
239 $segmentInfoQueue->put($segmentInfo);
243 $this->_writer->initializeDictionaryFiles();
245 $termDocs = array();
246 while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {
247 // Merge positions array
248 $termDocs += $segmentInfo->currentTermPositions();
250 if ($segmentInfoQueue->top() === null ||
251 $segmentInfoQueue->top()->currentTerm()->key() !=
252 $segmentInfo->currentTerm()->key()) {
253 // We got new term
254 ksort($termDocs, SORT_NUMERIC);
256 // Add term if it's contained in any document
257 if (count($termDocs) > 0) {
258 $this->_writer->addTerm($segmentInfo->currentTerm(), $termDocs);
260 $termDocs = array();
263 $segmentInfo->nextTerm();
264 // check, if segment dictionary is finished
265 if ($segmentInfo->currentTerm() !== null) {
266 // Put segment back into the priority queue
267 $segmentInfoQueue->put($segmentInfo);
271 $this->_writer->closeDictionaryFiles();