7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Search_Lucene
18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
23 /** Zend_Search_Lucene_Index_SegmentWriter_ */
24 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php';
26 /** Zend_Search_Lucene_Index_SegmentInfo */
27 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Index/SegmentInfo.php';
29 /** Zend_Search_Lucene_Index_SegmentMerger */
30 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Index/SegmentMerger.php';
36 * @package Zend_Search_Lucene
38 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
39 * @license http://framework.zend.com/license/new-bsd New BSD License
41 class Zend_Search_Lucene_Index_Writer
44 * @todo Implement Analyzer substitution
45 * @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for
46 * temporary index files
47 * @todo Directory lock processing
51 * Number of documents required before the buffered in-memory
52 * documents are written into a new Segment
58 public $maxBufferedDocs = 10;
61 * Largest number of documents ever merged by addDocument().
62 * Small values (e.g., less than 10,000) are best for interactive indexing,
63 * as this limits the length of pauses while indexing to a few seconds.
64 * Larger values are best for batched indexing and speedier searches.
66 * Default value is PHP_INT_MAX
70 public $maxMergeDocs = PHP_INT_MAX
;
73 * Determines how often segment indices are merged by addDocument().
75 * With smaller values, less RAM is used while indexing,
76 * and searches on unoptimized indices are faster,
77 * but indexing speed is slower.
79 * With larger values, more RAM is used during indexing,
80 * and while searches on unoptimized indices are slower,
83 * Thus larger values (> 10) are best for batch index creation,
84 * and smaller values (< 10) for indices that are interactively maintained.
90 public $mergeFactor = 10;
93 * File system adapter.
95 * @var Zend_Search_Lucene_Storage_Directory
97 private $_directory = null;
105 private $_versionUpdate = 0;
108 * List of the segments, created by index writer
109 * Array of Zend_Search_Lucene_Index_SegmentInfo objects
113 private $_newSegments = array();
116 * List of segments to be deleted on commit
120 private $_segmentsToDelete = array();
123 * Current segment to add documents
125 * @var Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter
127 private $_currentSegment = null;
130 * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
132 * It's a reference to the corresponding Zend_Search_Lucene::$_segmentInfos array
134 * @var array Zend_Search_Lucene_Index_SegmentInfo
136 private $_segmentInfos;
139 * List of indexfiles extensions
143 private static $_indexExtensions = array('.cfs' => '.cfs',
158 * Opens the index for writing
160 * IndexWriter constructor needs Directory as a parameter. It should be
161 * a string with a path to the index folder or a Directory object.
162 * Second constructor parameter create is optional - true to create the
163 * index or overwrite the existing one.
165 * @param Zend_Search_Lucene_Storage_Directory $directory
166 * @param array $segmentInfos
167 * @param boolean $create
169 public function __construct(Zend_Search_Lucene_Storage_Directory
$directory, &$segmentInfos, $create = false)
171 $this->_directory
= $directory;
172 $this->_segmentInfos
= &$segmentInfos;
175 foreach ($this->_directory
->fileList() as $file) {
176 if ($file == 'deletable' ||
177 $file == 'segments' ||
178 isset(self
::$_indexExtensions[ substr($file, strlen($file)-4)]) ||
179 preg_match('/\.f\d+$/i', $file) /* matches <segment_name>.f<decimal_nmber> file names */) {
180 $this->_directory
->deleteFile($file);
183 $segmentsFile = $this->_directory
->createFile('segments');
184 $segmentsFile->writeInt((int)0xFFFFFFFF);
186 // write version (is initialized by current time
187 // $segmentsFile->writeLong((int)microtime(true));
188 $version = microtime(true);
189 $segmentsFile->writeInt((int)($version/((double)0xFFFFFFFF +
1)));
190 $segmentsFile->writeInt((int)($version & 0xFFFFFFFF));
192 // write name counter
193 $segmentsFile->writeInt(0);
194 // write segment counter
195 $segmentsFile->writeInt(0);
197 $deletableFile = $this->_directory
->createFile('deletable');
199 $deletableFile->writeInt(0);
201 $segmentsFile = $this->_directory
->getFileObject('segments');
202 $format = $segmentsFile->readInt();
203 if ($format != (int)0xFFFFFFFF) {
204 throw new Zend_Search_Lucene_Exception('Wrong segments file format');
210 * Adds a document to this index.
212 * @param Zend_Search_Lucene_Document $document
214 public function addDocument(Zend_Search_Lucene_Document
$document)
216 if ($this->_currentSegment
=== null) {
217 $this->_currentSegment
=
218 new Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter($this->_directory
, $this->_newSegmentName());
220 $this->_currentSegment
->addDocument($document);
222 if ($this->_currentSegment
->count() >= $this->maxBufferedDocs
) {
226 $this->_versionUpdate++
;
228 $this->_maybeMergeSegments();
233 * Merge segments if necessary
235 private function _maybeMergeSegments()
237 $segmentSizes = array();
238 foreach ($this->_segmentInfos
as $segId => $segmentInfo) {
239 $segmentSizes[$segId] = $segmentInfo->count();
242 $mergePool = array();
244 $sizeToMerge = $this->maxBufferedDocs
;
245 asort($segmentSizes, SORT_NUMERIC
);
246 foreach ($segmentSizes as $segId => $size) {
247 // Check, if segment comes into a new merging block
248 while ($size >= $sizeToMerge) {
249 // Merge previous block if it's large enough
250 if ($poolSize >= $sizeToMerge) {
251 $this->_mergeSegments($mergePool);
253 $mergePool = array();
256 $sizeToMerge *= $this->mergeFactor
;
258 if ($sizeToMerge > $this->maxMergeDocs
) {
263 $mergePool[] = $this->_segmentInfos
[$segId];
267 if ($poolSize >= $sizeToMerge) {
268 $this->_mergeSegments($mergePool);
273 * Merge specified segments
275 * $segments is an array of SegmentInfo objects
277 * @param array $segments
279 private function _mergeSegments($segments)
281 // Try to get exclusive non-blocking lock to the 'index.optimization.lock'
282 // Skip optimization if it's performed by other process right now
283 $optimizationLock = $this->_directory
->createFile('index.optimization.lock');
284 if (!$optimizationLock->lock(LOCK_EX
,true)) {
288 $newName = $this->_newSegmentName();
289 $merger = new Zend_Search_Lucene_Index_SegmentMerger($this->_directory
,
291 foreach ($segments as $segmentInfo) {
292 $merger->addSource($segmentInfo);
293 $this->_segmentsToDelete
[$segmentInfo->getName()] = $segmentInfo->getName();
296 $newSegment = $merger->merge();
297 if ($newSegment !== null) {
298 $this->_newSegments
[$newSegment->getName()] = $newSegment;
303 // optimization is finished
304 $optimizationLock->unlock();
308 * Update segments file by adding current segment to a list
310 * @throws Zend_Search_Lucene_Exception
312 private function _updateSegments()
314 // Get an exclusive index lock
315 // Wait, until all parallel searchers or indexers won't stop
316 // and stop all next searchers, while we are updating segments file
317 $lock = $this->_directory
->getFileObject('index.lock');
318 if (!$lock->lock(LOCK_EX
)) {
319 throw new Zend_Search_Lucene_Exception('Can\'t obtain exclusive index lock');
323 // Do not share file handlers to get file updates from other sessions.
324 $segmentsFile = $this->_directory
->getFileObject('segments', false);
325 $newSegmentFile = $this->_directory
->createFile('segments.new', false);
327 // Write format marker
328 $newSegmentFile->writeInt((int)0xFFFFFFFF);
330 // Write index version
331 $segmentsFile->seek(4, SEEK_CUR
);
332 // $version = $segmentsFile->readLong() + $this->_versionUpdate;
333 // Process version on 32-bit platforms
334 $versionHigh = $segmentsFile->readInt();
335 $versionLow = $segmentsFile->readInt();
336 $version = $versionHigh * ((double)0xFFFFFFFF +
1) +
337 (($versionLow < 0)?
(double)0xFFFFFFFF - (-1 - $versionLow) : $versionLow);
338 $version +
= $this->_versionUpdate
;
339 $this->_versionUpdate
= 0;
340 $newSegmentFile->writeInt((int)($version/((double)0xFFFFFFFF +
1)));
341 $newSegmentFile->writeInt((int)($version & 0xFFFFFFFF));
343 // Write segment name counter
344 $newSegmentFile->writeInt($segmentsFile->readInt());
346 // Get number of segments offset
347 $numOfSegmentsOffset = $newSegmentFile->tell();
348 // Write number of segemnts
349 $segmentsCount = $segmentsFile->readInt();
350 $newSegmentFile->writeInt(0); // Write dummy data (segment counter)
353 for ($count = 0; $count < $segmentsCount; $count++
) {
354 $segName = $segmentsFile->readString();
355 $segSize = $segmentsFile->readInt();
357 if (!in_array($segName, $this->_segmentsToDelete
)) {
358 $newSegmentFile->writeString($segName);
359 $newSegmentFile->writeInt($segSize);
361 $segments[$segName] = $segSize;
364 $segmentsFile->close();
366 $segmentsCount = count($segments) +
count($this->_newSegments
);
368 // Remove segments, not listed in $segments (deleted)
369 // Load segments, not listed in $this->_segmentInfos
370 foreach ($this->_segmentInfos
as $segId => $segInfo) {
371 if (isset($segments[$segInfo->getName()])) {
372 // Segment is already included into $this->_segmentInfos
373 unset($segments[$segInfo->getName()]);
375 // remove deleted segment from a list
376 unset($this->_segmentInfos
[$segId]);
379 // $segments contains a list of segments to load
382 foreach ($this->_newSegments
as $segName => $segmentInfo) {
383 $newSegmentFile->writeString($segName);
384 $newSegmentFile->writeInt($segmentInfo->count());
386 $this->_segmentInfos
[] = $segmentInfo;
388 $this->_newSegments
= array();
390 $newSegmentFile->seek($numOfSegmentsOffset);
391 $newSegmentFile->writeInt($segmentsCount); // Update segments count
392 $newSegmentFile->close();
393 $this->_directory
->renameFile('segments.new', 'segments');
396 // Segments file update is finished
397 // Switch back to shared lock mode
398 $lock->lock(LOCK_SH
);
401 $fileList = $this->_directory
->fileList();
402 foreach ($this->_segmentsToDelete
as $nameToDelete) {
403 foreach (self
::$_indexExtensions as $ext) {
404 if ($this->_directory
->fileExists($nameToDelete . $ext)) {
405 $this->_directory
->deleteFile($nameToDelete . $ext);
409 foreach ($fileList as $file) {
410 if (substr($file, 0, strlen($nameToDelete) +
2) == ($nameToDelete . '.f') &&
411 ctype_digit( substr($file, strlen($nameToDelete) +
2) )) {
412 $this->_directory
->deleteFile($file);
416 $this->_segmentsToDelete
= array();
418 // Load segments, created by other process
419 foreach ($segments as $segName => $segSize) {
421 $this->_segmentInfos
[] = new Zend_Search_Lucene_Index_SegmentInfo($segName,
429 * Commit current changes
431 public function commit()
433 if ($this->_currentSegment
!== null) {
434 $newSegment = $this->_currentSegment
->close();
435 if ($newSegment !== null) {
436 $this->_newSegments
[$newSegment->getName()] = $newSegment;
438 $this->_currentSegment
= null;
441 if (count($this->_newSegments
) != 0 ||
442 count($this->_segmentsToDelete
) != 0) {
443 $this->_updateSegments();
449 * Merges the provided indexes into this index.
451 * @param array $readers
454 public function addIndexes($readers)
457 * @todo implementation
462 * Merges all segments together into a single segment, optimizing
463 * an index for search.
464 * Input is an array of Zend_Search_Lucene_Index_SegmentInfo objects
466 * @throws Zend_Search_Lucene_Exception
468 public function optimize()
470 $this->_mergeSegments($this->_segmentInfos
);
474 * Get name for new segment
478 private function _newSegmentName()
480 // Do not share file handler to get file updates from other sessions.
481 $segmentsFile = $this->_directory
->getFileObject('segments', false);
483 // Get exclusive segments file lock
484 // We have guarantee, that we will not intersect with _updateSegments() call
485 // of other process, because it needs exclusive index lock and waits
486 // until all other searchers won't stop
487 if (!$segmentsFile->lock(LOCK_EX
)) {
488 throw new Zend_Search_Lucene_Exception('Can\'t obtain exclusive index lock');
491 $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
492 $segmentNameCounter = $segmentsFile->readInt();
494 $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
495 $segmentsFile->writeInt($segmentNameCounter +
1);
497 // Flash output to guarantee that wrong value will not be loaded between unlock and
498 // return (which calls $segmentsFile destructor)
499 $segmentsFile->flush();
501 $segmentsFile->unlock();
503 return '_' . base_convert($segmentNameCounter, 10, 36);