7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Search_Lucene
17 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
18 * @license http://framework.zend.com/license/new-bsd New BSD License
23 /** Zend_Search_Lucene_Interface */
24 require_once 'Zend/Search/Lucene/Interface.php';
27 * Multisearcher allows to search through several independent indexes.
30 * @package Zend_Search_Lucene
31 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
32 * @license http://framework.zend.com/license/new-bsd New BSD License
34 class Zend_Search_Lucene_Interface_MultiSearcher
implements Zend_Search_Lucene_Interface
37 * List of indices for searching.
38 * Array of Zend_Search_Lucene_Interface objects
47 * @param array $indices Arrays of indices for search
48 * @throws Zend_Search_Lucene_Exception
50 public function __construct($indices = array())
52 $this->_indices
= $indices;
54 foreach ($this->_indices
as $index) {
55 if (!$index instanceof Zend_Search_Lucene_Interface
) {
56 require_once 'Zend/Search/Lucene/Exception.php';
57 throw new Zend_Search_Lucene_Exception('sub-index objects have to implement Zend_Search_Lucene_Interface.');
63 * Add index for searching.
65 * @param Zend_Search_Lucene_Interface $index
67 public function addIndex(Zend_Search_Lucene_Interface
$index)
69 $this->_indices
[] = $index;
74 * Get current generation number
76 * Returns generation number
77 * 0 means pre-2.1 index format
78 * -1 means there are no segments files.
80 * @param Zend_Search_Lucene_Storage_Directory $directory
82 * @throws Zend_Search_Lucene_Exception
84 public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory
$directory)
86 require_once 'Zend/Search/Lucene/Exception.php';
87 throw new Zend_Search_Lucene_Exception("Generation number can't be retrieved for multi-searcher");
91 * Get segments file name
93 * @param integer $generation
96 public static function getSegmentFileName($generation)
98 return Zend_Search_Lucene
::getSegmentFileName($generation);
102 * Get index format version
105 * @throws Zend_Search_Lucene_Exception
107 public function getFormatVersion()
109 require_once 'Zend/Search/Lucene/Exception.php';
110 throw new Zend_Search_Lucene_Exception("Format version can't be retrieved for multi-searcher");
114 * Set index format version.
115 * Index is converted to this format at the nearest upfdate time
117 * @param int $formatVersion
119 public function setFormatVersion($formatVersion)
121 foreach ($this->_indices
as $index) {
122 $index->setFormatVersion($formatVersion);
127 * Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
129 * @return Zend_Search_Lucene_Storage_Directory
131 public function getDirectory()
133 require_once 'Zend/Search/Lucene/Exception.php';
134 throw new Zend_Search_Lucene_Exception("Index directory can't be retrieved for multi-searcher");
138 * Returns the total number of documents in this index (including deleted documents).
142 public function count()
146 foreach ($this->_indices
as $index) {
147 $count +
= $this->_indices
->count();
154 * Returns one greater than the largest possible document number.
155 * This may be used to, e.g., determine how big to allocate a structure which will have
156 * an element for every document number in an index.
160 public function maxDoc()
162 return $this->count();
166 * Returns the total number of non-deleted documents in this index.
170 public function numDocs()
174 foreach ($this->_indices
as $index) {
175 $docs +
= $this->_indices
->numDocs();
182 * Checks, that document is deleted
186 * @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
188 public function isDeleted($id)
190 foreach ($this->_indices
as $index) {
191 $indexCount = $index->count();
193 if ($indexCount > $id) {
194 return $index->isDeleted($id);
200 require_once 'Zend/Search/Lucene/Exception.php';
201 throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
205 * Set default search field.
207 * Null means, that search is performed through all fields by default
209 * Default value is null
211 * @param string $fieldName
213 public static function setDefaultSearchField($fieldName)
215 foreach ($this->_indices
as $index) {
216 $index->setDefaultSearchField($fieldName);
222 * Get default search field.
224 * Null means, that search is performed through all fields by default
227 * @throws Zend_Search_Lucene_Exception
229 public static function getDefaultSearchField()
231 if (count($this->_indices
) == 0) {
232 require_once 'Zend/Search/Lucene/Exception.php';
233 throw new Zend_Search_Lucene_Exception('Indices list is empty');
236 $defaultSearchField = reset($this->_indices
)->getDefaultSearchField();
238 foreach ($this->_indices
as $index) {
239 if ($index->getDefaultSearchField() !== $defaultSearchField) {
240 require_once 'Zend/Search/Lucene/Exception.php';
241 throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
245 return $defaultSearchField;
249 * Set result set limit.
251 * 0 (default) means no limit
253 * @param integer $limit
255 public static function setResultSetLimit($limit)
257 foreach ($this->_indices
as $index) {
258 $index->setResultSetLimit($limit);
263 * Set result set limit.
268 * @throws Zend_Search_Lucene_Exception
270 public static function getResultSetLimit()
272 if (count($this->_indices
) == 0) {
273 require_once 'Zend/Search/Lucene/Exception.php';
274 throw new Zend_Search_Lucene_Exception('Indices list is empty');
277 $defaultResultSetLimit = reset($this->_indices
)->getResultSetLimit();
279 foreach ($this->_indices
as $index) {
280 if ($index->getResultSetLimit() !== $defaultResultSetLimit) {
281 require_once 'Zend/Search/Lucene/Exception.php';
282 throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
286 return $defaultResultSetLimit;
290 * Retrieve index maxBufferedDocs option
292 * maxBufferedDocs is a minimal number of documents required before
293 * the buffered in-memory documents are written into a new Segment
295 * Default value is 10
298 * @throws Zend_Search_Lucene_Exception
300 public function getMaxBufferedDocs()
302 if (count($this->_indices
) == 0) {
303 require_once 'Zend/Search/Lucene/Exception.php';
304 throw new Zend_Search_Lucene_Exception('Indices list is empty');
307 $maxBufferedDocs = reset($this->_indices
)->getMaxBufferedDocs();
309 foreach ($this->_indices
as $index) {
310 if ($index->getMaxBufferedDocs() !== $maxBufferedDocs) {
311 require_once 'Zend/Search/Lucene/Exception.php';
312 throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
316 return $maxBufferedDocs;
320 * Set index maxBufferedDocs option
322 * maxBufferedDocs is a minimal number of documents required before
323 * the buffered in-memory documents are written into a new Segment
325 * Default value is 10
327 * @param integer $maxBufferedDocs
329 public function setMaxBufferedDocs($maxBufferedDocs)
331 foreach ($this->_indices
as $index) {
332 $index->setMaxBufferedDocs($maxBufferedDocs);
337 * Retrieve index maxMergeDocs option
339 * maxMergeDocs is a largest number of documents ever merged by addDocument().
340 * Small values (e.g., less than 10,000) are best for interactive indexing,
341 * as this limits the length of pauses while indexing to a few seconds.
342 * Larger values are best for batched indexing and speedier searches.
344 * Default value is PHP_INT_MAX
347 * @throws Zend_Search_Lucene_Exception
349 public function getMaxMergeDocs()
351 if (count($this->_indices
) == 0) {
352 require_once 'Zend/Search/Lucene/Exception.php';
353 throw new Zend_Search_Lucene_Exception('Indices list is empty');
356 $maxMergeDocs = reset($this->_indices
)->getMaxMergeDocs();
358 foreach ($this->_indices
as $index) {
359 if ($index->getMaxMergeDocs() !== $maxMergeDocs) {
360 require_once 'Zend/Search/Lucene/Exception.php';
361 throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
365 return $maxMergeDocs;
369 * Set index maxMergeDocs option
371 * maxMergeDocs is a largest number of documents ever merged by addDocument().
372 * Small values (e.g., less than 10,000) are best for interactive indexing,
373 * as this limits the length of pauses while indexing to a few seconds.
374 * Larger values are best for batched indexing and speedier searches.
376 * Default value is PHP_INT_MAX
378 * @param integer $maxMergeDocs
380 public function setMaxMergeDocs($maxMergeDocs)
382 foreach ($this->_indices
as $index) {
383 $index->setMaxMergeDocs($maxMergeDocs);
388 * Retrieve index mergeFactor option
390 * mergeFactor determines how often segment indices are merged by addDocument().
391 * With smaller values, less RAM is used while indexing,
392 * and searches on unoptimized indices are faster,
393 * but indexing speed is slower.
394 * With larger values, more RAM is used during indexing,
395 * and while searches on unoptimized indices are slower,
396 * indexing is faster.
397 * Thus larger values (> 10) are best for batch index creation,
398 * and smaller values (< 10) for indices that are interactively maintained.
400 * Default value is 10
403 * @throws Zend_Search_Lucene_Exception
405 public function getMergeFactor()
407 if (count($this->_indices
) == 0) {
408 require_once 'Zend/Search/Lucene/Exception.php';
409 throw new Zend_Search_Lucene_Exception('Indices list is empty');
412 $mergeFactor = reset($this->_indices
)->getMergeFactor();
414 foreach ($this->_indices
as $index) {
415 if ($index->getMergeFactor() !== $mergeFactor) {
416 require_once 'Zend/Search/Lucene/Exception.php';
417 throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
425 * Set index mergeFactor option
427 * mergeFactor determines how often segment indices are merged by addDocument().
428 * With smaller values, less RAM is used while indexing,
429 * and searches on unoptimized indices are faster,
430 * but indexing speed is slower.
431 * With larger values, more RAM is used during indexing,
432 * and while searches on unoptimized indices are slower,
433 * indexing is faster.
434 * Thus larger values (> 10) are best for batch index creation,
435 * and smaller values (< 10) for indices that are interactively maintained.
437 * Default value is 10
439 * @param integer $maxMergeDocs
441 public function setMergeFactor($mergeFactor)
443 foreach ($this->_indices
as $index) {
444 $index->setMaxMergeDocs($mergeFactor);
449 * Performs a query against the index and returns an array
450 * of Zend_Search_Lucene_Search_QueryHit objects.
451 * Input is a string or Zend_Search_Lucene_Search_Query.
453 * @param mixed $query
454 * @return array Zend_Search_Lucene_Search_QueryHit
455 * @throws Zend_Search_Lucene_Exception
457 public function find($query)
459 if (count($this->_indices
) == 0) {
466 foreach ($this->_indices
as $index) {
467 $hits = $index->find($query);
469 if ($indexShift != 0) {
470 foreach ($hits as $hit) {
471 $hit->id +
= $indexShift;
475 $indexShift +
= $index->count();
479 /** @todo Implement advanced sorting */
481 return call_user_func_array('array_merge', $hitsList);
485 * Returns a list of all unique field names that exist in this index.
487 * @param boolean $indexed
490 public function getFieldNames($indexed = false)
492 $fieldNamesList = array();
494 foreach ($this->_indices
as $index) {
495 $fieldNamesList[] = $index->getFieldNames($indexed);
498 return array_unique(call_user_func_array('array_merge', $fieldNamesList));
502 * Returns a Zend_Search_Lucene_Document object for the document
503 * number $id in this index.
505 * @param integer|Zend_Search_Lucene_Search_QueryHit $id
506 * @return Zend_Search_Lucene_Document
507 * @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
509 public function getDocument($id)
511 if ($id instanceof Zend_Search_Lucene_Search_QueryHit
) {
512 /* @var $id Zend_Search_Lucene_Search_QueryHit */
516 foreach ($this->_indices
as $index) {
517 $indexCount = $index->count();
519 if ($indexCount > $id) {
520 return $index->getDocument($id);
526 require_once 'Zend/Search/Lucene/Exception.php';
527 throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
531 * Returns true if index contain documents with specified term.
533 * Is used for query optimization.
535 * @param Zend_Search_Lucene_Index_Term $term
538 public function hasTerm(Zend_Search_Lucene_Index_Term
$term)
540 foreach ($this->_indices
as $index) {
541 if ($index->hasTerm($term)) {
550 * Returns IDs of all the documents containing term.
552 * @param Zend_Search_Lucene_Index_Term $term
553 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
555 * @throws Zend_Search_Lucene_Exception
557 public function termDocs(Zend_Search_Lucene_Index_Term
$term, $docsFilter = null)
559 if ($docsFilter != null) {
560 require_once 'Zend/Search/Lucene/Exception.php';
561 throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
567 foreach ($this->_indices
as $index) {
568 $docs = $index->termDocs($term);
570 if ($indexShift != 0) {
571 foreach ($docs as $id => $docId) {
572 $docs[$id] +
= $indexShift;
576 $indexShift +
= $index->count();
580 return call_user_func_array('array_merge', $docsList);
584 * Returns documents filter for all documents containing term.
586 * It performs the same operation as termDocs, but return result as
587 * Zend_Search_Lucene_Index_DocsFilter object
589 * @param Zend_Search_Lucene_Index_Term $term
590 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
591 * @return Zend_Search_Lucene_Index_DocsFilter
592 * @throws Zend_Search_Lucene_Exception
594 public function termDocsFilter(Zend_Search_Lucene_Index_Term
$term, $docsFilter = null)
596 require_once 'Zend/Search/Lucene/Exception.php';
597 throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
601 * Returns an array of all term freqs.
602 * Return array structure: array( docId => freq, ...)
604 * @param Zend_Search_Lucene_Index_Term $term
605 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
607 * @throws Zend_Search_Lucene_Exception
609 public function termFreqs(Zend_Search_Lucene_Index_Term
$term, $docsFilter = null)
611 if ($docsFilter != null) {
612 require_once 'Zend/Search/Lucene/Exception.php';
613 throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
616 $freqsList = array();
619 foreach ($this->_indices
as $index) {
620 $freqs = $index->termFreqs($term);
622 if ($indexShift != 0) {
623 $freqsShifted = array();
625 foreach ($freqs as $docId => $freq) {
626 $freqsShifted[$docId +
$indexShift] = $freq;
628 $freqs = $freqsShifted;
631 $indexShift +
= $index->count();
632 $freqsList[] = $freqs;
635 return call_user_func_array('array_merge', $freqsList);
639 * Returns an array of all term positions in the documents.
640 * Return array structure: array( docId => array( pos1, pos2, ...), ...)
642 * @param Zend_Search_Lucene_Index_Term $term
643 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
645 * @throws Zend_Search_Lucene_Exception
647 public function termPositions(Zend_Search_Lucene_Index_Term
$term, $docsFilter = null)
649 if ($docsFilter != null) {
650 require_once 'Zend/Search/Lucene/Exception.php';
651 throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
654 $termPositionsList = array();
657 foreach ($this->_indices
as $index) {
658 $termPositions = $index->termPositions($term);
660 if ($indexShift != 0) {
661 $termPositionsShifted = array();
663 foreach ($termPositions as $docId => $positions) {
664 $termPositions[$docId +
$indexShift] = $positions;
666 $termPositions = $termPositionsShifted;
669 $indexShift +
= $index->count();
670 $termPositionsList[] = $termPositions;
673 return call_user_func_array('array_merge', $termPositions);
677 * Returns the number of documents in this index containing the $term.
679 * @param Zend_Search_Lucene_Index_Term $term
682 public function docFreq(Zend_Search_Lucene_Index_Term
$term)
686 foreach ($this->_indices
as $index) {
687 $docFreq +
= $index->docFreq($term);
694 * Retrive similarity used by index reader
696 * @return Zend_Search_Lucene_Search_Similarity
697 * @throws Zend_Search_Lucene_Exception
699 public function getSimilarity()
701 if (count($this->_indices
) == 0) {
702 require_once 'Zend/Search/Lucene/Exception.php';
703 throw new Zend_Search_Lucene_Exception('Indices list is empty');
706 $similarity = reset($this->_indices
)->getSimilarity();
708 foreach ($this->_indices
as $index) {
709 if ($index->getSimilarity() !== $similarity) {
710 require_once 'Zend/Search/Lucene/Exception.php';
711 throw new Zend_Search_Lucene_Exception('Indices have different similarity.');
719 * Returns a normalization factor for "field, document" pair.
722 * @param string $fieldName
725 public function norm($id, $fieldName)
727 foreach ($this->_indices
as $index) {
728 $indexCount = $index->count();
730 if ($indexCount > $id) {
731 return $index->norm($id, $fieldName);
741 * Returns true if any documents have been deleted from this index.
745 public function hasDeletions()
747 foreach ($this->_indices
as $index) {
748 if ($index->hasDeletions()) {
757 * Deletes a document from the index.
758 * $id is an internal document id
760 * @param integer|Zend_Search_Lucene_Search_QueryHit $id
761 * @throws Zend_Search_Lucene_Exception
763 public function delete($id)
765 foreach ($this->_indices
as $index) {
766 $indexCount = $index->count();
768 if ($indexCount > $id) {
776 require_once 'Zend/Search/Lucene/Exception.php';
777 throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
782 * Callback used to choose target index for new documents
784 * Function/method signature:
785 * Zend_Search_Lucene_Interface callbackFunction(Zend_Search_Lucene_Document $document, array $indices);
787 * null means "default documents distributing algorithm"
791 protected $_documentDistributorCallBack = null;
794 * Set callback for choosing target index.
796 * @param callback $callback
797 * @throws Zend_Search_Lucene_Exception
799 public function setDocumentDistributorCallback($callback)
801 if ($callback !== null && !is_callable($callback)) {
802 require_once 'Zend/Search/Lucene/Exception.php';
803 throw new Zend_Search_Lucene_Exception('$callback parameter must be a valid callback.');
806 $this->_documentDistributorCallBack
= $callback;
810 * Get callback for choosing target index.
814 public function getDocumentDistributorCallback()
816 return $this->_documentDistributorCallBack
;
820 * Adds a document to this index.
822 * @param Zend_Search_Lucene_Document $document
823 * @throws Zend_Search_Lucene_Exception
825 public function addDocument(Zend_Search_Lucene_Document
$document)
827 if ($this->_documentDistributorCallBack
!== null) {
828 $index = call_user_func($this->_documentDistributorCallBack
, $document, $this->_indices
);
830 $index = $this->_indices
[array_rand($this->_indices
)];
833 $index->addDocument($document);
837 * Commit changes resulting from delete() or undeleteAll() operations.
839 public function commit()
841 foreach ($this->_indices
as $index) {
849 * Merges all segments into one
851 public function optimize()
853 foreach ($this->_indices
as $index) {
859 * Returns an array of all terms in this index.
863 public function terms()
865 $termsList = array();
867 foreach ($this->_indices
as $index) {
868 $termsList[] = $index->terms();
871 return array_unique(call_user_func_array('array_merge', $termsList));
876 * Terms stream priority queue object
878 * @var Zend_Search_Lucene_TermStreamsPriorityQueue
880 private $_termsStream = null;
883 * Reset terms stream.
885 public function resetTermsStream()
887 if ($this->_termsStream
=== null) {
888 /** Zend_Search_Lucene_TermStreamsPriorityQueue */
889 require_once 'Zend/Search/Lucene/TermStreamsPriorityQueue.php';
891 $this->_termsStream
= new Zend_Search_Lucene_TermStreamsPriorityQueue($this->_indices
);
893 $this->_termsStream
->resetTermsStream();
898 * Skip terms stream up to specified term preffix.
900 * Prefix contains fully specified field info and portion of searched term
902 * @param Zend_Search_Lucene_Index_Term $prefix
904 public function skipTo(Zend_Search_Lucene_Index_Term
$prefix)
906 $this->_termsStream
->skipTo($prefix);
910 * Scans terms dictionary and returns next term
912 * @return Zend_Search_Lucene_Index_Term|null
914 public function nextTerm()
916 return $this->_termsStream
->nextTerm();
920 * Returns term in current position
922 * @return Zend_Search_Lucene_Index_Term|null
924 public function currentTerm()
926 return $this->_termsStream
->currentTerm();
932 * Should be used for resources clean up if stream is not read up to the end
934 public function closeTermsStream()
936 $this->_termsStream
->closeTermsStream();
937 $this->_termsStream
= null;
942 * Undeletes all documents currently marked as deleted in this index.
944 public function undeleteAll()
946 foreach ($this->_indices
as $index) {
947 $index->undeleteAll();
953 * Add reference to the index object
957 public function addReference()
959 // Do nothing, since it's never referenced by indices
963 * Remove reference from the index object
965 * When reference count becomes zero, index is closed and resources are cleaned up
969 public function removeReference()
971 // Do nothing, since it's never referenced by indices