*prechod na novsiu verziu ZF
[sport-group.git] / library / Zend / Search / Lucene / MultiSearcher.php
blobb78a877109618ad9642d141766e009b5b58f07e0
1 <?php
2 /**
3 * Zend Framework
5 * LICENSE
7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
15 * @category Zend
16 * @package Zend_Search_Lucene
17 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
18 * @license http://framework.zend.com/license/new-bsd New BSD License
19 * @version $Id: MultiSearcher.php 16971 2009-07-22 18:05:45Z mikaelkael $
22 /** Zend_Search_Lucene_TermStreamsPriorityQueue */
23 require_once 'Zend/Search/Lucene/TermStreamsPriorityQueue.php';
25 /** Zend_Search_Lucene_Interface */
26 require_once 'Zend/Search/Lucene/Interface.php';
28 /**
29 * Multisearcher allows to search through several independent indexes.
31 * @category Zend
32 * @package Zend_Search_Lucene
33 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
34 * @license http://framework.zend.com/license/new-bsd New BSD License
36 class Zend_Search_Lucene_Interface_MultiSearcher implements Zend_Search_Lucene_Interface
38 /**
39 * List of indices for searching.
40 * Array of Zend_Search_Lucene_Interface objects
42 * @var array
44 protected $_indices;
46 /**
47 * Object constructor.
49 * @param array $indices Arrays of indices for search
50 * @throws Zend_Search_Lucene_Exception
52 public function __construct($indices = array())
54 $this->_indices = $indices;
56 foreach ($this->_indices as $index) {
57 if (!$index instanceof Zend_Search_Lucene_Interface) {
58 require_once 'Zend/Search/Lucene/Exception.php';
59 throw new Zend_Search_Lucene_Exception('sub-index objects have to implement Zend_Search_Lucene_Interface.');
64 /**
65 * Add index for searching.
67 * @param Zend_Search_Lucene_Interface $index
69 public function addIndex(Zend_Search_Lucene_Interface $index)
71 $this->_indices[] = $index;
75 /**
76 * Get current generation number
78 * Returns generation number
79 * 0 means pre-2.1 index format
80 * -1 means there are no segments files.
82 * @param Zend_Search_Lucene_Storage_Directory $directory
83 * @return integer
84 * @throws Zend_Search_Lucene_Exception
86 public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory)
88 require_once 'Zend/Search/Lucene/Exception.php';
89 throw new Zend_Search_Lucene_Exception("Generation number can't be retrieved for multi-searcher");
92 /**
93 * Get segments file name
95 * @param integer $generation
96 * @return string
98 public static function getSegmentFileName($generation)
100 return Zend_Search_Lucene::getSegmentFileName($generation);
104 * Get index format version
106 * @return integer
107 * @throws Zend_Search_Lucene_Exception
109 public function getFormatVersion()
111 require_once 'Zend/Search/Lucene/Exception.php';
112 throw new Zend_Search_Lucene_Exception("Format version can't be retrieved for multi-searcher");
116 * Set index format version.
117 * Index is converted to this format at the nearest upfdate time
119 * @param int $formatVersion
121 public function setFormatVersion($formatVersion)
123 foreach ($this->_indices as $index) {
124 $index->setFormatVersion($formatVersion);
129 * Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
131 * @return Zend_Search_Lucene_Storage_Directory
133 public function getDirectory()
135 require_once 'Zend/Search/Lucene/Exception.php';
136 throw new Zend_Search_Lucene_Exception("Index directory can't be retrieved for multi-searcher");
140 * Returns the total number of documents in this index (including deleted documents).
142 * @return integer
144 public function count()
146 $count = 0;
148 foreach ($this->_indices as $index) {
149 $count += $this->_indices->count();
152 return $count;
156 * Returns one greater than the largest possible document number.
157 * This may be used to, e.g., determine how big to allocate a structure which will have
158 * an element for every document number in an index.
160 * @return integer
162 public function maxDoc()
164 return $this->count();
168 * Returns the total number of non-deleted documents in this index.
170 * @return integer
172 public function numDocs()
174 $docs = 0;
176 foreach ($this->_indices as $index) {
177 $docs += $this->_indices->numDocs();
180 return $docs;
184 * Checks, that document is deleted
186 * @param integer $id
187 * @return boolean
188 * @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
190 public function isDeleted($id)
192 foreach ($this->_indices as $index) {
193 $indexCount = $index->count();
195 if ($indexCount > $id) {
196 return $index->isDeleted($id);
199 $id -= $indexCount;
202 require_once 'Zend/Search/Lucene/Exception.php';
203 throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
207 * Set default search field.
209 * Null means, that search is performed through all fields by default
211 * Default value is null
213 * @param string $fieldName
215 public static function setDefaultSearchField($fieldName)
217 foreach ($this->_indices as $index) {
218 $index->setDefaultSearchField($fieldName);
224 * Get default search field.
226 * Null means, that search is performed through all fields by default
228 * @return string
229 * @throws Zend_Search_Lucene_Exception
231 public static function getDefaultSearchField()
233 if (count($this->_indices) == 0) {
234 require_once 'Zend/Search/Lucene/Exception.php';
235 throw new Zend_Search_Lucene_Exception('Indices list is empty');
238 $defaultSearchField = reset($this->_indices)->getDefaultSearchField();
240 foreach ($this->_indices as $index) {
241 if ($index->getDefaultSearchField() !== $defaultSearchField) {
242 require_once 'Zend/Search/Lucene/Exception.php';
243 throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
247 return $defaultSearchField;
251 * Set result set limit.
253 * 0 (default) means no limit
255 * @param integer $limit
257 public static function setResultSetLimit($limit)
259 foreach ($this->_indices as $index) {
260 $index->setResultSetLimit($limit);
265 * Set result set limit.
267 * 0 means no limit
269 * @return integer
270 * @throws Zend_Search_Lucene_Exception
272 public static function getResultSetLimit()
274 if (count($this->_indices) == 0) {
275 require_once 'Zend/Search/Lucene/Exception.php';
276 throw new Zend_Search_Lucene_Exception('Indices list is empty');
279 $defaultResultSetLimit = reset($this->_indices)->getResultSetLimit();
281 foreach ($this->_indices as $index) {
282 if ($index->getResultSetLimit() !== $defaultResultSetLimit) {
283 require_once 'Zend/Search/Lucene/Exception.php';
284 throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
288 return $defaultResultSetLimit;
292 * Retrieve index maxBufferedDocs option
294 * maxBufferedDocs is a minimal number of documents required before
295 * the buffered in-memory documents are written into a new Segment
297 * Default value is 10
299 * @return integer
300 * @throws Zend_Search_Lucene_Exception
302 public function getMaxBufferedDocs()
304 if (count($this->_indices) == 0) {
305 require_once 'Zend/Search/Lucene/Exception.php';
306 throw new Zend_Search_Lucene_Exception('Indices list is empty');
309 $maxBufferedDocs = reset($this->_indices)->getMaxBufferedDocs();
311 foreach ($this->_indices as $index) {
312 if ($index->getMaxBufferedDocs() !== $maxBufferedDocs) {
313 require_once 'Zend/Search/Lucene/Exception.php';
314 throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
318 return $maxBufferedDocs;
322 * Set index maxBufferedDocs option
324 * maxBufferedDocs is a minimal number of documents required before
325 * the buffered in-memory documents are written into a new Segment
327 * Default value is 10
329 * @param integer $maxBufferedDocs
331 public function setMaxBufferedDocs($maxBufferedDocs)
333 foreach ($this->_indices as $index) {
334 $index->setMaxBufferedDocs($maxBufferedDocs);
339 * Retrieve index maxMergeDocs option
341 * maxMergeDocs is a largest number of documents ever merged by addDocument().
342 * Small values (e.g., less than 10,000) are best for interactive indexing,
343 * as this limits the length of pauses while indexing to a few seconds.
344 * Larger values are best for batched indexing and speedier searches.
346 * Default value is PHP_INT_MAX
348 * @return integer
349 * @throws Zend_Search_Lucene_Exception
351 public function getMaxMergeDocs()
353 if (count($this->_indices) == 0) {
354 require_once 'Zend/Search/Lucene/Exception.php';
355 throw new Zend_Search_Lucene_Exception('Indices list is empty');
358 $maxMergeDocs = reset($this->_indices)->getMaxMergeDocs();
360 foreach ($this->_indices as $index) {
361 if ($index->getMaxMergeDocs() !== $maxMergeDocs) {
362 require_once 'Zend/Search/Lucene/Exception.php';
363 throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
367 return $maxMergeDocs;
371 * Set index maxMergeDocs option
373 * maxMergeDocs is a largest number of documents ever merged by addDocument().
374 * Small values (e.g., less than 10,000) are best for interactive indexing,
375 * as this limits the length of pauses while indexing to a few seconds.
376 * Larger values are best for batched indexing and speedier searches.
378 * Default value is PHP_INT_MAX
380 * @param integer $maxMergeDocs
382 public function setMaxMergeDocs($maxMergeDocs)
384 foreach ($this->_indices as $index) {
385 $index->setMaxMergeDocs($maxMergeDocs);
390 * Retrieve index mergeFactor option
392 * mergeFactor determines how often segment indices are merged by addDocument().
393 * With smaller values, less RAM is used while indexing,
394 * and searches on unoptimized indices are faster,
395 * but indexing speed is slower.
396 * With larger values, more RAM is used during indexing,
397 * and while searches on unoptimized indices are slower,
398 * indexing is faster.
399 * Thus larger values (> 10) are best for batch index creation,
400 * and smaller values (< 10) for indices that are interactively maintained.
402 * Default value is 10
404 * @return integer
405 * @throws Zend_Search_Lucene_Exception
407 public function getMergeFactor()
409 if (count($this->_indices) == 0) {
410 require_once 'Zend/Search/Lucene/Exception.php';
411 throw new Zend_Search_Lucene_Exception('Indices list is empty');
414 $mergeFactor = reset($this->_indices)->getMergeFactor();
416 foreach ($this->_indices as $index) {
417 if ($index->getMergeFactor() !== $mergeFactor) {
418 require_once 'Zend/Search/Lucene/Exception.php';
419 throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
423 return $mergeFactor;
427 * Set index mergeFactor option
429 * mergeFactor determines how often segment indices are merged by addDocument().
430 * With smaller values, less RAM is used while indexing,
431 * and searches on unoptimized indices are faster,
432 * but indexing speed is slower.
433 * With larger values, more RAM is used during indexing,
434 * and while searches on unoptimized indices are slower,
435 * indexing is faster.
436 * Thus larger values (> 10) are best for batch index creation,
437 * and smaller values (< 10) for indices that are interactively maintained.
439 * Default value is 10
441 * @param integer $maxMergeDocs
443 public function setMergeFactor($mergeFactor)
445 foreach ($this->_indices as $index) {
446 $index->setMaxMergeDocs($maxMergeDocs);
451 * Performs a query against the index and returns an array
452 * of Zend_Search_Lucene_Search_QueryHit objects.
453 * Input is a string or Zend_Search_Lucene_Search_Query.
455 * @param mixed $query
456 * @return array Zend_Search_Lucene_Search_QueryHit
457 * @throws Zend_Search_Lucene_Exception
459 public function find($query)
461 $hitsList = array();
463 $indexShift = 0;
464 foreach ($this->_indices as $index) {
465 $hits = $index->find($query);
467 if ($indexShift != 0) {
468 foreach ($hits as $hit) {
469 $hit->id += $indexShift;
473 $indexShift += $index->count();
474 $hitsList[] = $hits;
477 /** @todo Implement advanced sorting */
479 return call_user_func_array('array_merge', $hitsList);
483 * Returns a list of all unique field names that exist in this index.
485 * @param boolean $indexed
486 * @return array
488 public function getFieldNames($indexed = false)
490 $fieldNamesList = array();
492 foreach ($this->_indices as $index) {
493 $fieldNamesList[] = $index->getFieldNames($indexed);
496 return array_unique(call_user_func_array('array_merge', $fieldNamesList));
500 * Returns a Zend_Search_Lucene_Document object for the document
501 * number $id in this index.
503 * @param integer|Zend_Search_Lucene_Search_QueryHit $id
504 * @return Zend_Search_Lucene_Document
505 * @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
507 public function getDocument($id)
509 if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
510 /* @var $id Zend_Search_Lucene_Search_QueryHit */
511 $id = $id->id;
514 foreach ($this->_indices as $index) {
515 $indexCount = $index->count();
517 if ($indexCount > $id) {
518 return $index->getDocument($id);
521 $id -= $indexCount;
524 require_once 'Zend/Search/Lucene/Exception.php';
525 throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
529 * Returns true if index contain documents with specified term.
531 * Is used for query optimization.
533 * @param Zend_Search_Lucene_Index_Term $term
534 * @return boolean
536 public function hasTerm(Zend_Search_Lucene_Index_Term $term)
538 foreach ($this->_indices as $index) {
539 if ($index->hasTerm($term)) {
540 return true;
544 return false;
548 * Returns IDs of all the documents containing term.
550 * @param Zend_Search_Lucene_Index_Term $term
551 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
552 * @return array
553 * @throws Zend_Search_Lucene_Exception
555 public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
557 if ($docsFilter != null) {
558 require_once 'Zend/Search/Lucene/Exception.php';
559 throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
562 $docsList = array();
564 $indexShift = 0;
565 foreach ($this->_indices as $index) {
566 $docs = $index->termDocs($term);
568 if ($indexShift != 0) {
569 foreach ($docs as $id => $docId) {
570 $docs[$id] += $indexShift;
574 $indexShift += $index->count();
575 $docsList[] = $docs;
578 return call_user_func_array('array_merge', $docsList);
582 * Returns documents filter for all documents containing term.
584 * It performs the same operation as termDocs, but return result as
585 * Zend_Search_Lucene_Index_DocsFilter object
587 * @param Zend_Search_Lucene_Index_Term $term
588 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
589 * @return Zend_Search_Lucene_Index_DocsFilter
590 * @throws Zend_Search_Lucene_Exception
592 public function termDocsFilter(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
594 require_once 'Zend/Search/Lucene/Exception.php';
595 throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
599 * Returns an array of all term freqs.
600 * Return array structure: array( docId => freq, ...)
602 * @param Zend_Search_Lucene_Index_Term $term
603 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
604 * @return integer
605 * @throws Zend_Search_Lucene_Exception
607 public function termFreqs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
609 if ($docsFilter != null) {
610 require_once 'Zend/Search/Lucene/Exception.php';
611 throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
614 $freqsList = array();
616 $indexShift = 0;
617 foreach ($this->_indices as $index) {
618 $freqs = $index->termFreqs($term);
620 if ($indexShift != 0) {
621 $freqsShifted = array();
623 foreach ($freqs as $docId => $freq) {
624 $freqsShifted[$docId + $indexShift] = $freq;
626 $freqs = $freqsShifted;
629 $indexShift += $index->count();
630 $freqsList[] = $freqs;
633 return call_user_func_array('array_merge', $freqsList);
637 * Returns an array of all term positions in the documents.
638 * Return array structure: array( docId => array( pos1, pos2, ...), ...)
640 * @param Zend_Search_Lucene_Index_Term $term
641 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
642 * @return array
643 * @throws Zend_Search_Lucene_Exception
645 public function termPositions(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
647 if ($docsFilter != null) {
648 require_once 'Zend/Search/Lucene/Exception.php';
649 throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
652 $termPositionsList = array();
654 $indexShift = 0;
655 foreach ($this->_indices as $index) {
656 $termPositions = $index->termPositions($term);
658 if ($indexShift != 0) {
659 $termPositionsShifted = array();
661 foreach ($termPositions as $docId => $positions) {
662 $termPositions[$docId + $indexShift] = $positions;
664 $termPositions = $termPositionsShifted;
667 $indexShift += $index->count();
668 $termPositionsList[] = $termPositions;
671 return call_user_func_array('array_merge', $termPositions);
675 * Returns the number of documents in this index containing the $term.
677 * @param Zend_Search_Lucene_Index_Term $term
678 * @return integer
680 public function docFreq(Zend_Search_Lucene_Index_Term $term)
682 $docFreq = 0;
684 foreach ($this->_indices as $index) {
685 $docFreq += $index->docFreq($term);
688 return $docFreq;
692 * Retrive similarity used by index reader
694 * @return Zend_Search_Lucene_Search_Similarity
695 * @throws Zend_Search_Lucene_Exception
697 public function getSimilarity()
699 if (count($this->_indices) == 0) {
700 require_once 'Zend/Search/Lucene/Exception.php';
701 throw new Zend_Search_Lucene_Exception('Indices list is empty');
704 $similarity = reset($this->_indices)->getSimilarity();
706 foreach ($this->_indices as $index) {
707 if ($index->getSimilarity() !== $similarity) {
708 require_once 'Zend/Search/Lucene/Exception.php';
709 throw new Zend_Search_Lucene_Exception('Indices have different similarity.');
713 return $similarity;
717 * Returns a normalization factor for "field, document" pair.
719 * @param integer $id
720 * @param string $fieldName
721 * @return float
723 public function norm($id, $fieldName)
725 foreach ($this->_indices as $index) {
726 $indexCount = $index->count();
728 if ($indexCount > $id) {
729 return $index->norm($id, $fieldName);
732 $id -= $indexCount;
735 return null;
739 * Returns true if any documents have been deleted from this index.
741 * @return boolean
743 public function hasDeletions()
745 foreach ($this->_indices as $index) {
746 if ($index->hasDeletions()) {
747 return true;
751 return false;
755 * Deletes a document from the index.
756 * $id is an internal document id
758 * @param integer|Zend_Search_Lucene_Search_QueryHit $id
759 * @throws Zend_Search_Lucene_Exception
761 public function delete($id)
763 foreach ($this->_indices as $index) {
764 $indexCount = $index->count();
766 if ($indexCount > $id) {
767 $index->delete($id);
768 return;
771 $id -= $indexCount;
774 require_once 'Zend/Search/Lucene/Exception.php';
775 throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
780 * Callback used to choose target index for new documents
782 * Function/method signature:
783 * Zend_Search_Lucene_Interface callbackFunction(Zend_Search_Lucene_Document $document, array $indices);
785 * null means "default documents distributing algorithm"
787 * @var callback
789 protected $_documentDistributorCallBack = null;
792 * Set callback for choosing target index.
794 * @param callback $callback
796 public function setDocumentDistributorCallback($callback)
798 if ($callback !== null && !is_callable($callback))
799 $this->_documentDistributorCallBack = $callback;
803 * Get callback for choosing target index.
805 * @return callback
807 public function getDocumentDistributorCallback()
809 return $this->_documentDistributorCallBack;
813 * Adds a document to this index.
815 * @param Zend_Search_Lucene_Document $document
816 * @throws Zend_Search_Lucene_Exception
818 public function addDocument(Zend_Search_Lucene_Document $document)
820 if ($this->_documentDistributorCallBack !== null) {
821 $index = call_user_func($this->_documentDistributorCallBack, $document, $this->_indices);
822 } else {
823 $index = $this->_indices[ array_rand($this->_indices) ];
826 $index->addDocument($document);
830 * Commit changes resulting from delete() or undeleteAll() operations.
832 public function commit()
834 foreach ($this->_indices as $index) {
835 $index->commit();
840 * Optimize index.
842 * Merges all segments into one
844 public function optimize()
846 foreach ($this->_indices as $index) {
847 $index->_optimise();
852 * Returns an array of all terms in this index.
854 * @return array
856 public function terms()
858 $termsList = array();
860 foreach ($this->_indices as $index) {
861 $termsList[] = $index->terms();
864 return array_unique(call_user_func_array('array_merge', $termsList));
869 * Terms stream priority queue object
871 * @var Zend_Search_Lucene_TermStreamsPriorityQueue
873 private $_termsStream = null;
876 * Reset terms stream.
878 public function resetTermsStream()
880 if ($this->_termsStream === null) {
881 $this->_termsStream = new Zend_Search_Lucene_TermStreamsPriorityQueue($this->_indices);
882 } else {
883 $this->_termsStream->resetTermsStream();
888 * Skip terms stream up to specified term preffix.
890 * Prefix contains fully specified field info and portion of searched term
892 * @param Zend_Search_Lucene_Index_Term $prefix
894 public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
896 $this->_termsStream->skipTo($prefix);
900 * Scans terms dictionary and returns next term
902 * @return Zend_Search_Lucene_Index_Term|null
904 public function nextTerm()
906 return $this->_termsStream->nextTerm();
910 * Returns term in current position
912 * @return Zend_Search_Lucene_Index_Term|null
914 public function currentTerm()
916 return $this->_termsStream->currentTerm();
920 * Close terms stream
922 * Should be used for resources clean up if stream is not read up to the end
924 public function closeTermsStream()
926 $this->_termsStream->closeTermsStream();
927 $this->_termsStream = null;
932 * Undeletes all documents currently marked as deleted in this index.
934 public function undeleteAll()
936 foreach ($this->_indices as $index) {
937 $index->undeleteAll();
943 * Add reference to the index object
945 * @internal
947 public function addReference()
949 // Do nothing, since it's never referenced by indices
953 * Remove reference from the index object
955 * When reference count becomes zero, index is closed and resources are cleaned up
957 * @internal
959 public function removeReference()
961 // Do nothing, since it's never referenced by indices