adding some strings
[moodle-linuxchix.git] / search / Zend / Search / Lucene / Index / SegmentWriter / DocumentWriter.php
blob7dd2bf91587f611a5a276a33796864675016357b
1 <?php
2 /**
3 * Zend Framework
5 * LICENSE
7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
15 * @category Zend
16 * @package Zend_Search_Lucene
17 * @subpackage Index
18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
23 /** Zend_Search_Lucene_Exception */
24 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
26 /** Zend_Search_Lucene_Analysis_Analyzer */
27 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Analysis/Analyzer.php';
29 /** Zend_Search_Lucene_Index_SegmentWriter */
30 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/SegmentWriter.php';
33 /**
34 * @category Zend
35 * @package Zend_Search_Lucene
36 * @subpackage Index
37 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
38 * @license http://framework.zend.com/license/new-bsd New BSD License
40 class Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter extends Zend_Search_Lucene_Index_SegmentWriter
42 /**
43 * Term Dictionary
44 * Array of the Zend_Search_Lucene_Index_Term objects
45 * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
47 * @var array
49 protected $_termDictionary;
51 /**
52 * Documents, which contain the term
54 * @var array
56 protected $_termDocs;
58 /**
59 * Object constructor.
61 * @param Zend_Search_Lucene_Storage_Directory $directory
62 * @param string $name
64 public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name)
66 parent::__construct($directory, $name);
68 $this->_termDocs = array();
69 $this->_termDictionary = array();
73 /**
74 * Adds a document to this segment.
76 * @param Zend_Search_Lucene_Document $document
77 * @throws Zend_Search_Lucene_Exception
79 public function addDocument(Zend_Search_Lucene_Document $document)
81 $storedFields = array();
82 $docNorms = array();
83 $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
85 foreach ($document->getFieldNames() as $fieldName) {
86 $field = $document->getField($fieldName);
87 $this->addField($field);
89 if ($field->storeTermVector) {
90 /**
91 * @todo term vector storing support
93 throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
96 if ($field->isIndexed) {
97 if ($field->isTokenized) {
98 $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
99 $analyzer->setInput($field->value, $field->encoding);
101 $position = 0;
102 $tokenCounter = 0;
103 while (($token = $analyzer->nextToken()) !== null) {
104 $tokenCounter++;
106 $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name);
107 $termKey = $term->key();
109 if (!isset($this->_termDictionary[$termKey])) {
110 // New term
111 $this->_termDictionary[$termKey] = $term;
112 $this->_termDocs[$termKey] = array();
113 $this->_termDocs[$termKey][$this->_docCount] = array();
114 } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
115 // Existing term, but new term entry
116 $this->_termDocs[$termKey][$this->_docCount] = array();
118 $position += $token->getPositionIncrement();
119 $this->_termDocs[$termKey][$this->_docCount][] = $position;
122 $docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name,
123 $tokenCounter)*
124 $document->boost*
125 $field->boost ));
126 } else {
127 $term = new Zend_Search_Lucene_Index_Term($field->getUtf8Value(), $field->name);
128 $termKey = $term->key();
130 if (!isset($this->_termDictionary[$termKey])) {
131 // New term
132 $this->_termDictionary[$termKey] = $term;
133 $this->_termDocs[$termKey] = array();
134 $this->_termDocs[$termKey][$this->_docCount] = array();
135 } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
136 // Existing term, but new term entry
137 $this->_termDocs[$termKey][$this->_docCount] = array();
139 $this->_termDocs[$termKey][$this->_docCount][] = 0; // position
141 $docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name, 1)*
142 $document->boost*
143 $field->boost ));
147 if ($field->isStored) {
148 $storedFields[] = $field;
153 foreach ($this->_fields as $fieldName => $field) {
154 if (!$field->isIndexed) {
155 continue;
158 if (!isset($this->_norms[$fieldName])) {
159 $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
160 $this->_docCount);
163 if (isset($docNorms[$fieldName])){
164 $this->_norms[$fieldName] .= $docNorms[$fieldName];
165 } else {
166 $this->_norms[$fieldName] .= chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) ));
170 $this->addStoredFields($storedFields);
175 * Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files
177 protected function _dumpDictionary()
179 ksort($this->_termDictionary, SORT_STRING);
181 $this->initializeDictionaryFiles();
183 foreach ($this->_termDictionary as $termId => $term) {
184 $this->addTerm($term, $this->_termDocs[$termId]);
187 $this->closeDictionaryFiles();
192 * Close segment, write it to disk and return segment info
194 * @return Zend_Search_Lucene_Index_SegmentInfo
196 public function close()
198 if ($this->_docCount == 0) {
199 return null;
202 $this->_dumpFNM();
203 $this->_dumpDictionary();
205 $this->_generateCFS();
207 return new Zend_Search_Lucene_Index_SegmentInfo($this->_name,
208 $this->_docCount,
209 $this->_directory);