7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Search_Lucene
18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
23 /** Zend_Search_Lucene_Exception */
24 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Exception.php';
26 /** Zend_Search_Lucene_Analysis_Analyzer */
27 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Analysis/Analyzer.php';
29 /** Zend_Search_Lucene_Index_SegmentWriter */
30 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Index/SegmentWriter.php';
35 * @package Zend_Search_Lucene
37 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
38 * @license http://framework.zend.com/license/new-bsd New BSD License
40 class Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter
extends Zend_Search_Lucene_Index_SegmentWriter
44 * Array of the Zend_Search_Lucene_Index_Term objects
45 * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
49 protected $_termDictionary;
52 * Documents, which contain the term
61 * @param Zend_Search_Lucene_Storage_Directory $directory
64 public function __construct(Zend_Search_Lucene_Storage_Directory
$directory, $name)
66 parent
::__construct($directory, $name);
68 $this->_termDocs
= array();
69 $this->_termDictionary
= array();
74 * Adds a document to this segment.
76 * @param Zend_Search_Lucene_Document $document
77 * @throws Zend_Search_Lucene_Exception
79 public function addDocument(Zend_Search_Lucene_Document
$document)
81 $storedFields = array();
83 $similarity = Zend_Search_Lucene_Search_Similarity
::getDefault();
85 foreach ($document->getFieldNames() as $fieldName) {
86 $field = $document->getField($fieldName);
87 $this->addField($field);
89 if ($field->storeTermVector
) {
91 * @todo term vector storing support
93 throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
96 if ($field->isIndexed
) {
97 if ($field->isTokenized
) {
98 $analyzer = Zend_Search_Lucene_Analysis_Analyzer
::getDefault();
99 $analyzer->setInput($field->value
, $field->encoding
);
103 while (($token = $analyzer->nextToken()) !== null) {
106 $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name
);
107 $termKey = $term->key();
109 if (!isset($this->_termDictionary
[$termKey])) {
111 $this->_termDictionary
[$termKey] = $term;
112 $this->_termDocs
[$termKey] = array();
113 $this->_termDocs
[$termKey][$this->_docCount
] = array();
114 } else if (!isset($this->_termDocs
[$termKey][$this->_docCount
])) {
115 // Existing term, but new term entry
116 $this->_termDocs
[$termKey][$this->_docCount
] = array();
118 $position +
= $token->getPositionIncrement();
119 $this->_termDocs
[$termKey][$this->_docCount
][] = $position;
122 $docNorms[$field->name
] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name
,
127 $term = new Zend_Search_Lucene_Index_Term($field->getUtf8Value(), $field->name
);
128 $termKey = $term->key();
130 if (!isset($this->_termDictionary
[$termKey])) {
132 $this->_termDictionary
[$termKey] = $term;
133 $this->_termDocs
[$termKey] = array();
134 $this->_termDocs
[$termKey][$this->_docCount
] = array();
135 } else if (!isset($this->_termDocs
[$termKey][$this->_docCount
])) {
136 // Existing term, but new term entry
137 $this->_termDocs
[$termKey][$this->_docCount
] = array();
139 $this->_termDocs
[$termKey][$this->_docCount
][] = 0; // position
141 $docNorms[$field->name
] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name
, 1)*
147 if ($field->isStored
) {
148 $storedFields[] = $field;
153 foreach ($this->_fields
as $fieldName => $field) {
154 if (!$field->isIndexed
) {
158 if (!isset($this->_norms
[$fieldName])) {
159 $this->_norms
[$fieldName] = str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
163 if (isset($docNorms[$fieldName])){
164 $this->_norms
[$fieldName] .= $docNorms[$fieldName];
166 $this->_norms
[$fieldName] .= chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) ));
170 $this->addStoredFields($storedFields);
175 * Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files
177 protected function _dumpDictionary()
179 ksort($this->_termDictionary
, SORT_STRING
);
181 $this->initializeDictionaryFiles();
183 foreach ($this->_termDictionary
as $termId => $term) {
184 $this->addTerm($term, $this->_termDocs
[$termId]);
187 $this->closeDictionaryFiles();
192 * Close segment, write it to disk and return segment info
194 * @return Zend_Search_Lucene_Index_SegmentInfo
196 public function close()
198 if ($this->_docCount
== 0) {
203 $this->_dumpDictionary();
205 $this->_generateCFS();
207 return new Zend_Search_Lucene_Index_SegmentInfo($this->_name
,