7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Search_Lucene
18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
23 /** Zend_Search_Lucene_Index_Term */
24 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Index/Term.php';
26 /** Zend_Search_Lucene_Search_Query_Term */
27 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Search/Query/Term.php';
29 /** Zend_Search_Lucene_Search_Query_MultiTerm */
30 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Search/Query/MultiTerm.php';
32 /** Zend_Search_Lucene_Search_Query_Boolean */
33 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Search/Query/Boolean.php';
35 /** Zend_Search_Lucene_Search_Query_Phrase */
36 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Search/Query/Phrase.php';
38 /** Zend_Search_Lucene_Search_Query_Empty */
39 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Search/Query/Empty.php';
42 /** Zend_Search_Lucene_Search_QueryLexer */
43 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Search/QueryLexer.php';
45 /** Zend_Search_Lucene_Search_QueryParserContext */
46 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Search/QueryParserContext.php';
49 /** Zend_Search_Lucene_FSM */
50 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/FSM.php';
52 /** Zend_Search_Lucene_Exception */
53 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Exception.php';
55 /** Zend_Search_Lucene_Search_QueryParserException */
56 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
61 * @package Zend_Search_Lucene
63 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
64 * @license http://framework.zend.com/license/new-bsd New BSD License
66 class Zend_Search_Lucene_Search_QueryParser
extends Zend_Search_Lucene_FSM
71 * @var Zend_Search_Lucene_Search_QueryParser
73 private static $_instance = null;
79 * @var Zend_Search_Lucene_Search_QueryLexer
85 * Array of Zend_Search_Lucene_Search_QueryToken objects
96 private $_currentToken;
101 * It can be processed within FSM states, but this addirional state simplifies FSM
103 * @var Zend_Search_Lucene_Search_QueryToken
105 private $_lastToken = null;
108 * Range query first term
112 private $_rqFirstTerm = null;
115 * Current query parser context
117 * @var Zend_Search_Lucene_Search_QueryParserContext
126 private $_contextStack;
129 * Query string encoding
136 * Query string default encoding
140 private $_defaultEncoding = '';
144 * Boolean operators constants
150 * Default boolean queries operator
154 private $_defaultOperator = self
::B_OR
;
157 /** Query parser State Machine states */
158 const ST_COMMON_QUERY_ELEMENT
= 0; // Terms, phrases, operators
159 const ST_CLOSEDINT_RQ_START
= 1; // Range query start (closed interval) - '['
160 const ST_CLOSEDINT_RQ_FIRST_TERM
= 2; // First term in '[term1 to term2]' construction
161 const ST_CLOSEDINT_RQ_TO_TERM
= 3; // 'TO' lexeme in '[term1 to term2]' construction
162 const ST_CLOSEDINT_RQ_LAST_TERM
= 4; // Second term in '[term1 to term2]' construction
163 const ST_CLOSEDINT_RQ_END
= 5; // Range query end (closed interval) - ']'
164 const ST_OPENEDINT_RQ_START
= 6; // Range query start (opened interval) - '{'
165 const ST_OPENEDINT_RQ_FIRST_TERM
= 7; // First term in '{term1 to term2}' construction
166 const ST_OPENEDINT_RQ_TO_TERM
= 8; // 'TO' lexeme in '{term1 to term2}' construction
167 const ST_OPENEDINT_RQ_LAST_TERM
= 9; // Second term in '{term1 to term2}' construction
168 const ST_OPENEDINT_RQ_END
= 10; // Range query end (opened interval) - '}'
173 public function __construct()
175 parent
::__construct(array(self
::ST_COMMON_QUERY_ELEMENT
,
176 self
::ST_CLOSEDINT_RQ_START
,
177 self
::ST_CLOSEDINT_RQ_FIRST_TERM
,
178 self
::ST_CLOSEDINT_RQ_TO_TERM
,
179 self
::ST_CLOSEDINT_RQ_LAST_TERM
,
180 self
::ST_CLOSEDINT_RQ_END
,
181 self
::ST_OPENEDINT_RQ_START
,
182 self
::ST_OPENEDINT_RQ_FIRST_TERM
,
183 self
::ST_OPENEDINT_RQ_TO_TERM
,
184 self
::ST_OPENEDINT_RQ_LAST_TERM
,
185 self
::ST_OPENEDINT_RQ_END
187 Zend_Search_Lucene_Search_QueryToken
::getTypes());
190 array(array(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_WORD
, self
::ST_COMMON_QUERY_ELEMENT
),
191 array(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_PHRASE
, self
::ST_COMMON_QUERY_ELEMENT
),
192 array(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_FIELD
, self
::ST_COMMON_QUERY_ELEMENT
),
193 array(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_REQUIRED
, self
::ST_COMMON_QUERY_ELEMENT
),
194 array(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_PROHIBITED
, self
::ST_COMMON_QUERY_ELEMENT
),
195 array(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_FUZZY_PROX_MARK
, self
::ST_COMMON_QUERY_ELEMENT
),
196 array(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_BOOSTING_MARK
, self
::ST_COMMON_QUERY_ELEMENT
),
197 array(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_RANGE_INCL_START
, self
::ST_CLOSEDINT_RQ_START
),
198 array(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_RANGE_EXCL_START
, self
::ST_OPENEDINT_RQ_START
),
199 array(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_SUBQUERY_START
, self
::ST_COMMON_QUERY_ELEMENT
),
200 array(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_SUBQUERY_END
, self
::ST_COMMON_QUERY_ELEMENT
),
201 array(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_AND_LEXEME
, self
::ST_COMMON_QUERY_ELEMENT
),
202 array(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_OR_LEXEME
, self
::ST_COMMON_QUERY_ELEMENT
),
203 array(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_NOT_LEXEME
, self
::ST_COMMON_QUERY_ELEMENT
),
204 array(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_NUMBER
, self
::ST_COMMON_QUERY_ELEMENT
)
207 array(array(self
::ST_CLOSEDINT_RQ_START
, Zend_Search_Lucene_Search_QueryToken
::TT_WORD
, self
::ST_CLOSEDINT_RQ_FIRST_TERM
),
208 array(self
::ST_CLOSEDINT_RQ_FIRST_TERM
, Zend_Search_Lucene_Search_QueryToken
::TT_TO_LEXEME
, self
::ST_CLOSEDINT_RQ_TO_TERM
),
209 array(self
::ST_CLOSEDINT_RQ_TO_TERM
, Zend_Search_Lucene_Search_QueryToken
::TT_WORD
, self
::ST_CLOSEDINT_RQ_LAST_TERM
),
210 array(self
::ST_CLOSEDINT_RQ_LAST_TERM
, Zend_Search_Lucene_Search_QueryToken
::TT_RANGE_INCL_END
, self
::ST_COMMON_QUERY_ELEMENT
)
213 array(array(self
::ST_OPENEDINT_RQ_START
, Zend_Search_Lucene_Search_QueryToken
::TT_WORD
, self
::ST_OPENEDINT_RQ_FIRST_TERM
),
214 array(self
::ST_OPENEDINT_RQ_FIRST_TERM
, Zend_Search_Lucene_Search_QueryToken
::TT_TO_LEXEME
, self
::ST_OPENEDINT_RQ_TO_TERM
),
215 array(self
::ST_OPENEDINT_RQ_TO_TERM
, Zend_Search_Lucene_Search_QueryToken
::TT_WORD
, self
::ST_OPENEDINT_RQ_LAST_TERM
),
216 array(self
::ST_OPENEDINT_RQ_LAST_TERM
, Zend_Search_Lucene_Search_QueryToken
::TT_RANGE_EXCL_END
, self
::ST_COMMON_QUERY_ELEMENT
)
221 $addTermEntryAction = new Zend_Search_Lucene_FSMAction($this, 'addTermEntry');
222 $addPhraseEntryAction = new Zend_Search_Lucene_FSMAction($this, 'addPhraseEntry');
223 $setFieldAction = new Zend_Search_Lucene_FSMAction($this, 'setField');
224 $setSignAction = new Zend_Search_Lucene_FSMAction($this, 'setSign');
225 $setFuzzyProxAction = new Zend_Search_Lucene_FSMAction($this, 'processFuzzyProximityModifier');
226 $processModifierParameterAction = new Zend_Search_Lucene_FSMAction($this, 'processModifierParameter');
227 $subqueryStartAction = new Zend_Search_Lucene_FSMAction($this, 'subqueryStart');
228 $subqueryEndAction = new Zend_Search_Lucene_FSMAction($this, 'subqueryEnd');
229 $logicalOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'logicalOperator');
230 $openedRQFirstTermAction = new Zend_Search_Lucene_FSMAction($this, 'openedRQFirstTerm');
231 $openedRQLastTermAction = new Zend_Search_Lucene_FSMAction($this, 'openedRQLastTerm');
232 $closedRQFirstTermAction = new Zend_Search_Lucene_FSMAction($this, 'closedRQFirstTerm');
233 $closedRQLastTermAction = new Zend_Search_Lucene_FSMAction($this, 'closedRQLastTerm');
236 $this->addInputAction(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_WORD
, $addTermEntryAction);
237 $this->addInputAction(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_PHRASE
, $addPhraseEntryAction);
238 $this->addInputAction(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_FIELD
, $setFieldAction);
239 $this->addInputAction(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_REQUIRED
, $setSignAction);
240 $this->addInputAction(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_PROHIBITED
, $setSignAction);
241 $this->addInputAction(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_FUZZY_PROX_MARK
, $setFuzzyProxAction);
242 $this->addInputAction(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_NUMBER
, $processModifierParameterAction);
243 $this->addInputAction(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_SUBQUERY_START
, $subqueryStartAction);
244 $this->addInputAction(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_SUBQUERY_END
, $subqueryEndAction);
245 $this->addInputAction(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_AND_LEXEME
, $logicalOperatorAction);
246 $this->addInputAction(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_OR_LEXEME
, $logicalOperatorAction);
247 $this->addInputAction(self
::ST_COMMON_QUERY_ELEMENT
, Zend_Search_Lucene_Search_QueryToken
::TT_NOT_LEXEME
, $logicalOperatorAction);
249 $this->addEntryAction(self
::ST_OPENEDINT_RQ_FIRST_TERM
, $openedRQFirstTermAction);
250 $this->addEntryAction(self
::ST_OPENEDINT_RQ_LAST_TERM
, $openedRQLastTermAction);
251 $this->addEntryAction(self
::ST_CLOSEDINT_RQ_FIRST_TERM
, $closedRQFirstTermAction);
252 $this->addEntryAction(self
::ST_CLOSEDINT_RQ_LAST_TERM
, $closedRQLastTermAction);
256 $this->_lexer
= new Zend_Search_Lucene_Search_QueryLexer();
261 * Set query string default encoding
263 * @param string $encoding
265 public static function setDefaultEncoding($encoding)
267 if (self
::$_instance === null) {
268 self
::$_instance = new Zend_Search_Lucene_Search_QueryParser();
271 self
::$_instance->_defaultEncoding
= $encoding;
275 * Get query string default encoding
279 public static function getDefaultEncoding()
281 if (self
::$_instance === null) {
282 self
::$_instance = new Zend_Search_Lucene_Search_QueryParser();
285 return self
::$_instance->_defaultEncoding
;
289 * Set default boolean operator
291 * @param integer $operator
293 public static function setDefaultOperator($operator)
295 if (self
::$_instance === null) {
296 self
::$_instance = new Zend_Search_Lucene_Search_QueryParser();
299 self
::$_instance->_defaultOperator
= $operator;
303 * Get default boolean operator
307 public static function getDefaultOperator()
309 if (self
::$_instance === null) {
310 self
::$_instance = new Zend_Search_Lucene_Search_QueryParser();
313 return self
::$_instance->_defaultOperator
;
317 * Parses a query string
319 * @param string $strQuery
320 * @param string $encoding
321 * @return Zend_Search_Lucene_Search_Query
322 * @throws Zend_Search_Lucene_Search_QueryParserException
324 public static function parse($strQuery, $encoding = null)
326 if (self
::$_instance === null) {
327 self
::$_instance = new Zend_Search_Lucene_Search_QueryParser();
330 self
::$_instance->_encoding
= ($encoding !== null) ?
$encoding : self
::$_instance->_defaultEncoding
;
331 self
::$_instance->_lastToken
= null;
332 self
::$_instance->_context
= new Zend_Search_Lucene_Search_QueryParserContext(self
::$_instance->_encoding
);
333 self
::$_instance->_contextStack
= array();
334 self
::$_instance->_tokens
= self
::$_instance->_lexer
->tokenize($strQuery, self
::$_instance->_encoding
);
337 if (count(self
::$_instance->_tokens
) == 0) {
338 return new Zend_Search_Lucene_Search_Query_Empty();
342 foreach (self
::$_instance->_tokens
as $token) {
344 self
::$_instance->_currentToken
= $token;
345 self
::$_instance->process($token->type
);
347 self
::$_instance->_lastToken
= $token;
348 } catch (Exception
$e) {
349 if (strpos($e->getMessage(), 'There is no any rule for') !== false) {
350 throw new Zend_Search_Lucene_Search_QueryParserException( 'Syntax error at char position ' . $token->position
. '.' );
357 if (count(self
::$_instance->_contextStack
) != 0) {
358 throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing.' );
361 return self
::$_instance->_context
->getQuery();
365 /*********************************************************************
366 * Actions implementation
368 * Actions affect on recognized lexemes list
369 *********************************************************************/
372 * Add term to a query
374 public function addTermEntry()
376 $entry = new Zend_Search_Lucene_Search_QueryEntry_Term($this->_currentToken
->text
, $this->_context
->getField());
377 $this->_context
->addEntry($entry);
381 * Add phrase to a query
383 public function addPhraseEntry()
385 $entry = new Zend_Search_Lucene_Search_QueryEntry_Phrase($this->_currentToken
->text
, $this->_context
->getField());
386 $this->_context
->addEntry($entry);
392 public function setField()
394 $this->_context
->setNextEntryField($this->_currentToken
->text
);
400 public function setSign()
402 $this->_context
->setNextEntrySign($this->_currentToken
->type
);
407 * Process fuzzy search/proximity modifier - '~'
409 public function processFuzzyProximityModifier()
411 $this->_context
->processFuzzyProximityModifier();
415 * Process modifier parameter
417 * @throws Zend_Search_Lucene_Exception
419 public function processModifierParameter()
421 if ($this->_lastToken
=== null) {
422 throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier parameter must follow lexeme modifier. Char position 0.' );
425 switch ($this->_lastToken
->type
) {
426 case Zend_Search_Lucene_Search_QueryToken
::TT_FUZZY_PROX_MARK
:
427 $this->_context
->processFuzzyProximityModifier($this->_currentToken
->text
);
430 case Zend_Search_Lucene_Search_QueryToken
::TT_BOOSTING_MARK
:
431 $this->_context
->boost($this->_currentToken
->text
);
435 // It's not a user input exception
436 throw new Zend_Search_Lucene_Exception('Lexeme modifier parameter must follow lexeme modifier. Char position .' );
444 public function subqueryStart()
446 $this->_contextStack
[] = $this->_context
;
447 $this->_context
= new Zend_Search_Lucene_Search_QueryParserContext($this->_encoding
, $this->_context
->getField());
453 public function subqueryEnd()
455 if (count($this->_contextStack
) == 0) {
456 throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing. Char position ' . $this->_currentToken
->position
. '.' );
459 $query = $this->_context
->getQuery();
460 $this->_context
= array_pop($this->_contextStack
);
462 $this->_context
->addEntry(new Zend_Search_Lucene_Search_QueryEntry_Subquery($query));
466 * Process logical operator
468 public function logicalOperator()
470 $this->_context
->addLogicalOperator($this->_currentToken
->type
);
474 * Process first range query term (opened interval)
476 public function openedRQFirstTerm()
478 $this->_rqFirstTerm
= $this->_currentToken
->text
;
482 * Process last range query term (opened interval)
484 * @throws Zend_Search_Lucene_Search_QueryParserException
486 public function openedRQLastTerm()
488 throw new Zend_Search_Lucene_Search_QueryParserException('Range queries are not supported yet.');
490 // $firstTerm = new Zend_Search_Lucene_Index_Term($this->_rqFirstTerm, $this->_context->getField());
491 // $lastTerm = new Zend_Search_Lucene_Index_Term($this->_currentToken->text, $this->_context->getField());
493 // $query = new Zend_Search_Lucene_Search_Query_Range($firstTerm, $lastTerm, false);
494 // $this->_context->addentry($query);
498 * Process first range query term (closed interval)
500 public function closedRQFirstTerm()
502 $this->_rqFirstTerm
= $this->_currentToken
->text
;
506 * Process last range query term (closed interval)
508 * @throws Zend_Search_Lucene_Search_QueryParserException
510 public function closedRQLastTerm()
512 throw new Zend_Search_Lucene_Search_QueryParserException('Range queries are not supported yet.');
514 // $firstTerm = new Zend_Search_Lucene_Index_Term($this->_rqFirstTerm, $this->_context->getField());
515 // $lastTerm = new Zend_Search_Lucene_Index_Term($this->_currentToken->text, $this->_context->getField());
517 // $query = new Zend_Search_Lucene_Search_Query_Range($firstTerm, $lastTerm, true);
518 // $this->_context->addentry($query);