Merge commit 'catalyst/MOODLE_19_STABLE' into mdl19-linuxchix
[moodle-linuxchix.git] / search / Zend / Search / Lucene / Search / QueryParser.php
blob1a3d5712de820f444c47178013910078765980af
1 <?php
2 /**
3 * Zend Framework
5 * LICENSE
7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
15 * @category Zend
16 * @package Zend_Search_Lucene
17 * @subpackage Search
18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
23 /** Zend_Search_Lucene_Index_Term */
24 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Index/Term.php';
26 /** Zend_Search_Lucene_Search_Query_Term */
27 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Term.php';
29 /** Zend_Search_Lucene_Search_Query_MultiTerm */
30 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/MultiTerm.php';
32 /** Zend_Search_Lucene_Search_Query_Boolean */
33 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Boolean.php';
35 /** Zend_Search_Lucene_Search_Query_Phrase */
36 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Phrase.php';
38 /** Zend_Search_Lucene_Search_Query_Empty */
39 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/Query/Empty.php';
42 /** Zend_Search_Lucene_Search_QueryLexer */
43 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryLexer.php';
45 /** Zend_Search_Lucene_Search_QueryParserContext */
46 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserContext.php';
49 /** Zend_Search_Lucene_FSM */
50 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/FSM.php';
52 /** Zend_Search_Lucene_Exception */
53 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Exception.php';
55 /** Zend_Search_Lucene_Search_QueryParserException */
56 require_once $CFG->dirroot.'/search/Zend/Search/Lucene/Search/QueryParserException.php';
59 /**
60 * @category Zend
61 * @package Zend_Search_Lucene
62 * @subpackage Search
63 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
64 * @license http://framework.zend.com/license/new-bsd New BSD License
66 class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
68 /**
69 * Parser instance
71 * @var Zend_Search_Lucene_Search_QueryParser
73 private static $_instance = null;
76 /**
77 * Query lexer
79 * @var Zend_Search_Lucene_Search_QueryLexer
81 private $_lexer;
83 /**
84 * Tokens list
85 * Array of Zend_Search_Lucene_Search_QueryToken objects
87 * @var array
89 private $_tokens;
91 /**
92 * Current token
94 * @var integer|string
96 private $_currentToken;
98 /**
99 * Last token
101 * It can be processed within FSM states, but this addirional state simplifies FSM
103 * @var Zend_Search_Lucene_Search_QueryToken
105 private $_lastToken = null;
108 * Range query first term
110 * @var string
112 private $_rqFirstTerm = null;
115 * Current query parser context
117 * @var Zend_Search_Lucene_Search_QueryParserContext
119 private $_context;
122 * Context stack
124 * @var array
126 private $_contextStack;
129 * Query string encoding
131 * @var string
133 private $_encoding;
136 * Query string default encoding
138 * @var string
140 private $_defaultEncoding = '';
144 * Boolean operators constants
146 const B_OR = 0;
147 const B_AND = 1;
150 * Default boolean queries operator
152 * @var integer
154 private $_defaultOperator = self::B_OR;
157 /** Query parser State Machine states */
158 const ST_COMMON_QUERY_ELEMENT = 0; // Terms, phrases, operators
159 const ST_CLOSEDINT_RQ_START = 1; // Range query start (closed interval) - '['
160 const ST_CLOSEDINT_RQ_FIRST_TERM = 2; // First term in '[term1 to term2]' construction
161 const ST_CLOSEDINT_RQ_TO_TERM = 3; // 'TO' lexeme in '[term1 to term2]' construction
162 const ST_CLOSEDINT_RQ_LAST_TERM = 4; // Second term in '[term1 to term2]' construction
163 const ST_CLOSEDINT_RQ_END = 5; // Range query end (closed interval) - ']'
164 const ST_OPENEDINT_RQ_START = 6; // Range query start (opened interval) - '{'
165 const ST_OPENEDINT_RQ_FIRST_TERM = 7; // First term in '{term1 to term2}' construction
166 const ST_OPENEDINT_RQ_TO_TERM = 8; // 'TO' lexeme in '{term1 to term2}' construction
167 const ST_OPENEDINT_RQ_LAST_TERM = 9; // Second term in '{term1 to term2}' construction
168 const ST_OPENEDINT_RQ_END = 10; // Range query end (opened interval) - '}'
171 * Parser constructor
173 public function __construct()
175 parent::__construct(array(self::ST_COMMON_QUERY_ELEMENT,
176 self::ST_CLOSEDINT_RQ_START,
177 self::ST_CLOSEDINT_RQ_FIRST_TERM,
178 self::ST_CLOSEDINT_RQ_TO_TERM,
179 self::ST_CLOSEDINT_RQ_LAST_TERM,
180 self::ST_CLOSEDINT_RQ_END,
181 self::ST_OPENEDINT_RQ_START,
182 self::ST_OPENEDINT_RQ_FIRST_TERM,
183 self::ST_OPENEDINT_RQ_TO_TERM,
184 self::ST_OPENEDINT_RQ_LAST_TERM,
185 self::ST_OPENEDINT_RQ_END
187 Zend_Search_Lucene_Search_QueryToken::getTypes());
189 $this->addRules(
190 array(array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_COMMON_QUERY_ELEMENT),
191 array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PHRASE, self::ST_COMMON_QUERY_ELEMENT),
192 array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FIELD, self::ST_COMMON_QUERY_ELEMENT),
193 array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED, self::ST_COMMON_QUERY_ELEMENT),
194 array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED, self::ST_COMMON_QUERY_ELEMENT),
195 array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FUZZY_PROX_MARK, self::ST_COMMON_QUERY_ELEMENT),
196 array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_BOOSTING_MARK, self::ST_COMMON_QUERY_ELEMENT),
197 array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_INCL_START, self::ST_CLOSEDINT_RQ_START),
198 array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_EXCL_START, self::ST_OPENEDINT_RQ_START),
199 array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_START, self::ST_COMMON_QUERY_ELEMENT),
200 array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_END, self::ST_COMMON_QUERY_ELEMENT),
201 array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME, self::ST_COMMON_QUERY_ELEMENT),
202 array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME, self::ST_COMMON_QUERY_ELEMENT),
203 array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME, self::ST_COMMON_QUERY_ELEMENT),
204 array(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NUMBER, self::ST_COMMON_QUERY_ELEMENT)
206 $this->addRules(
207 array(array(self::ST_CLOSEDINT_RQ_START, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_CLOSEDINT_RQ_FIRST_TERM),
208 array(self::ST_CLOSEDINT_RQ_FIRST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_TO_LEXEME, self::ST_CLOSEDINT_RQ_TO_TERM),
209 array(self::ST_CLOSEDINT_RQ_TO_TERM, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_CLOSEDINT_RQ_LAST_TERM),
210 array(self::ST_CLOSEDINT_RQ_LAST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_INCL_END, self::ST_COMMON_QUERY_ELEMENT)
212 $this->addRules(
213 array(array(self::ST_OPENEDINT_RQ_START, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_OPENEDINT_RQ_FIRST_TERM),
214 array(self::ST_OPENEDINT_RQ_FIRST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_TO_LEXEME, self::ST_OPENEDINT_RQ_TO_TERM),
215 array(self::ST_OPENEDINT_RQ_TO_TERM, Zend_Search_Lucene_Search_QueryToken::TT_WORD, self::ST_OPENEDINT_RQ_LAST_TERM),
216 array(self::ST_OPENEDINT_RQ_LAST_TERM, Zend_Search_Lucene_Search_QueryToken::TT_RANGE_EXCL_END, self::ST_COMMON_QUERY_ELEMENT)
221 $addTermEntryAction = new Zend_Search_Lucene_FSMAction($this, 'addTermEntry');
222 $addPhraseEntryAction = new Zend_Search_Lucene_FSMAction($this, 'addPhraseEntry');
223 $setFieldAction = new Zend_Search_Lucene_FSMAction($this, 'setField');
224 $setSignAction = new Zend_Search_Lucene_FSMAction($this, 'setSign');
225 $setFuzzyProxAction = new Zend_Search_Lucene_FSMAction($this, 'processFuzzyProximityModifier');
226 $processModifierParameterAction = new Zend_Search_Lucene_FSMAction($this, 'processModifierParameter');
227 $subqueryStartAction = new Zend_Search_Lucene_FSMAction($this, 'subqueryStart');
228 $subqueryEndAction = new Zend_Search_Lucene_FSMAction($this, 'subqueryEnd');
229 $logicalOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'logicalOperator');
230 $openedRQFirstTermAction = new Zend_Search_Lucene_FSMAction($this, 'openedRQFirstTerm');
231 $openedRQLastTermAction = new Zend_Search_Lucene_FSMAction($this, 'openedRQLastTerm');
232 $closedRQFirstTermAction = new Zend_Search_Lucene_FSMAction($this, 'closedRQFirstTerm');
233 $closedRQLastTermAction = new Zend_Search_Lucene_FSMAction($this, 'closedRQLastTerm');
236 $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_WORD, $addTermEntryAction);
237 $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PHRASE, $addPhraseEntryAction);
238 $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FIELD, $setFieldAction);
239 $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED, $setSignAction);
240 $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED, $setSignAction);
241 $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_FUZZY_PROX_MARK, $setFuzzyProxAction);
242 $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NUMBER, $processModifierParameterAction);
243 $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_START, $subqueryStartAction);
244 $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_SUBQUERY_END, $subqueryEndAction);
245 $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME, $logicalOperatorAction);
246 $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME, $logicalOperatorAction);
247 $this->addInputAction(self::ST_COMMON_QUERY_ELEMENT, Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME, $logicalOperatorAction);
249 $this->addEntryAction(self::ST_OPENEDINT_RQ_FIRST_TERM, $openedRQFirstTermAction);
250 $this->addEntryAction(self::ST_OPENEDINT_RQ_LAST_TERM, $openedRQLastTermAction);
251 $this->addEntryAction(self::ST_CLOSEDINT_RQ_FIRST_TERM, $closedRQFirstTermAction);
252 $this->addEntryAction(self::ST_CLOSEDINT_RQ_LAST_TERM, $closedRQLastTermAction);
256 $this->_lexer = new Zend_Search_Lucene_Search_QueryLexer();
261 * Set query string default encoding
263 * @param string $encoding
265 public static function setDefaultEncoding($encoding)
267 if (self::$_instance === null) {
268 self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
271 self::$_instance->_defaultEncoding = $encoding;
275 * Get query string default encoding
277 * @return string
279 public static function getDefaultEncoding()
281 if (self::$_instance === null) {
282 self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
285 return self::$_instance->_defaultEncoding;
289 * Set default boolean operator
291 * @param integer $operator
293 public static function setDefaultOperator($operator)
295 if (self::$_instance === null) {
296 self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
299 self::$_instance->_defaultOperator = $operator;
303 * Get default boolean operator
305 * @return integer
307 public static function getDefaultOperator()
309 if (self::$_instance === null) {
310 self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
313 return self::$_instance->_defaultOperator;
317 * Parses a query string
319 * @param string $strQuery
320 * @param string $encoding
321 * @return Zend_Search_Lucene_Search_Query
322 * @throws Zend_Search_Lucene_Search_QueryParserException
324 public static function parse($strQuery, $encoding = null)
326 if (self::$_instance === null) {
327 self::$_instance = new Zend_Search_Lucene_Search_QueryParser();
330 self::$_instance->_encoding = ($encoding !== null) ? $encoding : self::$_instance->_defaultEncoding;
331 self::$_instance->_lastToken = null;
332 self::$_instance->_context = new Zend_Search_Lucene_Search_QueryParserContext(self::$_instance->_encoding);
333 self::$_instance->_contextStack = array();
334 self::$_instance->_tokens = self::$_instance->_lexer->tokenize($strQuery, self::$_instance->_encoding);
336 // Empty query
337 if (count(self::$_instance->_tokens) == 0) {
338 return new Zend_Search_Lucene_Search_Query_Empty();
342 foreach (self::$_instance->_tokens as $token) {
343 try {
344 self::$_instance->_currentToken = $token;
345 self::$_instance->process($token->type);
347 self::$_instance->_lastToken = $token;
348 } catch (Exception $e) {
349 if (strpos($e->getMessage(), 'There is no any rule for') !== false) {
350 throw new Zend_Search_Lucene_Search_QueryParserException( 'Syntax error at char position ' . $token->position . '.' );
353 throw $e;
357 if (count(self::$_instance->_contextStack) != 0) {
358 throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing.' );
361 return self::$_instance->_context->getQuery();
365 /*********************************************************************
366 * Actions implementation
368 * Actions affect on recognized lexemes list
369 *********************************************************************/
372 * Add term to a query
374 public function addTermEntry()
376 $entry = new Zend_Search_Lucene_Search_QueryEntry_Term($this->_currentToken->text, $this->_context->getField());
377 $this->_context->addEntry($entry);
381 * Add phrase to a query
383 public function addPhraseEntry()
385 $entry = new Zend_Search_Lucene_Search_QueryEntry_Phrase($this->_currentToken->text, $this->_context->getField());
386 $this->_context->addEntry($entry);
390 * Set entry field
392 public function setField()
394 $this->_context->setNextEntryField($this->_currentToken->text);
398 * Set entry sign
400 public function setSign()
402 $this->_context->setNextEntrySign($this->_currentToken->type);
407 * Process fuzzy search/proximity modifier - '~'
409 public function processFuzzyProximityModifier()
411 $this->_context->processFuzzyProximityModifier();
415 * Process modifier parameter
417 * @throws Zend_Search_Lucene_Exception
419 public function processModifierParameter()
421 if ($this->_lastToken === null) {
422 throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier parameter must follow lexeme modifier. Char position 0.' );
425 switch ($this->_lastToken->type) {
426 case Zend_Search_Lucene_Search_QueryToken::TT_FUZZY_PROX_MARK:
427 $this->_context->processFuzzyProximityModifier($this->_currentToken->text);
428 break;
430 case Zend_Search_Lucene_Search_QueryToken::TT_BOOSTING_MARK:
431 $this->_context->boost($this->_currentToken->text);
432 break;
434 default:
435 // It's not a user input exception
436 throw new Zend_Search_Lucene_Exception('Lexeme modifier parameter must follow lexeme modifier. Char position .' );
442 * Start subquery
444 public function subqueryStart()
446 $this->_contextStack[] = $this->_context;
447 $this->_context = new Zend_Search_Lucene_Search_QueryParserContext($this->_encoding, $this->_context->getField());
451 * End subquery
453 public function subqueryEnd()
455 if (count($this->_contextStack) == 0) {
456 throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing. Char position ' . $this->_currentToken->position . '.' );
459 $query = $this->_context->getQuery();
460 $this->_context = array_pop($this->_contextStack);
462 $this->_context->addEntry(new Zend_Search_Lucene_Search_QueryEntry_Subquery($query));
466 * Process logical operator
468 public function logicalOperator()
470 $this->_context->addLogicalOperator($this->_currentToken->type);
474 * Process first range query term (opened interval)
476 public function openedRQFirstTerm()
478 $this->_rqFirstTerm = $this->_currentToken->text;
482 * Process last range query term (opened interval)
484 * @throws Zend_Search_Lucene_Search_QueryParserException
486 public function openedRQLastTerm()
488 throw new Zend_Search_Lucene_Search_QueryParserException('Range queries are not supported yet.');
490 // $firstTerm = new Zend_Search_Lucene_Index_Term($this->_rqFirstTerm, $this->_context->getField());
491 // $lastTerm = new Zend_Search_Lucene_Index_Term($this->_currentToken->text, $this->_context->getField());
493 // $query = new Zend_Search_Lucene_Search_Query_Range($firstTerm, $lastTerm, false);
494 // $this->_context->addentry($query);
498 * Process first range query term (closed interval)
500 public function closedRQFirstTerm()
502 $this->_rqFirstTerm = $this->_currentToken->text;
506 * Process last range query term (closed interval)
508 * @throws Zend_Search_Lucene_Search_QueryParserException
510 public function closedRQLastTerm()
512 throw new Zend_Search_Lucene_Search_QueryParserException('Range queries are not supported yet.');
514 // $firstTerm = new Zend_Search_Lucene_Index_Term($this->_rqFirstTerm, $this->_context->getField());
515 // $lastTerm = new Zend_Search_Lucene_Index_Term($this->_currentToken->text, $this->_context->getField());
517 // $query = new Zend_Search_Lucene_Search_Query_Range($firstTerm, $lastTerm, true);
518 // $this->_context->addentry($query);