7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Search_Lucene
17 * @subpackage Analysis
18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
23 /** Zend_Search_Lucene_Analysis_Analyzer_Common */
24 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php';
29 * @package Zend_Search_Lucene
30 * @subpackage Analysis
31 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
32 * @license http://framework.zend.com/license/new-bsd New BSD License
35 class Zend_Search_Lucene_Analysis_Analyzer_Common_Text
extends Zend_Search_Lucene_Analysis_Analyzer_Common
38 * Current position in a stream
47 public function reset()
51 if ($this->_input
=== null) {
55 // convert input into ascii
56 $this->_input
= iconv($this->_encoding
, 'ASCII//TRANSLIT', $this->_input
);
57 $this->_encoding
= 'ASCII';
61 * Tokenization stream API
63 * Returns null at the end of stream
65 * @return Zend_Search_Lucene_Analysis_Token|null
67 public function nextToken()
69 if ($this->_input
=== null) {
75 if (! preg_match('/[a-zA-Z]+/', $this->_input
, $match, PREG_OFFSET_CAPTURE
, $this->_position
)) {
76 // It covers both cases a) there are no matches (preg_match(...) === 0)
77 // b) error occured (preg_match(...) === FALSE)
83 $endpos = $pos +
strlen($str);
85 $this->_position
= $endpos;
87 $token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($str, $pos, $endpos));
88 } while ($token === null); // try again if token is skipped