search/Zend/Search/Lucene/Analysis/Token.php

   1 <?php
   2 /**
   3  * Zend Framework
   4  *
   5  * LICENSE
   6  *
   7  * This source file is subject to the new BSD license that is bundled
   8  * with this package in the file LICENSE.txt.
   9  * It is also available through the world-wide-web at this URL:
  10  * http://framework.zend.com/license/new-bsd
  11  * If you did not receive a copy of the license and are unable to
  12  * obtain it through the world-wide-web, please send an email
  13  * to license@zend.com so we can send you a copy immediately.
  14  *
  15  * @category   Zend
  16  * @package    Zend_Search_Lucene
  17  * @subpackage Analysis
  18  * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  20  */
  21
  22
  23 /**
  24  * @category   Zend
  25  * @package    Zend_Search_Lucene
  26  * @subpackage Analysis
  27  * @copyright  Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
  28  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  29  */
  30 class Zend_Search_Lucene_Analysis_Token
  31 {
  32     /**
  33      * The text of the term.
  34      *
  35      * @var string
  36      */
  37     private $_termText;
  38
  39     /**
  40      * Start in source text.
  41      *
  42      * @var integer
  43      */
  44     private $_startOffset;
  45
  46     /**
  47      * End in source text
  48      *
  49      * @var integer
  50      */
  51     private $_endOffset;
  52
  53     /**
  54      * The position of this token relative to the previous Token.
  55      *
  56      * The default value is one.
  57      *
  58      * Some common uses for this are:
  59      * Set it to zero to put multiple terms in the same position.  This is
  60      * useful if, e.g., a word has multiple stems.  Searches for phrases
  61      * including either stem will match.  In this case, all but the first stem's
  62      * increment should be set to zero: the increment of the first instance
  63      * should be one.  Repeating a token with an increment of zero can also be
  64      * used to boost the scores of matches on that token.
  65      *
  66      * Set it to values greater than one to inhibit exact phrase matches.
  67      * If, for example, one does not want phrases to match across removed stop
  68      * words, then one could build a stop word filter that removes stop words and
  69      * also sets the increment to the number of stop words removed before each
  70      * non-stop word.  Then exact phrase queries will only match when the terms
  71      * occur with no intervening stop words.
  72      *
  73      * @var integer
  74      */
  75     private $_positionIncrement;
  76
  77
  78     /**
  79      * Object constructor
  80      *
  81      * @param string  $text
  82      * @param integer $start
  83      * @param integer $end
  84      * @param string  $type
  85      */
  86     public function __construct($text, $start, $end)
  87     {
  88         $this->_termText    = $text;
  89         $this->_startOffset = $start;
  90         $this->_endOffset   = $end;
  91
  92         $this->_positionIncrement = 1;
  93     }
  94
  95
  96     /**
  97      * positionIncrement setter
  98      *
  99      * @param integer $positionIncrement
 100      */
 101     public function setPositionIncrement($positionIncrement)
 102     {
 103         $this->_positionIncrement = $positionIncrement;
 104     }
 105
 106     /**
 107      * Returns the position increment of this Token.
 108      *
 109      * @return integer
 110      */
 111     public function getPositionIncrement()
 112     {
 113         return $this->_positionIncrement;
 114     }
 115
 116     /**
 117      * Returns the Token's term text.
 118      *
 119      * @return string
 120      */
 121     public function getTermText()
 122     {
 123         return $this->_termText;
 124     }
 125
 126     /**
 127      * Returns this Token's starting offset, the position of the first character
 128      * corresponding to this token in the source text.
 129      *
 130      * Note:
 131      * The difference between getEndOffset() and getStartOffset() may not be equal
 132      * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered
 133      * by a stemmer or some other filter.
 134      *
 135      * @return integer
 136      */
 137     public function getStartOffset()
 138     {
 139         return $this->_startOffset;
 140     }
 141
 142     /**
 143      * Returns this Token's ending offset, one greater than the position of the
 144      * last character corresponding to this token in the source text.
 145      *
 146      * @return integer
 147      */
 148     public function getEndOffset()
 149     {
 150         return $this->_endOffset;
 151     }
 152 }
 153