beagled/Lucene.Net/Analysis/Analyzer.cs

   1 /*
   2  * Copyright 2004 The Apache Software Foundation
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  * http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 using System;
  18
  19 namespace Lucene.Net.Analysis
  20 {
  21
  22         /// <summary>An Analyzer builds TokenStreams, which analyze text.  It thus represents a
  23         /// policy for extracting index terms from text.
  24         /// <p>
  25         /// Typical implementations first build a Tokenizer, which breaks the stream of
  26         /// characters from the Reader into raw Tokens.  One or more TokenFilters may
  27         /// then be applied to the output of the Tokenizer.
  28         /// <p>
  29         /// WARNING: You must override one of the methods defined by this class in your
  30         /// subclass or the Analyzer will enter an infinite loop.
  31         /// </summary>
  32         public abstract class Analyzer
  33         {
  34                 /// <summary>Creates a TokenStream which tokenizes all the text in the provided
  35                 /// Reader.  Default implementation forwards to tokenStream(Reader) for
  36                 /// compatibility with older version.  Override to allow Analyzer to choose
  37                 /// strategy based on document and/or field.  Must be able to handle null
  38                 /// field name for backward compatibility.
  39                 /// </summary>
  40                 public virtual TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
  41                 {
  42                         // implemented for backward compatibility
  43                         return TokenStream(reader);
  44                 }
  45
  46                 /// <summary>Creates a TokenStream which tokenizes all the text in the provided
  47                 /// Reader.  Provided for backward compatibility only.
  48                 /// </summary>
  49                 /// <deprecated> use tokenStream(String, Reader) instead.
  50                 /// </deprecated>
  51                 /// <seealso cref="TokenStream(String, Reader)">
  52                 /// </seealso>
  53                 public virtual TokenStream TokenStream(System.IO.TextReader reader)
  54                 {
  55                         return TokenStream(null, reader);
  56                 }
  57
  58                 /// <summary> Invoked before indexing a Field instance if
  59                 /// terms have already been added to that field.  This allows custom
  60                 /// analyzers to place an automatic position increment gap between
  61                 /// Field instances using the same field name.  The default value
  62                 /// position increment gap is 0.  With a 0 position increment gap and
  63                 /// the typical default token position increment of 1, all terms in a field,
  64                 /// including across Field instances, are in successive positions, allowing
  65                 /// exact PhraseQuery matches, for instance, across Field instance boundaries.
  66                 ///
  67                 /// </summary>
  68                 /// <param name="fieldName">Field name being indexed.
  69                 /// </param>
  70                 /// <returns> position increment gap, added to the next token emitted from {@link #TokenStream(String,Reader)}
  71                 /// </returns>
  72                 public virtual int GetPositionIncrementGap(System.String fieldName)
  73                 {
  74                         return 0;
  75                 }
  76         }
  77 }