cvsimport
[beagle.git] / beagled / Lucene.Net / Analysis / Analyzer.cs
blob9d34acd434af4a9f36a48c94ba35ff094d600413
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using System;
19 namespace Lucene.Net.Analysis
22 /// <summary>An Analyzer builds TokenStreams, which analyze text. It thus represents a
23 /// policy for extracting index terms from text.
24 /// <p>
25 /// Typical implementations first build a Tokenizer, which breaks the stream of
26 /// characters from the Reader into raw Tokens. One or more TokenFilters may
27 /// then be applied to the output of the Tokenizer.
28 /// <p>
29 /// WARNING: You must override one of the methods defined by this class in your
30 /// subclass or the Analyzer will enter an infinite loop.
31 /// </summary>
32 public abstract class Analyzer
34 /// <summary>Creates a TokenStream which tokenizes all the text in the provided
35 /// Reader. Default implementation forwards to tokenStream(Reader) for
36 /// compatibility with older version. Override to allow Analyzer to choose
37 /// strategy based on document and/or field. Must be able to handle null
38 /// field name for backward compatibility.
39 /// </summary>
40 public virtual TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
42 // implemented for backward compatibility
43 return TokenStream(reader);
46 /// <summary>Creates a TokenStream which tokenizes all the text in the provided
47 /// Reader. Provided for backward compatibility only.
48 /// </summary>
49 /// <deprecated> use tokenStream(String, Reader) instead.
50 /// </deprecated>
51 /// <seealso cref="TokenStream(String, Reader)">
52 /// </seealso>
53 public virtual TokenStream TokenStream(System.IO.TextReader reader)
55 return TokenStream(null, reader);
58 /// <summary> Invoked before indexing a Field instance if
59 /// terms have already been added to that field. This allows custom
60 /// analyzers to place an automatic position increment gap between
61 /// Field instances using the same field name. The default value
62 /// position increment gap is 0. With a 0 position increment gap and
63 /// the typical default token position increment of 1, all terms in a field,
64 /// including across Field instances, are in successive positions, allowing
65 /// exact PhraseQuery matches, for instance, across Field instance boundaries.
66 ///
67 /// </summary>
68 /// <param name="fieldName">Field name being indexed.
69 /// </param>
70 /// <returns> position increment gap, added to the next token emitted from {@link #TokenStream(String,Reader)}
71 /// </returns>
72 public virtual int GetPositionIncrementGap(System.String fieldName)
74 return 0;