2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 namespace Lucene
.Net
.Analysis
22 /// <summary>An Analyzer builds TokenStreams, which analyze text. It thus represents a
23 /// policy for extracting index terms from text.
25 /// Typical implementations first build a Tokenizer, which breaks the stream of
26 /// characters from the Reader into raw Tokens. One or more TokenFilters may
27 /// then be applied to the output of the Tokenizer.
29 /// WARNING: You must override one of the methods defined by this class in your
30 /// subclass or the Analyzer will enter an infinite loop.
32 public abstract class Analyzer
34 /// <summary>Creates a TokenStream which tokenizes all the text in the provided
35 /// Reader. Default implementation forwards to tokenStream(Reader) for
36 /// compatibility with older version. Override to allow Analyzer to choose
37 /// strategy based on document and/or field. Must be able to handle null
38 /// field name for backward compatibility.
40 public virtual TokenStream
TokenStream(System
.String fieldName
, System
.IO
.TextReader reader
)
42 // implemented for backward compatibility
43 return TokenStream(reader
);
46 /// <summary>Creates a TokenStream which tokenizes all the text in the provided
47 /// Reader. Provided for backward compatibility only.
49 /// <deprecated> use tokenStream(String, Reader) instead.
51 /// <seealso cref="TokenStream(String, Reader)">
53 public virtual TokenStream
TokenStream(System
.IO
.TextReader reader
)
55 return TokenStream(null, reader
);
58 /// <summary> Invoked before indexing a Field instance if
59 /// terms have already been added to that field. This allows custom
60 /// analyzers to place an automatic position increment gap between
61 /// Field instances using the same field name. The default value
62 /// position increment gap is 0. With a 0 position increment gap and
63 /// the typical default token position increment of 1, all terms in a field,
64 /// including across Field instances, are in successive positions, allowing
65 /// exact PhraseQuery matches, for instance, across Field instance boundaries.
68 /// <param name="fieldName">Field name being indexed.
70 /// <returns> position increment gap, added to the next token emitted from {@link #TokenStream(String,Reader)}
72 public virtual int GetPositionIncrementGap(System
.String fieldName
)