cvsimport
[beagle.git] / beagled / Lucene.Net / Analysis / Standard / StandardTokenizer.cs
blob6ba907106ec99a0ec6c388f8b5f2779364c48258
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* Generated By:JavaCC: Do not edit this line. StandardTokenizer.java */
18 using System;
20 namespace Lucene.Net.Analysis.Standard
23 /// <summary>A grammar-based tokenizer constructed with JavaCC.
24 ///
25 /// <p> This should be a good tokenizer for most European-language documents:
26 ///
27 /// <ul>
28 /// <li>Splits words at punctuation characters, removing punctuation. However, a
29 /// dot that's not followed by whitespace is considered part of a token.
30 /// <li>Splits words at hyphens, unless there's a number in the token, in which case
31 /// the whole token is interpreted as a product number and is not split.
32 /// <li>Recognizes email addresses and internet hostnames as one token.
33 /// </ul>
34 ///
35 /// <p>Many applications have specific tokenizer needs. If this tokenizer does
36 /// not suit your application, please consider copying this source code
37 /// directory to your project and maintaining your own grammar-based tokenizer.
38 /// </summary>
39 public class StandardTokenizer : Lucene.Net.Analysis.Tokenizer
42 /// <summary>Constructs a tokenizer for this Reader. </summary>
43 public StandardTokenizer(System.IO.TextReader reader) : this(new FastCharStream(reader))
45 this.input = reader;
48 /// <summary>Returns the next token in the stream, or null at EOS.
49 /// <p>The returned token's type is set to an element of {@link
50 /// StandardTokenizerConstants#tokenImage}.
51 /// </summary>
52 public override Lucene.Net.Analysis.Token Next()
54 Token token = null;
55 switch ((jj_ntk == - 1) ? Jj_ntk() : jj_ntk)
58 case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM:
59 token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ALPHANUM);
60 break;
62 case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE:
63 token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.APOSTROPHE);
64 break;
66 case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM:
67 token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.ACRONYM);
68 break;
70 case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY:
71 token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.COMPANY);
72 break;
74 case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL:
75 token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EMAIL);
76 break;
78 case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST:
79 token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.HOST);
80 break;
82 case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM:
83 token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM);
84 break;
86 case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ:
87 token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ);
88 break;
90 case 0:
91 token = Jj_consume_token(0);
92 break;
94 default:
95 jj_la1[0] = jj_gen;
96 Jj_consume_token(- 1);
97 throw new ParseException();
100 if (token.kind == Lucene.Net.Analysis.Standard.StandardTokenizerConstants.EOF)
103 if (true)
104 return null;
107 else
110 if (true)
111 return new Lucene.Net.Analysis.Token(token.image, token.beginColumn, token.endColumn, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage[token.kind]);
114 throw new System.ApplicationException("Missing return statement in function");
117 /// <summary>By default, closes the input Reader. </summary>
118 public override void Close()
120 token_source.Close();
121 base.Close();
124 public StandardTokenizerTokenManager token_source;
125 public Token token, jj_nt;
126 private int jj_ntk;
127 private int jj_gen;
128 private int[] jj_la1 = new int[1];
129 private static int[] jj_la1_0_Renamed_Field;
130 private static void jj_la1_0()
132 jj_la1_0_Renamed_Field = new int[]{0x10ff};
135 public StandardTokenizer(CharStream stream)
137 token_source = new StandardTokenizerTokenManager(stream);
138 token = new Token();
139 jj_ntk = - 1;
140 jj_gen = 0;
141 for (int i = 0; i < 1; i++)
142 jj_la1[i] = - 1;
145 public virtual void ReInit(CharStream stream)
147 token_source.ReInit(stream);
148 token = new Token();
149 jj_ntk = - 1;
150 jj_gen = 0;
151 for (int i = 0; i < 1; i++)
152 jj_la1[i] = - 1;
155 public StandardTokenizer(StandardTokenizerTokenManager tm)
157 token_source = tm;
158 token = new Token();
159 jj_ntk = - 1;
160 jj_gen = 0;
161 for (int i = 0; i < 1; i++)
162 jj_la1[i] = - 1;
165 public virtual void ReInit(StandardTokenizerTokenManager tm)
167 token_source = tm;
168 token = new Token();
169 jj_ntk = - 1;
170 jj_gen = 0;
171 for (int i = 0; i < 1; i++)
172 jj_la1[i] = - 1;
175 private Token Jj_consume_token(int kind)
177 Token oldToken;
178 if ((oldToken = token).next != null)
179 token = token.next;
180 else
181 token = token.next = token_source.GetNextToken();
182 jj_ntk = - 1;
183 if (token.kind == kind)
185 jj_gen++;
186 return token;
188 token = oldToken;
189 jj_kind = kind;
190 throw GenerateParseException();
193 public Token GetNextToken()
195 if (token.next != null)
196 token = token.next;
197 else
198 token = token.next = token_source.GetNextToken();
199 jj_ntk = - 1;
200 jj_gen++;
201 return token;
204 public Token GetToken(int index)
206 Token t = token;
207 for (int i = 0; i < index; i++)
209 if (t.next != null)
210 t = t.next;
211 else
212 t = t.next = token_source.GetNextToken();
214 return t;
217 private int Jj_ntk()
219 if ((jj_nt = token.next) == null)
220 return (jj_ntk = (token.next = token_source.GetNextToken()).kind);
221 else
222 return (jj_ntk = jj_nt.kind);
225 private System.Collections.ArrayList jj_expentries = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
226 private int[] jj_expentry;
227 private int jj_kind = - 1;
229 public virtual ParseException GenerateParseException()
231 jj_expentries.Clear();
232 bool[] la1tokens = new bool[16];
233 for (int i = 0; i < 16; i++)
235 la1tokens[i] = false;
237 if (jj_kind >= 0)
239 la1tokens[jj_kind] = true;
240 jj_kind = - 1;
242 for (int i = 0; i < 1; i++)
244 if (jj_la1[i] == jj_gen)
246 for (int j = 0; j < 32; j++)
248 if ((jj_la1_0_Renamed_Field[i] & (1 << j)) != 0)
250 la1tokens[j] = true;
255 for (int i = 0; i < 16; i++)
257 if (la1tokens[i])
259 jj_expentry = new int[1];
260 jj_expentry[0] = i;
261 jj_expentries.Add(jj_expentry);
264 int[][] exptokseq = new int[jj_expentries.Count][];
265 for (int i = 0; i < jj_expentries.Count; i++)
267 exptokseq[i] = (int[]) jj_expentries[i];
269 return new ParseException(token, exptokseq, Lucene.Net.Analysis.Standard.StandardTokenizerConstants.tokenImage);
272 public void Enable_tracing()
276 public void Disable_tracing()
279 static StandardTokenizer()
282 jj_la1_0();