2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* Generated By:JavaCC: Do not edit this line. StandardTokenizer.java */
20 namespace Lucene
.Net
.Analysis
.Standard
23 /// <summary>A grammar-based tokenizer constructed with JavaCC.
25 /// <p> This should be a good tokenizer for most European-language documents:
28 /// <li>Splits words at punctuation characters, removing punctuation. However, a
29 /// dot that's not followed by whitespace is considered part of a token.
30 /// <li>Splits words at hyphens, unless there's a number in the token, in which case
31 /// the whole token is interpreted as a product number and is not split.
32 /// <li>Recognizes email addresses and internet hostnames as one token.
35 /// <p>Many applications have specific tokenizer needs. If this tokenizer does
36 /// not suit your application, please consider copying this source code
37 /// directory to your project and maintaining your own grammar-based tokenizer.
39 public class StandardTokenizer
: Lucene
.Net
.Analysis
.Tokenizer
42 /// <summary>Constructs a tokenizer for this Reader. </summary>
43 public StandardTokenizer(System
.IO
.TextReader reader
) : this(new FastCharStream(reader
))
48 /// <summary>Returns the next token in the stream, or null at EOS.
49 /// <p>The returned token's type is set to an element of {@link
50 /// StandardTokenizerConstants#tokenImage}.
52 public override Lucene
.Net
.Analysis
.Token
Next()
55 switch ((jj_ntk
== - 1) ? Jj_ntk() : jj_ntk
)
58 case Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.ALPHANUM
:
59 token
= Jj_consume_token(Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.ALPHANUM
);
62 case Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.APOSTROPHE
:
63 token
= Jj_consume_token(Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.APOSTROPHE
);
66 case Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.ACRONYM
:
67 token
= Jj_consume_token(Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.ACRONYM
);
70 case Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.COMPANY
:
71 token
= Jj_consume_token(Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.COMPANY
);
74 case Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.EMAIL
:
75 token
= Jj_consume_token(Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.EMAIL
);
78 case Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.HOST
:
79 token
= Jj_consume_token(Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.HOST
);
82 case Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.NUM
:
83 token
= Jj_consume_token(Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.NUM
);
86 case Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.CJ
:
87 token
= Jj_consume_token(Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.CJ
);
91 token
= Jj_consume_token(0);
96 Jj_consume_token(- 1);
97 throw new ParseException();
100 if (token
.kind
== Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.EOF
)
111 return new Lucene
.Net
.Analysis
.Token(token
.image
, token
.beginColumn
, token
.endColumn
, Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.tokenImage
[token
.kind
]);
114 throw new System
.ApplicationException("Missing return statement in function");
117 /// <summary>By default, closes the input Reader. </summary>
118 public override void Close()
120 token_source
.Close();
124 public StandardTokenizerTokenManager token_source
;
125 public Token token
, jj_nt
;
128 private int[] jj_la1
= new int[1];
129 private static int[] jj_la1_0_Renamed_Field
;
130 private static void jj_la1_0()
132 jj_la1_0_Renamed_Field
= new int[]{0x10ff}
;
135 public StandardTokenizer(CharStream stream
)
137 token_source
= new StandardTokenizerTokenManager(stream
);
141 for (int i
= 0; i
< 1; i
++)
145 public virtual void ReInit(CharStream stream
)
147 token_source
.ReInit(stream
);
151 for (int i
= 0; i
< 1; i
++)
155 public StandardTokenizer(StandardTokenizerTokenManager tm
)
161 for (int i
= 0; i
< 1; i
++)
165 public virtual void ReInit(StandardTokenizerTokenManager tm
)
171 for (int i
= 0; i
< 1; i
++)
175 private Token
Jj_consume_token(int kind
)
178 if ((oldToken
= token
).next
!= null)
181 token
= token
.next
= token_source
.GetNextToken();
183 if (token
.kind
== kind
)
190 throw GenerateParseException();
193 public Token
GetNextToken()
195 if (token
.next
!= null)
198 token
= token
.next
= token_source
.GetNextToken();
204 public Token
GetToken(int index
)
207 for (int i
= 0; i
< index
; i
++)
212 t
= t
.next
= token_source
.GetNextToken();
219 if ((jj_nt
= token
.next
) == null)
220 return (jj_ntk
= (token
.next
= token_source
.GetNextToken()).kind
);
222 return (jj_ntk
= jj_nt
.kind
);
225 private System
.Collections
.ArrayList jj_expentries
= System
.Collections
.ArrayList
.Synchronized(new System
.Collections
.ArrayList(10));
226 private int[] jj_expentry
;
227 private int jj_kind
= - 1;
229 public virtual ParseException
GenerateParseException()
231 jj_expentries
.Clear();
232 bool[] la1tokens
= new bool[16];
233 for (int i
= 0; i
< 16; i
++)
235 la1tokens
[i
] = false;
239 la1tokens
[jj_kind
] = true;
242 for (int i
= 0; i
< 1; i
++)
244 if (jj_la1
[i
] == jj_gen
)
246 for (int j
= 0; j
< 32; j
++)
248 if ((jj_la1_0_Renamed_Field
[i
] & (1 << j
)) != 0)
255 for (int i
= 0; i
< 16; i
++)
259 jj_expentry
= new int[1];
261 jj_expentries
.Add(jj_expentry
);
264 int[][] exptokseq
= new int[jj_expentries
.Count
][];
265 for (int i
= 0; i
< jj_expentries
.Count
; i
++)
267 exptokseq
[i
] = (int[]) jj_expentries
[i
];
269 return new ParseException(token
, exptokseq
, Lucene
.Net
.Analysis
.Standard
.StandardTokenizerConstants
.tokenImage
);
272 public void Enable_tracing()
276 public void Disable_tracing()
279 static StandardTokenizer()