Initial revision
[beagle.git] / Lucene.Net / Analysis / Standard / FastCharStream.cs
blob6fd718252dbf954e62259f7bfb1ac6c5e0c2e2b0
1 using System;
2 using System.IO;
4 namespace Lucene.Net.Analysis.Standard
6 /* ====================================================================
7 * The Apache Software License, Version 1.1
9 * Copyright (c) 2001 The Apache Software Foundation. All rights
10 * reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
24 * 3. The end-user documentation included with the redistribution,
25 * if any, must include the following acknowledgment:
26 * "This product includes software developed by the
27 * Apache Software Foundation (http://www.apache.org/)."
28 * Alternately, this acknowledgment may appear in the software itself,
29 * if and wherever such third-party acknowledgments normally appear.
31 * 4. The names "Apache" and "Apache Software Foundation" and
32 * "Apache Lucene" must not be used to endorse or promote products
33 * derived from this software without prior written permission. For
34 * written permission, please contact apache@apache.org.
36 * 5. Products derived from this software may not be called "Apache",
37 * "Apache Lucene", nor may "Apache" appear in their name, without
38 * prior written permission of the Apache Software Foundation.
40 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
41 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
43 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
44 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
47 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
48 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
49 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
50 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 * ====================================================================
54 * This software consists of voluntary contributions made by many
55 * individuals on behalf of the Apache Software Foundation. For more
56 * information on the Apache Software Foundation, please see
57 * <http://www.apache.org/>.
60 /// An efficient implementation of JavaCC's CharStream interface.
61 /// <p>Note that
62 /// this does not do line-number counting, but instead keeps track of the
63 /// character position of the token in the input, as required by Lucene's
64 /// Lucene.Net.Analysis.Token API.
65 /// </p>
66 public sealed class FastCharStream : CharStream
68 char[] buffer = null;
70 int bufferLength = 0; // end of valid chars
71 int bufferPosition = 0; // next char to read
73 int tokenStart = 0; // offset in buffer
74 int bufferStart = 0; // position in file of buffer
76 TextReader input; // source of chars
78 /// <summary>
79 /// Constructs from a TextReader.
80 /// </summary>
81 /// <param name="r"></param>
82 public FastCharStream(TextReader r)
84 input = r;
87 public char ReadChar()
89 if (bufferPosition >= bufferLength)
90 Refill();
91 return buffer[bufferPosition++];
94 private void Refill()
96 int newPosition = bufferLength - tokenStart;
98 if (tokenStart == 0)
99 { // token won't fit in buffer
100 if (buffer == null)
101 { // first time: alloc buffer
102 buffer = new char[2048];
104 else if (bufferLength == buffer.Length)
105 { // grow buffer
106 char[] newBuffer = new char[buffer.Length*2];
107 Array.Copy(buffer, 0, newBuffer, 0, bufferLength);
108 buffer = newBuffer;
111 else
112 { // shift token to front
113 Array.Copy(buffer, tokenStart, buffer, 0, newPosition);
116 bufferLength = newPosition; // update state
117 bufferPosition = newPosition;
118 bufferStart += tokenStart;
119 tokenStart = 0;
121 int charsRead = 0;
125 charsRead = // fill space in buffer
126 input.Read(buffer, newPosition, buffer.Length-newPosition);
128 catch
132 if (charsRead == 0)
133 throw new IOException("read past eof");
134 else
135 bufferLength += charsRead;
138 public char BeginToken()
140 tokenStart = bufferPosition;
141 return ReadChar();
144 public void Backup(int amount)
146 bufferPosition -= amount;
149 public String GetImage()
151 return new String(buffer, tokenStart, bufferPosition - tokenStart);
154 public char[] GetSuffix(int len)
156 char[] value = new char[len];
157 Array.Copy(buffer, bufferPosition - len, value, 0, len);
158 return value;
161 public void Done()
163 try
165 input.Close();
167 catch (IOException e)
169 Console.Error.WriteLine("Caught: " + e + "; ignoring.");
173 public int GetColumn()
175 return bufferStart + bufferPosition;
177 public int GetLine()
179 return 1;
181 public int GetEndColumn()
183 return bufferStart + bufferPosition;
185 public int GetEndLine()
187 return 1;
189 public int GetBeginColumn()
191 return bufferStart + tokenStart;
193 public int GetBeginLine()
195 return 1;