1 From: Daniel Drake <dsd@gentoo.org>
3 Tracing shows that the exception thrown inside Refill() is thrown hundreds of
4 times when indexing a small amount of files.
6 Reduce overhead by removing exceptions from handling of this particular error.
8 Note: During migration to 1.9.1 there was an error in porting this patch.
9 As a result after applying this patch, Analysis/Standard/StandardTokenizerTokenManager.cs
10 won't compile. Apply patch# 21 which fixes this issue.
12 Index: Analysis/Standard/CharStream.cs
13 ===================================================================
14 RCS file: /cvs/gnome/beagle/beagled/Lucene.Net/Analysis/Standard/CharStream.cs,v
15 retrieving revision 1.3.4.5
16 diff -u -3 -p -r1.3.4.5 CharStream.cs
17 --- Analysis/Standard/CharStream.cs 25 Sep 2006 22:36:28 -0000 1.3.4.5
18 +++ Analysis/Standard/CharStream.cs 30 Sep 2006 02:39:59 -0000
19 @@ -40,7 +40,7 @@ namespace Lucene.Net.Analysis.Standard
20 /// of selecting the input is the responsibility of the class
21 /// implementing this interface. Can throw any java.io.IOException.
26 /// <summary> Returns the column position of the character last read.</summary>
28 @@ -87,7 +87,7 @@ namespace Lucene.Net.Analysis.Standard
29 /// All characters must remain in the buffer between two successive calls
30 /// to this method to implement backup correctly.
35 /// <summary> Returns a string made up of characters from the marked token beginning
36 /// to the current buffer position. Implementations have the choice of returning
37 @@ -115,4 +115,4 @@ namespace Lucene.Net.Analysis.Standard
42 \ No newline at end of file
44 Index: Analysis/Standard/FastCharStream.cs
45 ===================================================================
46 RCS file: /cvs/gnome/beagle/beagled/Lucene.Net/Analysis/Standard/FastCharStream.cs,v
47 retrieving revision 1.3.4.5
48 diff -u -3 -p -r1.3.4.5 FastCharStream.cs
49 --- Analysis/Standard/FastCharStream.cs 25 Sep 2006 22:36:28 -0000 1.3.4.5
50 +++ Analysis/Standard/FastCharStream.cs 30 Sep 2006 02:39:59 -0000
51 @@ -41,14 +41,15 @@ namespace Lucene.Net.Analysis.Standard
55 - public char ReadChar()
56 + public int ReadChar()
58 if (bufferPosition >= bufferLength)
62 return buffer[bufferPosition++];
65 - private void Refill()
66 + private bool Refill()
68 int newPosition = bufferLength - tokenStart;
70 @@ -81,12 +82,13 @@ namespace Lucene.Net.Analysis.Standard
72 int charsRead = input.Read(buffer, newPosition, buffer.Length - newPosition);
74 - throw new System.IO.IOException("read past eof");
76 - bufferLength += charsRead;
79 + bufferLength += charsRead;
83 - public char BeginToken()
84 + public int BeginToken()
86 tokenStart = bufferPosition;
88 @@ -146,4 +148,4 @@ namespace Lucene.Net.Analysis.Standard
93 \ No newline at end of file
95 Index: Analysis/Standard/StandardTokenizerTokenManager.cs
96 ===================================================================
97 RCS file: /cvs/gnome/beagle/beagled/Lucene.Net/Analysis/Standard/StandardTokenizerTokenManager.cs,v
98 retrieving revision 1.7.4.7
99 diff -u -3 -p -r1.7.4.7 StandardTokenizerTokenManager.cs
100 --- Analysis/Standard/StandardTokenizerTokenManager.cs 25 Sep 2006 22:36:28 -0000 1.7.4.7
101 +++ Analysis/Standard/StandardTokenizerTokenManager.cs 30 Sep 2006 02:40:00 -0000
102 @@ -1180,14 +1180,11 @@ MatchLoop1:
104 if ((i = jjnewStateCnt) == (startsAt = 75 - (jjnewStateCnt = startsAt)))
108 - curChar = input_stream.ReadChar();
110 - catch (System.IO.IOException)
112 + int ret = input_stream.ReadChar();
114 + curChar = (char) ret;
120 internal static readonly int[] jjnextStates = new int[]{30, 31, 32, 34, 38, 39, 41, 42, 46, 47, 53, 54, 5, 6, 10, 11, 19, 20, 3, 4, 8, 9, 17, 18, 31, 32, 34, 32, 33, 34, 65, 66, 68, 69, 72, 73, 5, 6, 19, 20, 25, 26, 50, 51, 70, 71, 12, 13, 14, 15, 23, 24, 43, 44, 48, 49, 55, 56, 59, 60, 61, 62};
121 @@ -1366,11 +1363,11 @@ MatchLoop1:
127 - curChar = input_stream.BeginToken();
129 - catch (System.IO.IOException)
131 + int ret = input_stream.BeginToken();
133 + curChar = (char) ret;
137 matchedToken = JjFillToken();
138 @@ -1402,11 +1399,7 @@ MatchLoop1:
139 int error_column = input_stream.GetEndColumn();
140 System.String error_after = null;
141 bool EOFSeen = false;
144 - input_stream.ReadChar(); input_stream.Backup(1);
146 - catch (System.IO.IOException)
147 + if (input_stream.ReadChar() == -1)
150 error_after = curPos <= 1?"":input_stream.GetImage();
151 @@ -1418,6 +1411,9 @@ MatchLoop1:
156 + input_stream.Backup(1);
160 input_stream.Backup(1);
161 @@ -1429,4 +1425,4 @@ EOFLoop: ;
166 \ No newline at end of file