Add --enable-deletion option to buildindex. If used, buildindex will remove deleted...
[beagle.git] / beagled / Lucene.Net / Search / TermScorer.cs
blob3df2554409e379b349a654c7c57d71aceeb1d43d
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 using System;
17 using TermDocs = Lucene.Net.Index.TermDocs;
18 namespace Lucene.Net.Search
21 /// <summary>Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.</summary>
22 sealed class TermScorer : Scorer
24 private Weight weight;
25 private TermDocs termDocs;
26 private byte[] norms;
27 private float weightValue;
28 private int doc;
30 private int[] docs = new int[32]; // buffered doc numbers
31 private int[] freqs = new int[32]; // buffered term freqs
32 private int pointer;
33 private int pointerMax;
35 private const int SCORE_CACHE_SIZE = 32;
36 private float[] scoreCache = new float[SCORE_CACHE_SIZE];
38 /// <summary>Construct a <code>TermScorer</code>.</summary>
39 /// <param name="weight">The weight of the <code>Term</code> in the query.
40 /// </param>
41 /// <param name="td">An iterator over the documents matching the <code>Term</code>.
42 /// </param>
43 /// <param name="similarity">The </code>Similarity</code> implementation to be used for score computations.
44 /// </param>
45 /// <param name="norms">The field norms of the document fields for the <code>Term</code>.
46 /// </param>
47 internal TermScorer(Weight weight, TermDocs td, Similarity similarity, byte[] norms) : base(similarity)
49 this.weight = weight;
50 this.termDocs = td;
51 this.norms = norms;
52 this.weightValue = weight.GetValue();
54 for (int i = 0; i < SCORE_CACHE_SIZE; i++)
55 scoreCache[i] = GetSimilarity().Tf(i) * weightValue;
58 public override void Score(HitCollector hc)
60 Next();
61 Score(hc, System.Int32.MaxValue);
64 protected internal override bool Score(HitCollector c, int end)
66 Similarity similarity = GetSimilarity(); // cache sim in local
67 float[] normDecoder = Similarity.GetNormDecoder();
68 while (doc < end)
70 // for docs in window
71 int f = freqs[pointer];
72 float score = f < SCORE_CACHE_SIZE?scoreCache[f]:similarity.Tf(f) * weightValue; // cache miss
74 score *= normDecoder[norms[doc] & 0xFF]; // normalize for field
76 c.Collect(doc, score); // collect score
78 if (++pointer >= pointerMax)
80 pointerMax = termDocs.Read(docs, freqs); // refill buffers
81 if (pointerMax != 0)
83 pointer = 0;
85 else
87 termDocs.Close(); // close stream
88 doc = System.Int32.MaxValue; // set to sentinel value
89 return false;
92 doc = docs[pointer];
94 return true;
97 /// <summary>Returns the current document number matching the query.
98 /// Initially invalid, until {@link #next()} is called the first time.
99 /// </summary>
100 public override int Doc()
102 return doc;
105 /// <summary>Advances to the next document matching the query.
106 /// <br>The iterator over the matching documents is buffered using
107 /// {@link TermDocs#Read(int[],int[])}.
108 /// </summary>
109 /// <returns> true iff there is another document matching the query.
110 /// </returns>
111 public override bool Next()
113 pointer++;
114 if (pointer >= pointerMax)
116 pointerMax = termDocs.Read(docs, freqs); // refill buffer
117 if (pointerMax != 0)
119 pointer = 0;
121 else
123 termDocs.Close(); // close stream
124 doc = System.Int32.MaxValue; // set to sentinel value
125 return false;
128 doc = docs[pointer];
129 return true;
132 public override float Score()
134 int f = freqs[pointer];
135 float raw = f < SCORE_CACHE_SIZE ? scoreCache[f] : GetSimilarity().Tf(f) * weightValue; // cache miss
137 return raw * Similarity.DecodeNorm(norms[doc ]); // normalize for Field
140 /// <summary>Skips to the first match beyond the current whose document number is
141 /// greater than or equal to a given target.
142 /// <br>The implementation uses {@link TermDocs#SkipTo(int)}.
143 /// </summary>
144 /// <param name="target">The target document number.
145 /// </param>
146 /// <returns> true iff there is such a match.
147 /// </returns>
148 public override bool SkipTo(int target)
150 // first scan in cache
151 for (pointer++; pointer < pointerMax; pointer++)
153 if (docs[pointer] >= target)
155 doc = docs[pointer];
156 return true;
160 // not found in cache, seek underlying stream
161 bool result = termDocs.SkipTo(target);
162 if (result)
164 pointerMax = 1;
165 pointer = 0;
166 docs[pointer] = doc = termDocs.Doc();
167 freqs[pointer] = termDocs.Freq();
169 else
171 doc = System.Int32.MaxValue;
173 return result;
176 /// <summary>Returns an explanation of the score for a document.
177 /// <br>When this method is used, the {@link #next()} method
178 /// and the {@link #Score(HitCollector)} method should not be used.
179 /// </summary>
180 /// <param name="doc">The document number for the explanation.
181 /// </param>
182 /// <todo> Modify to make use of {@link TermDocs#SkipTo(int)}. </todo>
183 public override Explanation Explain(int doc)
185 TermQuery query = (TermQuery) weight.GetQuery();
186 Explanation tfExplanation = new Explanation();
187 int tf = 0;
188 while (pointer < pointerMax)
190 if (docs[pointer] == doc)
191 tf = freqs[pointer];
192 pointer++;
194 if (tf == 0)
196 while (termDocs.Next())
198 if (termDocs.Doc() == doc)
200 tf = termDocs.Freq();
204 termDocs.Close();
205 tfExplanation.SetValue(GetSimilarity().Tf(tf));
206 tfExplanation.SetDescription("tf(termFreq(" + query.GetTerm() + ")=" + tf + ")");
208 return tfExplanation;
211 public override System.String ToString()
213 return "scorer(" + weight + ")";