cvsimport
[beagle.git] / beagled / Lucene.Net / Search / TermScorer.cs
blob6ec81a8b9d06b44fdf7b99bca1846044b1613c03
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using System;
18 using TermDocs = Lucene.Net.Index.TermDocs;
20 namespace Lucene.Net.Search
23 /// <summary>Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.</summary>
24 sealed class TermScorer : Scorer
26 private Weight weight;
27 private TermDocs termDocs;
28 private byte[] norms;
29 private float weightValue;
30 private int doc;
32 private int[] docs = new int[32]; // buffered doc numbers
33 private int[] freqs = new int[32]; // buffered term freqs
34 private int pointer;
35 private int pointerMax;
37 private const int SCORE_CACHE_SIZE = 32;
38 private float[] scoreCache = new float[SCORE_CACHE_SIZE];
40 /// <summary>Construct a <code>TermScorer</code>.</summary>
41 /// <param name="weight">The weight of the <code>Term</code> in the query.
42 /// </param>
43 /// <param name="td">An iterator over the documents matching the <code>Term</code>.
44 /// </param>
45 /// <param name="similarity">The </code>Similarity</code> implementation to be used for score computations.
46 /// </param>
47 /// <param name="norms">The field norms of the document fields for the <code>Term</code>.
48 /// </param>
49 internal TermScorer(Weight weight, TermDocs td, Similarity similarity, byte[] norms) : base(similarity)
51 this.weight = weight;
52 this.termDocs = td;
53 this.norms = norms;
54 this.weightValue = weight.GetValue();
56 for (int i = 0; i < SCORE_CACHE_SIZE; i++)
57 scoreCache[i] = GetSimilarity().Tf(i) * weightValue;
60 public override void Score(HitCollector hc)
62 Next();
63 Score(hc, System.Int32.MaxValue);
66 protected internal override bool Score(HitCollector c, int end)
68 Similarity similarity = GetSimilarity(); // cache sim in local
69 float[] normDecoder = Similarity.GetNormDecoder();
70 while (doc < end)
72 // for docs in window
73 int f = freqs[pointer];
74 float score = f < SCORE_CACHE_SIZE?scoreCache[f]:similarity.Tf(f) * weightValue; // cache miss
76 score *= normDecoder[norms[doc] & 0xFF]; // normalize for field
78 c.Collect(doc, score); // collect score
80 if (++pointer >= pointerMax)
82 pointerMax = termDocs.Read(docs, freqs); // refill buffers
83 if (pointerMax != 0)
85 pointer = 0;
87 else
89 termDocs.Close(); // close stream
90 doc = System.Int32.MaxValue; // set to sentinel value
91 return false;
94 doc = docs[pointer];
96 return true;
99 /// <summary>Returns the current document number matching the query.
100 /// Initially invalid, until {@link #Next()} is called the first time.
101 /// </summary>
102 public override int Doc()
104 return doc;
107 /// <summary>Advances to the next document matching the query.
108 /// <br>The iterator over the matching documents is buffered using
109 /// {@link TermDocs#Read(int[],int[])}.
110 /// </summary>
111 /// <returns> true iff there is another document matching the query.
112 /// </returns>
113 public override bool Next()
115 pointer++;
116 if (pointer >= pointerMax)
118 pointerMax = termDocs.Read(docs, freqs); // refill buffer
119 if (pointerMax != 0)
121 pointer = 0;
123 else
125 termDocs.Close(); // close stream
126 doc = System.Int32.MaxValue; // set to sentinel value
127 return false;
130 doc = docs[pointer];
131 return true;
134 public override float Score()
136 int f = freqs[pointer];
137 float raw = f < SCORE_CACHE_SIZE ? scoreCache[f] : GetSimilarity().Tf(f) * weightValue; // cache miss
139 return raw * Similarity.DecodeNorm(norms[doc]); // normalize for field
142 /// <summary>Skips to the first match beyond the current whose document number is
143 /// greater than or equal to a given target.
144 /// <br>The implementation uses {@link TermDocs#SkipTo(int)}.
145 /// </summary>
146 /// <param name="target">The target document number.
147 /// </param>
148 /// <returns> true iff there is such a match.
149 /// </returns>
150 public override bool SkipTo(int target)
152 // first scan in cache
153 for (pointer++; pointer < pointerMax; pointer++)
155 if (docs[pointer] >= target)
157 doc = docs[pointer];
158 return true;
162 // not found in cache, seek underlying stream
163 bool result = termDocs.SkipTo(target);
164 if (result)
166 pointerMax = 1;
167 pointer = 0;
168 docs[pointer] = doc = termDocs.Doc();
169 freqs[pointer] = termDocs.Freq();
171 else
173 doc = System.Int32.MaxValue;
175 return result;
178 /// <summary>Returns an explanation of the score for a document.
179 /// <br>When this method is used, the {@link #Next()} method
180 /// and the {@link #Score(HitCollector)} method should not be used.
181 /// </summary>
182 /// <param name="doc">The document number for the explanation.
183 /// </param>
184 /// <todo> Modify to make use of {@link TermDocs#SkipTo(int)}. </todo>
185 public override Explanation Explain(int doc)
187 TermQuery query = (TermQuery) weight.GetQuery();
188 Explanation tfExplanation = new Explanation();
189 int tf = 0;
190 while (pointer < pointerMax)
192 if (docs[pointer] == doc)
193 tf = freqs[pointer];
194 pointer++;
196 if (tf == 0)
198 while (termDocs.Next())
200 if (termDocs.Doc() == doc)
202 tf = termDocs.Freq();
206 termDocs.Close();
207 tfExplanation.SetValue(GetSimilarity().Tf(tf));
208 tfExplanation.SetDescription("tf(termFreq(" + query.GetTerm() + ")=" + tf + ")");
210 return tfExplanation;
213 /// <summary>Returns a string representation of this <code>TermScorer</code>. </summary>
214 public override System.String ToString()
216 return "scorer(" + weight + ")";