Thumbnail file hits. Based on a patch from D Bera
[beagle.git] / beagled / Lucene.Net / Search / PhrasePrefixQuery.cs
blobba01ca2c6b09ab59447082dbb7768afd0951b76a
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 using System;
17 using IndexReader = Lucene.Net.Index.IndexReader;
18 using MultipleTermPositions = Lucene.Net.Index.MultipleTermPositions;
19 using Term = Lucene.Net.Index.Term;
20 using TermPositions = Lucene.Net.Index.TermPositions;
21 namespace Lucene.Net.Search
24 /// <summary> PhrasePrefixQuery is a generalized version of PhraseQuery, with an added
25 /// method {@link #Add(Term[])}.
26 /// To use this class, to search for the phrase "Microsoft app*" first use
27 /// add(Term) on the term "Microsoft", then find all terms that has "app" as
28 /// prefix using IndexReader.terms(Term), and use PhrasePrefixQuery.add(Term[]
29 /// terms) to add them to the query.
30 ///
31 /// </summary>
32 /// <author> Anders Nielsen
33 /// </author>
34 /// <version> 1.0
35 /// </version>
36 [Serializable]
37 public class PhrasePrefixQuery:Query
39 private System.String field;
40 private System.Collections.ArrayList termArrays = new System.Collections.ArrayList();
41 private System.Collections.ArrayList positions = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
43 private int slop = 0;
45 /// <summary>Sets the phrase slop for this query.</summary>
46 /// <seealso cref="PhraseQuery#SetSlop(int)">
47 /// </seealso>
48 public virtual void SetSlop(int s)
50 slop = s;
53 /// <summary>Sets the phrase slop for this query.</summary>
54 /// <seealso cref="PhraseQuery#GetSlop()">
55 /// </seealso>
56 public virtual int GetSlop()
58 return slop;
61 /// <summary>Add a single term at the next position in the phrase.</summary>
62 /// <seealso cref="PhraseQuery#Add(Term)">
63 /// </seealso>
64 public virtual void Add(Term term)
66 Add(new Term[]{term});
69 /// <summary>Add multiple terms at the next position in the phrase. Any of the terms
70 /// may match.
71 ///
72 /// </summary>
73 /// <seealso cref="PhraseQuery#Add(Term)">
74 /// </seealso>
75 public virtual void Add(Term[] terms)
77 int position = 0;
78 if (positions.Count > 0)
79 position = ((System.Int32) positions[positions.Count - 1]) + 1;
81 Add(terms, position);
84 /// <summary> Allows to specify the relative position of terms within the phrase.
85 ///
86 /// </summary>
87 /// <seealso cref="int)">
88 /// </seealso>
89 /// <param name="">terms
90 /// </param>
91 /// <param name="">position
92 /// </param>
93 public virtual void Add(Term[] terms, int position)
95 if (termArrays.Count == 0)
96 field = terms[0].Field();
98 for (int i = 0; i < terms.Length; i++)
100 if ((System.Object) terms[i].Field() != (System.Object) field)
102 throw new System.ArgumentException("All phrase terms must be in the same field (" + field + "): " + terms[i]);
106 termArrays.Add(terms);
107 positions.Add((System.Int32) position);
110 /// <summary> Returns the relative positions of terms in this phrase.</summary>
111 public virtual int[] GetPositions()
113 int[] result = new int[positions.Count];
114 for (int i = 0; i < positions.Count; i++)
115 result[i] = ((System.Int32) positions[i]);
116 return result;
119 [Serializable]
120 private class PhrasePrefixWeight : Weight
122 private void InitBlock(PhrasePrefixQuery enclosingInstance)
124 this.enclosingInstance = enclosingInstance;
126 private PhrasePrefixQuery enclosingInstance;
127 virtual public Query Query
131 return Enclosing_Instance;
135 virtual public float Value
139 return value_Renamed;
143 public PhrasePrefixQuery Enclosing_Instance
147 return enclosingInstance;
151 private Searcher searcher;
152 private float value_Renamed;
153 private float idf;
154 private float queryNorm;
155 private float queryWeight;
157 public PhrasePrefixWeight(PhrasePrefixQuery enclosingInstance, Searcher searcher)
159 InitBlock(enclosingInstance);
160 this.searcher = searcher;
163 public virtual float SumOfSquaredWeights()
165 System.Collections.IEnumerator i = Enclosing_Instance.termArrays.GetEnumerator();
166 while (i.MoveNext())
168 Term[] terms = (Term[]) i.Current;
169 for (int j = 0; j < terms.Length; j++)
170 idf += Enclosing_Instance.GetSimilarity(searcher).Idf(terms[j], searcher);
173 queryWeight = idf * Enclosing_Instance.GetBoost(); // compute query weight
174 return queryWeight * queryWeight; // square it
177 public virtual void Normalize(float queryNorm)
179 this.queryNorm = queryNorm;
180 queryWeight *= queryNorm; // normalize query weight
181 value_Renamed = queryWeight * idf; // idf for document
184 public virtual Scorer Scorer(IndexReader reader)
186 if (Enclosing_Instance.termArrays.Count == 0)
187 // optimize zero-term case
188 return null;
190 TermPositions[] tps = new TermPositions[Enclosing_Instance.termArrays.Count];
191 for (int i = 0; i < tps.Length; i++)
193 Term[] terms = (Term[]) Enclosing_Instance.termArrays[i];
195 TermPositions p;
196 if (terms.Length > 1)
197 p = new MultipleTermPositions(reader, terms);
198 else
199 p = reader.TermPositions(terms[0]);
201 if (p == null)
202 return null;
204 tps[i] = p;
207 if (Enclosing_Instance.slop == 0)
208 return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), Enclosing_Instance.GetSimilarity(searcher), reader.Norms(Enclosing_Instance.field));
209 else
210 return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), Enclosing_Instance.GetSimilarity(searcher), Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field));
213 public virtual Explanation Explain(IndexReader reader, int doc)
215 Explanation result = new Explanation();
216 result.SetDescription("weight(" + Query + " in " + doc + "), product of:");
218 Explanation idfExpl = new Explanation(idf, "idf(" + Query + ")");
220 // explain query weight
221 Explanation queryExpl = new Explanation();
222 queryExpl.SetDescription("queryWeight(" + Query + "), product of:");
224 Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost");
225 if (Enclosing_Instance.GetBoost() != 1.0f)
226 queryExpl.AddDetail(boostExpl);
228 queryExpl.AddDetail(idfExpl);
230 Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
231 queryExpl.AddDetail(queryNormExpl);
233 queryExpl.SetValue(boostExpl.GetValue() * idfExpl.GetValue() * queryNormExpl.GetValue());
235 result.AddDetail(queryExpl);
237 // explain Field weight
238 Explanation fieldExpl = new Explanation();
239 fieldExpl.SetDescription("fieldWeight(" + Query + " in " + doc + "), product of:");
241 Explanation tfExpl = Scorer(reader).Explain(doc);
242 fieldExpl.AddDetail(tfExpl);
243 fieldExpl.AddDetail(idfExpl);
245 Explanation fieldNormExpl = new Explanation();
246 byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);
247 float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):0.0f;
248 fieldNormExpl.SetValue(fieldNorm);
249 fieldNormExpl.SetDescription("fieldNorm(Field=" + Enclosing_Instance.field + ", doc=" + doc + ")");
250 fieldExpl.AddDetail(fieldNormExpl);
252 fieldExpl.SetValue(tfExpl.GetValue() * idfExpl.GetValue() * fieldNormExpl.GetValue());
254 result.AddDetail(fieldExpl);
256 // combine them
257 result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());
259 if (queryExpl.GetValue() == 1.0f)
260 return fieldExpl;
262 return result;
266 protected internal override Weight CreateWeight(Searcher searcher)
268 if (termArrays.Count == 1)
270 // optimize one-term case
271 Term[] terms = (Term[]) termArrays[0];
272 BooleanQuery boq = new BooleanQuery();
273 for (int i = 0; i < terms.Length; i++)
275 boq.Add(new TermQuery(terms[i]), false, false);
277 boq.SetBoost(GetBoost());
278 return boq.CreateWeight(searcher);
280 return new PhrasePrefixWeight(this, searcher);
283 /// <summary>Prints a user-readable version of this query. </summary>
284 public override System.String ToString(System.String f)
286 System.Text.StringBuilder buffer = new System.Text.StringBuilder();
287 if (!field.Equals(f))
289 buffer.Append(field);
290 buffer.Append(":");
293 buffer.Append("\"");
294 System.Collections.IEnumerator i = termArrays.GetEnumerator();
295 while (i.MoveNext())
297 Term[] terms = (Term[]) i.Current;
298 buffer.Append(terms[0].Text() + (terms.Length > 0 ? "*" : ""));
299 if (i.MoveNext())
300 buffer.Append(" ");
302 buffer.Append("\"");
304 if (slop != 0)
306 buffer.Append("~");
307 buffer.Append(slop);
310 if (GetBoost() != 1.0f)
312 buffer.Append("^");
313 buffer.Append(GetBoost().ToString());
316 return buffer.ToString();
318 override public System.Object Clone()
320 return null;