QueryResponses.cs, DumpIndex.cs, IQueryResult.cs, QueryExecutor.cs, QueryResult.cs...
[beagle.git] / beagled / Lucene.Net / Search / PhraseQuery.cs
blob10e5900303963ad9b10d263aaebf27755cc8aed8
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using System;
18 using IndexReader = Lucene.Net.Index.IndexReader;
19 using Term = Lucene.Net.Index.Term;
20 using TermPositions = Lucene.Net.Index.TermPositions;
21 using ToStringUtils = Lucene.Net.Util.ToStringUtils;
23 namespace Lucene.Net.Search
26 /// <summary>A Query that matches documents containing a particular sequence of terms.
27 /// A PhraseQuery is built by QueryParser for input like <code>"new york"</code>.
28 ///
29 /// <p>This query may be combined with other terms or queries with a {@link BooleanQuery}.
30 /// </summary>
31 [Serializable]
32 public class PhraseQuery : Query
34 private System.String field;
35 private System.Collections.ArrayList terms = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
36 private System.Collections.ArrayList positions = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
37 private int slop = 0;
39 /// <summary>Constructs an empty phrase query. </summary>
40 public PhraseQuery()
44 /// <summary>Sets the number of other words permitted between words in query phrase.
45 /// If zero, then this is an exact phrase search. For larger values this works
46 /// like a <code>WITHIN</code> or <code>NEAR</code> operator.
47 /// <p>The slop is in fact an edit-distance, where the units correspond to
48 /// moves of terms in the query phrase out of position. For example, to switch
49 /// the order of two words requires two moves (the first move places the words
50 /// atop one another), so to permit re-orderings of phrases, the slop must be
51 /// at least two.
52 /// <p>More exact matches are scored higher than sloppier matches, thus search
53 /// results are sorted by exactness.
54 /// <p>The slop is zero by default, requiring exact matches.
55 /// </summary>
56 public virtual void SetSlop(int s)
58 slop = s;
60 /// <summary>Returns the slop. See setSlop(). </summary>
61 public virtual int GetSlop()
63 return slop;
66 /// <summary> Adds a term to the end of the query phrase.
67 /// The relative position of the term is the one immediately after the last term added.
68 /// </summary>
69 public virtual void Add(Term term)
71 int position = 0;
72 if (positions.Count > 0)
73 position = ((System.Int32) positions[positions.Count - 1]) + 1;
75 Add(term, position);
78 /// <summary> Adds a term to the end of the query phrase.
79 /// The relative position of the term within the phrase is specified explicitly.
80 /// This allows e.g. phrases with more than one term at the same position
81 /// or phrases with gaps (e.g. in connection with stopwords).
82 ///
83 /// </summary>
84 /// <param name="term">
85 /// </param>
86 /// <param name="position">
87 /// </param>
88 public virtual void Add(Term term, int position)
90 if (terms.Count == 0)
91 field = term.Field();
92 else if (term.Field() != field)
94 throw new System.ArgumentException("All phrase terms must be in the same field: " + term);
97 terms.Add(term);
98 positions.Add((System.Int32) position);
101 /// <summary>Returns the set of terms in this phrase. </summary>
102 public virtual Term[] GetTerms()
104 return (Term[]) terms.ToArray(typeof(Term));
107 /// <summary> Returns the relative positions of terms in this phrase.</summary>
108 public virtual int[] GetPositions()
110 int[] result = new int[positions.Count];
111 for (int i = 0; i < positions.Count; i++)
112 result[i] = ((System.Int32) positions[i]);
113 return result;
116 [Serializable]
117 private class PhraseWeight : Weight
119 private void InitBlock(PhraseQuery enclosingInstance)
121 this.enclosingInstance = enclosingInstance;
123 private PhraseQuery enclosingInstance;
124 public PhraseQuery Enclosing_Instance
128 return enclosingInstance;
132 private Similarity similarity;
133 private float value_Renamed;
134 private float idf;
135 private float queryNorm;
136 private float queryWeight;
138 public PhraseWeight(PhraseQuery enclosingInstance, Searcher searcher)
140 InitBlock(enclosingInstance);
141 this.similarity = Enclosing_Instance.GetSimilarity(searcher);
143 idf = similarity.Idf(Enclosing_Instance.terms, searcher);
146 public override System.String ToString()
148 return "weight(" + Enclosing_Instance + ")";
151 public virtual Query GetQuery()
153 return Enclosing_Instance;
155 public virtual float GetValue()
157 return value_Renamed;
160 public virtual float SumOfSquaredWeights()
162 queryWeight = idf * Enclosing_Instance.GetBoost(); // compute query weight
163 return queryWeight * queryWeight; // square it
166 public virtual void Normalize(float queryNorm)
168 this.queryNorm = queryNorm;
169 queryWeight *= queryNorm; // normalize query weight
170 value_Renamed = queryWeight * idf; // idf for document
173 public virtual Scorer Scorer(IndexReader reader)
175 if (Enclosing_Instance.terms.Count == 0)
176 // optimize zero-term case
177 return null;
179 TermPositions[] tps = new TermPositions[Enclosing_Instance.terms.Count];
180 for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
182 TermPositions p = reader.TermPositions((Term) Enclosing_Instance.terms[i]);
183 if (p == null)
184 return null;
185 tps[i] = p;
188 if (Enclosing_Instance.slop == 0)
189 // optimize exact case
190 return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field));
191 else
192 return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field));
195 public virtual Explanation Explain(IndexReader reader, int doc)
198 Explanation result = new Explanation();
199 result.SetDescription("weight(" + GetQuery() + " in " + doc + "), product of:");
201 System.Text.StringBuilder docFreqs = new System.Text.StringBuilder();
202 System.Text.StringBuilder query = new System.Text.StringBuilder();
203 query.Append('\"');
204 for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
206 if (i != 0)
208 docFreqs.Append(" ");
209 query.Append(" ");
212 Term term = (Term) Enclosing_Instance.terms[i];
214 docFreqs.Append(term.Text());
215 docFreqs.Append("=");
216 docFreqs.Append(reader.DocFreq(term));
218 query.Append(term.Text());
220 query.Append('\"');
222 Explanation idfExpl = new Explanation(idf, "idf(" + Enclosing_Instance.field + ": " + docFreqs + ")");
224 // explain query weight
225 Explanation queryExpl = new Explanation();
226 queryExpl.SetDescription("queryWeight(" + GetQuery() + "), product of:");
228 Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost");
229 if (Enclosing_Instance.GetBoost() != 1.0f)
230 queryExpl.AddDetail(boostExpl);
231 queryExpl.AddDetail(idfExpl);
233 Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
234 queryExpl.AddDetail(queryNormExpl);
236 queryExpl.SetValue(boostExpl.GetValue() * idfExpl.GetValue() * queryNormExpl.GetValue());
238 result.AddDetail(queryExpl);
240 // explain field weight
241 Explanation fieldExpl = new Explanation();
242 fieldExpl.SetDescription("fieldWeight(" + Enclosing_Instance.field + ":" + query + " in " + doc + "), product of:");
244 Explanation tfExpl = Scorer(reader).Explain(doc);
245 fieldExpl.AddDetail(tfExpl);
246 fieldExpl.AddDetail(idfExpl);
248 Explanation fieldNormExpl = new Explanation();
249 byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);
250 float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):0.0f;
251 fieldNormExpl.SetValue(fieldNorm);
252 fieldNormExpl.SetDescription("fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")");
253 fieldExpl.AddDetail(fieldNormExpl);
255 fieldExpl.SetValue(tfExpl.GetValue() * idfExpl.GetValue() * fieldNormExpl.GetValue());
257 result.AddDetail(fieldExpl);
259 // combine them
260 result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());
262 if (queryExpl.GetValue() == 1.0f)
263 return fieldExpl;
265 return result;
269 protected internal override Weight CreateWeight(Searcher searcher)
271 if (terms.Count == 1)
273 // optimize one-term case
274 Term term = (Term) terms[0];
275 Query termQuery = new TermQuery(term);
276 termQuery.SetBoost(GetBoost());
277 return termQuery.CreateWeight(searcher);
279 return new PhraseWeight(this, searcher);
282 /// <seealso cref="Lucene.Net.search.Query.ExtractTerms(java.util.Set)">
283 /// </seealso>
284 public override void ExtractTerms(System.Collections.Hashtable queryTerms)
286 foreach (Term term in terms)
288 queryTerms.Add(term, term);
292 /// <summary>Prints a user-readable version of this query. </summary>
293 public override System.String ToString(System.String f)
295 System.Text.StringBuilder buffer = new System.Text.StringBuilder();
296 if (!field.Equals(f))
298 buffer.Append(field);
299 buffer.Append(":");
302 buffer.Append("\"");
303 for (int i = 0; i < terms.Count; i++)
305 buffer.Append(((Term) terms[i]).Text());
306 if (i != terms.Count - 1)
307 buffer.Append(" ");
309 buffer.Append("\"");
311 if (slop != 0)
313 buffer.Append("~");
314 buffer.Append(slop);
317 buffer.Append(ToStringUtils.Boost(GetBoost()));
319 return buffer.ToString();
322 /// <summary>Returns true iff <code>o</code> is equal to this. </summary>
323 public override bool Equals(System.Object o)
325 if (!(o is PhraseQuery))
326 return false;
327 PhraseQuery other = (PhraseQuery) o;
328 return (this.GetBoost() == other.GetBoost()) &&
329 (this.slop == other.slop) &&
330 this.terms.Equals(other.terms) &&
331 this.positions.Equals(other.positions);
334 /// <summary>Returns a hash code value for this object.</summary>
335 public override int GetHashCode()
337 return BitConverter.ToInt32(BitConverter.GetBytes(GetBoost()), 0) ^ slop ^ terms.GetHashCode() ^ positions.GetHashCode();