Initial revision
[beagle.git] / Lucene.Net / Search / PhrasePrefixQuery.cs
blob8b2463a6cbbd57907546a79550de3f00ebbd30b7
1 using System;
2 using System.Text;
3 using System.Collections;
5 using Lucene.Net.Index;
6 using Lucene.Net.Search;
7 using Lucene.Net.Util;
9 namespace Lucene.Net.Search
11 /* ====================================================================
12 * The Apache Software License, Version 1.1
14 * Copyright (c) 2001 The Apache Software Foundation. All rights
15 * reserved.
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 * notice, this list of conditions and the following disclaimer in
26 * the documentation and/or other materials provided with the
27 * distribution.
29 * 3. The end-user documentation included with the redistribution,
30 * if any, must include the following acknowledgment:
31 * "This product includes software developed by the
32 * Apache Software Foundation (http://www.apache.org/)."
33 * Alternately, this acknowledgment may appear in the software itself,
34 * if and wherever such third-party acknowledgments normally appear.
36 * 4. The names "Apache" and "Apache Software Foundation" and
37 * "Apache Lucene" must not be used to endorse or promote products
38 * derived from this software without prior written permission. For
39 * written permission, please contact apache@apache.org.
41 * 5. Products derived from this software may not be called "Apache",
42 * "Apache Lucene", nor may "Apache" appear in their name, without
43 * prior written permission of the Apache Software Foundation.
45 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
46 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
47 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
48 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
51 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
52 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
53 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
54 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
55 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 * ====================================================================
59 * This software consists of voluntary contributions made by many
60 * individuals on behalf of the Apache Software Foundation. For more
61 * information on the Apache Software Foundation, please see
62 * <http://www.apache.org/>.
65 /// <summary>
66 /// PhrasePrefixQuery is a generalized version of PhraseQuery, with an added
67 /// method Add(Term[]).
68 /// To use this class, to search for the phrase "Microsoft app*" first use
69 /// Add(Term) on the term "Microsoft", then find all terms that has "app" as
70 /// prefix using IndexReader.Terms(Term), and use PhrasePrefixQuery.Add(Term[]
71 /// terms) to add them to the query.
72 /// <author>Anders Nielsen</author>
73 /// <versoin>1.0</versoin>
74 /// </summary>
75 [Serializable]
76 public class PhrasePrefixQuery : Query
78 private String field;
79 private ArrayList termArrays = new ArrayList();
81 //private float idf = 0.0f;
82 //private float weight = 0.0f;
84 private int slop = 0;
86 /// <summary>
87 /// Sets the phrase slop for this query.
88 /// <see cref="PhraseQuery.SetSlop(int)"/>
89 /// </summary>
90 /// <param name="s"></param>
91 public void SetSlop(int s) { slop = s; }
93 /// <summary>
94 /// Sets the phrase slop for this query.
95 /// <see cref="PhraseQuery.GetSlop()"/>
96 /// </summary>
97 /// <returns></returns>
98 public int GetSlop() { return slop; }
100 /// <summary>
101 /// Add a single term at the next position in the phrase.
102 /// <see cref="PhraseQuery.Add(Term)"/>
103 /// </summary>
104 /// <param name="term"></param>
105 public void Add(Term term) { Add(new Term[]{term}); }
107 /// <summary>
108 /// Add multiple terms at the next position in the phrase. Any of the terms
109 /// may match.
110 /// <see cref="PhraseQuery.Add(Term)"/>
111 /// </summary>
112 /// <param name="terms"></param>
113 public void Add(Term[] terms)
115 if (termArrays.Count == 0)
116 field = terms[0].Field();
118 for (int i=0; i<terms.Length; i++)
120 if (terms[i].Field() != field)
122 throw new ArgumentException
123 ("All phrase terms must be in the same field (" + field + "): "
124 + terms[i]);
128 termArrays.Add(terms);
131 [Serializable]
132 private class PhrasePrefixWeight : Weight
134 private Searcher searcher;
135 private float value;
136 private float idf;
137 private float queryNorm;
138 private float queryWeight;
139 private PhrasePrefixQuery phrasePrefixQuery;
141 public PhrasePrefixWeight(Searcher searcher, PhrasePrefixQuery phrasePrefixQuery)
143 this.searcher = searcher;
144 this.phrasePrefixQuery = phrasePrefixQuery;
147 public Query GetQuery() { return phrasePrefixQuery; }
148 public float GetValue() { return value; }
150 public float SumOfSquaredWeights()
152 foreach (Term[] terms in phrasePrefixQuery.termArrays)
154 for (int j=0; j< terms.Length; j++)
155 idf += searcher.GetSimilarity().Idf(terms[j], searcher);
158 queryWeight = idf * phrasePrefixQuery.GetBoost(); // compute query weight
159 return queryWeight * queryWeight; // square it
162 public void Normalize(float queryNorm)
164 this.queryNorm = queryNorm;
165 queryWeight *= queryNorm; // normalize query weight
166 value = queryWeight * idf; // idf for document
169 public Scorer Scorer(IndexReader reader)
171 if (phrasePrefixQuery.termArrays.Count == 0) // optimize zero-term case
172 return null;
174 TermPositions[] tps = new TermPositions[phrasePrefixQuery.termArrays.Count];
175 for (int i=0; i< tps.Length; i++)
177 Term[] terms = (Term[])phrasePrefixQuery.termArrays[i];
179 TermPositions p;
180 if (terms.Length > 1)
181 p = new MultipleTermPositions(reader, terms);
182 else
183 p = reader.TermPositions(terms[0]);
185 if (p == null)
186 return null;
188 tps[i] = p;
191 if (phrasePrefixQuery.slop == 0)
192 return new ExactPhraseScorer(this, tps, searcher.GetSimilarity(),
193 reader.Norms(phrasePrefixQuery.field));
194 else
195 return new SloppyPhraseScorer(this, tps, searcher.GetSimilarity(),
196 phrasePrefixQuery.slop, reader.Norms(phrasePrefixQuery.field));
199 public Explanation Explain(IndexReader reader, int doc)
201 Explanation result = new Explanation();
202 result.SetDescription("weight("+GetQuery()+" in "+doc+"), product of:");
204 Explanation idfExpl = new Explanation(idf, "idf("+GetQuery()+")");
206 // explain query weight
207 Explanation queryExpl = new Explanation();
208 queryExpl.SetDescription("queryWeight(" + GetQuery() + "), product of:");
210 Explanation boostExpl = new Explanation(phrasePrefixQuery.GetBoost(), "boost");
211 if (phrasePrefixQuery.GetBoost() != 1.0f)
212 queryExpl.AddDetail(boostExpl);
214 queryExpl.AddDetail(idfExpl);
216 Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
217 queryExpl.AddDetail(queryNormExpl);
219 queryExpl.SetValue(boostExpl.GetValue() *
220 idfExpl.GetValue() *
221 queryNormExpl.GetValue());
223 result.AddDetail(queryExpl);
225 // explain field weight
226 Explanation fieldExpl = new Explanation();
227 fieldExpl.SetDescription("fieldWeight("+GetQuery()+" in "+doc+
228 "), product of:");
230 Explanation tfExpl = Scorer(reader).Explain(doc);
231 fieldExpl.AddDetail(tfExpl);
232 fieldExpl.AddDetail(idfExpl);
234 Explanation fieldNormExpl = new Explanation();
235 fieldNormExpl.SetValue(Similarity.DecodeNorm(reader.Norms(phrasePrefixQuery.field)[doc]));
236 fieldNormExpl.SetDescription("fieldNorm(field="+phrasePrefixQuery.field+", doc="+doc+")");
237 fieldExpl.AddDetail(fieldNormExpl);
239 fieldExpl.SetValue(tfExpl.GetValue() *
240 idfExpl.GetValue() *
241 fieldNormExpl.GetValue());
243 result.AddDetail(fieldExpl);
245 // combine them
246 result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());
248 if (queryExpl.GetValue() == 1.0f)
249 return fieldExpl;
251 return result;
255 public override Weight CreateWeight(Searcher searcher)
257 if (termArrays.Count == 1)
258 { // optimize one-term case
259 Term[] terms = (Term[])termArrays[0];
260 BooleanQuery boq = new BooleanQuery();
261 for (int i=0; i<terms.Length; i++)
263 boq.Add(new TermQuery(terms[i]), false, false);
265 boq.SetBoost(GetBoost());
266 return boq.CreateWeight(searcher);
268 return new PhrasePrefixWeight(searcher, this);
271 /** Prints a user-readable version of this query. */
272 public override String ToString(String f)
274 StringBuilder buffer = new StringBuilder();
275 if (!field.Equals(f))
277 buffer.Append(field);
278 buffer.Append(":");
281 buffer.Append("\"");
282 foreach (Term[] terms in termArrays)
284 buffer.Append(terms[0].Text() + (terms.Length > 0 ? "*" : ""));
286 buffer.Append("\"");
288 if (slop != 0)
290 buffer.Append("~");
291 buffer.Append(slop);
294 if (GetBoost() != 1.0f)
296 buffer.Append("^");
297 buffer.Append(Number.ToString(GetBoost()));
300 return buffer.ToString();