cvsimport
[beagle.git] / beagled / Lucene.Net / Search / BooleanQuery.cs
blob1cd63f895c9f3a7b70149851fc36972820cb0dcd
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using System;
18 using IndexReader = Lucene.Net.Index.IndexReader;
19 using ToStringUtils = Lucene.Net.Util.ToStringUtils;
21 namespace Lucene.Net.Search
24 /// <summary>A Query that matches documents matching boolean combinations of other
25 /// queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other
26 /// BooleanQuerys.
27 /// </summary>
28 [Serializable]
29 public class BooleanQuery : Query, System.ICloneable
31 private class AnonymousClassSimilarityDelegator : SimilarityDelegator
33 private void InitBlock(BooleanQuery enclosingInstance)
35 this.enclosingInstance = enclosingInstance;
37 private BooleanQuery enclosingInstance;
38 public BooleanQuery Enclosing_Instance
40 get
42 return enclosingInstance;
46 internal AnonymousClassSimilarityDelegator(BooleanQuery enclosingInstance, Lucene.Net.Search.Similarity Param1):base(Param1)
48 InitBlock(enclosingInstance);
50 public override float Coord(int overlap, int maxOverlap)
52 return 1.0f;
56 /// <deprecated> use {@link #SetMaxClauseCount(int)} instead
57 /// </deprecated>
58 public static int maxClauseCount = 1024;
60 /// <summary>Thrown when an attempt is made to add more than {@link
61 /// #GetMaxClauseCount()} clauses. This typically happens if
62 /// a PrefixQuery, FuzzyQuery, WildcardQuery, or RangeQuery
63 /// is expanded to many terms during search.
64 /// </summary>
65 [Serializable]
66 public class TooManyClauses : System.SystemException
70 /// <summary>Return the maximum number of clauses permitted, 1024 by default.
71 /// Attempts to add more than the permitted number of clauses cause {@link
72 /// TooManyClauses} to be thrown.
73 /// </summary>
74 /// <seealso cref="SetMaxClauseCount(int)">
75 /// </seealso>
76 public static int GetMaxClauseCount()
78 return maxClauseCount;
81 /// <summary>Set the maximum number of clauses permitted per BooleanQuery.
82 /// Default value is 1024.
83 /// <p>TermQuery clauses are generated from for example prefix queries and
84 /// fuzzy queries. Each TermQuery needs some buffer space during search,
85 /// so this parameter indirectly controls the maximum buffer requirements for
86 /// query search.
87 /// <p>When this parameter becomes a bottleneck for a Query one can use a
88 /// Filter. For example instead of a {@link RangeQuery} one can use a
89 /// {@link RangeFilter}.
90 /// <p>Normally the buffers are allocated by the JVM. When using for example
91 /// {@link Lucene.Net.store.MMapDirectory} the buffering is left to
92 /// the operating system.
93 /// </summary>
94 public static void SetMaxClauseCount(int maxClauseCount)
96 if (maxClauseCount < 1)
97 throw new System.ArgumentException("maxClauseCount must be >= 1");
98 BooleanQuery.maxClauseCount = maxClauseCount;
101 private System.Collections.ArrayList clauses = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
102 private bool disableCoord;
104 /// <summary>Constructs an empty boolean query. </summary>
105 public BooleanQuery()
109 /// <summary>Constructs an empty boolean query.
110 ///
111 /// {@link Similarity#Coord(int,int)} may be disabled in scoring, as
112 /// appropriate. For example, this score factor does not make sense for most
113 /// automatically generated queries, like {@link WildcardQuery} and {@link
114 /// FuzzyQuery}.
115 ///
116 /// </summary>
117 /// <param name="disableCoord">disables {@link Similarity#Coord(int,int)} in scoring.
118 /// </param>
119 public BooleanQuery(bool disableCoord)
121 this.disableCoord = disableCoord;
124 /// <summary>Returns true iff {@link Similarity#Coord(int,int)} is disabled in
125 /// scoring for this query instance.
126 /// </summary>
127 /// <seealso cref="BooleanQuery(boolean)">
128 /// </seealso>
129 public virtual bool IsCoordDisabled()
131 return disableCoord;
134 // Implement coord disabling.
135 // Inherit javadoc.
136 public override Similarity GetSimilarity(Searcher searcher)
138 Similarity result = base.GetSimilarity(searcher);
139 if (disableCoord)
141 // disable coord as requested
142 result = new AnonymousClassSimilarityDelegator(this, result);
144 return result;
147 /// <summary> Specifies a minimum number of the optional BooleanClauses
148 /// which must be satisifed.
149 ///
150 /// <p>
151 /// By default no optional clauses are neccessary for a match
152 /// (unless there are no required clauses). If this method is used,
153 /// then the specified numebr of clauses is required.
154 /// </p>
155 /// <p>
156 /// Use of this method is totally independant of specifying that
157 /// any specific clauses are required (or prohibited). This number will
158 /// only be compared against the number of matching optional clauses.
159 /// </p>
160 /// <p>
161 /// EXPERT NOTE: Using this method will force the use of BooleanWeight2,
162 /// regardless of wether setUseScorer14(true) has been called.
163 /// </p>
164 ///
165 /// </summary>
166 /// <param name="min">the number of optional clauses that must match
167 /// </param>
168 /// <seealso cref="setUseScorer14">
169 /// </seealso>
170 public virtual void SetMinimumNumberShouldMatch(int min)
172 this.minNrShouldMatch = min;
174 protected internal int minNrShouldMatch = 0;
176 /// <summary> Gets the minimum number of the optional BooleanClauses
177 /// which must be satisifed.
178 /// </summary>
179 public virtual int GetMinimumNumberShouldMatch()
181 return minNrShouldMatch;
184 /// <summary>Adds a clause to a boolean query. Clauses may be:
185 /// <ul>
186 /// <li><code>required</code> which means that documents which <i>do not</i>
187 /// match this sub-query will <i>not</i> match the boolean query;
188 /// <li><code>prohibited</code> which means that documents which <i>do</i>
189 /// match this sub-query will <i>not</i> match the boolean query; or
190 /// <li>neither, in which case matched documents are neither prohibited from
191 /// nor required to match the sub-query. However, a document must match at
192 /// least 1 sub-query to match the boolean query.
193 /// </ul>
194 /// It is an error to specify a clause as both <code>required</code> and
195 /// <code>prohibited</code>.
196 ///
197 /// </summary>
198 /// <deprecated> use {@link #Add(Query, BooleanClause.Occur)} instead:
199 /// <ul>
200 /// <li>For add(query, true, false) use add(query, BooleanClause.Occur.MUST)
201 /// <li>For add(query, false, false) use add(query, BooleanClause.Occur.SHOULD)
202 /// <li>For add(query, false, true) use add(query, BooleanClause.Occur.MUST_NOT)
203 /// </ul>
204 /// </deprecated>
205 public virtual void Add(Query query, bool required, bool prohibited)
207 Add(new BooleanClause(query, required, prohibited));
210 /// <summary>Adds a clause to a boolean query.
211 ///
212 /// </summary>
213 /// <throws> TooManyClauses if the new number of clauses exceeds the maximum clause number </throws>
214 /// <seealso cref="GetMaxClauseCount()">
215 /// </seealso>
216 public virtual void Add(Query query, BooleanClause.Occur occur)
218 Add(new BooleanClause(query, occur));
221 /// <summary>Adds a clause to a boolean query.</summary>
222 /// <throws> TooManyClauses if the new number of clauses exceeds the maximum clause number </throws>
223 /// <seealso cref="GetMaxClauseCount()">
224 /// </seealso>
225 public virtual void Add(BooleanClause clause)
227 if (clauses.Count >= maxClauseCount)
228 throw new TooManyClauses();
230 clauses.Add(clause);
233 /// <summary>Returns the set of clauses in this query. </summary>
234 public virtual BooleanClause[] GetClauses()
236 return (BooleanClause[]) clauses.ToArray(typeof(BooleanClause));
239 [Serializable]
240 private class BooleanWeight : Weight
242 private void InitBlock(BooleanQuery enclosingInstance)
244 this.enclosingInstance = enclosingInstance;
246 private BooleanQuery enclosingInstance;
247 public BooleanQuery Enclosing_Instance
251 return enclosingInstance;
255 protected internal Similarity similarity;
256 protected internal System.Collections.ArrayList weights = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
258 public BooleanWeight(BooleanQuery enclosingInstance, Searcher searcher)
260 InitBlock(enclosingInstance);
261 this.similarity = Enclosing_Instance.GetSimilarity(searcher);
262 for (int i = 0; i < Enclosing_Instance.clauses.Count; i++)
264 BooleanClause c = (BooleanClause) Enclosing_Instance.clauses[i];
265 weights.Add(c.GetQuery().CreateWeight(searcher));
269 public virtual Query GetQuery()
271 return Enclosing_Instance;
273 public virtual float GetValue()
275 return Enclosing_Instance.GetBoost();
278 public virtual float SumOfSquaredWeights()
280 float sum = 0.0f;
281 for (int i = 0; i < weights.Count; i++)
283 BooleanClause c = (BooleanClause) Enclosing_Instance.clauses[i];
284 Weight w = (Weight) weights[i];
285 if (!c.IsProhibited())
286 sum += w.SumOfSquaredWeights(); // sum sub weights
289 sum *= Enclosing_Instance.GetBoost() * Enclosing_Instance.GetBoost(); // boost each sub-weight
291 return sum;
295 public virtual void Normalize(float norm)
297 norm *= Enclosing_Instance.GetBoost(); // incorporate boost
298 for (int i = 0; i < weights.Count; i++)
300 BooleanClause c = (BooleanClause) Enclosing_Instance.clauses[i];
301 Weight w = (Weight) weights[i];
302 if (!c.IsProhibited())
303 w.Normalize(norm);
307 /// <returns> A good old 1.4 Scorer
308 /// </returns>
309 public virtual Scorer Scorer(IndexReader reader)
311 // First see if the (faster) ConjunctionScorer will work. This can be
312 // used when all clauses are required. Also, at this point a
313 // BooleanScorer cannot be embedded in a ConjunctionScorer, as the hits
314 // from a BooleanScorer are not always sorted by document number (sigh)
315 // and hence BooleanScorer cannot implement skipTo() correctly, which is
316 // required by ConjunctionScorer.
317 bool allRequired = true;
318 bool noneBoolean = true;
319 for (int i = 0; i < weights.Count; i++)
321 BooleanClause c = (BooleanClause) Enclosing_Instance.clauses[i];
322 if (!c.IsRequired())
323 allRequired = false;
324 if (c.GetQuery() is BooleanQuery)
325 noneBoolean = false;
328 if (allRequired && noneBoolean)
330 // ConjunctionScorer is okay
331 ConjunctionScorer result = new ConjunctionScorer(similarity);
332 for (int i = 0; i < weights.Count; i++)
334 Weight w = (Weight) weights[i];
335 Scorer subScorer = w.Scorer(reader);
336 if (subScorer == null)
337 return null;
338 result.Add(subScorer);
340 return result;
343 // Use good-old BooleanScorer instead.
344 BooleanScorer result2 = new BooleanScorer(similarity);
346 for (int i = 0; i < weights.Count; i++)
348 BooleanClause c = (BooleanClause) Enclosing_Instance.clauses[i];
349 Weight w = (Weight) weights[i];
350 Scorer subScorer = w.Scorer(reader);
351 if (subScorer != null)
352 result2.Add(subScorer, c.IsRequired(), c.IsProhibited());
353 else if (c.IsRequired())
354 return null;
357 return result2;
360 public virtual Explanation Explain(IndexReader reader, int doc)
362 Explanation sumExpl = new Explanation();
363 sumExpl.SetDescription("sum of:");
364 int coord = 0;
365 int maxCoord = 0;
366 float sum = 0.0f;
367 for (int i = 0; i < weights.Count; i++)
369 BooleanClause c = (BooleanClause) Enclosing_Instance.clauses[i];
370 Weight w = (Weight) weights[i];
371 Explanation e = w.Explain(reader, doc);
372 if (!c.IsProhibited())
373 maxCoord++;
374 if (e.GetValue() > 0)
376 if (!c.IsProhibited())
378 sumExpl.AddDetail(e);
379 sum += e.GetValue();
380 coord++;
382 else
384 return new Explanation(0.0f, "match prohibited");
387 else if (c.IsRequired())
389 return new Explanation(0.0f, "match required");
392 sumExpl.SetValue(sum);
394 if (coord == 1)
395 // only one clause matched
396 sumExpl = sumExpl.GetDetails()[0]; // eliminate wrapper
398 float coordFactor = similarity.Coord(coord, maxCoord);
399 if (coordFactor == 1.0f)
400 // coord is no-op
401 return sumExpl;
402 // eliminate wrapper
403 else
405 Explanation result = new Explanation();
406 result.SetDescription("product of:");
407 result.AddDetail(sumExpl);
408 result.AddDetail(new Explanation(coordFactor, "coord(" + coord + "/" + maxCoord + ")"));
409 result.SetValue(sum * coordFactor);
410 return result;
415 [Serializable]
416 private class BooleanWeight2 : BooleanWeight
418 private void InitBlock(BooleanQuery enclosingInstance)
420 this.enclosingInstance = enclosingInstance;
422 private BooleanQuery enclosingInstance;
423 public new BooleanQuery Enclosing_Instance
427 return enclosingInstance;
431 /* Merge into BooleanWeight in case the 1.4 BooleanScorer is dropped */
432 public BooleanWeight2(BooleanQuery enclosingInstance, Searcher searcher):base(enclosingInstance, searcher)
434 InitBlock(enclosingInstance);
437 /// <returns> An alternative Scorer that uses and provides skipTo(),
438 /// and scores documents in document number order.
439 /// </returns>
440 public override Scorer Scorer(IndexReader reader)
442 BooleanScorer2 result = new BooleanScorer2(similarity, Enclosing_Instance.minNrShouldMatch);
444 for (int i = 0; i < weights.Count; i++)
446 BooleanClause c = (BooleanClause) Enclosing_Instance.clauses[i];
447 Weight w = (Weight) weights[i];
448 Scorer subScorer = w.Scorer(reader);
449 if (subScorer != null)
450 result.Add(subScorer, c.IsRequired(), c.IsProhibited());
451 else if (c.IsRequired())
452 return null;
455 return result;
459 /// <summary>Indicates whether to use good old 1.4 BooleanScorer. </summary>
460 private static bool useScorer14 = false;
462 public static void SetUseScorer14(bool use14)
464 useScorer14 = use14;
467 public static bool GetUseScorer14()
469 return useScorer14;
472 protected internal override Weight CreateWeight(Searcher searcher)
475 if (0 < minNrShouldMatch)
477 // :TODO: should we throw an exception if getUseScorer14 ?
478 return new BooleanWeight2(this, searcher);
481 return GetUseScorer14() ? (Weight) new BooleanWeight(this, searcher) : (Weight) new BooleanWeight2(this, searcher);
484 public override Query Rewrite(IndexReader reader)
486 if (clauses.Count == 1)
488 // optimize 1-clause queries
489 BooleanClause c = (BooleanClause) clauses[0];
490 if (!c.IsProhibited())
492 // just return clause
494 Query query = c.GetQuery().Rewrite(reader); // rewrite first
496 if (GetBoost() != 1.0f)
498 // incorporate boost
499 if (query == c.GetQuery())
500 // if rewrite was no-op
501 query = (Query) query.Clone(); // then clone before boost
502 query.SetBoost(GetBoost() * query.GetBoost());
505 return query;
509 BooleanQuery clone = null; // recursively rewrite
510 for (int i = 0; i < clauses.Count; i++)
512 BooleanClause c = (BooleanClause) clauses[i];
513 Query query = c.GetQuery().Rewrite(reader);
514 if (query != c.GetQuery())
516 // clause rewrote: must clone
517 if (clone == null)
518 clone = (BooleanQuery) this.Clone();
519 clone.clauses[i] = new BooleanClause(query, c.GetOccur());
522 if (clone != null)
524 return clone; // some clauses rewrote
526 else
527 return this; // no clauses rewrote
530 // inherit javadoc
531 public override void ExtractTerms(System.Collections.Hashtable terms)
533 for (System.Collections.IEnumerator i = clauses.GetEnumerator(); i.MoveNext(); )
535 BooleanClause clause = (BooleanClause) i.Current;
536 clause.GetQuery().ExtractTerms(terms);
540 public override System.Object Clone()
542 BooleanQuery clone = (BooleanQuery) base.Clone();
543 clone.clauses = (System.Collections.ArrayList) this.clauses.Clone();
544 return clone;
547 /// <summary>Prints a user-readable version of this query. </summary>
548 public override System.String ToString(System.String field)
550 System.Text.StringBuilder buffer = new System.Text.StringBuilder();
551 bool needParens = (GetBoost() != 1.0) || (GetMinimumNumberShouldMatch() > 0);
552 if (needParens)
554 buffer.Append("(");
557 for (int i = 0; i < clauses.Count; i++)
559 BooleanClause c = (BooleanClause) clauses[i];
560 if (c.IsProhibited())
561 buffer.Append("-");
562 else if (c.IsRequired())
563 buffer.Append("+");
565 Query subQuery = c.GetQuery();
566 if (subQuery is BooleanQuery)
568 // wrap sub-bools in parens
569 buffer.Append("(");
570 buffer.Append(c.GetQuery().ToString(field));
571 buffer.Append(")");
573 else
574 buffer.Append(c.GetQuery().ToString(field));
576 if (i != clauses.Count - 1)
577 buffer.Append(" ");
580 if (needParens)
582 buffer.Append(")");
585 if (GetMinimumNumberShouldMatch() > 0)
587 buffer.Append('~');
588 buffer.Append(GetMinimumNumberShouldMatch());
591 if (GetBoost() != 1.0f)
593 buffer.Append(ToStringUtils.Boost(GetBoost()));
596 return buffer.ToString();
599 /// <summary>Returns true iff <code>o</code> is equal to this. </summary>
600 public override bool Equals(System.Object o)
602 if (!(o is BooleanQuery))
603 return false;
604 BooleanQuery other = (BooleanQuery) o;
605 if (this.GetBoost() != other.GetBoost())
606 return false;
607 if (this.clauses.Count != other.clauses.Count)
608 return false;
609 for (int i = 0; i < this.clauses.Count; i++)
611 if (this.clauses[i].Equals(other.clauses[i]) == false)
612 return false;
614 return this.GetMinimumNumberShouldMatch() == other.GetMinimumNumberShouldMatch();
617 /// <summary>Returns a hash code value for this object.</summary>
618 public override int GetHashCode()
620 return BitConverter.ToInt32(BitConverter.GetBytes(GetBoost()), 0) ^ clauses.GetHashCode() + GetMinimumNumberShouldMatch();