2 * Copyright 2005 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 using IndexReader
= Lucene
.Net
.Index
.IndexReader
;
20 namespace Lucene
.Net
.Search
23 /// <summary> A query that generates the union of the documents produced by its subqueries, and that scores each document as the maximum
24 /// score for that document produced by any subquery plus a tie breaking increment for any additional matching subqueries.
25 /// This is useful to search for a word in multiple fields with different boost factors (so that the fields cannot be
26 /// combined equivalently into a single search field). We want the primary score to be the one associated with the highest boost,
27 /// not the sum of the field scores (as BooleanQuery would give).
28 /// If the query is "albino elephant" this ensures that "albino" matching one field and "elephant" matching
29 /// another gets a higher score than "albino" matching both fields.
30 /// To get this result, use both BooleanQuery and DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in
31 /// each field, while the set of these DisjunctionMaxQuery's is combined into a BooleanQuery.
32 /// The tie breaker capability allows results that include the same term in multiple fields to be judged better than results that
33 /// include this term in only the best of those multiple fields, without confusing this with the better case of two different terms
34 /// in the multiple fields.
36 /// <author> Chuck Williams
39 public class DisjunctionMaxQuery
: Query
, System
.ICloneable
43 private System
.Collections
.ArrayList disjuncts
= new System
.Collections
.ArrayList();
45 /* Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. */
46 private float tieBreakerMultiplier
= 0.0f
;
48 /// <summary>Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries.</summary>
49 /// <param name="tieBreakerMultiplier">this score of each non-maximum disjunct for a document is multiplied by this weight
50 /// and added into the final score. If non-zero, the value should be small, on the order of 0.1, which says that
51 /// 10 occurrences of word in a lower-scored field that is also in a higher scored field is just as good as a unique
52 /// word in the lower scored field (i.e., one that is not in any higher scored field.
54 public DisjunctionMaxQuery(float tieBreakerMultiplier
)
56 this.tieBreakerMultiplier
= tieBreakerMultiplier
;
59 /// <summary> Creates a new DisjunctionMaxQuery</summary>
60 /// <param name="disjuncts">a Collection<Query> of all the disjuncts to add
62 /// <param name="tieBreakerMultiplier"> the weight to give to each matching non-maximum disjunct
64 public DisjunctionMaxQuery(System
.Collections
.ICollection disjuncts
, float tieBreakerMultiplier
)
66 this.tieBreakerMultiplier
= tieBreakerMultiplier
;
70 /// <summary>Add a subquery to this disjunction</summary>
71 /// <param name="query">the disjunct added
73 public virtual void Add(Query query
)
78 /// <summary>Add a collection of disjuncts to this disjunction
79 /// via Iterable<Query>
81 public virtual void Add(System
.Collections
.ICollection disjuncts
)
83 this.disjuncts
.AddRange(disjuncts
);
86 /// <summary>An Iterator<Query> over the disjuncts </summary>
87 public virtual System
.Collections
.IEnumerator
Iterator()
89 return disjuncts
.GetEnumerator();
92 /* The Weight for DisjunctionMaxQuery's, used to normalize, score and explain these queries */
94 private class DisjunctionMaxWeight
: Weight
96 private void InitBlock(DisjunctionMaxQuery enclosingInstance
)
98 this.enclosingInstance
= enclosingInstance
;
100 private DisjunctionMaxQuery enclosingInstance
;
101 public DisjunctionMaxQuery Enclosing_Instance
105 return enclosingInstance
;
110 private Searcher searcher
; // The searcher with which we are associated.
111 private System
.Collections
.ArrayList weights
= new System
.Collections
.ArrayList(); // The Weight's for our subqueries, in 1-1 correspondence with disjuncts
113 /* Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */
114 public DisjunctionMaxWeight(DisjunctionMaxQuery enclosingInstance
, Searcher searcher
)
116 InitBlock(enclosingInstance
);
117 this.searcher
= searcher
;
118 for (int i
= 0; i
< Enclosing_Instance
.disjuncts
.Count
; i
++)
119 weights
.Add(((Query
) Enclosing_Instance
.disjuncts
[i
]).CreateWeight(searcher
));
122 /* Return our associated DisjunctionMaxQuery */
123 public virtual Query
GetQuery()
125 return Enclosing_Instance
;
128 /* Return our boost */
129 public virtual float GetValue()
131 return Enclosing_Instance
.GetBoost();
134 /* Compute the sub of squared weights of us applied to our subqueries. Used for normalization. */
135 public virtual float SumOfSquaredWeights()
137 float max
= 0.0f
, sum
= 0.0f
;
138 for (int i
= 0; i
< weights
.Count
; i
++)
140 float sub
= ((Weight
) weights
[i
]).SumOfSquaredWeights();
142 max
= System
.Math
.Max(max
, sub
);
144 return (((sum
- max
) * Enclosing_Instance
.tieBreakerMultiplier
* Enclosing_Instance
.tieBreakerMultiplier
) + max
) * Enclosing_Instance
.GetBoost() * Enclosing_Instance
.GetBoost();
147 /* Apply the computed normalization factor to our subqueries */
148 public virtual void Normalize(float norm
)
150 norm
*= Enclosing_Instance
.GetBoost(); // Incorporate our boost
151 for (int i
= 0; i
< weights
.Count
; i
++)
152 ((Weight
) weights
[i
]).Normalize(norm
);
155 /* Create the scorer used to score our associated DisjunctionMaxQuery */
156 public virtual Scorer
Scorer(IndexReader reader
)
158 DisjunctionMaxScorer result
= new DisjunctionMaxScorer(Enclosing_Instance
.tieBreakerMultiplier
, Enclosing_Instance
.GetSimilarity(searcher
));
159 for (int i
= 0; i
< weights
.Count
; i
++)
161 Weight w
= (Weight
) weights
[i
];
162 Scorer subScorer
= w
.Scorer(reader
);
163 if (subScorer
== null)
165 result
.Add(subScorer
);
170 /* Explain the score we computed for doc */
171 public virtual Explanation
Explain(IndexReader reader
, int doc
)
173 if (Enclosing_Instance
.disjuncts
.Count
== 1)
174 return ((Weight
) weights
[0]).Explain(reader
, doc
);
175 Explanation result
= new Explanation();
176 float max
= 0.0f
, sum
= 0.0f
;
177 result
.SetDescription(Enclosing_Instance
.tieBreakerMultiplier
== 0.0f
? "max of:" : "max plus " + Enclosing_Instance
.tieBreakerMultiplier
+ " times others of:");
178 for (int i
= 0; i
< weights
.Count
; i
++)
180 Explanation e
= ((Weight
) weights
[i
]).Explain(reader
, doc
);
181 if (e
.GetValue() > 0)
185 max
= System
.Math
.Max(max
, e
.GetValue());
188 result
.SetValue(max
+ (sum
- max
) * Enclosing_Instance
.tieBreakerMultiplier
);
191 } // end of DisjunctionMaxWeight inner class
193 /* Create the Weight used to score us */
194 protected internal override Weight
CreateWeight(Searcher searcher
)
196 return new DisjunctionMaxWeight(this, searcher
);
199 /// <summary>Optimize our representation and our subqueries representations</summary>
200 /// <param name="reader">the IndexReader we query
202 /// <returns> an optimized copy of us (which may not be a copy if there is nothing to optimize)
204 public override Query
Rewrite(IndexReader reader
)
206 if (disjuncts
.Count
== 1)
208 Query singleton
= (Query
) disjuncts
[0];
209 Query result
= singleton
.Rewrite(reader
);
210 if (GetBoost() != 1.0f
)
212 if (result
== singleton
)
213 result
= (Query
) result
.Clone();
214 result
.SetBoost(GetBoost() * result
.GetBoost());
218 DisjunctionMaxQuery clone
= null;
219 for (int i
= 0; i
< disjuncts
.Count
; i
++)
221 Query clause
= (Query
) disjuncts
[i
];
222 Query rewrite
= clause
.Rewrite(reader
);
223 if (rewrite
!= clause
)
226 clone
= (DisjunctionMaxQuery
) this.Clone();
227 clone
.disjuncts
[i
] = rewrite
;
236 /// <summary>Create a shallow copy of us -- used in rewriting if necessary</summary>
237 /// <returns> a copy of us (but reuse, don't copy, our subqueries)
239 public override System
.Object
Clone()
241 DisjunctionMaxQuery clone
= (DisjunctionMaxQuery
) base.Clone();
242 clone
.disjuncts
= (System
.Collections
.ArrayList
) this.disjuncts
.Clone();
246 /// <summary>Prettyprint us.</summary>
247 /// <param name="field">the field to which we are applied
249 /// <returns> a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost"
251 public override System
.String
ToString(System
.String field
)
253 System
.Text
.StringBuilder buffer
= new System
.Text
.StringBuilder();
255 for (int i
= 0; i
< disjuncts
.Count
; i
++)
257 Query subquery
= (Query
) disjuncts
[i
];
258 if (subquery
is BooleanQuery
)
260 // wrap sub-bools in parens
262 buffer
.Append(subquery
.ToString(field
));
266 buffer
.Append(subquery
.ToString(field
));
267 if (i
!= disjuncts
.Count
- 1)
268 buffer
.Append(" | ");
271 if (tieBreakerMultiplier
!= 0.0f
)
274 buffer
.Append(tieBreakerMultiplier
);
276 if (GetBoost() != 1.0)
279 buffer
.Append(GetBoost());
281 return buffer
.ToString();
284 /// <summary>Return true iff we represent the same query as o</summary>
285 /// <param name="o">another object
287 /// <returns> true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us
289 public override bool Equals(System
.Object o
)
291 if (!(o
is DisjunctionMaxQuery
))
293 DisjunctionMaxQuery other
= (DisjunctionMaxQuery
) o
;
294 return this.GetBoost() == other
.GetBoost() &&
295 this.tieBreakerMultiplier
== other
.tieBreakerMultiplier
&&
296 this.disjuncts
.Equals(other
.disjuncts
);
299 /// <summary>Compute a hash code for hashing us</summary>
300 /// <returns> the hash code
302 public override int GetHashCode()
304 return BitConverter
.ToInt32(BitConverter
.GetBytes(GetBoost()), 0) + BitConverter
.ToInt32(BitConverter
.GetBytes(tieBreakerMultiplier
), 0) + disjuncts
.GetHashCode();