2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 using IndexReader
= Lucene
.Net
.Index
.IndexReader
;
19 using Term
= Lucene
.Net
.Index
.Term
;
20 using TermPositions
= Lucene
.Net
.Index
.TermPositions
;
21 using ToStringUtils
= Lucene
.Net
.Util
.ToStringUtils
;
23 namespace Lucene
.Net
.Search
26 /// <summary>A Query that matches documents containing a particular sequence of terms.
27 /// A PhraseQuery is built by QueryParser for input like <code>"new york"</code>.
29 /// <p>This query may be combined with other terms or queries with a {@link BooleanQuery}.
32 public class PhraseQuery
: Query
34 private System
.String field
;
35 private System
.Collections
.ArrayList terms
= System
.Collections
.ArrayList
.Synchronized(new System
.Collections
.ArrayList(10));
36 private System
.Collections
.ArrayList positions
= System
.Collections
.ArrayList
.Synchronized(new System
.Collections
.ArrayList(10));
39 /// <summary>Constructs an empty phrase query. </summary>
44 /// <summary>Sets the number of other words permitted between words in query phrase.
45 /// If zero, then this is an exact phrase search. For larger values this works
46 /// like a <code>WITHIN</code> or <code>NEAR</code> operator.
47 /// <p>The slop is in fact an edit-distance, where the units correspond to
48 /// moves of terms in the query phrase out of position. For example, to switch
49 /// the order of two words requires two moves (the first move places the words
50 /// atop one another), so to permit re-orderings of phrases, the slop must be
52 /// <p>More exact matches are scored higher than sloppier matches, thus search
53 /// results are sorted by exactness.
54 /// <p>The slop is zero by default, requiring exact matches.
56 public virtual void SetSlop(int s
)
60 /// <summary>Returns the slop. See setSlop(). </summary>
61 public virtual int GetSlop()
66 /// <summary> Adds a term to the end of the query phrase.
67 /// The relative position of the term is the one immediately after the last term added.
69 public virtual void Add(Term term
)
72 if (positions
.Count
> 0)
73 position
= ((System
.Int32
) positions
[positions
.Count
- 1]) + 1;
78 /// <summary> Adds a term to the end of the query phrase.
79 /// The relative position of the term within the phrase is specified explicitly.
80 /// This allows e.g. phrases with more than one term at the same position
81 /// or phrases with gaps (e.g. in connection with stopwords).
84 /// <param name="term">
86 /// <param name="position">
88 public virtual void Add(Term term
, int position
)
92 else if (term
.Field() != field
)
94 throw new System
.ArgumentException("All phrase terms must be in the same field: " + term
);
98 positions
.Add((System
.Int32
) position
);
101 /// <summary>Returns the set of terms in this phrase. </summary>
102 public virtual Term
[] GetTerms()
104 return (Term
[]) terms
.ToArray(typeof(Term
));
107 /// <summary> Returns the relative positions of terms in this phrase.</summary>
108 public virtual int[] GetPositions()
110 int[] result
= new int[positions
.Count
];
111 for (int i
= 0; i
< positions
.Count
; i
++)
112 result
[i
] = ((System
.Int32
) positions
[i
]);
117 private class PhraseWeight
: Weight
119 private void InitBlock(PhraseQuery enclosingInstance
)
121 this.enclosingInstance
= enclosingInstance
;
123 private PhraseQuery enclosingInstance
;
124 public PhraseQuery Enclosing_Instance
128 return enclosingInstance
;
132 private Similarity similarity
;
133 private float value_Renamed
;
135 private float queryNorm
;
136 private float queryWeight
;
138 public PhraseWeight(PhraseQuery enclosingInstance
, Searcher searcher
)
140 InitBlock(enclosingInstance
);
141 this.similarity
= Enclosing_Instance
.GetSimilarity(searcher
);
143 idf
= similarity
.Idf(Enclosing_Instance
.terms
, searcher
);
146 public override System
.String
ToString()
148 return "weight(" + Enclosing_Instance
+ ")";
151 public virtual Query
GetQuery()
153 return Enclosing_Instance
;
155 public virtual float GetValue()
157 return value_Renamed
;
160 public virtual float SumOfSquaredWeights()
162 queryWeight
= idf
* Enclosing_Instance
.GetBoost(); // compute query weight
163 return queryWeight
* queryWeight
; // square it
166 public virtual void Normalize(float queryNorm
)
168 this.queryNorm
= queryNorm
;
169 queryWeight
*= queryNorm
; // normalize query weight
170 value_Renamed
= queryWeight
* idf
; // idf for document
173 public virtual Scorer
Scorer(IndexReader reader
)
175 if (Enclosing_Instance
.terms
.Count
== 0)
176 // optimize zero-term case
179 TermPositions
[] tps
= new TermPositions
[Enclosing_Instance
.terms
.Count
];
180 for (int i
= 0; i
< Enclosing_Instance
.terms
.Count
; i
++)
182 TermPositions p
= reader
.TermPositions((Term
) Enclosing_Instance
.terms
[i
]);
188 if (Enclosing_Instance
.slop
== 0)
189 // optimize exact case
190 return new ExactPhraseScorer(this, tps
, Enclosing_Instance
.GetPositions(), similarity
, reader
.Norms(Enclosing_Instance
.field
));
192 return new SloppyPhraseScorer(this, tps
, Enclosing_Instance
.GetPositions(), similarity
, Enclosing_Instance
.slop
, reader
.Norms(Enclosing_Instance
.field
));
195 public virtual Explanation
Explain(IndexReader reader
, int doc
)
198 Explanation result
= new Explanation();
199 result
.SetDescription("weight(" + GetQuery() + " in " + doc
+ "), product of:");
201 System
.Text
.StringBuilder docFreqs
= new System
.Text
.StringBuilder();
202 System
.Text
.StringBuilder query
= new System
.Text
.StringBuilder();
204 for (int i
= 0; i
< Enclosing_Instance
.terms
.Count
; i
++)
208 docFreqs
.Append(" ");
212 Term term
= (Term
) Enclosing_Instance
.terms
[i
];
214 docFreqs
.Append(term
.Text());
215 docFreqs
.Append("=");
216 docFreqs
.Append(reader
.DocFreq(term
));
218 query
.Append(term
.Text());
222 Explanation idfExpl
= new Explanation(idf
, "idf(" + Enclosing_Instance
.field
+ ": " + docFreqs
+ ")");
224 // explain query weight
225 Explanation queryExpl
= new Explanation();
226 queryExpl
.SetDescription("queryWeight(" + GetQuery() + "), product of:");
228 Explanation boostExpl
= new Explanation(Enclosing_Instance
.GetBoost(), "boost");
229 if (Enclosing_Instance
.GetBoost() != 1.0f
)
230 queryExpl
.AddDetail(boostExpl
);
231 queryExpl
.AddDetail(idfExpl
);
233 Explanation queryNormExpl
= new Explanation(queryNorm
, "queryNorm");
234 queryExpl
.AddDetail(queryNormExpl
);
236 queryExpl
.SetValue(boostExpl
.GetValue() * idfExpl
.GetValue() * queryNormExpl
.GetValue());
238 result
.AddDetail(queryExpl
);
240 // explain field weight
241 Explanation fieldExpl
= new Explanation();
242 fieldExpl
.SetDescription("fieldWeight(" + Enclosing_Instance
.field
+ ":" + query
+ " in " + doc
+ "), product of:");
244 Explanation tfExpl
= Scorer(reader
).Explain(doc
);
245 fieldExpl
.AddDetail(tfExpl
);
246 fieldExpl
.AddDetail(idfExpl
);
248 Explanation fieldNormExpl
= new Explanation();
249 byte[] fieldNorms
= reader
.Norms(Enclosing_Instance
.field
);
250 float fieldNorm
= fieldNorms
!= null?Similarity
.DecodeNorm(fieldNorms
[doc
]):0.0f
;
251 fieldNormExpl
.SetValue(fieldNorm
);
252 fieldNormExpl
.SetDescription("fieldNorm(field=" + Enclosing_Instance
.field
+ ", doc=" + doc
+ ")");
253 fieldExpl
.AddDetail(fieldNormExpl
);
255 fieldExpl
.SetValue(tfExpl
.GetValue() * idfExpl
.GetValue() * fieldNormExpl
.GetValue());
257 result
.AddDetail(fieldExpl
);
260 result
.SetValue(queryExpl
.GetValue() * fieldExpl
.GetValue());
262 if (queryExpl
.GetValue() == 1.0f
)
269 protected internal override Weight
CreateWeight(Searcher searcher
)
271 if (terms
.Count
== 1)
273 // optimize one-term case
274 Term term
= (Term
) terms
[0];
275 Query termQuery
= new TermQuery(term
);
276 termQuery
.SetBoost(GetBoost());
277 return termQuery
.CreateWeight(searcher
);
279 return new PhraseWeight(this, searcher
);
282 /// <seealso cref="Lucene.Net.search.Query.ExtractTerms(java.util.Set)">
284 public override void ExtractTerms(System
.Collections
.Hashtable queryTerms
)
286 foreach (Term term
in terms
)
288 queryTerms
.Add(term
, term
);
292 /// <summary>Prints a user-readable version of this query. </summary>
293 public override System
.String
ToString(System
.String f
)
295 System
.Text
.StringBuilder buffer
= new System
.Text
.StringBuilder();
296 if (!field
.Equals(f
))
298 buffer
.Append(field
);
303 for (int i
= 0; i
< terms
.Count
; i
++)
305 buffer
.Append(((Term
) terms
[i
]).Text());
306 if (i
!= terms
.Count
- 1)
317 buffer
.Append(ToStringUtils
.Boost(GetBoost()));
319 return buffer
.ToString();
322 /// <summary>Returns true iff <code>o</code> is equal to this. </summary>
323 public override bool Equals(System
.Object o
)
325 if (!(o
is PhraseQuery
))
327 PhraseQuery other
= (PhraseQuery
) o
;
328 return (this.GetBoost() == other
.GetBoost()) &&
329 (this.slop
== other
.slop
) &&
330 this.terms
.Equals(other
.terms
) &&
331 this.positions
.Equals(other
.positions
);
334 /// <summary>Returns a hash code value for this object.</summary>
335 public override int GetHashCode()
337 return BitConverter
.ToInt32(BitConverter
.GetBytes(GetBoost()), 0) ^ slop ^ terms
.GetHashCode() ^ positions
.GetHashCode();