cvsimport
[beagle.git] / beagled / Lucene.Net / Search / FuzzyQuery.cs
blobbb2cd26d4132c98f8fb80b792ac16162d369b67b
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using System;
18 using IndexReader = Lucene.Net.Index.IndexReader;
19 using Term = Lucene.Net.Index.Term;
20 using PriorityQueue = Lucene.Net.Util.PriorityQueue;
21 using ToStringUtils = Lucene.Net.Util.ToStringUtils;
23 namespace Lucene.Net.Search
26 /// <summary>Implements the fuzzy search query. The similiarity measurement
27 /// is based on the Levenshtein (edit distance) algorithm.
28 /// </summary>
29 [Serializable]
30 public sealed class FuzzyQuery : MultiTermQuery
33 public const float defaultMinSimilarity = 0.5f;
34 public const int defaultPrefixLength = 0;
36 private float minimumSimilarity;
37 private int prefixLength;
39 /// <summary> Create a new FuzzyQuery that will match terms with a similarity
40 /// of at least <code>minimumSimilarity</code> to <code>term</code>.
41 /// If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
42 /// of that length is also required.
43 ///
44 /// </summary>
45 /// <param name="term">the term to search for
46 /// </param>
47 /// <param name="minimumSimilarity">a value between 0 and 1 to set the required similarity
48 /// between the query term and the matching terms. For example, for a
49 /// <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
50 /// as the query term is considered similar to the query term if the edit distance
51 /// between both terms is less than <code>length(term)*0.5</code>
52 /// </param>
53 /// <param name="prefixLength">length of common (non-fuzzy) prefix
54 /// </param>
55 /// <throws> IllegalArgumentException if minimumSimilarity is &gt;= 1 or &lt; 0 </throws>
56 /// <summary> or if prefixLength &lt; 0
57 /// </summary>
58 public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength):base(term)
61 if (minimumSimilarity >= 1.0f)
62 throw new System.ArgumentException("minimumSimilarity >= 1");
63 else if (minimumSimilarity < 0.0f)
64 throw new System.ArgumentException("minimumSimilarity < 0");
65 if (prefixLength < 0)
66 throw new System.ArgumentException("prefixLength < 0");
68 this.minimumSimilarity = minimumSimilarity;
69 this.prefixLength = prefixLength;
72 /// <summary> Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}.</summary>
73 public FuzzyQuery(Term term, float minimumSimilarity):this(term, minimumSimilarity, defaultPrefixLength)
77 /// <summary> Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}.</summary>
78 public FuzzyQuery(Term term):this(term, defaultMinSimilarity, defaultPrefixLength)
82 /// <summary> Returns the minimum similarity that is required for this query to match.</summary>
83 /// <returns> float value between 0.0 and 1.0
84 /// </returns>
85 public float GetMinSimilarity()
87 return minimumSimilarity;
90 /// <summary> Returns the non-fuzzy prefix length. This is the number of characters at the start
91 /// of a term that must be identical (not fuzzy) to the query term if the query
92 /// is to match that term.
93 /// </summary>
94 public int GetPrefixLength()
96 return prefixLength;
99 protected internal override FilteredTermEnum GetEnum(IndexReader reader)
101 return new FuzzyTermEnum(reader, GetTerm(), minimumSimilarity, prefixLength);
104 public override Query Rewrite(IndexReader reader)
106 FilteredTermEnum enumerator = GetEnum(reader);
107 int maxClauseCount = BooleanQuery.GetMaxClauseCount();
108 ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
114 float minScore = 0.0f;
115 float score = 0.0f;
116 Term t = enumerator.Term();
117 if (t != null)
119 score = enumerator.Difference();
120 // terms come in alphabetical order, therefore if queue is full and score
121 // not bigger than minScore, we can skip
122 if (stQueue.Size() < maxClauseCount || score > minScore)
124 stQueue.Insert(new ScoreTerm(t, score));
125 minScore = ((ScoreTerm) stQueue.Top()).score; // maintain minScore
129 while (enumerator.Next());
131 finally
133 enumerator.Close();
136 BooleanQuery query = new BooleanQuery(true);
137 int size = stQueue.Size();
138 for (int i = 0; i < size; i++)
140 ScoreTerm st = (ScoreTerm) stQueue.Pop();
141 TermQuery tq = new TermQuery(st.term); // found a match
142 tq.SetBoost(GetBoost() * st.score); // set the boost
143 query.Add(tq, BooleanClause.Occur.SHOULD); // add to query
146 return query;
149 public override System.String ToString(System.String field)
151 System.Text.StringBuilder buffer = new System.Text.StringBuilder();
152 Term term = GetTerm();
153 if (!term.Field().Equals(field))
155 buffer.Append(term.Field());
156 buffer.Append(":");
158 buffer.Append(term.Text());
159 buffer.Append('~');
160 buffer.Append(minimumSimilarity.ToString());
161 buffer.Append(ToStringUtils.Boost(GetBoost()));
162 return buffer.ToString();
165 private class ScoreTerm
167 public Term term;
168 public float score;
170 public ScoreTerm(Term term, float score)
172 this.term = term;
173 this.score = score;
177 private class ScoreTermQueue : PriorityQueue
180 public ScoreTermQueue(int size)
182 Initialize(size);
185 /* (non-Javadoc)
186 * @see Lucene.Net.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object)
188 public override bool LessThan(System.Object a, System.Object b)
190 ScoreTerm termA = (ScoreTerm) a;
191 ScoreTerm termB = (ScoreTerm) b;
192 if (termA.score == termB.score)
193 return termA.term.CompareTo(termB.term) > 0;
194 else
195 return termA.score < termB.score;
199 public override bool Equals(System.Object o)
201 if (this == o)
202 return true;
203 if (!(o is FuzzyQuery))
204 return false;
205 if (!base.Equals(o))
206 return false;
208 FuzzyQuery fuzzyQuery = (FuzzyQuery) o;
210 if (minimumSimilarity != fuzzyQuery.minimumSimilarity)
211 return false;
212 if (prefixLength != fuzzyQuery.prefixLength)
213 return false;
215 return true;
218 public override int GetHashCode()
220 int result = base.GetHashCode();
221 result = 29 * result + minimumSimilarity != + 0.0f ? BitConverter.ToInt32(BitConverter.GetBytes(minimumSimilarity), 0) : 0;
222 result = 29 * result + prefixLength;
223 return result;