2006-09-10 Francisco Javier F. Serrador <serrador@openshine.com>
[beagle.git] / beagled / Lucene.Net / Search / FuzzyQuery.cs
blob8f06bcdc5141ec6793090e41fd0a8d91898fb59f
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 using System;
17 using IndexReader = Lucene.Net.Index.IndexReader;
18 using Term = Lucene.Net.Index.Term;
19 using PriorityQueue = Lucene.Net.Util.PriorityQueue;
20 namespace Lucene.Net.Search
23 /// <summary>Implements the fuzzy search query. The similiarity measurement
24 /// is based on the Levenshtein (edit distance) algorithm.
25 /// </summary>
26 [Serializable]
27 public sealed class FuzzyQuery:MultiTermQuery
30 public const float defaultMinSimilarity = 0.5f;
31 public const int defaultPrefixLength = 0;
33 private float minimumSimilarity;
34 private int prefixLength;
36 /// <summary> Create a new FuzzyQuery that will match terms with a similarity
37 /// of at least <code>minimumSimilarity</code> to <code>term</code>.
38 /// If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
39 /// of that length is also required.
40 ///
41 /// </summary>
42 /// <param name="term">the term to search for
43 /// </param>
44 /// <param name="minimumSimilarity">a value between 0 and 1 to set the required similarity
45 /// between the query term and the matching terms. For example, for a
46 /// <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
47 /// as the query term is considered similar to the query term if the edit distance
48 /// between both terms is less than <code>length(term)*0.5</code>
49 /// </param>
50 /// <param name="prefixLength">length of common (non-fuzzy) prefix
51 /// </param>
52 /// <throws> IllegalArgumentException if minimumSimilarity is &gt;= 1 or &lt; 0 </throws>
53 /// <summary> or if prefixLength &lt; 0
54 /// </summary>
55 public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength):base(term)
58 if (minimumSimilarity >= 1.0f)
59 throw new System.ArgumentException("minimumSimilarity >= 1");
60 else if (minimumSimilarity < 0.0f)
61 throw new System.ArgumentException("minimumSimilarity < 0");
62 if (prefixLength < 0)
63 throw new System.ArgumentException("prefixLength < 0");
65 this.minimumSimilarity = minimumSimilarity;
66 this.prefixLength = prefixLength;
69 /// <summary> Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}.</summary>
70 public FuzzyQuery(Term term, float minimumSimilarity):this(term, minimumSimilarity, defaultPrefixLength)
74 /// <summary> Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}.</summary>
75 public FuzzyQuery(Term term):this(term, defaultMinSimilarity, defaultPrefixLength)
79 /// <summary> Returns the minimum similarity that is required for this query to match.</summary>
80 /// <returns> float value between 0.0 and 1.0
81 /// </returns>
82 public float GetMinSimilarity()
84 return minimumSimilarity;
87 /// <summary> Returns the non-fuzzy prefix length. This is the number of characters at the start
88 /// of a term that must be identical (not fuzzy) to the query term if the query
89 /// is to match that term.
90 /// </summary>
91 public int GetPrefixLength()
93 return prefixLength;
96 protected internal override FilteredTermEnum GetEnum(IndexReader reader)
98 return new FuzzyTermEnum(reader, GetTerm(), minimumSimilarity, prefixLength);
101 public override Query Rewrite(IndexReader reader)
103 FilteredTermEnum enumerator = GetEnum(reader);
104 int maxClauseCount = BooleanQuery.GetMaxClauseCount();
105 ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
111 float minScore = 0.0f;
112 float score = 0.0f;
113 Term t = enumerator.Term();
114 if (t != null)
116 score = enumerator.Difference();
117 // terms come in alphabetical order, therefore if queue is full and score
118 // not bigger than minScore, we can skip
119 if (stQueue.Size() < maxClauseCount || score > minScore)
121 stQueue.Insert(new ScoreTerm(t, score));
122 minScore = ((ScoreTerm) stQueue.Top()).score; // maintain minScore
126 while (enumerator.Next());
128 finally
130 enumerator.Close();
133 BooleanQuery query = new BooleanQuery(true);
134 int size = stQueue.Size();
135 for (int i = 0; i < size; i++)
137 ScoreTerm st = (ScoreTerm) stQueue.Pop();
138 TermQuery tq = new TermQuery(st.term); // found a match
139 tq.SetBoost(GetBoost() * st.score); // set the boost
140 query.Add(tq, BooleanClause.Occur.SHOULD); // add to query
143 return query;
146 public override System.String ToString(System.String field)
148 return base.ToString(field) + '~' + minimumSimilarity.ToString();
151 private class ScoreTerm
153 public Term term;
154 public float score;
156 public ScoreTerm(Term term, float score)
158 this.term = term;
159 this.score = score;
163 private class ScoreTermQueue:PriorityQueue
166 public ScoreTermQueue(int size)
168 Initialize(size);
171 /* (non-Javadoc)
172 * @see Lucene.Net.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object)
174 public override bool LessThan(System.Object a, System.Object b)
176 ScoreTerm termA = (ScoreTerm) a;
177 ScoreTerm termB = (ScoreTerm) b;
178 if (termA.score == termB.score)
179 return termA.term.CompareTo(termB.term) > 0;
180 else
181 return termA.score < termB.score;
185 public override bool Equals(System.Object o)
187 if (this == o)
188 return true;
189 if (!(o is FuzzyQuery))
190 return false;
191 if (!base.Equals(o))
192 return false;
194 FuzzyQuery fuzzyQuery = (FuzzyQuery) o;
196 if (minimumSimilarity != fuzzyQuery.minimumSimilarity)
197 return false;
198 if (prefixLength != fuzzyQuery.prefixLength)
199 return false;
201 return true;
204 public override int GetHashCode()
206 int result = base.GetHashCode();
207 result = 29 * result + minimumSimilarity != + 0.0f ? BitConverter.ToInt32(BitConverter.GetBytes(minimumSimilarity), 0) : 0;
208 result = 29 * result + prefixLength;
209 return result;