2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 using IndexReader
= Lucene
.Net
.Index
.IndexReader
;
19 using Term
= Lucene
.Net
.Index
.Term
;
20 using PriorityQueue
= Lucene
.Net
.Util
.PriorityQueue
;
21 using ToStringUtils
= Lucene
.Net
.Util
.ToStringUtils
;
23 namespace Lucene
.Net
.Search
26 /// <summary>Implements the fuzzy search query. The similiarity measurement
27 /// is based on the Levenshtein (edit distance) algorithm.
30 public sealed class FuzzyQuery
: MultiTermQuery
33 public const float defaultMinSimilarity
= 0.5f
;
34 public const int defaultPrefixLength
= 0;
36 private float minimumSimilarity
;
37 private int prefixLength
;
39 /// <summary> Create a new FuzzyQuery that will match terms with a similarity
40 /// of at least <code>minimumSimilarity</code> to <code>term</code>.
41 /// If a <code>prefixLength</code> > 0 is specified, a common prefix
42 /// of that length is also required.
45 /// <param name="term">the term to search for
47 /// <param name="minimumSimilarity">a value between 0 and 1 to set the required similarity
48 /// between the query term and the matching terms. For example, for a
49 /// <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
50 /// as the query term is considered similar to the query term if the edit distance
51 /// between both terms is less than <code>length(term)*0.5</code>
53 /// <param name="prefixLength">length of common (non-fuzzy) prefix
55 /// <throws> IllegalArgumentException if minimumSimilarity is >= 1 or < 0 </throws>
56 /// <summary> or if prefixLength < 0
58 public FuzzyQuery(Term term
, float minimumSimilarity
, int prefixLength
):base(term
)
61 if (minimumSimilarity
>= 1.0f
)
62 throw new System
.ArgumentException("minimumSimilarity >= 1");
63 else if (minimumSimilarity
< 0.0f
)
64 throw new System
.ArgumentException("minimumSimilarity < 0");
66 throw new System
.ArgumentException("prefixLength < 0");
68 this.minimumSimilarity
= minimumSimilarity
;
69 this.prefixLength
= prefixLength
;
72 /// <summary> Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}.</summary>
73 public FuzzyQuery(Term term
, float minimumSimilarity
):this(term
, minimumSimilarity
, defaultPrefixLength
)
77 /// <summary> Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}.</summary>
78 public FuzzyQuery(Term term
):this(term
, defaultMinSimilarity
, defaultPrefixLength
)
82 /// <summary> Returns the minimum similarity that is required for this query to match.</summary>
83 /// <returns> float value between 0.0 and 1.0
85 public float GetMinSimilarity()
87 return minimumSimilarity
;
90 /// <summary> Returns the non-fuzzy prefix length. This is the number of characters at the start
91 /// of a term that must be identical (not fuzzy) to the query term if the query
92 /// is to match that term.
94 public int GetPrefixLength()
99 protected internal override FilteredTermEnum
GetEnum(IndexReader reader
)
101 return new FuzzyTermEnum(reader
, GetTerm(), minimumSimilarity
, prefixLength
);
104 public override Query
Rewrite(IndexReader reader
)
106 FilteredTermEnum enumerator
= GetEnum(reader
);
107 int maxClauseCount
= BooleanQuery
.GetMaxClauseCount();
108 ScoreTermQueue stQueue
= new ScoreTermQueue(maxClauseCount
);
114 float minScore
= 0.0f
;
116 Term t
= enumerator
.Term();
119 score
= enumerator
.Difference();
120 // terms come in alphabetical order, therefore if queue is full and score
121 // not bigger than minScore, we can skip
122 if (stQueue
.Size() < maxClauseCount
|| score
> minScore
)
124 stQueue
.Insert(new ScoreTerm(t
, score
));
125 minScore
= ((ScoreTerm
) stQueue
.Top()).score
; // maintain minScore
129 while (enumerator
.Next());
136 BooleanQuery query
= new BooleanQuery(true);
137 int size
= stQueue
.Size();
138 for (int i
= 0; i
< size
; i
++)
140 ScoreTerm st
= (ScoreTerm
) stQueue
.Pop();
141 TermQuery tq
= new TermQuery(st
.term
); // found a match
142 tq
.SetBoost(GetBoost() * st
.score
); // set the boost
143 query
.Add(tq
, BooleanClause
.Occur
.SHOULD
); // add to query
149 public override System
.String
ToString(System
.String field
)
151 System
.Text
.StringBuilder buffer
= new System
.Text
.StringBuilder();
152 Term term
= GetTerm();
153 if (!term
.Field().Equals(field
))
155 buffer
.Append(term
.Field());
158 buffer
.Append(term
.Text());
160 buffer
.Append(minimumSimilarity
.ToString());
161 buffer
.Append(ToStringUtils
.Boost(GetBoost()));
162 return buffer
.ToString();
165 private class ScoreTerm
170 public ScoreTerm(Term term
, float score
)
177 private class ScoreTermQueue
: PriorityQueue
180 public ScoreTermQueue(int size
)
186 * @see Lucene.Net.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object)
188 public override bool LessThan(System
.Object a
, System
.Object b
)
190 ScoreTerm termA
= (ScoreTerm
) a
;
191 ScoreTerm termB
= (ScoreTerm
) b
;
192 if (termA
.score
== termB
.score
)
193 return termA
.term
.CompareTo(termB
.term
) > 0;
195 return termA
.score
< termB
.score
;
199 public override bool Equals(System
.Object o
)
203 if (!(o
is FuzzyQuery
))
208 FuzzyQuery fuzzyQuery
= (FuzzyQuery
) o
;
210 if (minimumSimilarity
!= fuzzyQuery
.minimumSimilarity
)
212 if (prefixLength
!= fuzzyQuery
.prefixLength
)
218 public override int GetHashCode()
220 int result
= base.GetHashCode();
221 result
= 29 * result
+ minimumSimilarity
!= + 0.0f
? BitConverter
.ToInt32(BitConverter
.GetBytes(minimumSimilarity
), 0) : 0;
222 result
= 29 * result
+ prefixLength
;