2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using IndexReader
= Lucene
.Net
.Index
.IndexReader
;
18 using Term
= Lucene
.Net
.Index
.Term
;
19 using PriorityQueue
= Lucene
.Net
.Util
.PriorityQueue
;
20 namespace Lucene
.Net
.Search
23 /// <summary>Implements the fuzzy search query. The similiarity measurement
24 /// is based on the Levenshtein (edit distance) algorithm.
27 public sealed class FuzzyQuery
:MultiTermQuery
30 public const float defaultMinSimilarity
= 0.5f
;
31 public const int defaultPrefixLength
= 0;
33 private float minimumSimilarity
;
34 private int prefixLength
;
36 /// <summary> Create a new FuzzyQuery that will match terms with a similarity
37 /// of at least <code>minimumSimilarity</code> to <code>term</code>.
38 /// If a <code>prefixLength</code> > 0 is specified, a common prefix
39 /// of that length is also required.
42 /// <param name="term">the term to search for
44 /// <param name="minimumSimilarity">a value between 0 and 1 to set the required similarity
45 /// between the query term and the matching terms. For example, for a
46 /// <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
47 /// as the query term is considered similar to the query term if the edit distance
48 /// between both terms is less than <code>length(term)*0.5</code>
50 /// <param name="prefixLength">length of common (non-fuzzy) prefix
52 /// <throws> IllegalArgumentException if minimumSimilarity is >= 1 or < 0 </throws>
53 /// <summary> or if prefixLength < 0
55 public FuzzyQuery(Term term
, float minimumSimilarity
, int prefixLength
):base(term
)
58 if (minimumSimilarity
>= 1.0f
)
59 throw new System
.ArgumentException("minimumSimilarity >= 1");
60 else if (minimumSimilarity
< 0.0f
)
61 throw new System
.ArgumentException("minimumSimilarity < 0");
63 throw new System
.ArgumentException("prefixLength < 0");
65 this.minimumSimilarity
= minimumSimilarity
;
66 this.prefixLength
= prefixLength
;
69 /// <summary> Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}.</summary>
70 public FuzzyQuery(Term term
, float minimumSimilarity
):this(term
, minimumSimilarity
, defaultPrefixLength
)
74 /// <summary> Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}.</summary>
75 public FuzzyQuery(Term term
):this(term
, defaultMinSimilarity
, defaultPrefixLength
)
79 /// <summary> Returns the minimum similarity that is required for this query to match.</summary>
80 /// <returns> float value between 0.0 and 1.0
82 public float GetMinSimilarity()
84 return minimumSimilarity
;
87 /// <summary> Returns the non-fuzzy prefix length. This is the number of characters at the start
88 /// of a term that must be identical (not fuzzy) to the query term if the query
89 /// is to match that term.
91 public int GetPrefixLength()
96 protected internal override FilteredTermEnum
GetEnum(IndexReader reader
)
98 return new FuzzyTermEnum(reader
, GetTerm(), minimumSimilarity
, prefixLength
);
101 public override Query
Rewrite(IndexReader reader
)
103 FilteredTermEnum enumerator
= GetEnum(reader
);
104 int maxClauseCount
= BooleanQuery
.GetMaxClauseCount();
105 ScoreTermQueue stQueue
= new ScoreTermQueue(maxClauseCount
);
111 float minScore
= 0.0f
;
113 Term t
= enumerator
.Term();
116 score
= enumerator
.Difference();
117 // terms come in alphabetical order, therefore if queue is full and score
118 // not bigger than minScore, we can skip
119 if (stQueue
.Size() < maxClauseCount
|| score
> minScore
)
121 stQueue
.Insert(new ScoreTerm(t
, score
));
122 minScore
= ((ScoreTerm
) stQueue
.Top()).score
; // maintain minScore
126 while (enumerator
.Next());
133 BooleanQuery query
= new BooleanQuery(true);
134 int size
= stQueue
.Size();
135 for (int i
= 0; i
< size
; i
++)
137 ScoreTerm st
= (ScoreTerm
) stQueue
.Pop();
138 TermQuery tq
= new TermQuery(st
.term
); // found a match
139 tq
.SetBoost(GetBoost() * st
.score
); // set the boost
140 query
.Add(tq
, BooleanClause
.Occur
.SHOULD
); // add to query
146 public override System
.String
ToString(System
.String field
)
148 return base.ToString(field
) + '~' + minimumSimilarity
.ToString();
151 private class ScoreTerm
156 public ScoreTerm(Term term
, float score
)
163 private class ScoreTermQueue
:PriorityQueue
166 public ScoreTermQueue(int size
)
172 * @see Lucene.Net.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object)
174 public override bool LessThan(System
.Object a
, System
.Object b
)
176 ScoreTerm termA
= (ScoreTerm
) a
;
177 ScoreTerm termB
= (ScoreTerm
) b
;
178 if (termA
.score
== termB
.score
)
179 return termA
.term
.CompareTo(termB
.term
) > 0;
181 return termA
.score
< termB
.score
;
185 public override bool Equals(System
.Object o
)
189 if (!(o
is FuzzyQuery
))
194 FuzzyQuery fuzzyQuery
= (FuzzyQuery
) o
;
196 if (minimumSimilarity
!= fuzzyQuery
.minimumSimilarity
)
198 if (prefixLength
!= fuzzyQuery
.prefixLength
)
204 public override int GetHashCode()
206 int result
= base.GetHashCode();
207 result
= 29 * result
+ minimumSimilarity
!= + 0.0f
? BitConverter
.ToInt32(BitConverter
.GetBytes(minimumSimilarity
), 0) : 0;
208 result
= 29 * result
+ prefixLength
;