2 using Lucene
.Net
.Index
;
4 namespace Lucene
.Net
.Search
6 /* ====================================================================
7 * The Apache Software License, Version 1.1
9 * Copyright (c) 2001 The Apache Software Foundation. All rights
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
24 * 3. The end-user documentation included with the redistribution,
25 * if any, must include the following acknowledgment:
26 * "This product includes software developed by the
27 * Apache Software Foundation (http://www.apache.org/)."
28 * Alternately, this acknowledgment may appear in the software itself,
29 * if and wherever such third-party acknowledgments normally appear.
31 * 4. The names "Apache" and "Apache Software Foundation" and
32 * "Apache Lucene" must not be used to endorse or promote products
33 * derived from this software without prior written permission. For
34 * written permission, please contact apache@apache.org.
36 * 5. Products derived from this software may not be called "Apache",
37 * "Apache Lucene", nor may "Apache" appear in their name, without
38 * prior written permission of the Apache Software Foundation.
40 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
41 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
43 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
44 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
47 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
48 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
49 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
50 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * ====================================================================
54 * This software consists of voluntary contributions made by many
55 * individuals on behalf of the Apache Software Foundation. For more
56 * information on the Apache Software Foundation, please see
57 * <http://www.apache.org/>.
61 /// Subclass of FilteredTermEnum for enumerating all terms that are
62 /// similiar to the specified filter term.
63 /// <p>Term enumerations are always ordered by Term.CompareTo(). Each term in
64 /// the enumeration is greater than all that precede it.</p>
66 public sealed class FuzzyTermEnum
: FilteredTermEnum
68 internal double distance
;
69 internal bool fieldMatch
= false;
70 internal bool endEnum
= false;
72 internal Term searchTerm
= null;
73 internal String field
= "";
74 internal String text
= "";
77 public FuzzyTermEnum(IndexReader reader
, Term term
) : base(reader
, term
)
80 field
= searchTerm
.Field();
81 text
= searchTerm
.Text();
82 textlen
= text
.Length
;
83 SetEnum(reader
.Terms(new Term(searchTerm
.Field(), "")));
87 /// The termCompare method in FuzzyTermEnum uses Levenshtein distance to
88 /// calculate the distance between the given term and the comparing term.
90 /// <param name="term"></param>
91 /// <returns></returns>
92 protected override bool TermCompare(Term term
)
94 if (field
== term
.Field())
96 String target
= term
.Text();
97 int targetlen
= target
.Length
;
98 int dist
= EditDistance(text
, target
, textlen
, targetlen
);
99 distance
= 1 - ((double)dist
/ (double)Math
.Min(textlen
, targetlen
));
100 return (distance
> FUZZY_THRESHOLD
);
106 public override float Difference()
108 return (float)((distance
- FUZZY_THRESHOLD
) * SCALE_FACTOR
);
111 public override bool EndEnum()
116 /******************************
117 * Compute Levenshtein distance
118 ******************************/
120 public const double FUZZY_THRESHOLD
= 0.5;
121 public const double SCALE_FACTOR
= 1.0f
/ (1.0f
- FUZZY_THRESHOLD
);
124 /// Finds and returns the smallest of three integers
126 /// <param name="a"></param>
127 /// <param name="b"></param>
128 /// <param name="c"></param>
129 /// <returns></returns>
130 private int Min(int a
, int b
, int c
)
132 int t
= (a
< b
) ? a
: b
;
133 return (t
< c
) ? t
: c
;
137 /// This static array saves us from the time required to create a new array
138 /// everytime editDistance is called.
141 private int[][] e
= null;
144 /// Levenshtein distance also known as edit distance is a measure of similiarity
145 /// between two strings where the distance is measured as the number of character
146 /// deletions, insertions or substitutions required to transform one string to
147 /// the other string.
148 /// <p>This method takes in four parameters; two strings and their respective
149 /// lengths to compute the Levenshtein distance between the two strings.
150 /// The result is returned as an integer.</p>
152 /// <param name="s"></param>
153 /// <param name="t"></param>
154 /// <param name="n"></param>
155 /// <param name="m"></param>
156 /// <returns></returns>
157 private int EditDistance(String s
, String t
, int n
, int m
)
159 if (e
== null || e
.Length
<= n
|| e
[0].Length
<= m
)
170 l1
= Math
.Max(e
.Length
, n
+1);
171 l2
= Math
.Max(e
[0].Length
, m
+1);
175 for (int i1
= 0; i1
< l1
; i1
++)
180 int[][] d
= e
; // matrix
181 int i
; // iterates through s
182 int j
; // iterates through t
183 char s_i
; // ith character of s
185 if (n
== 0) return m
;
186 if (m
== 0) return n
;
189 for (i
= 0; i
<= n
; i
++) d
[i
][0] = i
;
190 for (j
= 0; j
<= m
; j
++) d
[0][j
] = j
;
192 // start computing edit distance
193 for (i
= 1; i
<= n
; i
++)
196 for (j
= 1; j
<= m
; j
++)
199 d
[i
][j
] = Min(d
[i
-1][j
], d
[i
][j
-1], d
[i
-1][j
-1])+1;
200 else d
[i
][j
] = Min(d
[i
-1][j
]+1, d
[i
][j
-1]+1, d
[i
-1][j
-1]);
204 // we got the result!
208 public override void Close()