1 package CXGN
::String
::FuzzyMatch
;
5 CXGN::String::FuzzyMatch - an object to find approximate matches to a query string in an array of strings.
9 my $fuzzy_string_search = CXGN::String::FuzzyMatch->new();
10 my @string_matches = @{$fuzzy_string_search->get_matches($query_string, \@string_array, $max_distance)};
17 Jeremy D. Edwards (jde22@cornell.edu)
23 use String
::Approx
'adistr';
27 has
'case_insensitive' => ( isa
=> 'Bool',
33 my $query_string = shift;
34 my $string_array_ref = shift;
35 my $max_distance = shift;
37 my @string_array = @
{$string_array_ref};
39 my %string_distance_lookup;
40 my %string_length_difference_lookup;
41 my @strings_sorted_distance;
42 my @strings_sorted_length;
43 my $query_length = length $query_string;
45 if ($self->case_insensitive()) {
46 $query_string = uc($query_string);
47 @string_array = map { uc($_) } @string_array;
50 #no fuzzy search if max distance is 0
51 if ($max_distance == 0) {
52 for my $i (0 .. $#string_array) {
53 my $string_match = $string_array[$i];
54 if ($query_string eq $string_match) {
55 $string_length_difference_lookup{$string_match} = 0;
60 @distances = adistr
($query_string, @string_array);
62 for my $i (0 .. $#string_array) {
63 my $distance = $distances[$i];
64 my $string_match = $string_array[$i];
66 $string_length_difference_lookup{$string_match} = (length $string_match) - $query_length;
67 } elsif (abs($distance) <= $max_distance) {
68 $string_distance_lookup{$string_match}=$distance;
75 #get a list of strings sorted by their difference in length from the query
76 @strings_sorted_length = sort { abs($string_length_difference_lookup{$a}) <=> abs($string_length_difference_lookup{$b}) } keys(%string_length_difference_lookup);
78 #get a list of strings sorted by their distance from the query
79 @strings_sorted_distance = sort { abs($string_distance_lookup{$a}) <=> abs($string_distance_lookup{$b}) } keys(%string_distance_lookup);
81 foreach my $sorted_string (@strings_sorted_length) {
82 my %string_distance_result;
83 $string_distance_result{'string'} = $sorted_string;
84 $string_distance_result{'distance'} = 0;
85 push (@matches, \
%string_distance_result);
88 foreach my $sorted_string (@strings_sorted_distance) {
89 my %string_distance_result;
90 $string_distance_result{'string'} = $sorted_string;
91 $string_distance_result{'distance'} = $string_distance_lookup{$sorted_string};
92 push (@matches, \
%string_distance_result);