for git v1.5.2 (and below): chdir to the directory of the target file before executin...
[translate_toolkit.git] / search / test_lshtein.py
blobc4d557d7cbb95beaf3c00d3b788d2804fbbbee11
1 from translate.search import lshtein
3 class TestLevenshtein:
4 """Test whether Levenshtein distance calculations are correct"""
5 def test_basic_distance(self):
6 """Tests distance correctness with a few basic values"""
7 levenshtein = lshtein.LevenshteinComparer()
8 assert lshtein.distance("word", "word") == 0
9 assert lshtein.distance("word", "") == 4
10 assert lshtein.distance("", "word") == 4
11 assert lshtein.distance("word", "word 2") == 2
12 assert lshtein.distance("words", "word") == 1
13 assert lshtein.distance("word", "woord") == 1
15 def test_basic_similarity(self):
16 """Tests similarity correctness with a few basic values"""
17 levenshtein = lshtein.LevenshteinComparer()
18 assert levenshtein.similarity("word", "word") == 100
19 assert levenshtein.similarity("word", "words") == 80
20 assert levenshtein.similarity("word", "wood") == 75
21 assert levenshtein.similarity("aaa", "bbb", 0) == 0
23 def test_long_similarity(self):
24 """Tests that very long strings are handled well."""
25 #A sentence with 240 characters:
26 sentence = "A long, dreary sentence about a cow that never new his mother. Actually it didn't known its father either. One day he decided that enough is enough, and that he would stop making long, dreary sentences just for the sake of making sentences."
27 levenshtein = lshtein.LevenshteinComparer()
28 assert levenshtein.similarity("Cow", sentence, 10) < 10
29 assert levenshtein.similarity(sentence, "Cow", 10) < 10
30 #The difference in the next comparison is supposed to be 25.83, but
31 #since the sentence is long it might be chopped and report higher.
32 assert levenshtein.similarity(sentence, sentence[0:62], 0) > 25
33 assert levenshtein.similarity(sentence, sentence[0:62], 0) < 50