2 (* Correctness of a program computing the minimal distance between
3 two words (code by Claude Marché).
5 This program computes a variant of the Levenshtein distance. Given
6 two strings [w1] and [w2] of respective lengths [n1] and [n2], it
7 computes the minimal numbers of insertions and deletions to
8 perform in one of the strings to get the other one. (The
9 traditional edit distance also includes substitutions.)
11 The nice point about this algorithm, due to Claude Marché, is to
12 work in linear space, in an array of min(n1,n2) integers. Time
13 complexity is O(n1 * n2), as usual.
21 use export list.Length
25 val eq (c1 c2:char) : bool
26 ensures { result <-> c1 = c2 }
30 inductive dist word word int =
34 forall w1 w2: word, n: int.
35 dist w1 w2 n -> forall a: char. dist (Cons a w1) w2 (n + 1)
37 forall w1 w2: word, n: int.
38 dist w1 w2 n -> forall a: char. dist w1 (Cons a w2) (n + 1)
40 forall w1 w2:word, n: int.
41 dist w1 w2 n -> forall a: char. dist (Cons a w1) (Cons a w2) n
43 predicate min_dist (w1 w2: word) (n: int) =
44 dist w1 w2 n /\ forall m: int. dist w1 w2 m -> n <= m
46 (* intermediate lemmas *)
48 use export list.Append
50 function last_char (a: char) (u: word) : char = match u with
52 | Cons c u' -> last_char c u'
55 function but_last (a: char) (u: word) : word = match u with
57 | Cons c u' -> Cons a (but_last c u')
60 lemma first_last_explicit:
61 forall u: word, a: char.
62 but_last a u ++ Cons (last_char a u) Nil = Cons a u
65 forall a: char, u: word. exists v: word, b: char.
66 v ++ Cons b Nil = Cons a u /\ length v = length u
68 lemma key_lemma_right:
69 forall w1 w'2: word, m: int, a: char.
71 forall w2: word. w'2 = Cons a w2 ->
72 exists u1 v1: word, k: int.
73 w1 = u1 ++ v1 /\ dist v1 w2 k /\ k + length u1 <= m + 1
76 forall w1 w2: word, n: int. dist w1 w2 n -> dist w2 w1 n
79 forall w1 w2: word, m: int, a: char.
80 dist (Cons a w1) w2 m ->
81 exists u2 v2: word, k: int.
82 w2 = u2 ++ v2 /\ dist w1 v2 k /\ k + length u2 <= m + 1
84 lemma dist_concat_left:
85 forall u v w: word, n: int.
86 dist v w n -> dist (u ++ v) w (length u + n)
88 lemma dist_concat_right:
89 forall u v w: word, n: int.
90 dist v w n -> dist v (u ++ w) (length u + n)
95 forall w1 w2: word, a: char, n: int.
96 min_dist w1 w2 n -> min_dist (Cons a w1) (Cons a w2) n
99 forall w1 w2: word, a b: char, m p: int.
101 min_dist (Cons a w1) w2 p ->
102 min_dist w1 (Cons b w2) m ->
103 min_dist (Cons a w1) (Cons b w2) (min m p + 1)
106 forall w: word, a: char, n: int.
107 min_dist w Nil n -> min_dist (Cons a w) Nil (n + 1)
109 lemma min_dist_eps_length:
110 forall w: word. min_dist Nil w (length w)
121 (* Auxiliary definitions for the program and its specification. *)
123 function suffix (a: array char) (i: int) : word
126 forall a: array char. suffix a a.length = Nil
129 forall a: array char, i: int.
130 0 <= i < a.length -> suffix a i = Cons a[i] (suffix a (i+1))
133 forall a: array char, i: int.
134 0 <= i <= a.length -> L.length (suffix a i) = (a.length - i)
136 predicate min_suffix (a1 a2: array char) (i j n: int) =
137 min_dist (suffix a1 i) (suffix a2 j) n
139 function word_of (a: array char) : word = suffix a 0
143 let distance (w1: array char) (w2: array char)
144 ensures { min_dist (word_of w1) (word_of w2) result }
145 = let n1 = length w1 in
146 let n2 = length w2 in
147 let t = Array.make (n2+1) 0 in
148 (* initialization of t *)
150 invariant { forall j:int. 0 <= j < i -> t[j] = n2 - j }
154 for i = n1 - 1 downto 0 do
155 invariant { forall j:int. 0 <= j <= n2 -> min_suffix w1 w2 (i+1) j t[j] }
156 let oldt = ref t[n2] in
159 for j = n2 - 1 downto 0 do
160 invariant { forall k:int. j < k <= n2 -> min_suffix w1 w2 i k t[k] }
161 invariant { forall k:int. 0 <= k <= j -> min_suffix w1 w2 (i+1) k t[k] }
162 invariant { min_suffix w1 w2 (i+1) (j+1) !oldt }
165 if eq w1[i] w2[j] then
168 t[j] <- (min t[j] t[j + 1]) + 1