3 {1 VerifyThis@fm2012 competition, Challenge 1: Longest Repeated Substring}
6 The following is the original description of the verification task,
7 reproduced verbatim. A <a
8 href="http://fm2012.verifythis.org/challenges">reference
9 implementation in Java</a> (based on code by Robert Sedgewick and
10 Kevin Wayne) was also given. The Why3 implementation below follows the
11 Java implementation as faithfully as possible.
14 Longest Common Prefix (LCP) - 45 minutes
19 Longest Common Prefix (LCP) is an algorithm used for text querying. In
20 the following, we model text as an integer array. You may use other
21 representations (e.g., Java Strings), if your system supports them.
23 LCP can be implemented with the pseudocode given below. Formalize and
24 verify the following informal specification for LCP.
26 Input: an integer array a, and two indices x and y into this array
27 Output: length of the longest common prefix of the subarrays of a
28 starting at x and y respectively.
32 int lcp(int[] a, int x, int y) {
34 while (x+l<a.length && y+l<a.length && a[x+l]==a[y+l]) {
47 Together with a suffix array, LCP can be used to solve interesting text
48 problems, such as finding the longest repeated substring (LRS) in a text.
50 A suffix array (for a given text) is an array of all suffixes of the
51 text. For the text [7,8,8,6], the suffix array is
57 Typically, the suffixes are not stored explicitly as above but
58 represented as pointers into the original text. The suffixes in a suffix
59 array are sorted in lexicographical order. This way, occurrences of
60 repeated substrings in the original text are neighbors in the suffix
63 For the above, example (assuming pointers are 0-based integers), the
64 sorted suffix array is:
71 The attached Java code contains an implementation of a suffix array
72 (SuffixArray.java), consisting essentially of a lexicographical
73 comparison on arrays, a sorting routine, and LCP.
75 The client code (LRS.java) uses these to solve the LRS problem. Verify
76 that it does so correctly.
83 (** {2 First module: longest common prefix}
85 it corresponds to the basic part of the challenge
94 (** `is_common_prefix a x y l` is true when the prefixes of length `l`
95 at respective positions `x` and `y` in array `a` are identical. In
96 other words, the array parts `a[x..x+l-1]` and `a[y..y+l-1]` are equal
98 predicate is_common_prefix (a:array int) (x y l:int) =
99 0 <= l /\ x+l <= a.length /\ y+l <= a.length /\
100 (forall i:int. 0 <= i < l -> a[x+i] = a[y+i])
102 (** This lemma helps for the proof of `lcp` (but is not mandatory) and
103 is needed later (for `le_trans`) *)
104 lemma not_common_prefix_if_last_char_are_different:
105 forall a:array int, x y:int, l:int.
106 0 <= l /\ x+l < a.length /\ y+l < a.length /\ a[x+l] <> a[y+l] ->
107 not is_common_prefix a x y (l+1)
109 (** `is_longest_common_prefix a x y l` is true when `l` is the maximal
110 length such that prefixes at positions `x` and `y` in array `a`
112 predicate is_longest_common_prefix (a:array int) (x y l:int) =
113 is_common_prefix a x y l /\
114 forall m:int. l < m -> not (is_common_prefix a x y m)
116 (** This lemma helps to proving `lcp` (but again is not mandatory),
117 and is needed for proving `lcp_sym` and `le_trans` lemmas, and
118 the post-condition of `compare` function in the "absurd" case *)
120 lemma longest_common_prefix_succ:
121 forall a:array int, x y l:int.
122 is_common_prefix a x y l /\ not (is_common_prefix a x y (l+1)) ->
123 is_longest_common_prefix a x y l
125 use export ref.Refint
127 (** the first function, that computes the longest common prefix *)
128 let lcp (a:array int) (x y:int) : int
129 requires { 0 <= x <= a.length }
130 requires { 0 <= y <= a.length }
131 ensures { is_longest_common_prefix a x y result }
132 = let n = a.length in
134 while x + !l < n && y + !l < n && a[x + !l] = a[y + !l] do
135 invariant { is_common_prefix a x y !l }
145 (** test harness for lcp *)
152 let arr = Array.make 4 0 in
153 arr[0]<-1; arr[1]<-2; arr[2]<-2; arr[3]<-5;
154 let x = lcp arr 1 2 in
155 assert { is_common_prefix arr 1 2 1};
156 assert { not is_common_prefix arr 1 2 2};
158 (* int[] brr = {1,2,3,5}; *)
159 let brr = Array.make 4 0 in
160 brr[0]<-1; brr[1]<-2; brr[2]<-3; brr[3]<-5;
161 let x = lcp brr 1 2 in
162 assert { is_common_prefix brr 1 2 0};
163 assert { not is_common_prefix brr 1 2 1};
165 (* int[] crr = {1,2,3,5}; *)
166 let crr = Array.make 4 0 in
167 crr[0]<-1; crr[1]<-2; crr[2]<-3; crr[3]<-5;
168 let x = lcp crr 2 3 in
169 assert { is_common_prefix crr 2 3 0};
170 assert { not is_common_prefix crr 2 3 1};
172 (* int[] drr = {1,2,3,3}; *)
173 let drr = Array.make 4 0 in
174 drr[0]<-1; drr[1]<-2; drr[2]<-3; drr[3]<-3;
175 let x = lcp drr 2 3 in
176 assert { is_common_prefix drr 2 3 1};
177 assert { not is_common_prefix drr 2 3 2};
181 exception BenchFailure
183 let bench () raises { BenchFailure -> true } =
184 let arr = Array.make 4 0 in
185 arr[0]<-1; arr[1]<-2; arr[2]<-2; arr[3]<-5;
186 let x = lcp arr 1 2 in
187 if x <> 1 then raise BenchFailure;
193 (** {2 Second module: sorting suffixes } *)
201 (** order by prefix *)
202 predicate lt (a : array int) (x y:int) =
204 0 <= x <= n /\ 0 <= y <= n /\
205 exists l:int. LCP.is_common_prefix a x y l /\
206 (y+l < n /\ (x+l = n \/ a[x+l] < a[y+l]))
210 (** comparison function, that decides the order of prefixes *)
211 let compare (a:array int) (x y:int) : int
212 requires { 0 <= x <= a.length }
213 requires { 0 <= y <= a.length }
214 ensures { result = 0 -> x = y }
215 ensures { result < 0 -> lt a x y }
216 ensures { result > 0 -> lt a y x }
220 let l = LCP.lcp a x y in
221 if x + l = n then -1 else
222 if y + l = n then 1 else
223 if a[x + l] < a[y + l] then -1 else
224 if a[x + l] > a[y + l] then 1 else
227 use map.MapInjection as Inj
228 (** `range a` is true when for each `i`, `a[i]` is between `0` and
230 predicate range (a:array int) = Inj.range a.elts a.length
232 (** for the `permut` predicate (permutation of elements) *)
233 use array.ArrayPermut
235 predicate le (a : array int) (x y:int) = x = y \/ lt a x y
237 (* needed for le_trans (for cases x = y /\ le y z and le x y /\ y = z) *)
238 lemma lcp_same_index :
239 forall a:array int, x:int.
240 0 <= x <= a.length -> LCP.is_longest_common_prefix a x x (a.length - x)
242 lemma le_trans : forall a:array int, x y z:int.
243 le a x y /\ le a y z -> le a x z
245 (** spec of sorted in increasing prefix order *)
248 predicate sorted_sub (a:array int) (data:Map.map int int) (l u:int) =
249 forall i1 i2 : int. l <= i1 <= i2 < u ->
250 le a (Map.get data i1) (Map.get data i2)
252 predicate sorted (a : array int) (data:array int) =
253 sorted_sub a data.elts 0 data.length
255 let sort (a:array int) (data : array int)
256 requires { data.length = a.length }
257 requires { range data }
258 ensures { sorted a data }
259 ensures { permut_all (old data) data }
261 for i = 0 to data.length - 1 do
262 invariant { permut_all (old data) data }
263 invariant { sorted_sub a data.elts 0 i }
264 invariant { range data }
266 while !j > 0 && compare a data[!j-1] data[!j] > 0 do
267 invariant { 0 <= !j <= i }
268 invariant { range data }
269 invariant { permut_all (old data) data }
270 invariant { sorted_sub a data.elts 0 !j }
271 invariant { sorted_sub a data.elts !j (i+1) }
272 invariant { forall k1 k2:int. 0 <= k1 < !j /\ !j+1 <= k2 <= i ->
273 le a data[k1] data[k2] }
280 assert { exchange (data at L) data (!j-1) !j };
283 (* needed to prove the invariant `sorted_sub a data.elts 0 i` *)
284 assert { !j > 0 -> le a data[!j-1] data[!j] }
291 module SuffixSort_test
298 exception BenchFailure
300 let bench () raises { BenchFailure -> true } =
301 let arr = Array.make 4 0 in
303 arr[0]<-7; arr[1]<-8; arr[2]<-8; arr[3]<-6;
304 let suf = Array.make 4 0 in
306 invariant { forall j:int. 0 <= j < i -> suf[j] = j }
309 SuffixSort.sort arr suf;
310 (* should be [3,0,2,1] *)
311 if suf[0] <> 3 then raise BenchFailure;
312 if suf[1] <> 0 then raise BenchFailure;
313 if suf[2] <> 2 then raise BenchFailure;
314 if suf[3] <> 1 then raise BenchFailure;
322 (** {2 Third module: Suffix Array Data Structure } *)
332 use map.MapInjection as Inj
334 predicate permutation (m:Map.map int int) (u : int) =
335 Inj.range m u /\ Inj.injective m u
339 suffixes : array int;
341 invariant { values.length = suffixes.length /\
342 permutation suffixes.elts suffixes.length /\
343 SuffixSort.sorted values suffixes }
344 by { values = make 0 0; suffixes = make 0 0 }
346 let select (s:suffixArray) (i:int) : int
347 requires { 0 <= i < s.values.length }
348 ensures { result = s.suffixes[i] }
351 use array.ArrayPermut
353 (** needed to establish invariant in function `create` *)
354 lemma permut_permutation : forall a1 a2:array int.
355 permut_all a1 a2 /\ permutation a1.elts a1.length ->
356 permutation a2.elts a2.length
358 (** constructor of suffixArray structure *)
359 let create (a:array int) : suffixArray
360 ensures { result.values = a }
361 = let n = a.length in
362 let suf = Array.make n 0 in
364 invariant { forall j:int. 0 <= j < i -> suf[j] = j }
366 SuffixSort.sort a suf;
367 { values = a; suffixes = suf }
369 let lcp (s:suffixArray) (i:int) : int
370 requires { 0 < i < s.values.length }
371 ensures { LCP.is_longest_common_prefix s.values
372 s.suffixes[i-1] s.suffixes[i] result }
373 = LCP.lcp s.values s.suffixes[i] s.suffixes[i-1]
380 module SuffixArray_test
388 let arr = Array.make 4 0 in
389 arr[0]<-1; arr[1]<-2; arr[2]<-2; arr[3]<-5;
390 let sa = create arr in
391 assert { sa.suffixes[0] = 0 };
392 assert { sa.suffixes[1] = 1 };
393 assert { sa.suffixes[2] = 2 };
394 assert { sa.suffixes[3] = 3 };
397 (* int[] brr = {1,2,3,5}; *)
398 let brr = Array.make 4 0 in
399 brr[0]<-1; brr[1]<-2; brr[2]<-3; brr[3]<-5;
400 let sa = create brr in
402 check {x = 0 (* TODO *)};
403 (* int[] crr = {1,2,3,5}; *)
404 let crr = Array.make 4 0 in
405 crr[0]<-1; crr[1]<-2; crr[2]<-3; crr[3]<-5;
406 let sa = create crr in
408 check {x = 0 (* TODO *)};
409 (* int[] drr = {1,2,3,3}; *)
410 let drr = Array.make 4 0 in
411 drr[0]<-1; drr[1]<-2; drr[2]<-3; drr[3]<-3;
412 let sa = create drr in
414 check {x = 0 (* TODO *)}
417 exception BenchFailure
419 let bench () raises { BenchFailure -> true } =
420 let arr = Array.make 4 0 in
422 arr[0]<-7; arr[1]<-8; arr[2]<-8; arr[3]<-6;
423 let sa = create arr in
424 if sa.suffixes[0] <> 3 then raise BenchFailure;
425 if sa.suffixes[1] <> 0 then raise BenchFailure;
426 if sa.suffixes[2] <> 2 then raise BenchFailure;
427 if sa.suffixes[3] <> 1 then raise BenchFailure;
433 (** {2 Fourth module: Longest Repeated Substring} *)
440 use map.MapInjection as Inj
446 (** additional properties relating `le` and `longest_common_prefix` *)
448 (* needed for `lrs` function, last before last assert *)
450 forall a:array int, x y l:int.
451 0 <= x <= a.length /\ 0 <= y <= a.length ->
452 LCP.is_longest_common_prefix a x y l ->
453 LCP.is_longest_common_prefix a y x l
456 (** allows CVC to prove the next lemma *)
457 lemma le_le_common_prefix:
458 forall a:array int, x y z l:int.
459 SuffixSort.le a x y /\ SuffixSort.le a y z ->
460 LCP.is_common_prefix a x z l -> LCP.is_common_prefix a y z l
462 (** proved by Alt-Ergo and CVC. But only by Alt-Ergo if previous lemma is removed *)
463 lemma le_le_longest_common_prefix:
464 forall a:array int, x y z l m :int.
465 SuffixSort.le a x y /\ SuffixSort.le a y z ->
466 LCP.is_longest_common_prefix a x z l /\
467 LCP.is_longest_common_prefix a y z m
471 val solStart : ref int
472 val solLength : ref int
473 val ghost solStart2 : ref int
475 let lrs (a:array int) : unit
476 requires { a.length > 0 }
477 ensures { 0 <= !solLength <= a.length }
478 ensures { 0 <= !solStart <= a.length }
479 ensures { 0 <= !solStart2 <= a.length /\ !solStart <> !solStart2 /\
480 LCP.is_longest_common_prefix a !solStart !solStart2 !solLength }
481 ensures { forall x y l:int.
482 0 <= x < y < a.length /\
483 LCP.is_longest_common_prefix a x y l ->
486 let sa = SuffixArray.create a in
489 solStart2 := a.length;
490 for i=1 to a.length - 1 do
491 invariant { 0 <= !solLength <= a.length }
492 invariant { 0 <= !solStart <= a.length }
494 0 <= !solStart2 <= a.length /\ !solStart <> !solStart2 /\
495 LCP.is_longest_common_prefix a !solStart !solStart2 !solLength }
496 invariant { forall j k l:int.
498 LCP.is_longest_common_prefix a
499 sa.SuffixArray.suffixes[j] sa.SuffixArray.suffixes[k] l ->
501 let l = SuffixArray.lcp sa i in
502 if l > !solLength then begin
503 solStart := SuffixArray.select sa i;
504 solStart2 := SuffixArray.select sa (i-1);
508 (** the following assert needs `lcp_sym` lemma *)
509 assert { forall j k l:int.
510 0 <= j < a.length /\ 0 <= k < a.length /\ j <> k /\
511 LCP.is_longest_common_prefix a
512 sa.SuffixArray.suffixes[j] sa.SuffixArray.suffixes[k] l ->
514 (* we state explicitly that sa.suffixes is surjective *)
515 assert { forall x y:int.
516 0 <= x < y < a.length ->
518 0 <= j < a.length /\ 0 <= k < a.length /\ j <> k /\
519 x = sa.SuffixArray.suffixes[j] /\
520 y = sa.SuffixArray.suffixes[k] };
533 exception BenchFailure
535 let bench () raises { BenchFailure -> true } =
536 let arr = Array.make 4 0 in
538 arr[0]<-7; arr[1]<-8; arr[2]<-8; arr[3]<-6;
540 if !solStart <> 1 then raise BenchFailure;
541 if !solLength <> 1 then raise BenchFailure;
542 !solStart, !solLength