1 (** {1 Theory of strings}
3 This file provides a theory over strings. It contains all the
4 predicates and functions currently supported by the smt-solvers CVC4
9 (** {2 String operations} *)
15 (** The type `string` is built-in. Indexes for strings start at `0`. *)
17 (** The following functions/predicates are available in this theory:
19 <style type="text/css">td{padding:0 15px 0 15px;}</style>
20 <table cellpadding="5" cellspacing="0" border="1" style="border-collapse:collapse">
21 <tr><th> Description </th> <th> Name </th> <th> Arguments </th> <th> Result </th> </tr>
24 <td/> <code> concat <code> <td/> string string <td/> string
27 <td/><code> length </code> <td/> string <td/> int
29 <td/> Get string of length 1
30 <td/><code> s_at </code> <td/> string int <td/> string
33 <td/><code> substring </code> <td/> string int int <td/> string
35 <td/> Index of substring
36 <td/><code> indexof </code> <td/> string string int <td/> int
38 <td/> Replace first occurrence
39 <td/><code> replace </code> <td/> string string string <td/> string
41 <td/> Replace all occurrences
42 <td/><code> replaceall </code><td/> string string string <td/> string
45 <td/><code> to_int </code> <td/> string <td/> int
48 <td/><code> from_int </code> <td/> int <td/> string
50 <td/> Comparison less than
51 <td/><code> lt </code> <td/> string string <td/>
53 <td/> Comparison less or equal than
54 <td/><code> le </code> <td/> string string <td/>
56 <td/> Check if it a prefix
57 <td/><code> prefixof </code> <td/> string string <td/>
59 <td/> Check if it is a suffix
60 <td/><code> suffixof </code> <td/> string string <td/>
62 <td/> Check if contains
63 <td/><code> contains </code> <td/> string string <td/>
65 <td/> Check if it is a digit
66 <td/><code> is_digit </code> <td/> string <td/>
74 constant empty : string = ""
75 (** the empty string. *)
77 function concat string string : string
78 (** `concat s1 s2` is the concatenation of `s1` and `s2`. *)
80 axiom concat_assoc: forall s1 s2 s3.
81 concat (concat s1 s2) s3 = concat s1 (concat s2 s3)
83 axiom concat_empty: forall s.
84 concat s empty = concat empty s = s
86 function length string : int
87 (** `length s` is the length of the string `s`. *)
89 (* axiom length_nonneg: forall s. length s >= 0 *)
91 axiom length_empty: length "" = 0
93 axiom length_concat: forall s1 s2.
94 length (concat s1 s2) = length s1 + length s2
96 predicate lt string string
97 (** `lt s1 s2` returns `True` iff `s1` is lexicographically smaller
100 axiom lt_empty: forall s.
101 s <> empty -> lt empty s
103 axiom lt_not_com: forall s1 s2.
104 lt s1 s2 -> not (lt s2 s1)
106 axiom lt_ref: forall s1. not (lt s1 s1)
108 axiom lt_trans: forall s1 s2 s3.
109 lt s1 s2 && lt s2 s3 -> lt s1 s3
111 predicate le string string
112 (** `le s1 s2` returns `True` iff `s1` is lexicographically smaller
113 or equal than `s2`. *)
115 axiom le_empty: forall s.
118 axiom le_ref: forall s1.
121 axiom lt_le: forall s1 s2.
124 axiom lt_le_eq: forall s1 s2.
125 le s1 s2 -> lt s1 s2 || s1 = s2
127 axiom le_trans: forall s1 s2 s3.
128 le s1 s2 && le s2 s3 -> le s1 s3
130 function s_at string int : string
133 (1) `empty`, if either `i < 0` or `i >= length s`;
135 (2) the string of length `1` containing the character of
136 position `i` in string `s`, if `0 <= i < length s`. *)
138 axiom at_out_of_range: forall s i.
139 i < 0 || i >= length s -> s_at s i = empty
141 axiom at_empty: forall i.
144 axiom at_length: forall s i.
146 if 0 <= i < length s then length j = 1 else length j = 0
148 axiom concat_at: forall s1 s2.
149 let s = concat s1 s2 in
150 forall i. (0 <= i < length s1 -> s_at s i = s_at s1 i) &&
151 (length s1 <= i < length s -> s_at s i = s_at s2 (i - length s1))
153 function substring string int int : string
154 (** `substring s i x` is:
156 (1) the `empty` string if `i < 0`, `i >= length s`, or `x <= 0`;
158 (2) the substring of `s` starting at `i` and of length
159 `min x (length s - i)`. *)
161 axiom substring_out_of_range: forall s i x.
162 i < 0 || i >= length s -> substring s i x = empty
164 axiom substring_of_length_zero_or_less: forall s i x.
165 x <= 0 -> substring s i x = ""
167 axiom substring_of_empty: forall i x.
168 substring "" i x = ""
170 axiom substring_smaller: forall s i x.
171 length (substring s i x) <= length s
173 axiom substring_smaller_x: forall s i x.
174 x >= 0 -> length (substring s i x) <= x
176 axiom substring_length: forall s i x.
177 x >= 0 && 0 <= i < length s ->
178 if i + x > length s then
179 length (substring s i x) = length s - i
180 else length (substring s i x) = x
182 axiom substring_at: forall s i.
183 s_at s i = substring s i 1
185 axiom substring_substring:
186 forall s ofs len ofs' len'.
187 0 <= ofs <= length s -> 0 <= len -> ofs + len <= length s ->
188 0 <= ofs' <= len -> 0 <= len' -> ofs' + len' <= len ->
189 substring (substring s ofs len) ofs' len' = substring s (ofs + ofs') len'
191 axiom concat_substring:
192 forall s ofs len len'.
193 0 <= ofs <= length s -> 0 <= len -> ofs + len <= length s ->
194 0 <= len' -> 0 <= ofs + len + len' <= length s ->
195 concat (substring s ofs len) (substring s (ofs+len) len') =
196 substring s ofs (len + len')
198 predicate prefixof string string
199 (** `prefixof s1 s2` is `True` iff `s1` is a prefix of `s2`. *)
201 axiom prefixof_substring: forall s1 s2.
202 prefixof s1 s2 <-> s1 = substring s2 0 (length s1)
204 axiom prefixof_concat: forall s1 s2.
205 prefixof s1 (concat s1 s2)
207 axiom prefixof_empty: forall s2.
210 axiom prefixof_empty2: forall s1.
211 s1 <> empty -> not (prefixof s1 "")
213 predicate suffixof string string
214 (** `suffixof s1 s2` is `True` iff `s1` is a suffix of `s2`. *)
216 axiom suffixof_substring: forall s1 s2.
217 suffixof s1 s2 <-> s1 = substring s2 (length s2 - length s1) (length s1)
219 axiom suffixof_concat: forall s1 s2.
220 suffixof s2 (concat s1 s2)
222 axiom suffixof_empty: forall s2.
225 axiom suffixof_empty2: forall s1.
226 s1 <> empty -> not (suffixof s1 "")
228 predicate contains string string
229 (** `contains s1 s2` is `True` iff `s1` contains `s2`. *)
231 axiom contains_prefixof: forall s1 s2.
232 prefixof s1 s2 -> contains s2 s1
234 axiom contains_suffixof: forall s1 s2.
235 suffixof s1 s2 -> contains s2 s1
237 axiom contains_empty: forall s2.
238 contains "" s2 <-> s2 = empty
240 axiom contains_empty2: forall s1.
243 axiom contains_substring: forall s1 s2 i.
244 substring s1 i (length s2) = s2 -> contains s1 s2
246 (*** The following should hold, but are not proved by SMT provers *)
247 (*** axiom substring_contains: forall s1 s2.
248 contains s1 s2 -> exists i. substring s1 i (length s2) = s2 *)
250 axiom contains_concat: forall s1 s2.
251 contains (concat s1 s2) s1 && contains (concat s1 s2) s2
253 axiom contains_at: forall s1 s2 i.
254 s_at s1 i = s2 -> contains s1 s2
256 (*** The following should hold, but are not proved by SMT provers *)
257 (*** axiom at_contains: forall s1 s2.
258 contains s1 s2 && length s2 = 1 -> exists i. s_at s1 i = s2 *)
260 function indexof string string int : int
261 (** `indexof s1 s2 i` is:
263 (1) the first occurrence of `s2` in `s1` after `i`, if `0 <= i <=
264 length s1` (note: the result is `i`, if `s2 = empty` and `0
267 (2) `-1`, if `i < 0`, `i > length s1`, or `0 <= i <= length s1`
268 and `s2` does not occur in `s1` after `i`. *)
270 axiom indexof_empty: forall s i.
271 0 <= i <= length s -> indexof s "" i = i
273 axiom indexof_empty1: forall s i.
274 let j = indexof "" s i in
275 j = -1 || (s = "" && i = j = 0)
277 axiom indexof_contains: forall s1 s2.
278 let j = indexof s1 s2 0 in
280 0 <= j <= length s1 && substring s1 j (length s2) = s2
282 axiom contains_indexof: forall s1 s2 i.
283 indexof s1 s2 i >= 0 -> contains s1 s2
285 axiom not_contains_indexof: forall s1 s2 i.
286 not (contains s1 s2) -> indexof s1 s2 i = -1
288 axiom substring_indexof: forall s1 s2 i.
289 let j = indexof s1 s2 i in
290 j >= 0 -> substring s1 j (length s2) = s2
292 axiom indexof_out_of_range: forall i s1 s2.
293 not (0 <= i <= length s1) -> indexof s1 s2 i = -1
295 axiom indexof_in_range: forall s1 s2 i.
296 let j = indexof s1 s2 i in
297 0 <= i <= length s1 -> j = -1 || i <= j <= length s1
299 axiom indexof_contains_substring: forall s1 s2 i.
300 0 <= i <= length s1 && contains (substring s1 i (length s1 - i)) s2 ->
301 i <= indexof s1 s2 i <= length s1
303 function replace string string string : string
304 (** `replace s1 s2 s3` is:
306 (1) `concat s3 s1`, if `s2 = empty`;
308 (2) the string obtained by replacing the first occurrence of `s2`
309 (if any) by `s3` in `s1`. *)
311 axiom replace_empty: forall s1 s3.
312 replace s1 "" s3 = concat s3 s1
314 axiom replace_not_contains: forall s1 s2 s3.
315 not (contains s1 s2) -> replace s1 s2 s3 = s1
317 axiom replace_empty2: forall s2 s3.
318 let s4 = replace empty s2 s3 in
319 if s2 = empty then s4 = s3 else s4 = empty
321 axiom replace_substring_indexof: forall s1 s2 s3.
322 let j = indexof s1 s2 0 in
324 if j < 0 then s1 else
325 concat (concat (substring s1 0 j) s3)
326 (substring s1 (j + length s2) (length s1 - j - length s2))
328 (*** Not in SMT-LIB standard, but in CVC4 *)
329 function replaceall string string string : string
330 (** `replaceall s1 s2 s3` is:
332 (1) `s1`, if `s2 = empty`;
334 (2) the string obtained by replacing all occurrences of `s2` by
337 axiom replaceall_empty1: forall s1 s3.
338 replaceall s1 "" s3 = s1
340 axiom not_contains_replaceall: forall s1 s2 s3.
341 not (contains s1 s2) -> replaceall s1 s2 s3 = s1
343 function to_int string : int
346 (1) an `int` consisting on the digits of `s`, if `s` contains
347 exclusively ascii characters in the range 0x30 ... 0x39;
349 (2) `-1`, if `s` contains a character that is not in the range
352 axiom to_int_gt_minus_1: forall s. to_int s >= -1
354 axiom to_int_empty: to_int "" = -1
356 (*** The following lemmas should hold, but CVC4 is not able to prove them *)
357 (*** axiom to_int_only_digits: forall s.
359 (forall i. 0 <= i < length s -> le "0" (s_at s i) && le (s_at s i) "9")
362 (*** axiom to_int_lt_0: forall s i.
363 0 <= i < length s && lt (s_at s i) "0"
366 (*** In SMT-Lib now *)
367 predicate is_digit (s:string) = 0 <= to_int s <= 9 && length s = 1
368 (** `is_digit s` returns `True` iff `s` is of length `1` and
369 corresponds to a decimal digit, that is, to a code point in the
370 range 0x30 ... 0x39 *)
372 function from_int int : string
375 (1) the corresponding string in the decimal notation if `i >= 0`;
377 (2) `empty`, if `i < 0`. *)
379 axiom from_int_negative: forall i.
380 i < 0 <-> from_int i = empty
382 axiom from_int_to_int: forall i.
383 if i >= 0 then to_int (from_int i) = i else to_int (from_int i) = -1
387 (** {2 Characters} *)
394 (** to be mapped into the OCaml char type. *)
395 type char = abstract {
401 axiom char_eq: forall c1 c2.
402 c1.contents = c2.contents -> c1 = c2
404 function code char : int
406 axiom code: forall c. 0 <= code c < 256
408 function chr (n: int) : char
410 axiom code_chr: forall n. 0 <= n < 256 -> code (chr n) = n
412 axiom chr_code: forall c. chr (code c) = c
414 function get (s: string) (i: int) : char
416 axiom get: forall s i.
417 0 <= i < length s -> (get s i).contents = s_at s i
421 0 <= ofs <= length s -> 0 <= len -> ofs + len <= length s -> 0 <= i < len ->
422 get (substring s ofs len) i = get s (ofs + i)
424 lemma concat_first: forall s1 s2.
425 let s3 = concat s1 s2 in
426 forall i. 0 <= i < length s1 ->
429 lemma concat_second: forall s1 s2.
430 let s3 = concat s1 s2 in
431 forall i. length s1 <= i < length s1 + length s2 ->
432 get s3 i = get s2 (i - length s1)
434 function ([]) (s: string) (i: int) : char = get s i
436 predicate eq_string (s1 s2: string) = length s1 = length s2 &&
437 (forall i. 0 <= i < length s1 -> get s1 i = get s2 i)
439 axiom extensionality [@W:non_conservative_extension:N]:
440 forall s1 s2. eq_string s1 s2 -> s1 = s2
442 meta extensionality predicate eq_string
444 function make (size: int) (v: char) : string
446 axiom make_length: forall size v. size >= 0 ->
447 length (make size v) = size
449 axiom make_contents: forall size v. size >= 0 ->
450 (forall i. 0 <= i < size -> get (make size v) i = v)
454 (** {3 Programming API}
456 The following program functions are mapped to OCaml's functions.
457 See also module `io.StdIO`. *)
466 (* In OCaml max_string_length is 144_115_188_075_855_863 *)
468 val eq_char (c1 c2: char) : bool
469 ensures { result <-> c1 = c2 }
471 val get (s: string) (i: int63) : char
472 requires { 0 <= i < length s }
473 ensures { result = get s i }
475 let ([]) (s: string) (i: int63) : char
476 requires { 0 <= i < length s }
477 ensures { result = get s i }
480 val code (c: char) : int63
481 ensures { result = code c }
483 val chr (n: int63) : char
484 requires { 0 <= n < 256 }
485 ensures { result = chr n }
487 val (=) (x y: string) : bool
488 ensures { result <-> x = y }
490 val partial length (s: string) : int63
491 ensures { result = length s >= 0 }
493 val sub (s: string) (start: int63) (len: int63) : string
494 requires { 0 <= start <= length s }
495 requires { 0 <= len <= length s - start }
496 ensures { result = substring s start len }
498 val concat (s1 s2: string) : string
499 ensures { result = concat s1 s2 }
501 val make (size: int63) (v: char) : string
502 requires { 0 <= size }
503 ensures { result = make size v }
507 (** The following module is extracted to OCaml's Buffer *)
517 type buffer = abstract {
520 meta coercion function str
522 val create (_: int63) : buffer
523 ensures { result.str = "" }
525 val length (b: buffer) : int63
526 ensures { result = length b.str }
528 val contents (b: buffer) : string
529 ensures { result = b.str }
531 val clear (b: buffer) : unit
533 ensures { b.str = "" }
535 val reset (b: buffer) : unit
537 ensures { b.str = "" }
539 val sub (b: buffer) (ofs len: int63) : string
540 requires { 0 <= ofs /\ 0 <= len /\ ofs + len <= length b.str }
541 ensures { result = substring b.str ofs len }
543 val add_char (b: buffer) (c: char) : unit
545 ensures { b.str = concat (old b.str) c.Char.contents }
547 val add_string (b: buffer) (s: string) : unit
549 ensures { b.str = concat (old b.str) s }
551 val truncate (b: buffer) (n: int63) : unit
552 requires { 0 <= n <= length b.str }
554 ensures { b.str = substring (old b.str) 0 n }
558 (** {2 String realization}
560 This module is intended for string realization. It clones the `String`
561 module replacing axioms by goals.
565 module StringRealization
569 (** trick to remove axioms from the `String` theory. See file
570 `examples/stdlib/stringCheck.mlw` *)
574 (** {2 Regular expressions} *)
581 (** type for regular expressions *)
583 function to_re string : re
584 (** string to regular expression injection *)
586 predicate in_re string re
587 (** regular expression membership *)
589 function concat re re : re
590 (** regular expressions concatenation, left associativity *)
592 function union re re : re
593 (** regular expressions union, left associativity *)
595 function inter re re : re
596 (** regular expressions intersection, left associativity *)
598 function star re : re
599 (** Kleene closure *)
601 function plus re : re
603 (*** forall re. plus re = concat re (star re) *)
606 (** the empty set of strings *)
608 constant allchar : re
609 (** the set of all strings of length 1 *)
611 constant all : re = star allchar
612 (** the set of all strings *)
615 (** regular expression option *)
616 (*** forall re. opt re = union re (to_re "") *)
618 function range string string : re
619 (** `range s1 s2` is the set of singleton strings such that all
620 element `s` of `range s1 s2` satisfies the condition `Str.<= s1 s`
621 and `Str.<= s s2`. *)
623 function power int re : re
624 (** `power n r` is the `n`th power of `r`; `n` must be an
626 (*** power 0 e = to_re ""
627 power n e = concat e (power (n-1) e) *)
629 function loop int int re : re
630 (** `loop n1 n2 r = if n1 > ne then none else
631 if n1 = n2 then power n1 r else
632 union (power n1 e) (loop (n1+1) n2 e)` *)
633 (*** `n1` and `n2` must be literals. *)