2 (** {1 Formalization of Floating-Point Arithmetic}
4 This formalization follows the {h <a
5 href="http://ieeexplore.ieee.org/servlet/opac?punumber=4610933">IEEE-754
10 (** {2 Definition of IEEE-754 rounding modes} *)
14 type mode = NearestTiesToEven | ToZero | Up | Down | NearestTiesToAway
15 (** nearest ties to even, to zero, upward, downward, nearest ties to away *)
19 (** {2 Handling of IEEE-754 special values}
21 Special values are +infinity, -infinity, NaN, +0, -0. These are
22 handled as described in {h <a
23 href="http://www.lri.fr/~marche/ayad10ijcar.pdf">[Ayad, Marché, IJCAR,
30 type class = Finite | Infinite | NaN
36 inductive same_sign_real sign real =
37 | Neg_case: forall x:real. x < 0.0 -> same_sign_real Neg x
38 | Pos_case: forall x:real. x > 0.0 -> same_sign_real Pos x
42 lemma sign_not_pos_neg : forall x:t.
43 sign(x) <> Pos -> sign(x) = Neg
44 lemma sign_not_neg_pos : forall x:t.
45 sign(x) <> Neg -> sign(x) = Pos
47 lemma sign_not_pos_neg : forall x:double.
48 sign(x) <> Pos -> sign(x) = Neg
49 lemma sign_not_neg_pos : forall x:double.
50 sign(x) <> Neg -> sign(x) = Pos
53 lemma same_sign_real_zero1 :
54 forall b:sign. not same_sign_real b 0.0
56 lemma same_sign_real_zero2 :
58 same_sign_real Neg x /\ same_sign_real Pos x -> false
60 lemma same_sign_real_zero3 :
61 forall b:sign, x:real. same_sign_real b x -> x <> 0.0
63 lemma same_sign_real_correct2 :
64 forall b:sign, x:real.
65 same_sign_real b x -> (x < 0.0 <-> b = Neg)
67 lemma same_sign_real_correct3 :
68 forall b:sign, x:real.
69 same_sign_real b x -> (x > 0.0 <-> b = Pos)
74 (** {2 Generic theory of floats}
76 The theory is parametrized by the real constant `max` which denotes
77 the maximal representable number, the real constant `min` which denotes
78 the minimal number whose rounding is not null, and the integer constant
79 `max_representable_integer` which is the maximal integer `n` such that
80 every integer between `0` and `n` are representable.
94 function round mode real : real
96 function value t : real
97 function exact t : real
98 function model t : real
100 function round_error (x : t) : real = abs (value x - exact x)
101 function total_error (x : t) : real = abs (value x - model x)
105 predicate no_overflow (m:mode) (x:real) = abs (round m x) <= max
107 axiom Bounded_real_no_overflow :
108 forall m:mode, x:real. abs x <= max -> no_overflow m x
110 axiom Round_monotonic :
111 forall m:mode, x y:real. x <= y -> round m x <= round m y
113 axiom Round_idempotent :
114 forall m1 m2:mode, x:real. round m1 (round m2 x) = round m2 x
117 forall m:mode, x:t. round m (value x) = value x
119 axiom Bounded_value :
120 forall x:t. abs (value x) <= max
122 constant max_representable_integer : int
124 axiom Exact_rounding_for_integers:
126 Int.(<=) (Int.(-_) max_representable_integer) i /\
127 Int.(<=) i max_representable_integer ->
128 round m (from_int i) = from_int i
130 (** rounding up and down *)
133 forall x:real. round Down x <= x
135 forall x:real. round Up x >= x
136 axiom Round_down_neg:
137 forall x:real. round Down (-x) = - round Up x
139 forall x:real. round Up (-x) = - round Down x
141 (** rounding into a float instead of a real *)
143 function round_logic mode real : t
144 axiom Round_logic_def:
145 forall m:mode, x:real.
147 value (round_logic m x) = round m x
152 (** {2 Format of Single precision floats}
154 A.k.a. binary32 numbers.
162 constant max_single : real = 0x1.FFFFFEp127
163 constant max_int : int = 16777216 (* 2^24 *)
166 clone export GenFloatSpecStrict with
168 constant max = max_single,
169 constant max_representable_integer = max_int
174 (** {2 Format of Double precision floats}
176 A.k.a. binary64 numbers.
184 constant max_double : real = 0x1.FFFFFFFFFFFFFp1023
185 constant max_int : int = 9007199254740992 (* 2^53 *)
188 clone export GenFloatSpecStrict with
190 constant max = max_double,
191 constant max_representable_integer = max_int
196 module GenFloatSpecStrict
200 clone export GenFloat with axiom .
203 predicate of_real_post (m:mode) (x:real) (res:t) =
204 value res = round m x /\ exact res = x /\ model res = x
206 (** {3 binary operations} *)
208 predicate add_post (m:mode) (x y res:t) =
209 value res = round m (value x + value y) /\
210 exact res = exact x + exact y /\
211 model res = model x + model y
213 predicate sub_post (m:mode) (x y res:t) =
214 value res = round m (value x - value y) /\
215 exact res = exact x - exact y /\
216 model res = model x - model y
218 predicate mul_post (m:mode) (x y res:t) =
219 value res = round m (value x * value y) /\
220 exact res = exact x * exact y /\
221 model res = model x * model y
223 predicate div_post (m:mode) (x y res:t) =
224 value res = round m (value x / value y) /\
225 exact res = exact x / exact y /\
226 model res = model x / model y
228 predicate neg_post (x res:t) =
229 value res = - value x /\
230 exact res = - exact x /\
231 model res = - model x
233 (** {3 Comparisons} *)
235 predicate lt (x:t) (y:t) = value x < value y
237 predicate gt (x:t) (y:t) = value x > value y
245 use export SingleFormat
247 clone export GenFloatSpecStrict with
249 constant max = max_single,
250 constant max_representable_integer = max_int,
251 lemma Bounded_real_no_overflow,
252 axiom . (* TODO: "lemma"? "goal"? *)
259 use export DoubleFormat
261 clone export GenFloatSpecStrict with
263 constant max = max_double,
264 constant max_representable_integer = max_int,
265 lemma Bounded_real_no_overflow,
266 axiom . (* TODO: "lemma"? "goal"? *)
272 (** {2 Generic Full theory of floats}
274 This theory extends the generic theory above by adding the special values.
284 clone export GenFloat with axiom .
286 (** {3 special values} *)
288 function class t : class
290 predicate is_finite (x:t) = class x = Finite
291 predicate is_infinite (x:t) = class x = Infinite
292 predicate is_NaN (x:t) = class x = NaN
293 predicate is_not_NaN (x:t) = is_finite x \/ is_infinite x
295 lemma is_not_NaN: forall x:t. is_not_NaN x <-> not (is_NaN x)
297 function sign t : sign
299 predicate same_sign_real (x:t) (y:real) =
300 SpecialValues.same_sign_real (sign x) y
301 predicate same_sign (x:t) (y:t) = sign x = sign y
302 predicate diff_sign (x:t) (y:t) = sign x <> sign y
304 predicate sign_zero_result (m:mode) (x:t) =
307 | Down -> sign x = Neg
311 predicate is_minus_infinity (x:t) = is_infinite x /\ sign x = Neg
312 predicate is_plus_infinity (x:t) = is_infinite x /\ sign x = Pos
313 predicate is_gen_zero (x:t) = is_finite x /\ value x = 0.0
314 predicate is_gen_zero_plus (x:t) = is_gen_zero x /\ sign x = Pos
315 predicate is_gen_zero_minus (x:t) = is_gen_zero x /\ sign x = Neg
317 (** {3 Useful lemmas on sign} *)
319 (* non-zero finite gen_float has the same sign as its float_value *)
322 class x = Finite /\ value x <> 0.0 -> same_sign_real x (value x)
324 lemma finite_sign_pos1:
326 class x = Finite /\ value x > 0.0 -> sign x = Pos
328 lemma finite_sign_pos2:
330 class x = Finite /\ value x <> 0.0 /\ sign x = Pos -> value x > 0.0
332 lemma finite_sign_neg1:
333 forall x:t. class x = Finite /\ value x < 0.0 -> sign x = Neg
335 lemma finite_sign_neg2:
337 class x = Finite /\ value x <> 0.0 /\ sign x = Neg -> value x < 0.0
339 lemma diff_sign_trans:
340 forall x y z:t. diff_sign x y /\ diff_sign y z -> same_sign x z
342 lemma diff_sign_product:
344 class x = Finite /\ class y = Finite /\ value x * value y < 0.0
347 lemma same_sign_product:
349 class x = Finite /\ class y = Finite /\ same_sign x y
350 -> value x * value y >= 0.0
354 (** This predicate tells what is the result of a rounding in case
356 predicate overflow_value (m:mode) (x:t) =
358 | Down, Neg -> is_infinite x
359 | Down, Pos -> is_finite x /\ value x = max
360 | Up, Neg -> is_finite x /\ value x = - max
361 | Up, Pos -> is_infinite x
362 | ToZero, Neg -> is_finite x /\ value x = - max
363 | ToZero, Pos -> is_finite x /\ value x = max
364 | (NearestTiesToAway | NearestTiesToEven), _ -> is_infinite x
368 (** rounding in logic *)
370 forall m:mode, x:real.
372 is_finite (round_logic m x) /\ value(round_logic m x) = round m x
375 forall m:mode, x:real.
376 not no_overflow m x ->
377 same_sign_real (round_logic m x) x /\
378 overflow_value m (round_logic m x)
380 lemma round3 : forall m:mode, x:real. exact (round_logic m x) = x
382 lemma round4 : forall m:mode, x:real. model (round_logic m x) = x
384 (** rounding of zero *)
386 lemma round_of_zero : forall m:mode. is_gen_zero (round_logic m 0.0)
390 lemma round_logic_le : forall m:mode, x:real.
391 is_finite (round_logic m x) -> abs (value (round_logic m x)) <= max
393 lemma round_no_overflow : forall m:mode, x:real.
395 is_finite (round_logic m x) /\ value (round_logic m x) = round m x
399 lemma positive_constant : forall m:mode, x:real.
401 is_finite (round_logic m x) /\ value (round_logic m x) > 0.0 /\
402 sign (round_logic m x) = Pos
404 lemma negative_constant : forall m:mode, x:real.
405 - max <= x <= - min ->
406 is_finite (round_logic m x) /\ value (round_logic m x) < 0.0 /\
407 sign (round_logic m x) = Neg
409 (** lemmas on `gen_zero` *)
411 lemma is_gen_zero_comp1 : forall x y:t.
412 is_gen_zero x /\ value x = value y /\ is_finite y -> is_gen_zero y
414 lemma is_gen_zero_comp2 : forall x y:t.
415 is_finite x /\ not (is_gen_zero x) /\ value x = value y
416 -> not (is_gen_zero y)
420 module GenFloatSpecFull
426 clone export GenFloatFull with axiom .
428 (** {3 binary operations} *)
430 predicate add_post (m:mode) (x:t) (y:t) (r:t) =
431 (is_NaN x \/ is_NaN y -> is_NaN r)
433 (is_finite x /\ is_infinite y -> is_infinite r /\ same_sign r y)
435 (is_infinite x /\ is_finite y -> is_infinite r /\ same_sign r x)
437 (is_infinite x /\ is_infinite y ->
438 if same_sign x y then is_infinite r /\ same_sign r x
441 (is_finite x /\ is_finite y /\ no_overflow m (value x + value y)
443 value r = round m (value x + value y) /\
444 (if same_sign x y then same_sign r x
445 else sign_zero_result m r))
447 (is_finite x /\ is_finite y /\ not no_overflow m (value x + value y)
448 -> SpecialValues.same_sign_real (sign r) (value x + value y)
449 /\ overflow_value m r)
450 /\ exact r = exact x + exact y
451 /\ model r = model x + model y
453 predicate sub_post (m:mode) (x:t) (y:t) (r:t) =
454 (is_NaN x \/ is_NaN y -> is_NaN r)
456 (is_finite x /\ is_infinite y -> is_infinite r /\ diff_sign r y)
458 (is_infinite x /\ is_finite y -> is_infinite r /\ same_sign r x)
460 (is_infinite x /\ is_infinite y ->
461 if diff_sign x y then is_infinite r /\ same_sign r x
464 (is_finite x /\ is_finite y /\ no_overflow m (value x - value y)
466 value r = round m (value x - value y) /\
467 (if diff_sign x y then same_sign r x
468 else sign_zero_result m r))
470 (is_finite x /\ is_finite y /\ not no_overflow m (value x - value y)
471 -> SpecialValues.same_sign_real (sign r) (value x - value y)
472 /\ overflow_value m r)
473 /\ exact r = exact x - exact y
474 /\ model r = model x - model y
476 predicate product_sign (z x y: t) =
477 (same_sign x y -> sign z = Pos) /\ (diff_sign x y -> sign z = Neg)
479 predicate mul_post (m:mode) (x:t) (y:t) (r:t) =
480 (is_NaN x \/ is_NaN y -> is_NaN r)
481 /\ (is_gen_zero x /\ is_infinite y -> is_NaN r)
482 /\ (is_finite x /\ is_infinite y /\ value x <> 0.0
484 /\ (is_infinite x /\ is_gen_zero y -> is_NaN r)
485 /\ (is_infinite x /\ is_finite y /\ value y <> 0.0
487 /\ (is_infinite x /\ is_infinite y -> is_infinite r)
488 /\ (is_finite x /\ is_finite y /\ no_overflow m (value x * value y)
489 -> is_finite r /\ value r = round m (value x * value y))
490 /\ (is_finite x /\ is_finite y /\ not no_overflow m (value x * value y)
491 -> overflow_value m r)
492 /\ (not is_NaN r -> product_sign r x y)
493 /\ exact r = exact x * exact y
494 /\ model r = model x * model y
496 predicate neg_post (x:t) (r:t) =
497 (is_NaN x -> is_NaN r)
498 /\ (is_infinite x -> is_infinite r)
499 /\ (is_finite x -> is_finite r /\ value r = - value x)
500 /\ (not is_NaN r -> diff_sign r x)
501 /\ exact r = - exact x
502 /\ model r = - model x
504 predicate div_post (m:mode) (x:t) (y:t) (r:t) =
505 (is_NaN x \/ is_NaN y -> is_NaN r)
506 /\ (is_finite x /\ is_infinite y -> is_gen_zero r)
507 /\ (is_infinite x /\ is_finite y -> is_infinite r)
508 /\ (is_infinite x /\ is_infinite y -> is_NaN r)
509 /\ (is_finite x /\ is_finite y /\ value y <> 0.0 /\
510 no_overflow m (value x / value y)
511 -> is_finite r /\ value r = round m (value x / value y))
512 /\ (is_finite x /\ is_finite y /\ value y <> 0.0 /\
513 not no_overflow m (value x / value y)
514 -> overflow_value m r)
515 /\ (is_finite x /\ is_gen_zero y /\ value x <> 0.0
517 /\ (is_gen_zero x /\ is_gen_zero y -> is_NaN r)
518 /\ (not is_NaN r -> product_sign r x y)
519 /\ exact r = exact x / exact y
520 /\ model r = model x / model y
522 predicate fma_post (m:mode) (x y z:t) (r:t) =
523 (is_NaN x \/ is_NaN y \/ is_NaN z -> is_NaN r)
524 /\ (is_gen_zero x /\ is_infinite y -> is_NaN r)
525 /\ (is_infinite x /\ is_gen_zero y -> is_NaN r)
526 /\ (is_finite x /\ value x <> 0.0 /\ is_infinite y /\ is_finite z
527 -> is_infinite r /\ product_sign r x y)
528 /\ (is_finite x /\ value x <> 0.0 /\ is_infinite y /\ is_infinite z
529 -> (if product_sign z x y then is_infinite r /\ same_sign r z
531 /\ (is_infinite x /\ is_finite y /\ value y <> 0.0 /\ is_finite z
532 -> is_infinite r /\ product_sign r x y)
533 /\ (is_infinite x /\ is_finite y /\ value y <> 0.0 /\ is_infinite z
534 -> (if product_sign z x y then is_infinite r /\ same_sign r z
536 /\ (is_infinite x /\ is_infinite y /\ is_finite z
537 -> is_infinite r /\ product_sign r x y)
538 /\ (is_finite x /\ is_finite y /\ is_infinite z
539 -> is_infinite r /\ same_sign r z)
540 /\ (is_infinite x /\ is_infinite y /\ is_infinite z
541 -> (if product_sign z x y then is_infinite r /\ same_sign r z
543 /\ (is_finite x /\ is_finite y /\ is_finite z /\
544 no_overflow m (value x * value y + value z)
546 value r = round m (value x * value y + value z) /\
547 (if product_sign z x y then same_sign r z
548 else (value x * value y + value z = 0.0 ->
549 if m = Down then sign r = Neg else sign r = Pos)))
550 /\ (is_finite x /\ is_finite y /\ is_finite z /\
551 not no_overflow m (value x * value y + value z)
552 -> SpecialValues.same_sign_real (sign r) (value x * value y + value z)
553 /\ overflow_value m r)
554 /\ exact r = exact x * exact y + exact z
555 /\ model r = model x * model y + model z
557 predicate sqrt_post (m:mode) (x:t) (r:t) =
558 (is_NaN x -> is_NaN r)
559 /\ (is_plus_infinity x -> is_plus_infinity r)
560 /\ (is_minus_infinity x -> is_NaN r)
561 /\ (is_finite x /\ value x < 0.0 -> is_NaN r)
562 /\ (is_finite x /\ value x = 0.0
563 -> is_finite r /\ value r = 0.0 /\ same_sign r x)
564 /\ (is_finite x /\ value x > 0.0
565 -> is_finite r /\ value r = round m (sqrt (value x)) /\ sign r = Pos)
566 /\ exact r = sqrt (exact x)
567 /\ model r = sqrt (model x)
569 predicate of_real_exact_post (x:real) (r:t) =
570 is_finite r /\ value r = x /\ exact r = x /\ model r = x
572 (** {3 Comparisons} *)
574 predicate le (x:t) (y:t) =
575 (is_finite x /\ is_finite y /\ value x <= value y)
576 \/ (is_minus_infinity x /\ is_not_NaN y)
577 \/ (is_not_NaN x /\ is_plus_infinity y)
579 predicate lt (x:t) (y:t) =
580 (is_finite x /\ is_finite y /\ value x < value y)
581 \/ (is_minus_infinity x /\ is_not_NaN y /\ not (is_minus_infinity y))
582 \/ (is_not_NaN x /\ not (is_plus_infinity x) /\ is_plus_infinity y)
584 predicate ge (x:t) (y:t) = le y x
586 predicate gt (x:t) (y:t) = lt y x
588 predicate eq (x:t) (y:t) =
589 (is_finite x /\ is_finite y /\ value x = value y) \/
590 (is_infinite x /\ is_infinite y /\ same_sign x y)
592 predicate ne (x:t) (y:t) = not (eq x y)
595 forall x y z:t. le x y /\ lt y z -> lt x z
598 forall x y z:t. lt x y /\ le y z -> lt x z
601 forall x y:t. le x y /\ ge x y -> eq x y
603 lemma not_lt_ge: forall x y:t.
604 not (lt x y) /\ is_not_NaN x /\ is_not_NaN y -> ge x y
606 lemma not_gt_le: forall x y:t.
607 not (gt x y) /\ is_not_NaN x /\ is_not_NaN y -> le x y
612 (** {2 Full theory of single precision floats} *)
616 use export SingleFormat
618 constant min_single : real = 0x1p-149
620 clone export GenFloatSpecFull with
622 constant min = min_single,
623 constant max = max_single,
624 constant max_representable_integer = max_int,
625 lemma Bounded_real_no_overflow,
626 axiom . (* TODO: "lemma"? "goal"? *)
630 (** {2 Full theory of double precision floats} *)
634 use export DoubleFormat
636 constant min_double : real = 0x1p-1074
638 clone export GenFloatSpecFull with
640 constant min = min_double,
641 constant max = max_double,
642 constant max_representable_integer = max_int,
643 lemma Bounded_real_no_overflow,
644 axiom . (* TODO: "lemma"? "goal"? *)
650 module GenFloatSpecMultiRounding
655 clone export GenFloat with axiom .
657 (** {3 binary operations} *)
659 constant min_normalized : real
660 constant eps_normalized : real
661 constant eta_normalized : real
663 predicate add_post (_m:mode) (x y res:t) =
664 ((* CASE 1 : normalized numbers *)
665 (abs(value x + value y) >= min_normalized ->
666 abs(value res - (value x + value y)) <= eps_normalized * abs(value x + value y))
668 (*CASE 2: denormalized numbers *)
669 (abs(value x + value y) <= min_normalized ->
670 abs(value res - (value x + value y)) <= eta_normalized))
672 exact res = exact x + exact y
674 model res = model x + model y
677 predicate sub_post (_m:mode) (x y res:t) =
678 ((* CASE 1 : normalized numbers *)
679 (abs(value x - value y) >= min_normalized ->
680 abs(value res - (value x - value y)) <= eps_normalized * abs(value x - value y))
682 (*CASE 2: denormalized numbers *)
683 (abs(value x - value y) <= min_normalized ->
684 abs(value res - (value x - value y)) <= eta_normalized))
686 exact res = exact x - exact y
688 model res = model x - model y
691 predicate mul_post (_m:mode) (x y res:t) =
692 ((* CASE 1 : normalized numbers *)
693 (abs(value x * value y) >= min_normalized ->
694 abs(value res - (value x * value y)) <= eps_normalized * abs(value x * value y))
696 (*CASE 2: denormalized numbers *)
697 (abs(value x * value y) <= min_normalized ->
698 abs(value res - (value x * value y)) <= eta_normalized))
699 /\ exact res = exact x * exact y
700 /\ model res = model x * model y
703 predicate neg_post (_m:mode) (x res:t) =
704 ((abs(value x) >= min_normalized ->
705 abs(value res - (- value x)) <= eps_normalized * abs(- value x))
707 (abs(value x) <= min_normalized ->
708 abs(value res - (- value x)) <= eta_normalized))
709 /\ exact res = - exact x
710 /\ model res = - model x
712 predicate of_real_post (m:mode) (x:real) (res:t) =
713 value res = round m x /\ exact res = x /\ model res = x
715 predicate of_real_exact_post (x:real) (r:t) =
716 value r = x /\ exact r = x /\ model r = x
718 (** {3 Comparisons} *)
720 predicate lt (x:t) (y:t) = value x < value y
722 predicate gt (x:t) (y:t) = value x > value y
727 (*** TODO: find constants for type single *)
730 module SingleMultiRounding
732 constant min_normalized_single : real =
733 constant eps_normalized_single : real =
734 constant eta_normalized_single : real =
736 clone export GenFloatSpecMultiRounding with
738 constant max = max_single,
739 constant max_representable_integer = max_int,
740 constant min_normalized = min_normalized_single,
741 constant eps_normalized = eps_normalized_single,
742 constant eta_normalized = eta_normalized_single,
743 lemma Bounded_real_no_overflow,
744 axiom . (* TODO: "lemma"? "goal"? *)
751 module DoubleMultiRounding
753 use export DoubleFormat
755 constant min_normalized_double : real = 0x1p-1022
756 constant eps_normalized_double : real = 0x1.004p-53
757 constant eta_normalized_double : real = 0x1.002p-1075
759 clone export GenFloatSpecMultiRounding with
761 constant max = max_double,
762 constant max_representable_integer = max_int,
763 constant min_normalized = min_normalized_double,
764 constant eps_normalized = eps_normalized_double,
765 constant eta_normalized = eta_normalized_double,
766 lemma Bounded_real_no_overflow,
767 axiom . (* TODO: "lemma"? "goal"? *)