bench/check-ce-bench

   1 #!/bin/bash
   2 set -euo pipefail
   3
   4
   5 ################################################################################
   6 # Global variables & utility functions
   7
   8 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)"
   9 readonly SCRIPT_DIR
  10
  11 # Currently the script only works if it is run in this directory.
  12 # TODO Change this dirty workaround.
  13 cd "$SCRIPT_DIR/.."
  14
  15 # Suffix that is appened on the Why3 excecutable name
  16 SUFFIX=.opt
  17
  18 updateoracle=false
  19 removeoutfile=true
  20 debug=false
  21 files=""
  22 petiot=1
  23 failed=""
  24
  25 function usage() {
  26   cat <<EOF
  27 Usage: $(basename "$0") [OPTIONS...] <files>"
  28
  29 Run the check-ce tests on the given <files>.
  30 <files> must be given without the '.mlw' suffix.
  31 If <files> is empty, use all files from directory 'check-ce'.
  32
  33 Options:
  34   -h | -help | --help
  35     Prints this help text.
  36
  37   --update-oracle
  38     Update the oracles during the run of tests suite.
  39
  40   --keep-out-files
  41     Remove out files.
  42
  43   --suffix <s>
  44     The suffix <s> is appended on the Why3 executable name, e.g. '.opt'.
  45     Default: $SUFFIX
  46 EOF
  47 }
  48
  49 ################################################################################
  50 # Parse options
  51
  52 while test $# != 0; do
  53   case "$1" in
  54     -h | -help | --help)
  55       usage
  56       exit 0
  57       ;;
  58     --update-oracle)
  59       updateoracle=true
  60       shift 1
  61       ;;
  62     --keep-out-files)
  63       removeoutfile=false
  64       shift 1
  65       ;;
  66     --suffix)
  67       SUFFIX="$2"
  68       shift 2
  69       ;;
  70     "-"*)
  71       echo "unknown option: $1"
  72       usage
  73       exit 2
  74       ;;
  75     "petiot")
  76         files="$files "
  77         petiot=2
  78         shift 1
  79         ;;
  80     *)
  81         files="$files $1"
  82         if test "$petiot" = 1; then
  83             petiot=0
  84         fi
  85       shift 1
  86   esac
  87 done
  88
  89 # If files is empty, use all files from directory 'check-ce'.
  90 if test "$files" = "" ; then
  91   files="$SCRIPT_DIR/check-ce/*.mlw"
  92 fi
  93
  94 readonly SUFFIX
  95 readonly removeoutfile
  96 readonly updateoracle
  97 readonly files
  98 readonly petiot
  99
 100 # Path to the why3 binary
 101 WHY3_BIN="$SCRIPT_DIR"/../bin/why3"$SUFFIX"
 102 whydata=$("$WHY3_BIN" --print-datadir)
 103 whylib=$("$WHY3_BIN" --print-libdir)
 104 readonly WHY3_BIN
 105 readonly whydata
 106 readonly whylib
 107
 108
 109 colorize() {
 110   if command -v pygmentize &> /dev/null; then
 111     pygmentize -ldiff
 112   else
 113     cat
 114   fi
 115 }
 116
 117 remove_solver_details () {
 118    sed -e "s|$whydata|WHY3DATA|g" -e "s|$whylib|WHY3LIB|g" \
 119    | sed 's/ ([0-9.]\+\.[0-9]\+s, / (/' \
 120 #  | sed 's/ ([0-9.]\+\.[0-9]\+s, [0-9]\+ steps)\.$/\./' \
 121 #  | sed 's/Prover result is: \(Timeout\.\|Unknown (\(unknown\|incomplete\|unknown + \(incomplete\|interrupted\)\))\.\|Out of memory.\|Step limit exceeded\.\|Step limit exceeded ([^)]*)\.\)$/Prover result is: Unknown or time\/memory\/step limit./'
 122 }
 123
 124
 125 record_time () {
 126     elapsed=$(date +%s%N)
 127 }
 128
 129 # print elapsed time
 130 elapsed_time () {
 131     t=$(date +%s%N)
 132     d=$(expr $t - $elapsed)
 133 #    printf "d=${d}\n"
 134     l=$(expr length $d)
 135 #    printf "l=${l}\n"
 136     lmn=$(expr $l - 9 || true)
 137     if [ "$lmn" = "0" ] ; then
 138         d=0"$d"
 139         lmn=1
 140     fi
 141 #    printf "lmn=${lmn}\n"
 142     s=$(expr substr "${d}" 1 ${lmn} || true)
 143 #    printf "s=${s}\n"
 144     c=$(expr substr "${d}" ${lmn} 2 || true)
 145 #    printf "c=${c}\n"
 146     elapsed="$s"."$c"
 147 #    printf "${elapsed}\n"
 148 }
 149
 150
 151
 152
 153 # $1 = prover
 154 # $2 = dir
 155 # $3 = filename
 156 # $4 = true for WP, false for SP
 157 # $5 = steps
 158 # $6 = racsteps
 159 run () {
 160   file_path="$2/$3"
 161   debug_vc_sp=""
 162   if $4; then
 163     f="${file_path}_$1_WP"
 164     oracle_file="$2/oracles/$3_$1_WP.oracle"
 165     printf "${file_path},WP,$1... "
 166   else
 167     f="${file_path}_$1_SP"
 168     oracle_file="$2/oracles/$3_$1_SP.oracle"
 169     debug_vc_sp=",vc_sp"
 170     printf "${file_path},SP,$1... "
 171   fi
 172   steps=$5
 173   racsteps=$6
 174   if [ "$3" = "strings" ] && [[ $1 == CVC* ]] ; then
 175       prover="$1,strings+counterexamples"
 176   else
 177     prover="$1,counterexamples"
 178   fi
 179   # one may add reduction_cont_size,stack_trace, in
 180   # the debug flags below to test cont_invariant in reduction engine
 181   # (note that stack trace may be more detailed with bytecode)
 182   record_time
 183   cmd="$WHY3_BIN prove -a split_vc -P $prover \
 184       --stepslimit=$steps --timelimit=10 \
 185       --rac-steplimit=$steps --rac-timelimit=10 \
 186       --check-ce --rac-prover=$1 --ce-log-verbosity=5 \
 187       --library=$2 ${file_path}.mlw \
 188       --debug=check_ce:categorization${debug_vc_sp}"
 189   record_time
 190   if $debug; then echo $cmd ; fi
 191   ($cmd || true) 2>&1 \
 192     | remove_solver_details \
 193           > "$f.out"
 194   elapsed_time
 195   str_out=$(cat "$f.out")
 196   if [ -e "$oracle_file" ]; then
 197     str_oracle=$(cat "$oracle_file")
 198   else
 199     str_oracle=""
 200   fi
 201   if [ "$str_oracle" = "$str_out" ] ; then
 202     echo "OK (in ${elapsed}s)"
 203   else
 204     if $updateoracle; then
 205       echo "Updating oracle (in ${elapsed}s)"
 206       cp "$f.out" "${oracle_file}"
 207     else
 208       echo "FAILED"
 209       echo "diff is the following:"
 210             (diff -u "$oracle_file" "$f.out" \
 211         || [ $? -eq 1 ])|colorize
 212       failed="$failed$f\n"
 213     fi
 214   fi
 215   if $removeoutfile; then
 216     rm "$f.out"
 217   fi
 218 }
 219
 220
 221
 222 start=$(date +%s)
 223 for file in $files; do
 224   filedir="$(realpath --relative-to="$PWD" $(dirname $file))"
 225   filebase=`basename $file .mlw`
 226     printf "# Running provers on $filedir/$filebase.mlw\n";
 227     altergosteps=10000
 228     altergostepsrac=$altergosteps
 229     skipae=no
 230     skipcvc4=no
 231     cvc4steps=150000
 232     cvc4stepsrac=$cvc4steps
 233     cvc5steps=150000
 234     cvc5stepsrac=$cvc5steps
 235     z3steps=3000000
 236     z3stepsrac=$z3steps
 237     case "$filebase" in
 238       "640_no_loc_failure")
 239           z3steps=500000
 240           ;;
 241       "657")
 242           z3steps=500000
 243           ;;
 244       "668_projection")
 245           z3steps=500000
 246           ;;
 247       "array_records_poly")
 248           skipae=yes
 249           skipcvc4=yes
 250           ;;
 251       "array_records_mono")
 252           z3stepsrac=100
 253           skipae=yes
 254           ;;
 255       "array_mono")
 256           z3steps=100000
 257           ;;
 258       "bv32")
 259           z3steps=2500000
 260           ;;
 261       "bv32_mono")
 262           z3steps=100000
 263           ;;
 264       "bv32_toBig")
 265           z3steps=100000
 266           ;;
 267       "floats")
 268           cvc5stepsrac=10000
 269           z3steps=100000
 270           z3stepsrac=10000
 271           ;;
 272       "int_overflow")
 273           z3steps=400000
 274           ;;
 275       "jlamp0_mono")
 276           skipae=yes
 277           ;;
 278       "jlamp0_poly")
 279           skipae=yes
 280           ;;
 281       "loop_inv_int_mono")
 282           skipae=yes
 283           ;;
 284       "loop_inv_real")
 285           z3steps=1000000
 286           ;;
 287       "maps_mono")
 288           altergosteps=100
 289           altergostepsrac=$altergosteps
 290           ;;
 291       "maps_poly")
 292           cvc5steps=300000
 293           ;;
 294       "strings")
 295           z3steps=3000
 296           ;;
 297       "test_result_ce_value2")
 298           skipae=yes
 299           ;;
 300       "threshold")
 301           z3steps=500000
 302           ;;
 303       "while_mono")
 304           skipae=yes
 305           ;;
 306       *)
 307           ;;
 308     esac
 309     if [ "$skipae" = "yes" ] ; then
 310           echo "Skipping unreproducible run of Alt-Ergo 2.5.4 on $filedir/$filebase.mlw";
 311     else
 312       run Alt-Ergo,2.5.4 $filedir $filebase true  $altergosteps $altergostepsrac
 313       run Alt-Ergo,2.5.4 $filedir $filebase false  $altergosteps $altergostepsrac
 314     fi
 315     if [ "$skipcvc4" = "yes" ] ; then
 316           echo "Skipping unreproducible run of CVC4 1.8 on $filedir/$filebase.mlw";
 317     else
 318         run CVC4,1.8       $filedir $filebase true  $cvc4steps $cvc4stepsrac
 319         run CVC4,1.8       $filedir $filebase false $cvc4steps $cvc4stepsrac
 320     fi
 321     run CVC5,1.0.5     $filedir $filebase true  $cvc5steps $cvc5stepsrac
 322     run CVC5,1.0.5     $filedir $filebase false $cvc5steps $cvc5stepsrac
 323     run Z3,4.8.10      $filedir $filebase true  $z3steps $z3stepsrac
 324     run Z3,4.8.10      $filedir $filebase false $z3steps $z3stepsrac
 325   # fi
 326 done
 327
 328 if $updateoracle; then
 329   updatearg="--updateoracle"
 330 else
 331   updatearg=""
 332 fi
 333
 334 # Check the reproduction of the experiments by Petiot (2018).
 335 # $1 = prover
 336 # $2 = stepslimit
 337 petiot () {
 338 bench/check-ce/petiot2018/experiments.sh \
 339   --prover $1 --rac-prover $1 \
 340   --ce-prover $1,counterexamples --stepslimit $2 \
 341   $updatearg
 342 if [ $? = 1 ]; then failed="${failed}Petiot experiments with $1.\n"; fi
 343 }
 344
 345 if test "$petiot" -gt 0; then
 346     petiot Alt-Ergo,2.5.4  10000
 347     petiot Z3,4.8.10  500000
 348     petiot CVC4,1.8 10000
 349     petiot CVC5,1.0.5 10000
 350 fi
 351
 352 end=$(date +%s)
 353
 354 runtime=$((end-start))
 355 if [ "$failed" = "" ]; then
 356   echo "check-ce-bench: success (runtime = $runtime seconds)"
 357   exit 0
 358 else
 359   printf "\ncheck-ce-bench: failed for the following files (runtime = $runtime seconds)\n$failed\n"
 360   exit 1
 361 fi