examples/maximum_subarray.mlw

   1
   2 (** Maximum subarray problem
   3
   4    Given an array of integers, find the contiguous subarray with the
   5    largest sum. Subarrays of length 0 are allowed (which means that an
   6    array with negative values only has a maximal sum of 0).
   7
   8    Authors: Jean-Christophe Filliâtre (CNRS)
   9             Guillaume Melquiond       (Inria)
  10             Andrei Paskevich          (U-PSUD)
  11 *)
  12
  13 module Spec
  14   use int.Int
  15   use export array.Array
  16
  17   use export array.ArraySum
  18   (* provides [sum a l h] = the sum of a[l..h[ and suitable lemmas *)
  19
  20   (* s is no smaller than sums of subarrays a[l..h[ with 0 <= l < maxlo *)
  21   predicate maxsublo (a: array int) (maxlo: int) (s: int) =
  22     forall l h. 0 <= l < maxlo -> l <= h <= length a -> sum a l h <= s
  23
  24   (* s is no smaller than sums of subarrays of a *)
  25   predicate maxsub (a: array int) (s: int) =
  26     forall l h. 0 <= l <= h <= length a -> sum a l h <= s
  27
  28 end
  29
  30 (** In all codes below, reference ms stands for the maximal sum found so far
  31    and ghost references lo and hi hold the bounds for this sum *)
  32
  33 (** Naive solution, in O(N^3) *)
  34 module Algo1
  35
  36   use int.Int
  37   use ref.Refint
  38   use Spec
  39
  40   let maximum_subarray (a: array int) (ghost ref lo hi: int): int
  41     ensures { 0 <= lo <= hi <= length a && result = sum a lo hi }
  42     ensures { maxsub a result }
  43   = lo <- 0;
  44     hi <- 0;
  45     let n = length a in
  46     let ref ms = 0 in
  47     for l = 0 to n-1 do
  48       invariant { 0 <= lo <= l && lo <= hi <= n && ms = sum a lo hi }
  49       invariant { maxsublo a l ms }
  50       for h = l to n do
  51         invariant { 0 <= lo <= l && lo <= hi <= n && ms = sum a lo hi }
  52         invariant { maxsublo a l ms }
  53         invariant { forall h'. l <= h' < h -> sum a l h' <= ms }
  54         (* compute the sum of a[l..h[ *)
  55         let ref s = 0 in
  56         for i = l to h-1 do
  57           invariant { s = sum a l i }
  58           invariant { 0 <= lo <= l && lo <= hi <= n && ms = sum a lo hi }
  59           s += a[i]
  60         done;
  61         assert { s = sum a l h };
  62         if s > ms then begin ms <- s; lo <- l; hi <- h end
  63       done
  64     done;
  65     ms
  66
  67 end
  68
  69 (** Slightly less naive solution, in O(N^2)
  70    Do not recompute the sum, simply update it *)
  71
  72 module Algo2
  73
  74   use int.Int
  75   use ref.Refint
  76   use Spec
  77
  78   let maximum_subarray (a: array int) (ghost ref lo hi: int): int
  79     ensures { 0 <= lo <= hi <= length a && result = sum a lo hi }
  80     ensures { maxsub a result }
  81   = lo <- 0;
  82     hi <- 0;
  83     let n = length a in
  84     let ref ms = 0 in
  85     for l = 0 to n-1 do
  86       invariant { 0 <= lo <= l && lo <= hi <= n && 0 <= ms = sum a lo hi }
  87       invariant { maxsublo a l ms }
  88       let ref s = 0 in
  89       for h = l+1 to n do
  90         invariant
  91                 { 0 <= lo <= l && lo <= hi <= n && 0 <= ms = sum a lo hi }
  92         invariant { maxsublo a l ms }
  93         invariant { forall h'. l <= h' < h -> sum a l h' <= ms }
  94         invariant { s = sum a l (h-1) }
  95         s += a[h-1]; (* update the sum *)
  96         assert { s = sum a l h };
  97         if s > ms then begin ms <- s; lo <- l; hi <- h end
  98       done
  99     done;
 100     ms
 101
 102 end
 103
 104 (** Divide-and-conqueer solution, in O(N log N) *)
 105
 106 module Algo3
 107
 108   use int.Int
 109   use ref.Refint
 110   use int.ComputerDivision
 111   use Spec
 112
 113   let rec maximum_subarray_rec (a: array int) (l h: int) (ghost ref lo hi: int)
 114     : int
 115     requires { 0 <= l <= h <= length a }
 116     ensures  { l <= lo <= hi <= h && result = sum a lo hi }
 117     ensures  { forall l' h'. l <= l' <= h' <= h -> sum a l' h' <= result }
 118     variant  { h - l }
 119   = if h = l then begin
 120       (* base case: no element at all *)
 121       lo <- l; hi <- h; 0
 122     end else begin
 123       (* at least one element *)
 124       let mid = l + div (h - l) 2 in
 125       (* first consider all sums that include a[mid] *)
 126       lo <- mid; hi <- mid;
 127       let ref ms = 0 in
 128       let ref s  = ms in
 129       for i = mid-1 downto l do
 130         invariant { l <= lo <= mid = hi && ms = sum a lo hi }
 131         invariant { forall l'. i < l' <= mid -> sum a l' mid <= ms }
 132         invariant { s = sum a (i+1) mid }
 133         s += a[i];
 134         assert { s = sum a i mid };
 135         if s > ms then begin ms <- s; lo <- i end
 136       done;
 137       assert { forall l'. l <= l' <= mid ->
 138                sum a l' mid <= sum a lo mid };
 139       s <- ms;
 140       for i = mid to h-1 do
 141         invariant { l <= lo <= mid <= hi <= h && ms = sum a lo hi }
 142         invariant { forall l' h'. l <= l' <= mid <= h' <= i ->
 143                     sum a l' h' <= ms }
 144         invariant { s = sum a lo i }
 145         s += a[i];
 146         assert { s = sum a lo (i+1) };
 147         assert { s = sum a lo mid + sum a mid (i+1) };
 148         if s > ms then begin ms <- s; hi <- (i+1) end
 149       done;
 150       (* then consider sums in a[l..mid[ and a[mid+1..h[, recursively *)
 151       begin
 152          let ghost ref lo' = 0 in
 153          let ghost ref hi' = 0 in
 154          let left = maximum_subarray_rec a l mid lo' hi' in
 155          if left > ms then begin ms <- left; lo <- lo'; hi <- hi' end
 156       end;
 157       begin
 158          let ghost ref lo' = 0 in
 159          let ghost ref hi' = 0 in
 160          let right = maximum_subarray_rec a (mid+1) h lo' hi' in
 161          if right > ms then begin ms <- right; lo <- lo'; hi <- hi' end
 162       end;
 163       ms
 164     end
 165
 166  let maximum_subarray (a: array int) (ghost ref lo hi: int): int
 167     ensures { 0 <= lo <= hi <= length a && result = sum a lo hi }
 168     ensures { maxsub a result }
 169   = maximum_subarray_rec a 0 (length a) lo hi
 170
 171 end
 172
 173 (** Optimal solution, in O(N)
 174     Known as Kadane's algorithm
 175
 176     The key idea is to maintain, in addition to the best sum found so far,
 177     the best sum that ends at the current point.
 178
 179                                      i
 180      [ 1 | 7 | -3 | 4 | -7 | 1 | 2 | ...
 181       <-------------->             |
 182     max sum so far is 9     <----->
 183                     max sum ending at i is 3
 184
 185     Then, for each new value a[i], we
 186     1. update the sum ending at i (in particular, setting it to 0 if a[i]<0);
 187     2. update the maximal sum.
 188 *)
 189
 190 module Algo4
 191
 192   use int.Int
 193   use ref.Refint
 194   use Spec
 195
 196   let maximum_subarray (a: array int) (ghost ref lo hi: int): int
 197     ensures { 0 <= lo <= hi <= length a && result = sum a lo hi }
 198     ensures { maxsub a result }
 199   = lo <- 0;
 200     hi <- 0;
 201     let n = length a in
 202     let ref ms = 0 in
 203     let ghost ref l = 0 in
 204     let ref s = 0 in
 205     for i = 0 to n-1 do
 206       invariant { 0 <= lo <= hi <= i && 0 <= ms = sum a lo hi }
 207       invariant { forall l' h'. 0 <= l' <= h' <= i -> sum a l' h' <= ms }
 208       invariant { 0 <= l <= i && s = sum a l i }
 209       invariant { forall l'. 0 <= l' < i -> sum a l' i <= s }
 210       if s < 0 then begin s <- a[i]; l <- i end else s += a[i];
 211       if s > ms then begin ms <- s; lo <- l; hi <- (i+1) end
 212     done;
 213     ms
 214
 215 end
 216
 217 (** A slightly different implementation of Kadane's algorithm *)
 218
 219 module Algo5
 220
 221   use int.Int
 222   use ref.Refint
 223   use export array.Array
 224   use export array.ArraySum
 225
 226 (*
 227     [| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |]
 228      ......|###### maxsum #######|..............
 229      ............................. |## curmax ##
 230 *)
 231
 232   let maximum_subarray (a: array int): int
 233     ensures { forall l h. 0 <= l <= h <= length a -> sum a l h <= result }
 234     ensures { exists l h. 0 <= l <= h <= length a /\ sum a l h  = result }
 235   =
 236     let ref maxsum = 0 in
 237     let ref curmax = 0 in
 238     let ghost ref lo = 0 in
 239     let ghost ref hi = 0 in
 240     let ghost ref cl = 0 in
 241     for i = 0 to a.length - 1 do
 242       invariant { forall l. 0 <= l <= i -> sum a l i <= curmax }
 243       invariant { 0 <= cl <= i /\ sum a cl i  = curmax }
 244       invariant { forall l h. 0 <= l <= h <= i -> sum a l h <= maxsum }
 245       invariant { 0 <= lo <= hi <= i /\ sum a lo hi = maxsum }
 246       curmax += a[i];
 247       if curmax < 0 then begin curmax <- 0; cl <- i+1 end;
 248       if curmax > maxsum then begin maxsum <- curmax; lo <- cl; hi <- i+1 end
 249     done;
 250     maxsum
 251
 252 end
 253
 254 (** Kadane's algorithm with 63-bit integers
 255
 256    Interestingly, we only have to require all sums to be no greater
 257    than max_int.  There is no need to require the sums to be no
 258    smaller than min_int, since whenever the sum becomes negative it is
 259    replaced by the next element. *)
 260
 261 module BoundedIntegers
 262
 263   use int.Int
 264   use mach.int.Int63
 265   use mach.int.Refint63
 266   use seq.Seq
 267   use mach.array.Array63
 268   use int.Sum
 269
 270   function sum (a: array int63) (lo hi: int) : int =
 271     Sum.sum (fun i -> (a[i] : int)) lo hi
 272
 273   let maximum_subarray (a: array int63) (ghost ref lo hi: int): int63
 274     requires { [@no overflow] forall l h. 0 <= l <= h <= length a ->
 275                sum a l h <= max_int }
 276     ensures { 0 <= lo <= hi <= length a && result  = sum a lo hi }
 277     ensures { forall l h. 0 <= l <= h <= length a -> result >= sum a lo hi }
 278   = lo <- 0;
 279     hi <- 0;
 280     let n = length a in
 281     let ref ms = zero in
 282     let ghost ref l = 0 in
 283     let ref s = zero in
 284     let ref i = zero in
 285     while i < n do
 286       invariant { 0 <= lo <= hi <= i <= n && 0 <= ms = sum a lo hi }
 287       invariant { forall l' h'. 0 <= l' <= h' <= i -> sum a l' h' <= ms }
 288       invariant { 0 <= l <= i && s = sum a l i }
 289       invariant { forall l'. 0 <= l' < i -> sum a l' i <= s }
 290       variant   { n - i }
 291       if s < zero then begin s <- a[i]; l <- to_int i end
 292       else begin assert { sum a l (i + 1) <= max_int }; s += a[i] end;
 293       if s > ms then begin
 294         ms <- s; lo <- l; hi <- to_int i + 1 end;
 295       incr i
 296     done;
 297     ms
 298
 299 end
 300
 301 (** Variant where we seek for the maximal product instead of the
 302    maximal sum.
 303
 304    This is an exercise in Jeff Erickson's book "Algorithms", and the
 305    author reports that most solutions he could find online were
 306    incorrect. Indeed, this happens to be subtle to get right.
 307
 308    The idea is to maintain *two* maximal products ending at position i,
 309    one positive and one negative.
 310
 311              maximum so far is 10
 312              <---->                          i
 313     3   0    5    2    -1    2    4    1  |  ...
 314                              <---------->
 315                        maximum positive product so far is 6
 316              <-------------------------->
 317              maximum negative product so far is -60
 318
 319 *)
 320
 321 module MaxProd
 322
 323   use int.Int
 324   use ref.Refint
 325   use export array.Array
 326
 327   (** the product of a[lo..hi[ *)
 328   let rec function prod (a: array int) (lo hi: int) : int
 329     requires { 0 <= lo <= hi <= length a }
 330     variant  { hi-lo }
 331   = if lo = hi then 1 else prod a lo (hi-1) * a[hi-1]
 332
 333   let maximum_subarray (a: array int): int
 334     ensures { forall l h. 0 <= l <= h <= length a -> prod a l h <= result }
 335     ensures { exists l h. 0 <= l <= h <= length a /\ prod a l h  = result }
 336   =
 337     let ref maxprd = 1 in
 338     let ref curmaxp = 1 in
 339     let ref curmaxn = 0 in
 340     let ghost ref lo = 0 in
 341     let ghost ref hi = 0 in
 342     let ghost ref clp = 0 in
 343     let ghost ref cln = 0 in
 344     for i = 0 to a.length - 1 do
 345       invariant { 0 <= clp <= i /\ prod a clp i = curmaxp >= 1 }
 346       invariant { forall l. 0 <= l <= i -> 0 <= prod a l i ->
 347                     prod a l i <= curmaxp }
 348       invariant { curmaxn <= 0 }
 349       invariant { curmaxn < 0 -> 0 <= cln <= i /\ prod a cln i = curmaxn < 0 }
 350       invariant { curmaxn < 0 -> forall l. 0 <= l <= i -> prod a l i < 0 ->
 351                     curmaxn <= prod a l i }
 352       invariant { curmaxn = 0 -> forall l. 0 <= l <= i -> prod a l i >= 0 }
 353       invariant { forall l h. 0 <= l <= h <= i -> prod a l h <= maxprd }
 354       invariant { 0 <= lo <= hi <= i /\ prod a lo hi = maxprd >= 1 }
 355       if a[i] = 0 then (
 356         curmaxp <- 1; clp <- i+1; curmaxn <- 0; cln <- i+1 )
 357       else if a[i] > 0 then (
 358         curmaxp <- curmaxp * a[i];
 359         curmaxn <- curmaxn * a[i]; )
 360       else (* a[i] < 0 *)
 361         if curmaxn < 0 then (
 362           curmaxp, curmaxn <- curmaxn * a[i], curmaxp * a[i];
 363           clp, cln <- cln, clp )
 364         else ( (* curmaxn = 0 i.e. no negative product *)
 365           curmaxp, curmaxn <- 1, curmaxp * a[i];
 366           clp, cln <- i+1, clp; )
 367       ;
 368       if curmaxp > maxprd then (
 369         maxprd <- curmaxp; lo <- clp; hi <- i+1
 370       )
 371     done;
 372     maxprd
 373
 374 end