llvm/test/Transforms/LoopVectorize/if-reduction.ll

   1 ; RUN: opt -S -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
   2
   3 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
   4
   5 ; Float pattern:
   6 ;   Check vectorization of reduction code which has an fadd instruction after
   7 ;   an fcmp instruction which compares an array element and 0.
   8 ;
   9 ; float fcmp_0_fadd_select1(ptr restrict x, const int N) {
  10 ;   float sum = 0.
  11 ;   for (int i = 0; i < N; ++i)
  12 ;     if (x[i] > (float)0.)
  13 ;       sum += x[i];
  14 ;   return sum;
  15 ; }
  16
  17 ; CHECK-LABEL: @fcmp_0_fadd_select1(
  18 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
  19 ; CHECK: %[[V3:.*]] = fadd fast <4 x float> %[[V0]], %[[V2:.*]]
  20 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
  21 define float @fcmp_0_fadd_select1(ptr noalias %x, i32 %N) nounwind readonly {
  22 entry:
  23   %cmp.1 = icmp sgt i32 %N, 0
  24   br i1 %cmp.1, label %for.header, label %for.end
  25
  26 for.header:                                       ; preds = %entry
  27   %zext = zext i32 %N to i64
  28   br label %for.body
  29
  30 for.body:                                         ; preds = %header, %for.body
  31   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
  32   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
  33   %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
  34   %0 = load float, ptr %arrayidx, align 4
  35   %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
  36   %add = fadd fast float %0, %sum.1
  37   %sum.2 = select i1 %cmp.2, float %add, float %sum.1
  38   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  39   %exitcond = icmp eq i64 %indvars.iv.next, %zext
  40   br i1 %exitcond, label %for.end, label %for.body
  41
  42 for.end:                                          ; preds = %for.body, %entry
  43   %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
  44   ret float %1
  45 }
  46
  47 ; Double pattern:
  48 ;   Check vectorization of reduction code which has an fadd instruction after
  49 ;   an fcmp instruction which compares an array element and 0.
  50 ;
  51 ; double fcmp_0_fadd_select2(ptr restrict x, const int N) {
  52 ;   double sum = 0.
  53 ;   for (int i = 0; i < N; ++i)
  54 ;     if (x[i] > 0.)
  55 ;       sum += x[i];
  56 ;   return sum;
  57 ; }
  58
  59 ; CHECK-LABEL: @fcmp_0_fadd_select2(
  60 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
  61 ; CHECK: %[[V3:.*]] = fadd fast <4 x double> %[[V0]], %[[V2:.*]]
  62 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
  63 define double @fcmp_0_fadd_select2(ptr noalias %x, i32 %N) nounwind readonly {
  64 entry:
  65   %cmp.1 = icmp sgt i32 %N, 0
  66   br i1 %cmp.1, label %for.header, label %for.end
  67
  68 for.header:                                       ; preds = %entry
  69   %zext = zext i32 %N to i64
  70   br label %for.body
  71
  72 for.body:                                         ; preds = %header, %for.body
  73   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
  74   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
  75   %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
  76   %0 = load double, ptr %arrayidx, align 4
  77   %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
  78   %add = fadd fast double %0, %sum.1
  79   %sum.2 = select i1 %cmp.2, double %add, double %sum.1
  80   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  81   %exitcond = icmp eq i64 %indvars.iv.next, %zext
  82   br i1 %exitcond, label %for.end, label %for.body
  83
  84 for.end:                                          ; preds = %for.body, %entry
  85   %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
  86   ret double %1
  87 }
  88
  89 ; Float pattern:
  90 ;   Check vectorization of reduction code which has an fadd instruction after
  91 ;   an fcmp instruction which compares an array element and a floating-point
  92 ;   value.
  93 ;
  94 ; float fcmp_val_fadd_select1(ptr restrict x, float y, const int N) {
  95 ;   float sum = 0.
  96 ;   for (int i = 0; i < N; ++i)
  97 ;     if (x[i] > y)
  98 ;       sum += x[i];
  99 ;   return sum;
 100 ; }
 101
 102 ; CHECK-LABEL: @fcmp_val_fadd_select1(
 103 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], %broadcast.splat
 104 ; CHECK: %[[V3:.*]] = fadd fast <4 x float> %[[V0]], %[[V2:.*]]
 105 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
 106 define float @fcmp_val_fadd_select1(ptr noalias %x, float %y, i32 %N) nounwind readonly {
 107 entry:
 108   %cmp.1 = icmp sgt i32 %N, 0
 109   br i1 %cmp.1, label %for.header, label %for.end
 110
 111 for.header:                                       ; preds = %entry
 112   %zext = zext i32 %N to i64
 113   br label %for.body
 114
 115 for.body:                                         ; preds = %header, %for.body
 116   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
 117   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
 118   %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
 119   %0 = load float, ptr %arrayidx, align 4
 120   %cmp.2 = fcmp fast ogt float %0, %y
 121   %add = fadd fast float %0, %sum.1
 122   %sum.2 = select i1 %cmp.2, float %add, float %sum.1
 123   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 124   %exitcond = icmp eq i64 %indvars.iv.next, %zext
 125   br i1 %exitcond, label %for.end, label %for.body
 126
 127 for.end:                                          ; preds = %for.body, %entry
 128   %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
 129   ret float %1
 130 }
 131
 132 ; Double pattern:
 133 ;   Check vectorization of reduction code which has an fadd instruction after
 134 ;   an fcmp instruction which compares an array element and a floating-point
 135 ;   value.
 136 ;
 137 ; double fcmp_val_fadd_select2(ptr restrict x, double y, const int N) {
 138 ;   double sum = 0.
 139 ;   for (int i = 0; i < N; ++i)
 140 ;     if (x[i] > y)
 141 ;       sum += x[i];
 142 ;   return sum;
 143 ; }
 144
 145 ; CHECK-LABEL: @fcmp_val_fadd_select2(
 146 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], %broadcast.splat
 147 ; CHECK: %[[V3:.*]] = fadd fast <4 x double> %[[V0]], %[[V2:.*]]
 148 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
 149 define double @fcmp_val_fadd_select2(ptr noalias %x, double %y, i32 %N) nounwind readonly {
 150 entry:
 151   %cmp.1 = icmp sgt i32 %N, 0
 152   br i1 %cmp.1, label %for.header, label %for.end
 153
 154 for.header:                                       ; preds = %entry
 155   %zext = zext i32 %N to i64
 156   br label %for.body
 157
 158 for.body:                                         ; preds = %header, %for.body
 159   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
 160   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
 161   %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
 162   %0 = load double, ptr %arrayidx, align 4
 163   %cmp.2 = fcmp fast ogt double %0, %y
 164   %add = fadd fast double %0, %sum.1
 165   %sum.2 = select i1 %cmp.2, double %add, double %sum.1
 166   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 167   %exitcond = icmp eq i64 %indvars.iv.next, %zext
 168   br i1 %exitcond, label %for.end, label %for.body
 169
 170 for.end:                                          ; preds = %for.body, %entry
 171   %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
 172   ret double %1
 173 }
 174
 175 ; Float pattern:
 176 ;   Check vectorization of reduction code which has an fadd instruction after
 177 ;   an fcmp instruction which compares an array element and another array
 178 ;   element.
 179 ;
 180 ; float fcmp_array_elm_fadd_select1(ptr restrict x, ptr restrict y,
 181 ;                                   const int N) {
 182 ;   float sum = 0.
 183 ;   for (int i = 0; i < N; ++i)
 184 ;     if (x[i] > y[i])
 185 ;       sum += x[i];
 186 ;   return sum;
 187 ; }
 188
 189 ; CHECK-LABEL: @fcmp_array_elm_fadd_select1(
 190 ; CHECK: %[[V2:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], %[[V1:.*]]
 191 ; CHECK: %[[V4:.*]] = fadd fast <4 x float> %[[V0]], %[[V3:.*]]
 192 ; CHECK: select <4 x i1> %[[V2]], <4 x float> %[[V4]], <4 x float> %[[V3]]
 193 define float @fcmp_array_elm_fadd_select1(ptr noalias %x, ptr noalias %y, i32 %N) nounwind readonly {
 194 entry:
 195   %cmp.1 = icmp sgt i32 %N, 0
 196   br i1 %cmp.1, label %for.header, label %for.end
 197
 198 for.header:                                       ; preds = %entry
 199   %zext = zext i32 %N to i64
 200   br label %for.body
 201
 202 for.body:                                         ; preds = %for.body, %for.header
 203   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
 204   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
 205   %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
 206   %0 = load float, ptr %arrayidx.1, align 4
 207   %arrayidx.2 = getelementptr inbounds float, ptr %y, i64 %indvars.iv
 208   %1 = load float, ptr %arrayidx.2, align 4
 209   %cmp.2 = fcmp fast ogt float %0, %1
 210   %add = fadd fast float %0, %sum.1
 211   %sum.2 = select i1 %cmp.2, float %add, float %sum.1
 212   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 213   %exitcond = icmp eq i64 %indvars.iv.next, %zext
 214   br i1 %exitcond, label %for.end, label %for.body
 215
 216 for.end:                                          ; preds = %for.body, %entry
 217   %2 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
 218   ret float %2
 219 }
 220
 221 ; Double pattern:
 222 ;   Check vectorization of reduction code which has an fadd instruction after
 223 ;   an fcmp instruction which compares an array element and another array
 224 ;   element.
 225 ;
 226 ; double fcmp_array_elm_fadd_select2(ptr restrict x, ptr restrict y,
 227 ;                                    const int N) {
 228 ;   double sum = 0.
 229 ;   for (int i = 0; i < N; ++i)
 230 ;     if (x[i] > y[i])
 231 ;       sum += x[i];
 232 ;   return sum;
 233 ; }
 234
 235 ; CHECK-LABEL: @fcmp_array_elm_fadd_select2(
 236 ; CHECK: %[[V2:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], %[[V1:.*]]
 237 ; CHECK: %[[V4:.*]] = fadd fast <4 x double> %[[V0]], %[[V3:.*]]
 238 ; CHECK: select <4 x i1> %[[V2]], <4 x double> %[[V4]], <4 x double> %[[V3]]
 239 define double @fcmp_array_elm_fadd_select2(ptr noalias %x, ptr noalias %y, i32 %N) nounwind readonly {
 240 entry:
 241   %cmp.1 = icmp sgt i32 %N, 0
 242   br i1 %cmp.1, label %for.header, label %for.end
 243
 244 for.header:                                       ; preds = %entry
 245   %zext = zext i32 %N to i64
 246   br label %for.body
 247
 248 for.body:                                         ; preds = %for.body, %for.header
 249   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
 250   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
 251   %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
 252   %0 = load double, ptr %arrayidx.1, align 4
 253   %arrayidx.2 = getelementptr inbounds double, ptr %y, i64 %indvars.iv
 254   %1 = load double, ptr %arrayidx.2, align 4
 255   %cmp.2 = fcmp fast ogt double %0, %1
 256   %add = fadd fast double %0, %sum.1
 257   %sum.2 = select i1 %cmp.2, double %add, double %sum.1
 258   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 259   %exitcond = icmp eq i64 %indvars.iv.next, %zext
 260   br i1 %exitcond, label %for.end, label %for.body
 261
 262 for.end:                                          ; preds = %for.body, %entry
 263   %2 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
 264   ret double %2
 265 }
 266
 267 ; Float pattern:
 268 ;   Check vectorization of reduction code which has an fsub instruction after
 269 ;   an fcmp instruction which compares an array element and 0.
 270 ;
 271 ; float fcmp_0_fsub_select1(ptr restrict x, const int N) {
 272 ;   float sum = 0.
 273 ;   for (int i = 0; i < N; ++i)
 274 ;     if (x[i] > (float)0.)
 275 ;       sum -= x[i];
 276 ;   return sum;
 277 ; }
 278
 279 ; CHECK-LABEL: @fcmp_0_fsub_select1(
 280 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
 281 ; CHECK: %[[V3:.*]] = fsub fast <4 x float> %[[V2:.*]], %[[V0]]
 282 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
 283 define float @fcmp_0_fsub_select1(ptr noalias %x, i32 %N) nounwind readonly {
 284 entry:
 285   %cmp.1 = icmp sgt i32 %N, 0
 286   br i1 %cmp.1, label %for.header, label %for.end
 287
 288 for.header:                                       ; preds = %entry
 289   %zext = zext i32 %N to i64
 290   br label %for.body
 291
 292 for.body:                                         ; preds = %for.body, %for.header
 293   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
 294   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
 295   %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
 296   %0 = load float, ptr %arrayidx, align 4
 297   %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
 298   %sub = fsub fast float %sum.1, %0
 299   %sum.2 = select i1 %cmp.2, float %sub, float %sum.1
 300   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 301   %exitcond = icmp eq i64 %indvars.iv.next, %zext
 302   br i1 %exitcond, label %for.end, label %for.body
 303
 304 for.end:                                          ; preds = %for.body, %entry
 305   %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
 306   ret float %1
 307 }
 308
 309 ; Float pattern:
 310 ;   Check that is not vectorized if fp-instruction has no fast-math property.
 311 ; float fcmp_0_fsub_select1_novectorize(ptr restrict x, const int N) {
 312 ;   float sum = 0.
 313 ;   for (int i = 0; i < N; ++i)
 314 ;     if (x[i] > (float)0.)
 315 ;       sum -= x[i];
 316 ;   return sum;
 317 ; }
 318
 319 ; CHECK-LABEL: @fcmp_0_fsub_select1_novectorize(
 320 ; CHECK-NOT: <4 x float>
 321 define float @fcmp_0_fsub_select1_novectorize(ptr noalias %x, i32 %N) nounwind readonly {
 322 entry:
 323   %cmp.1 = icmp sgt i32 %N, 0
 324   br i1 %cmp.1, label %for.header, label %for.end
 325
 326 for.header:                                       ; preds = %entry
 327   %zext = zext i32 %N to i64
 328   br label %for.body
 329
 330 for.body:                                         ; preds = %for.body, %for.header
 331   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
 332   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
 333   %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
 334   %0 = load float, ptr %arrayidx, align 4
 335   %cmp.2 = fcmp ogt float %0, 0.000000e+00
 336   %sub = fsub float %sum.1, %0
 337   %sum.2 = select i1 %cmp.2, float %sub, float %sum.1
 338   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 339   %exitcond = icmp eq i64 %indvars.iv.next, %zext
 340   br i1 %exitcond, label %for.end, label %for.body
 341
 342 for.end:                                          ; preds = %for.body, %entry
 343   %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
 344   ret float %1
 345 }
 346
 347 ; Double pattern:
 348 ;   Check vectorization of reduction code which has an fsub instruction after
 349 ;   an fcmp instruction which compares an array element and 0.
 350 ;
 351 ; double fcmp_0_fsub_select2(ptr restrict x, const int N) {
 352 ;   double sum = 0.
 353 ;   for (int i = 0; i < N; ++i)
 354 ;     if (x[i] > 0.)
 355 ;       sum -= x[i];
 356 ;   return sum;
 357 ; }
 358
 359 ; CHECK-LABEL: @fcmp_0_fsub_select2(
 360 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
 361 ; CHECK: %[[V3:.*]] = fsub fast <4 x double> %[[V2:.*]], %[[V0]]
 362 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
 363 define double @fcmp_0_fsub_select2(ptr noalias %x, i32 %N) nounwind readonly {
 364 entry:
 365   %cmp.1 = icmp sgt i32 %N, 0
 366   br i1 %cmp.1, label %for.header, label %for.end
 367
 368 for.header:                                       ; preds = %entry
 369   %zext = zext i32 %N to i64
 370   br label %for.body
 371
 372 for.body:                                         ; preds = %for.body, %for.header
 373   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
 374   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
 375   %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
 376   %0 = load double, ptr %arrayidx, align 4
 377   %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
 378   %sub = fsub fast double %sum.1, %0
 379   %sum.2 = select i1 %cmp.2, double %sub, double %sum.1
 380   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 381   %exitcond = icmp eq i64 %indvars.iv.next, %zext
 382   br i1 %exitcond, label %for.end, label %for.body
 383
 384 for.end:                                          ; preds = %for.body, %entry
 385   %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
 386   ret double %1
 387 }
 388
 389 ; Double pattern:
 390 ; Check that is not vectorized if fp-instruction has no fast-math property.
 391 ;
 392 ; double fcmp_0_fsub_select2_notvectorize(ptr restrict x, const int N) {
 393 ;   double sum = 0.
 394 ;   for (int i = 0; i < N; ++i)
 395 ;     if (x[i] > 0.)
 396 ;       sum -= x[i];
 397 ;   return sum;
 398 ; }
 399
 400 ; CHECK-LABEL: @fcmp_0_fsub_select2_notvectorize(
 401 ; CHECK-NOT: <4 x doubole>
 402 define double @fcmp_0_fsub_select2_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
 403 entry:
 404   %cmp.1 = icmp sgt i32 %N, 0
 405   br i1 %cmp.1, label %for.header, label %for.end
 406
 407 for.header:                                       ; preds = %entry
 408   %zext = zext i32 %N to i64
 409   br label %for.body
 410
 411 for.body:                                         ; preds = %for.body, %for.header
 412   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
 413   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
 414   %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
 415   %0 = load double, ptr %arrayidx, align 4
 416   %cmp.2 = fcmp ogt double %0, 0.000000e+00
 417   %sub = fsub double %sum.1, %0
 418   %sum.2 = select i1 %cmp.2, double %sub, double %sum.1
 419   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 420   %exitcond = icmp eq i64 %indvars.iv.next, %zext
 421   br i1 %exitcond, label %for.end, label %for.body
 422
 423 for.end:                                          ; preds = %for.body, %entry
 424   %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
 425   ret double %1
 426 }
 427
 428 ; Float pattern:
 429 ;   Check vectorization of reduction code which has an fmul instruction after
 430 ;   an fcmp instruction which compares an array element and 0.
 431 ;
 432 ; float fcmp_0_fmult_select1(ptr restrict x, const int N) {
 433 ;   float sum = 0.
 434 ;   for (int i = 0; i < N; ++i)
 435 ;     if (x[i] > (float)0.)
 436 ;       sum *= x[i];
 437 ;   return sum;
 438 ; }
 439
 440 ; CHECK-LABEL: @fcmp_0_fmult_select1(
 441 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
 442 ; CHECK: %[[V3:.*]] = fmul fast <4 x float> %[[V2:.*]], %[[V0]]
 443 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
 444 define float @fcmp_0_fmult_select1(ptr noalias %x, i32 %N) nounwind readonly {
 445 entry:
 446   %cmp.1 = icmp sgt i32 %N, 0
 447   br i1 %cmp.1, label %for.header, label %for.end
 448
 449 for.header:                                       ; preds = %entry
 450   %zext = zext i32 %N to i64
 451   br label %for.body
 452
 453 for.body:                                         ; preds = %for.body, %for.header
 454   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
 455   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
 456   %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
 457   %0 = load float, ptr %arrayidx, align 4
 458   %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
 459   %mult = fmul fast float %sum.1, %0
 460   %sum.2 = select i1 %cmp.2, float %mult, float %sum.1
 461   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 462   %exitcond = icmp eq i64 %indvars.iv.next, %zext
 463   br i1 %exitcond, label %for.end, label %for.body
 464
 465 for.end:                                          ; preds = %for.body, %entry
 466   %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
 467   ret float %1
 468 }
 469
 470 ; Float pattern:
 471 ;   Check that is not vectorized if fp-instruction has no fast-math property.
 472 ;
 473 ; float fcmp_0_fmult_select1_notvectorize(ptr restrict x, const int N) {
 474 ;   float sum = 0.
 475 ;   for (int i = 0; i < N; ++i)
 476 ;     if (x[i] > (float)0.)
 477 ;       sum *= x[i];
 478 ;   return sum;
 479 ; }
 480
 481 ; CHECK-LABEL: @fcmp_0_fmult_select1_notvectorize(
 482 ; CHECK-NOT: <4 x float>
 483 define float @fcmp_0_fmult_select1_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
 484 entry:
 485   %cmp.1 = icmp sgt i32 %N, 0
 486   br i1 %cmp.1, label %for.header, label %for.end
 487
 488 for.header:                                       ; preds = %entry
 489   %zext = zext i32 %N to i64
 490   br label %for.body
 491
 492 for.body:                                         ; preds = %for.body, %for.header
 493   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
 494   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
 495   %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
 496   %0 = load float, ptr %arrayidx, align 4
 497   %cmp.2 = fcmp ogt float %0, 0.000000e+00
 498   %mult = fmul float %sum.1, %0
 499   %sum.2 = select i1 %cmp.2, float %mult, float %sum.1
 500   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 501   %exitcond = icmp eq i64 %indvars.iv.next, %zext
 502   br i1 %exitcond, label %for.end, label %for.body
 503
 504 for.end:                                          ; preds = %for.body, %entry
 505   %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
 506   ret float %1
 507 }
 508
 509 ; Double pattern:
 510 ;   Check vectorization of reduction code which has an fmul instruction after
 511 ;   an fcmp instruction which compares an array element and 0.
 512 ;
 513 ; double fcmp_0_fmult_select2(ptr restrict x, const int N) {
 514 ;   double sum = 0.
 515 ;   for (int i = 0; i < N; ++i)
 516 ;     if (x[i] > 0.)
 517 ;       sum *= x[i];
 518 ;   return sum;
 519 ; }
 520
 521 ; CHECK-LABEL: @fcmp_0_fmult_select2(
 522 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
 523 ; CHECK: %[[V3:.*]] = fmul fast <4 x double> %[[V2:.*]], %[[V0]]
 524 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
 525 define double @fcmp_0_fmult_select2(ptr noalias %x, i32 %N) nounwind readonly {
 526 entry:
 527   %cmp.1 = icmp sgt i32 %N, 0
 528   br i1 %cmp.1, label %for.header, label %for.end
 529
 530 for.header:                                       ; preds = %entry
 531   %zext = zext i32 %N to i64
 532   br label %for.body
 533
 534 for.body:                                         ; preds = %for.body, %for.header
 535   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
 536   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
 537   %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
 538   %0 = load double, ptr %arrayidx, align 4
 539   %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
 540   %mult = fmul fast double %sum.1, %0
 541   %sum.2 = select i1 %cmp.2, double %mult, double %sum.1
 542   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 543   %exitcond = icmp eq i64 %indvars.iv.next, %zext
 544   br i1 %exitcond, label %for.end, label %for.body
 545
 546 for.end:                                          ; preds = %for.body, %entry
 547   %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
 548   ret double %1
 549 }
 550
 551 ; Double pattern:
 552 ;   Check that is not vectorized if fp-instruction has no fast-math property.
 553 ;
 554 ; double fcmp_0_fmult_select2_notvectorize(ptr restrict x, const int N) {
 555 ;   double sum = 0.
 556 ;   for (int i = 0; i < N; ++i)
 557 ;     if (x[i] > 0.)
 558 ;       sum *= x[i];
 559 ;   return sum;
 560 ; }
 561
 562 ; CHECK-LABEL: @fcmp_0_fmult_select2_notvectorize(
 563 ; CHECK-NOT: <4 x double>
 564 define double @fcmp_0_fmult_select2_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
 565 entry:
 566   %cmp.1 = icmp sgt i32 %N, 0
 567   br i1 %cmp.1, label %for.header, label %for.end
 568
 569 for.header:                                       ; preds = %entry
 570   %zext = zext i32 %N to i64
 571   br label %for.body
 572
 573 for.body:                                         ; preds = %for.body, %for.header
 574   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
 575   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
 576   %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
 577   %0 = load double, ptr %arrayidx, align 4
 578   %cmp.2 = fcmp ogt double %0, 0.000000e+00
 579   %mult = fmul double %sum.1, %0
 580   %sum.2 = select i1 %cmp.2, double %mult, double %sum.1
 581   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 582   %exitcond = icmp eq i64 %indvars.iv.next, %zext
 583   br i1 %exitcond, label %for.end, label %for.body
 584
 585 for.end:                                          ; preds = %for.body, %entry
 586   %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
 587   ret double %1
 588 }
 589
 590 ; Float multi pattern
 591 ;   Check vectorisation of reduction code with a pair of selects to different
 592 ;   fadd patterns.
 593 ;
 594 ; float fcmp_multi(ptr a, int n) {
 595 ;   float sum=0.0;
 596 ;   for (int i=0;i<n;i++) {
 597 ;     if (a[i]>1.0)
 598 ;       sum+=a[i];
 599 ;     else if (a[i]<3.0)
 600 ;       sum+=2*a[i];
 601 ;     else
 602 ;       sum+=3*a[i];
 603 ;   }
 604 ;   return sum;
 605 ; }
 606
 607 ; CHECK-LABEL: @fcmp_multi(
 608 ; CHECK: %[[C1:.*]] = fcmp ogt <4 x float> %[[V0:.*]], <float 1.000000e+00,
 609 ; CHECK: %[[C11:.*]] = xor <4 x i1> %[[C1]], <i1 true,
 610 ; CHECK: %[[C2:.*]] = fcmp olt <4 x float> %[[V0]], <float 3.000000e+00,
 611 ; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], <i1 true,
 612 ; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer
 613 ; CHECK-DAG: %[[M1:.*]] = fmul fast <4 x float> %[[V0]], <float 3.000000e+00,
 614 ; CHECK-DAG: %[[M2:.*]] = fmul fast <4 x float> %[[V0]], <float 2.000000e+00,
 615 ; CHECK-DAG: %[[C12:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C2]], <4 x i1> zeroinitializer
 616 ; CHECK: %[[S1:.*]] = select <4 x i1> %[[C22]], <4 x float> %[[M1]], <4 x float> %[[M2]]
 617 ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C1]], <4 x float> %[[V0]], <4 x float> %[[S1]]
 618 ; CHECK: fadd fast <4 x float> %[[S2]],
 619 define float @fcmp_multi(ptr nocapture readonly %a, i32 %n) nounwind readonly {
 620 entry:
 621   %cmp10 = icmp sgt i32 %n, 0
 622   br i1 %cmp10, label %for.body.preheader, label %for.end
 623
 624 for.body.preheader:                               ; preds = %entry
 625   %wide.trip.count = zext i32 %n to i64
 626   br label %for.body
 627
 628 for.body:                                         ; preds = %for.inc, %for.body.preheader
 629   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
 630   %sum.011 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
 631   %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
 632   %0 = load float, ptr %arrayidx, align 4
 633   %cmp1 = fcmp ogt float %0, 1.000000e+00
 634   br i1 %cmp1, label %for.inc, label %if.else
 635
 636 if.else:                                          ; preds = %for.body
 637   %cmp8 = fcmp olt float %0, 3.000000e+00
 638   br i1 %cmp8, label %if.then10, label %if.else14
 639
 640 if.then10:                                        ; preds = %if.else
 641   %mul = fmul fast float %0, 2.000000e+00
 642   br label %for.inc
 643
 644 if.else14:                                        ; preds = %if.else
 645   %mul17 = fmul fast float %0, 3.000000e+00
 646   br label %for.inc
 647
 648 for.inc:                                          ; preds = %for.body, %if.else14, %if.then10
 649   %.pn = phi float [ %mul, %if.then10 ], [ %mul17, %if.else14 ], [ %0, %for.body ]
 650   %sum.1 = fadd fast float %.pn, %sum.011
 651   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 652   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
 653   br i1 %exitcond, label %for.end, label %for.body
 654
 655 for.end:                                          ; preds = %for.inc, %entry
 656   %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
 657   ret float %sum.0.lcssa
 658 }
 659
 660 ; Float fadd + fsub patterns
 661 ;   Check vectorisation of reduction code with a pair of selects to different
 662 ;   instructions { fadd, fsub } but equivalent (change in constant).
 663 ;
 664 ; float fcmp_multi(ptr a, int n) {
 665 ;   float sum=0.0;
 666 ;   for (int i=0;i<n;i++) {
 667 ;     if (a[i]>1.0)
 668 ;       sum+=a[i];
 669 ;     else if (a[i]<3.0)
 670 ;       sum-=a[i];
 671 ;   }
 672 ;   return sum;
 673 ; }
 674
 675 ; CHECK-LABEL: @fcmp_fadd_fsub(
 676 ; CHECK: %[[C1:.*]] = fcmp ogt <4 x float> %[[V0:.*]], <float 1.000000e+00,
 677 ; CHECK: %[[C11:.*]] = xor <4 x i1> %[[C1]], <i1 true,
 678 ; CHECK: %[[C2:.*]] = fcmp olt <4 x float> %[[V0]], <float 3.000000e+00,
 679 ; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], <i1 true,
 680 ; CHECK-DAG: %[[SUB:.*]] = fsub fast <4 x float>
 681 ; CHECK-DAG: %[[ADD:.*]] = fadd fast <4 x float>
 682 ; CHECK-DAG: %[[C12:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C2]], <4 x i1> zeroinitializer
 683 ; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer
 684 ; CHECK: %[[S1:.*]] = select <4 x i1> %[[C12]], <4 x float> %[[SUB]], <4 x float> %[[ADD]]
 685 ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C22]], {{.*}} <4 x float> %[[S1]]
 686 define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonly {
 687 entry:
 688   %cmp9 = icmp sgt i32 %n, 0
 689   br i1 %cmp9, label %for.body.preheader, label %for.end
 690
 691 for.body.preheader:                               ; preds = %entry
 692   %wide.trip.count = zext i32 %n to i64
 693   br label %for.body
 694
 695 for.body:                                         ; preds = %for.inc, %for.body.preheader
 696   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
 697   %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
 698   %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
 699   %0 = load float, ptr %arrayidx, align 4
 700   %cmp1 = fcmp ogt float %0, 1.000000e+00
 701   br i1 %cmp1, label %if.then, label %if.else
 702
 703 if.then:                                          ; preds = %for.body
 704   %add = fadd fast float %0, %sum.010
 705   br label %for.inc
 706
 707 if.else:                                          ; preds = %for.body
 708   %cmp8 = fcmp olt float %0, 3.000000e+00
 709   br i1 %cmp8, label %if.then10, label %for.inc
 710
 711 if.then10:                                        ; preds = %if.else
 712   %sub = fsub fast float %sum.010, %0
 713   br label %for.inc
 714
 715 for.inc:                                          ; preds = %if.then, %if.then10, %if.else
 716   %sum.1 = phi float [ %add, %if.then ], [ %sub, %if.then10 ], [ %sum.010, %if.else ]
 717   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 718   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
 719   br i1 %exitcond, label %for.end, label %for.body
 720
 721 for.end:                                          ; preds = %for.inc, %entry
 722   %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
 723   ret float %sum.0.lcssa
 724 }
 725
 726 ; Float fadd + fmul patterns
 727 ;   Check lack of vectorisation of reduction code with a pair of non-compatible
 728 ;   instructions { fadd, fmul }.
 729 ;
 730 ; float fcmp_multi(ptr a, int n) {
 731 ;   float sum=0.0;
 732 ;   for (int i=0;i<n;i++) {
 733 ;     if (a[i]>1.0)
 734 ;       sum+=a[i];
 735 ;     else if (a[i]<3.0)
 736 ;       sum*=a[i];
 737 ;   }
 738 ;   return sum;
 739 ; }
 740
 741 ; CHECK-LABEL: @fcmp_fadd_fmul(
 742 ; CHECK-NOT: <4 x float>
 743 define float @fcmp_fadd_fmul(ptr nocapture readonly %a, i32 %n) nounwind readonly {
 744 entry:
 745   %cmp9 = icmp sgt i32 %n, 0
 746   br i1 %cmp9, label %for.body.preheader, label %for.end
 747
 748 for.body.preheader:                               ; preds = %entry
 749   %wide.trip.count = zext i32 %n to i64
 750   br label %for.body
 751
 752 for.body:                                         ; preds = %for.inc, %for.body.preheader
 753   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
 754   %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
 755   %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
 756   %0 = load float, ptr %arrayidx, align 4
 757   %cmp1 = fcmp ogt float %0, 1.000000e+00
 758   br i1 %cmp1, label %if.then, label %if.else
 759
 760 if.then:                                          ; preds = %for.body
 761   %add = fadd fast float %0, %sum.010
 762   br label %for.inc
 763
 764 if.else:                                          ; preds = %for.body
 765   %cmp8 = fcmp olt float %0, 3.000000e+00
 766   br i1 %cmp8, label %if.then10, label %for.inc
 767
 768 if.then10:                                        ; preds = %if.else
 769   %mul = fmul fast float %0, %sum.010
 770   br label %for.inc
 771
 772 for.inc:                                          ; preds = %if.then, %if.then10, %if.else
 773   %sum.1 = phi float [ %add, %if.then ], [ %mul, %if.then10 ], [ %sum.010, %if.else ]
 774   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 775   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
 776   br i1 %exitcond, label %for.end, label %for.body
 777
 778 for.end:                                          ; preds = %for.inc, %entry
 779   %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
 780   ret float %sum.0.lcssa
 781 }
 782
 783 ; Float fadd + store patterns
 784 ;   Check lack of vectorisation of reduction code with a store back, given it
 785 ;   has loop dependency on a[i].
 786 ;
 787 ; float fcmp_store_back(float a[], int LEN) {
 788 ;     float sum = 0.0;
 789 ;     for (int i = 0; i < LEN; i++) {
 790 ;       sum += a[i];
 791 ;       a[i] = sum;
 792 ;     }
 793 ;     return sum;
 794 ; }
 795
 796 define float @fcmp_store_back(ptr nocapture %a, i32 %LEN) nounwind readonly {
 797 ; CHECK-LABEL: @fcmp_store_back(
 798 ; CHECK-NOT:     <4 x float>
 799 ;
 800 entry:
 801   %cmp7 = icmp sgt i32 %LEN, 0
 802   br i1 %cmp7, label %for.body.preheader, label %for.end
 803
 804 for.body.preheader:                               ; preds = %entry
 805   %wide.trip.count = zext i32 %LEN to i64
 806   br label %for.body
 807
 808 for.body:                                         ; preds = %for.body, %for.body.preheader
 809   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
 810   %sum.08 = phi float [ 0.000000e+00, %for.body.preheader ], [ %add, %for.body ]
 811   %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
 812   %0 = load float, ptr %arrayidx, align 4
 813   %add = fadd fast float %0, %sum.08
 814   store float %add, ptr %arrayidx, align 4
 815   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 816   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
 817   br i1 %exitcond, label %for.end, label %for.body
 818
 819 for.end:                                          ; preds = %for.body, %entry
 820   %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
 821   ret float %sum.0.lcssa
 822 }
 823
 824 ; CHECK-LABEL: @fcmp_0_add_select2(
 825 ; CHECK: %[[V1:.*]] = fcmp ogt <4 x float> %[[V0:.*]], zeroinitializer
 826 ; CHECK: %[[V3:.*]] = add <4 x i64> %[[V2:.*]], <i64 2, i64 2, i64 2, i64 2>
 827 ; CHECK: select <4 x i1> %[[V1]], <4 x i64> %[[V3]], <4 x i64> %[[V2]]
 828 define i64 @fcmp_0_add_select2(ptr noalias %x, i64 %N) nounwind readonly {
 829 entry:
 830   %cmp.1 = icmp sgt i64 %N, 0
 831   br i1 %cmp.1, label %for.header, label %for.end
 832
 833 for.header:                                       ; preds = %entry
 834   br label %for.body
 835
 836 for.body:                                         ; preds = %header, %for.body
 837   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
 838   %sum.1 = phi i64 [ 0, %for.header ], [ %sum.2, %for.body ]
 839   %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
 840   %0 = load float, ptr %arrayidx, align 4
 841   %cmp.2 = fcmp ogt float %0, 0.000000e+00
 842   %add = add nsw i64 %sum.1, 2
 843   %sum.2 = select i1 %cmp.2, i64 %add, i64 %sum.1
 844   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 845   %exitcond = icmp eq i64 %indvars.iv.next, %N
 846   br i1 %exitcond, label %for.end, label %for.body
 847
 848 for.end:                                          ; preds = %for.body, %entry
 849   %1 = phi i64 [ 0, %entry ], [ %sum.2, %for.body ]
 850   ret i64 %1
 851 }
 852
 853 ; CHECK-LABEL: @fcmp_0_sub_select1(
 854 ; CHECK: %[[V1:.*]] = fcmp ogt <4 x float> %[[V0:.*]], zeroinitializer
 855 ; CHECK: %[[V3:.*]] = sub <4 x i32> %[[V2:.*]], <i32 2, i32 2, i32 2, i32 2>
 856 ; CHECK: select <4 x i1> %[[V1]], <4 x i32> %[[V3]], <4 x i32> %[[V2]]
 857 define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly {
 858 entry:
 859   %cmp.1 = icmp sgt i32 %N, 0
 860   br i1 %cmp.1, label %for.header, label %for.end
 861
 862 for.header:                                       ; preds = %entry
 863   %zext = zext i32 %N to i64
 864   br label %for.body
 865
 866 for.body:                                         ; preds = %header, %for.body
 867   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
 868   %sum.1 = phi i32 [ 0, %for.header ], [ %sum.2, %for.body ]
 869   %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
 870   %0 = load float, ptr %arrayidx, align 4
 871   %cmp.2 = fcmp ogt float %0, 0.000000e+00
 872   %sub = sub nsw i32 %sum.1, 2
 873   %sum.2 = select i1 %cmp.2, i32 %sub, i32 %sum.1
 874   %indvars.iv.next = sub nuw nsw i64 %indvars.iv, 1
 875   %exitcond = icmp eq i64 %indvars.iv.next, %zext
 876   br i1 %exitcond, label %for.end, label %for.body
 877
 878 for.end:                                          ; preds = %for.body, %entry
 879   %1 = phi i32 [ 0, %entry ], [ %sum.2, %for.body ]
 880   ret i32 %1
 881 }
 882
 883 ; CHECK-LABEL: @fcmp_0_mult_select1(
 884 ; CHECK: %[[V1:.*]] = fcmp ogt <4 x float> %[[V0:.*]], zeroinitializer
 885 ; CHECK: %[[V3:.*]] = mul <4 x i32> %[[V2:.*]], <i32 2, i32 2, i32 2, i32 2>
 886 ; CHECK: select <4 x i1> %[[V1]], <4 x i32> %[[V3]], <4 x i32> %[[V2]]
 887 define i32 @fcmp_0_mult_select1(ptr noalias %x, i32 %N) nounwind readonly {
 888 entry:
 889   %cmp.1 = icmp sgt i32 %N, 0
 890   br i1 %cmp.1, label %for.header, label %for.end
 891
 892 for.header:                                       ; preds = %entry
 893   %zext = zext i32 %N to i64
 894   br label %for.body
 895
 896 for.body:                                         ; preds = %for.body, %for.header
 897   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
 898   %sum.1 = phi i32 [ 0, %for.header ], [ %sum.2, %for.body ]
 899   %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
 900   %0 = load float, ptr %arrayidx, align 4
 901   %cmp.2 = fcmp ogt float %0, 0.000000e+00
 902   %mult = mul nsw i32 %sum.1, 2
 903   %sum.2 = select i1 %cmp.2, i32 %mult, i32 %sum.1
 904   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 905   %exitcond = icmp eq i64 %indvars.iv.next, %zext
 906   br i1 %exitcond, label %for.end, label %for.body
 907
 908 for.end:                                          ; preds = %for.body, %entry
 909   %1 = phi i32 [ 0, %entry ], [ %sum.2, %for.body ]
 910   ret i32 %1
 911 }
 912
 913 @table = constant [13 x i16] [i16 10, i16 35, i16 69, i16 147, i16 280, i16 472, i16 682, i16 1013, i16 1559, i16 2544, i16 4553, i16 6494, i16 10000], align 1
 914
 915 ; CHECK-LABEL: @non_reduction_index(
 916 ; CHECK-NOT:     <4 x i16>
 917 define i16 @non_reduction_index(i16 noundef %val) {
 918 entry:
 919   br label %for.body
 920
 921 for.cond.cleanup:                                 ; preds = %for.body
 922   %spec.select.lcssa = phi i16 [ %spec.select, %for.body ]
 923   ret i16 %spec.select.lcssa
 924
 925 for.body:                                         ; preds = %entry, %for.body
 926   %i.05 = phi i16 [ 12, %entry ], [ %sub, %for.body ]
 927   %k.04 = phi i16 [ 0, %entry ], [ %spec.select, %for.body ]
 928   %arrayidx = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 %i.05
 929   %0 = load i16, ptr %arrayidx, align 1
 930   %cmp1 = icmp ugt i16 %0, %val
 931   %sub = add nsw i16 %i.05, -1
 932   %spec.select = select i1 %cmp1, i16 %sub, i16 %k.04
 933   %cmp.not = icmp eq i16 %sub, 0
 934   br i1 %cmp.not, label %for.cond.cleanup, label %for.body
 935 }
 936
 937 @tablef = constant [13 x half] [half 10.0, half 35.0, half 69.0, half 147.0, half 280.0, half 472.0, half 682.0, half 1013.0, half 1559.0, half 2544.0, half 4556.0, half 6496.0, half 10000.0], align 1
 938
 939 ; CHECK-LABEL: @non_reduction_index_half(
 940 ; CHECK-NOT:     <4 x half>
 941 define i16 @non_reduction_index_half(half noundef %val) {
 942 entry:
 943   br label %for.body
 944
 945 for.cond.cleanup:                                 ; preds = %for.body
 946   %spec.select.lcssa = phi i16 [ %spec.select, %for.body ]
 947   ret i16 %spec.select.lcssa
 948
 949 for.body:                                         ; preds = %entry, %for.body
 950   %i.05 = phi i16 [ 12, %entry ], [ %sub, %for.body ]
 951   %k.04 = phi i16 [ 0, %entry ], [ %spec.select, %for.body ]
 952   %arrayidx = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 %i.05
 953   %0 = load half, ptr %arrayidx, align 1
 954   %fcmp1 = fcmp ugt half %0, %val
 955   %sub = add nsw i16 %i.05, -1
 956   %spec.select = select i1 %fcmp1, i16 %sub, i16 %k.04
 957   %cmp.not = icmp eq i16 %sub, 0
 958   br i1 %cmp.not, label %for.cond.cleanup, label %for.body
 959 }
 960
 961 ; Make sure any check-not directives are not triggered by function declarations.
 962 ; CHECK: declare