llvm/test/CodeGen/X86/x86-cmov-converter.ll

   1 ; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s
   2
   3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
   4 ;; This test checks that x86-cmov-converter optimization transform CMOV
   5 ;; instruction into branches when it is profitable.
   6 ;; There are 5 cases below:
   7 ;;   1. CmovInCriticalPath:
   8 ;;        CMOV depends on the condition and it is in the hot path.
   9 ;;        Thus, it worths transforming.
  10 ;;
  11 ;;   2. CmovNotInCriticalPath:
  12 ;;        Similar test like in (1), just that CMOV is not in the hot path.
  13 ;;        Thus, it does not worth transforming.
  14 ;;
  15 ;;   3. MaxIndex:
  16 ;;        Maximum calculation algorithm that is looking for the max index,
  17 ;;        calculating CMOV value is cheaper than calculating CMOV condition.
  18 ;;        Thus, it worths transforming.
  19 ;;
  20 ;;   4. MaxValue:
  21 ;;        Maximum calculation algorithm that is looking for the max value,
  22 ;;        calculating CMOV value is not cheaper than calculating CMOV condition.
  23 ;;        Thus, it does not worth transforming.
  24 ;;
  25 ;;   5. BinarySearch:
  26 ;;        Usually, binary search CMOV is not predicted.
  27 ;;        Thus, it does not worth transforming.
  28 ;;
  29 ;; Test was created using the following command line:
  30 ;; > clang -S -O2 -m64 -fno-vectorize -fno-unroll-loops -emit-llvm foo.c -o -
  31 ;; Where foo.c is:
  32 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  33 ;;void CmovInHotPath(int n, int a, int b, int *c, int *d) {
  34 ;;  for (int i = 0; i < n; i++) {
  35 ;;    int t = c[i] + 1;
  36 ;;    if (c[i] * a > b)
  37 ;;      t = 10;
  38 ;;    c[i] = (c[i] + 1) * t;
  39 ;;  }
  40 ;;}
  41 ;;
  42 ;;
  43 ;;void CmovNotInHotPath(int n, int a, int b, int *c, int *d) {
  44 ;;  for (int i = 0; i < n; i++) {
  45 ;;    int t = c[i];
  46 ;;    if (c[i] * a > b)
  47 ;;      t = 10;
  48 ;;    c[i] = t;
  49 ;;    d[i] /= b;
  50 ;;  }
  51 ;;}
  52 ;;
  53 ;;
  54 ;;int MaxIndex(int n, int *a) {
  55 ;;  int t = 0;
  56 ;;  for (int i = 1; i < n; i++) {
  57 ;;    if (a[i] > a[t])
  58 ;;      t = i;
  59 ;;  }
  60 ;;  return t;
  61 ;;}
  62 ;;
  63 ;;
  64 ;;int MaxValue(int n, int *a) {
  65 ;;  int t = a[0];
  66 ;;  for (int i = 1; i < n; i++) {
  67 ;;    if (a[i] > t)
  68 ;;      t = a[i];
  69 ;;  }
  70 ;;  return t;
  71 ;;}
  72 ;;
  73 ;;typedef struct Node Node;
  74 ;;struct Node {
  75 ;;  unsigned Val;
  76 ;;  Node *Right;
  77 ;;  Node *Left;
  78 ;;};
  79 ;;
  80 ;;unsigned BinarySearch(unsigned Mask, Node *Curr, Node *Next) {
  81 ;;  while (Curr->Val > Next->Val) {
  82 ;;    Curr = Next;
  83 ;;    if (Mask & (0x1 << Curr->Val))
  84 ;;      Next = Curr->Right;
  85 ;;    else
  86 ;;      Next = Curr->Left;
  87 ;;  }
  88 ;;  return Curr->Val;
  89 ;;}
  90 ;;
  91 ;;
  92 ;;void SmallGainPerLoop(int n, int a, int b, int *c, int *d) {
  93 ;;  for (int i = 0; i < n; i++) {
  94 ;;    int t = c[i];
  95 ;;    if (c[i] * a > b)
  96 ;;      t = 10;
  97 ;;    c[i] = t;
  98 ;;  }
  99 ;;}
 100 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 101
 102 %struct.Node = type { i32, %struct.Node*, %struct.Node* }
 103
 104 ; CHECK-LABEL: CmovInHotPath
 105 ; CHECK-NOT: cmov
 106 ; CHECK: jg
 107
 108 define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture readnone %d) #0 {
 109 entry:
 110   %cmp14 = icmp sgt i32 %n, 0
 111   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
 112
 113 for.body.preheader:                               ; preds = %entry
 114   %wide.trip.count = zext i32 %n to i64
 115   br label %for.body
 116
 117 for.cond.cleanup:                                 ; preds = %for.body, %entry
 118   ret void
 119
 120 for.body:                                         ; preds = %for.body.preheader, %for.body
 121   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
 122   %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
 123   %0 = load i32, i32* %arrayidx, align 4
 124   %add = add nsw i32 %0, 1
 125   %mul = mul nsw i32 %0, %a
 126   %cmp3 = icmp sgt i32 %mul, %b
 127   %. = select i1 %cmp3, i32 10, i32 %add
 128   %mul7 = mul nsw i32 %., %add
 129   store i32 %mul7, i32* %arrayidx, align 4
 130   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 131   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
 132   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 133 }
 134
 135 ; CHECK-LABEL: CmovNotInHotPath
 136 ; CHECK: cmovg
 137
 138 define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture %d) #0 {
 139 entry:
 140   %cmp18 = icmp sgt i32 %n, 0
 141   br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup
 142
 143 for.body.preheader:                               ; preds = %entry
 144   %wide.trip.count = zext i32 %n to i64
 145   br label %for.body
 146
 147 for.cond.cleanup:                                 ; preds = %for.body, %entry
 148   ret void
 149
 150 for.body:                                         ; preds = %for.body.preheader, %for.body
 151   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
 152   %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
 153   %0 = load i32, i32* %arrayidx, align 4
 154   %mul = mul nsw i32 %0, %a
 155   %cmp3 = icmp sgt i32 %mul, %b
 156   %. = select i1 %cmp3, i32 10, i32 %0
 157   store i32 %., i32* %arrayidx, align 4
 158   %arrayidx7 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
 159   %1 = load i32, i32* %arrayidx7, align 4
 160   %div = sdiv i32 %1, %b
 161   store i32 %div, i32* %arrayidx7, align 4
 162   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 163   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
 164   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 165 }
 166
 167 ; CHECK-LABEL: MaxIndex
 168 ; CHECK-NOT: cmov
 169 ; CHECK: jg
 170
 171 define i32 @MaxIndex(i32 %n, i32* nocapture readonly %a) #0 {
 172 entry:
 173   %cmp14 = icmp sgt i32 %n, 1
 174   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
 175
 176 for.body.preheader:                               ; preds = %entry
 177   %wide.trip.count = zext i32 %n to i64
 178   br label %for.body
 179
 180 for.cond.cleanup:                                 ; preds = %for.body, %entry
 181   %t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ]
 182   ret i32 %t.0.lcssa
 183
 184 for.body:                                         ; preds = %for.body.preheader, %for.body
 185   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
 186   %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ]
 187   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
 188   %0 = load i32, i32* %arrayidx, align 4
 189   %idxprom1 = sext i32 %t.015 to i64
 190   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
 191   %1 = load i32, i32* %arrayidx2, align 4
 192   %cmp3 = icmp sgt i32 %0, %1
 193   %2 = trunc i64 %indvars.iv to i32
 194   %i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015
 195   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 196   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
 197   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 198 }
 199
 200 ; CHECK-LABEL: MaxValue
 201 ; CHECK-NOT: jg
 202 ; CHECK: cmovg
 203
 204 define i32 @MaxValue(i32 %n, i32* nocapture readonly %a) #0 {
 205 entry:
 206   %0 = load i32, i32* %a, align 4
 207   %cmp13 = icmp sgt i32 %n, 1
 208   br i1 %cmp13, label %for.body.preheader, label %for.cond.cleanup
 209
 210 for.body.preheader:                               ; preds = %entry
 211   %wide.trip.count = zext i32 %n to i64
 212   br label %for.body
 213
 214 for.cond.cleanup:                                 ; preds = %for.body, %entry
 215   %t.0.lcssa = phi i32 [ %0, %entry ], [ %.t.0, %for.body ]
 216   ret i32 %t.0.lcssa
 217
 218 for.body:                                         ; preds = %for.body.preheader, %for.body
 219   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
 220   %t.014 = phi i32 [ %.t.0, %for.body ], [ %0, %for.body.preheader ]
 221   %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
 222   %1 = load i32, i32* %arrayidx1, align 4
 223   %cmp2 = icmp sgt i32 %1, %t.014
 224   %.t.0 = select i1 %cmp2, i32 %1, i32 %t.014
 225   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 226   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
 227   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 228 }
 229
 230 ; CHECK-LABEL: BinarySearch
 231 ; CHECK: set
 232
 233 define i32 @BinarySearch(i32 %Mask, %struct.Node* nocapture readonly %Curr, %struct.Node* nocapture readonly %Next) #0 {
 234 entry:
 235   %Val8 = getelementptr inbounds %struct.Node, %struct.Node* %Curr, i64 0, i32 0
 236   %0 = load i32, i32* %Val8, align 8
 237   %Val19 = getelementptr inbounds %struct.Node, %struct.Node* %Next, i64 0, i32 0
 238   %1 = load i32, i32* %Val19, align 8
 239   %cmp10 = icmp ugt i32 %0, %1
 240   br i1 %cmp10, label %while.body, label %while.end
 241
 242 while.body:                                       ; preds = %entry, %while.body
 243   %2 = phi i32 [ %4, %while.body ], [ %1, %entry ]
 244   %Next.addr.011 = phi %struct.Node* [ %3, %while.body ], [ %Next, %entry ]
 245   %shl = shl i32 1, %2
 246   %and = and i32 %shl, %Mask
 247   %tobool = icmp eq i32 %and, 0
 248   %Left = getelementptr inbounds %struct.Node, %struct.Node* %Next.addr.011, i64 0, i32 2
 249   %Right = getelementptr inbounds %struct.Node, %struct.Node* %Next.addr.011, i64 0, i32 1
 250   %Left.sink = select i1 %tobool, %struct.Node** %Left, %struct.Node** %Right
 251   %3 = load %struct.Node*, %struct.Node** %Left.sink, align 8
 252   %Val1 = getelementptr inbounds %struct.Node, %struct.Node* %3, i64 0, i32 0
 253   %4 = load i32, i32* %Val1, align 8
 254   %cmp = icmp ugt i32 %2, %4
 255   br i1 %cmp, label %while.body, label %while.end
 256
 257 while.end:                                        ; preds = %while.body, %entry
 258   %.lcssa = phi i32 [ %0, %entry ], [ %2, %while.body ]
 259   ret i32 %.lcssa
 260 }
 261
 262 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 263 ;; The following test checks that x86-cmov-converter optimization transforms
 264 ;; CMOV instructions into branch correctly.
 265 ;;
 266 ;; MBB:
 267 ;;   cond = cmp ...
 268 ;;   v1 = CMOVgt t1, f1, cond
 269 ;;   v2 = CMOVle s1, f2, cond
 270 ;;
 271 ;; Where: t1 = 11, f1 = 22, f2 = a
 272 ;;
 273 ;; After CMOV transformation
 274 ;; -------------------------
 275 ;; MBB:
 276 ;;   cond = cmp ...
 277 ;;   ja %SinkMBB
 278 ;;
 279 ;; FalseMBB:
 280 ;;   jmp %SinkMBB
 281 ;;
 282 ;; SinkMBB:
 283 ;;   %v1 = phi[%f1, %FalseMBB], [%t1, %MBB]
 284 ;;   %v2 = phi[%f1, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch
 285 ;;                                          ; true-value with false-value
 286 ;;                                          ; Phi instruction cannot use
 287 ;;                                          ; previous Phi instruction result
 288 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 289
 290 ; CHECK-LABEL: Transform
 291 ; CHECK-NOT: cmov
 292 ; CHECK:         divl    [[a:%[0-9a-z]*]]
 293 ; CHECK:         movl    $11, [[s1:%[0-9a-z]*]]
 294 ; CHECK:         movl    [[a]], [[s2:%[0-9a-z]*]]
 295 ; CHECK:         cmpl    [[a]], %edx
 296 ; CHECK:         ja      [[SinkBB:.*]]
 297 ; CHECK: [[FalseBB:.*]]:
 298 ; CHECK:         movl    $22, [[s1]]
 299 ; CHECK:         movl    $22, [[s2]]
 300 ; CHECK: [[SinkBB]]:
 301 ; CHECK:         ja
 302
 303 define void @Transform(i32 *%arr, i32 *%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 {
 304 entry:
 305   %cmp10 = icmp ugt i32 0, %n
 306   br i1 %cmp10, label %while.body, label %while.end
 307
 308 while.body:                                       ; preds = %entry, %while.body
 309   %i = phi i32 [ %i_inc, %while.body ], [ 0, %entry ]
 310   %arr_i = getelementptr inbounds i32, i32* %arr, i32 %i
 311   %x = load i32, i32* %arr_i, align 4
 312   %div = udiv i32 %x, %a
 313   %cond = icmp ugt i32 %div, %a
 314   %condOpp = icmp ule i32 %div, %a
 315   %s1 = select i1 %cond, i32 11, i32 22
 316   %s2 = select i1 %condOpp, i32 %s1, i32 %a
 317   %sum = urem i32 %s1, %s2
 318   store i32 %sum, i32* %arr_i, align 4
 319   %i_inc = add i32 %i, 1
 320   %cmp = icmp ugt i32 %i_inc, %n
 321   br i1 %cmp, label %while.body, label %while.end
 322
 323 while.end:                                        ; preds = %while.body, %entry
 324   ret void
 325 }
 326
 327 ; Test that we always will convert a cmov with a memory operand into a branch,
 328 ; even outside of a loop.
 329 define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
 330 ; CHECK-LABEL: test_cmov_memoperand:
 331 entry:
 332   %cond = icmp ugt i32 %a, %b
 333 ; CHECK:         movl %edx, %eax
 334 ; CHECK:         cmpl
 335   %load = load i32, i32* %y
 336   %z = select i1 %cond, i32 %x, i32 %load
 337 ; CHECK-NOT:     cmov
 338 ; CHECK:         ja [[FALSE_BB:.*]]
 339 ; CHECK:         movl (%rcx), %eax
 340 ; CHECK:       [[FALSE_BB]]:
 341   ret i32 %z
 342 }
 343
 344 ; Test that we can convert a group of cmovs where only one has a memory
 345 ; operand.
 346 define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
 347 ; CHECK-LABEL: test_cmov_memoperand_in_group:
 348 entry:
 349   %cond = icmp ugt i32 %a, %b
 350 ; CHECK:         movl %edx, %eax
 351 ; CHECK:         cmpl
 352   %y = load i32, i32* %y.ptr
 353   %z1 = select i1 %cond, i32 %x, i32 %a
 354   %z2 = select i1 %cond, i32 %x, i32 %y
 355   %z3 = select i1 %cond, i32 %x, i32 %b
 356 ; CHECK-NOT:     cmov
 357 ; CHECK:         ja [[FALSE_BB:.*]]
 358 ; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
 359 ; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
 360 ; CHECK-DAG:     movl %{{.*}} %eax
 361 ; CHECK:       [[FALSE_BB]]:
 362 ; CHECK:         addl
 363 ; CHECK-DAG:       %[[R1]]
 364 ; CHECK-DAG:       ,
 365 ; CHECK-DAG:       %eax
 366 ; CHECK-DAG:     addl
 367 ; CHECK-DAG:       %[[R2]]
 368 ; CHECK-DAG:       ,
 369 ; CHECK-DAG:       %eax
 370 ; CHECK:         retq
 371   %s1 = add i32 %z1, %z2
 372   %s2 = add i32 %s1, %z3
 373   ret i32 %s2
 374 }
 375
 376 ; Same as before but with operands reversed in the select with a load.
 377 define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
 378 ; CHECK-LABEL: test_cmov_memoperand_in_group2:
 379 entry:
 380   %cond = icmp ugt i32 %a, %b
 381 ; CHECK:         movl %edx, %eax
 382 ; CHECK:         cmpl
 383   %y = load i32, i32* %y.ptr
 384   %z2 = select i1 %cond, i32 %a, i32 %x
 385   %z1 = select i1 %cond, i32 %y, i32 %x
 386   %z3 = select i1 %cond, i32 %b, i32 %x
 387 ; CHECK-NOT:     cmov
 388 ; CHECK:         jbe [[FALSE_BB:.*]]
 389 ; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
 390 ; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
 391 ; CHECK-DAG:     movl %{{.*}} %eax
 392 ; CHECK:       [[FALSE_BB]]:
 393 ; CHECK:         addl
 394 ; CHECK-DAG:       %[[R1]]
 395 ; CHECK-DAG:       ,
 396 ; CHECK-DAG:       %eax
 397 ; CHECK-DAG:     addl
 398 ; CHECK-DAG:       %[[R2]]
 399 ; CHECK-DAG:       ,
 400 ; CHECK-DAG:       %eax
 401 ; CHECK:         retq
 402   %s1 = add i32 %z1, %z2
 403   %s2 = add i32 %s1, %z3
 404   ret i32 %s2
 405 }
 406
 407 ; Test that we don't convert a group of cmovs with conflicting directions of
 408 ; loads.
 409 define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, i32* %y1.ptr, i32* %y2.ptr) #0 {
 410 ; CHECK-LABEL: test_cmov_memoperand_conflicting_dir:
 411 entry:
 412   %cond = icmp ugt i32 %a, %b
 413 ; CHECK:         cmpl
 414   %y1 = load i32, i32* %y1.ptr
 415   %y2 = load i32, i32* %y2.ptr
 416   %z1 = select i1 %cond, i32 %x, i32 %y1
 417   %z2 = select i1 %cond, i32 %y2, i32 %x
 418 ; CHECK:         cmoval
 419 ; CHECK:         cmoval
 420   %s1 = add i32 %z1, %z2
 421   ret i32 %s1
 422 }
 423
 424 ; Test that we can convert a group of cmovs where only one has a memory
 425 ; operand and where that memory operand's registers come from a prior cmov in
 426 ; the group.
 427 define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, i32* %x, i32* %y) #0 {
 428 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
 429 entry:
 430   %cond = icmp ugt i32 %a, %b
 431 ; CHECK:         movl %edi, %eax
 432 ; CHECK:         cmpl
 433   %p = select i1 %cond, i32* %x, i32* %y
 434   %load = load i32, i32* %p
 435   %z = select i1 %cond, i32 %a, i32 %load
 436 ; CHECK-NOT:     cmov
 437 ; CHECK:         ja [[FALSE_BB:.*]]
 438 ; CHECK:         movl (%r{{..}}), %eax
 439 ; CHECK:       [[FALSE_BB]]:
 440 ; CHECK:         retq
 441   ret i32 %z
 442 }
 443
 444 ; Test that we can convert a group of two cmovs with memory operands where one
 445 ; uses the result of the other as part of the address.
 446 define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, i32* %x, i32** %y) #0 {
 447 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
 448 entry:
 449   %cond = icmp ugt i32 %a, %b
 450 ; CHECK:         movl %edi, %eax
 451 ; CHECK:         cmpl
 452   %load1 = load i32*, i32** %y
 453   %p = select i1 %cond, i32* %x, i32* %load1
 454   %load2 = load i32, i32* %p
 455   %z = select i1 %cond, i32 %a, i32 %load2
 456 ; CHECK-NOT:     cmov
 457 ; CHECK:         ja [[FALSE_BB:.*]]
 458 ; CHECK:         movq (%r{{..}}), %[[R1:.*]]
 459 ; CHECK:         movl (%[[R1]]), %eax
 460 ; CHECK:       [[FALSE_BB]]:
 461 ; CHECK:         retq
 462   ret i32 %z
 463 }
 464
 465 ; Test that we can convert a group of cmovs where only one has a memory
 466 ; operand and where that memory operand's registers come from a prior cmov and
 467 ; where that cmov gets *its* input from a prior cmov in the group.
 468 define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, i32* %x, i32* %y, i32* %z) #0 {
 469 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
 470 entry:
 471   %cond = icmp ugt i32 %a, %b
 472 ; CHECK:         movl %edi, %eax
 473 ; CHECK:         cmpl
 474   %p = select i1 %cond, i32* %x, i32* %y
 475   %p2 = select i1 %cond, i32* %z, i32* %p
 476   %load = load i32, i32* %p2
 477   %r = select i1 %cond, i32 %a, i32 %load
 478 ; CHECK-NOT:     cmov
 479 ; CHECK:         ja [[FALSE_BB:.*]]
 480 ; CHECK:         movl (%r{{..}}), %eax
 481 ; CHECK:       [[FALSE_BB]]:
 482 ; CHECK:         retq
 483   ret i32 %r
 484 }
 485
 486 @begin = external global i32*
 487 @end = external global i32*
 488
 489 define void @test_memoperand_loop(i32 %data) #0 {
 490 ; CHECK-LABEL: test_memoperand_loop:
 491 ; CHECK:       # %bb.0: # %entry
 492 ; CHECK-NEXT:    movq begin@GOTPCREL(%rip), %r8
 493 ; CHECK-NEXT:    movq (%r8), %rax
 494 ; CHECK-NEXT:    movq end@GOTPCREL(%rip), %rcx
 495 ; CHECK-NEXT:    movq (%rcx), %rdx
 496 ; CHECK-NEXT:    xorl %esi, %esi
 497 ; CHECK-NEXT:    movq %rax, %rcx
 498 entry:
 499   %begin = load i32*, i32** @begin, align 8
 500   %end = load i32*, i32** @end, align 8
 501   br label %loop.body
 502
 503 ; CHECK-NEXT:  .LBB13_1: # %loop.body
 504 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 505 ; CHECK-NEXT:    addq $8, %rcx
 506 ; CHECK-NEXT:    cmpq %rdx, %rcx
 507 ; CHECK-NEXT:    ja .LBB13_3
 508 ; CHECK-NEXT:  # %bb.2: # %loop.body
 509 ; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
 510 ; CHECK-NEXT:    movq (%r8), %rcx
 511 ; CHECK-NEXT:  .LBB13_3: # %loop.body
 512 ; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
 513 ; CHECK-NEXT:    movl %edi, (%rcx)
 514 ; CHECK-NEXT:    addq $8, %rcx
 515 ; CHECK-NEXT:    cmpq %rdx, %rcx
 516 ; CHECK-NEXT:    ja .LBB13_5
 517 ; CHECK-NEXT:  # %bb.4: # %loop.body
 518 ; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
 519 ; CHECK-NEXT:    movq %rax, %rcx
 520 ; CHECK-NEXT:  .LBB13_5: # %loop.body
 521 ; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
 522 ; CHECK-NEXT:    movl %edi, (%rcx)
 523 ; CHECK-NEXT:    addl $1, %esi
 524 ; CHECK-NEXT:    cmpl $1024, %esi # imm = 0x400
 525 ; CHECK-NEXT:    jl .LBB13_1
 526 loop.body:
 527   %phi.iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ]
 528   %phi.ptr = phi i32* [ %begin, %entry ], [ %dst2, %loop.body ]
 529   %gep1 = getelementptr inbounds i32, i32 *%phi.ptr, i64 2
 530   %cmp1 = icmp ugt i32* %gep1, %end
 531   %begin_dup = load i32*, i32** @begin, align 8
 532   %dst1 = select i1 %cmp1, i32* %gep1, i32* %begin_dup
 533   store i32 %data, i32 *%dst1, align 4
 534   %gep2 = getelementptr inbounds i32, i32 *%dst1, i64 2
 535   %cmp2 = icmp ugt i32* %gep2, %end
 536   %dst2 = select i1 %cmp2, i32* %gep2, i32* %begin
 537   store i32 %data, i32 *%dst2, align 4
 538   %iv.next = add i32 %phi.iv, 1
 539   %cond = icmp slt i32 %iv.next, 1024
 540   br i1 %cond, label %loop.body, label %exit
 541
 542 ; CHECK-NEXT:  # %bb.6: # %exit
 543 ; CHECK-NEXT:    retq
 544 exit:
 545   ret void
 546 }
 547
 548 attributes #0 = {"target-cpu"="x86-64"}