llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll

   1 ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s -o - -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,ENABLE,CHECK-64,ENABLE-64
   2 ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu %s -o - -enable-shrink-wrap=false -verify-machineinstrs |  FileCheck %s --check-prefixes=CHECK,DISABLE,CHECK-64,DISABLE-64
   3 ; RUN: llc -mtriple=powerpc-ibm-aix-xcoff -mattr=-altivec -mcpu=pwr8 %s -o - -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,ENABLE,CHECK-32,ENABLE-32
   4 ; RUN: llc -mtriple=powerpc-ibm-aix-xcoff %s -o - -enable-shrink-wrap=false -verify-machineinstrs |  FileCheck %s --check-prefixes=CHECK,DISABLE,CHECK-32,DISABLE-32
   5 ; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff -mattr=-altivec -mcpu=pwr8 %s -o - -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,ENABLE,CHECK-64,ENABLE-64
   6 ; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff %s -o - -enable-shrink-wrap=false -verify-machineinstrs |  FileCheck %s --check-prefixes=CHECK,DISABLE,CHECK-64,DISABLE-64
   7 ;
   8 ;
   9 ; Note: Lots of tests use inline asm instead of regular calls.
  10 ; This allows to have a better control on what the allocation will do.
  11 ; Otherwise, we may have spill right in the entry block, defeating
  12 ; shrink-wrapping. Moreover, some of the inline asm statement (nop)
  13 ; are here to ensure that the related paths do not end up as critical
  14 ; edges.
  15
  16
  17 ; Initial motivating example: Simple diamond with a call just on one side.
  18 ; CHECK-LABEL: {{.*}}foo:
  19 ;
  20 ; Compare the arguments and return
  21 ; No prologue needed.
  22 ; ENABLE: cmpw 3, 4
  23 ; ENABLE-NEXT: bgelr 0
  24 ;
  25 ; Prologue code.
  26 ;  At a minimum, we save/restore the link register. Other registers may be saved
  27 ;  as well.
  28 ; CHECK: mflr
  29 ;
  30 ; Compare the arguments and jump to exit.
  31 ; After the prologue is set.
  32 ; DISABLE: cmpw 3, 4
  33 ; DISABLE-NEXT: bge 0, {{.*}}[[EXIT_LABEL:BB[0-9_]+]]
  34 ;
  35 ; Store %a on the stack
  36 ; CHECK: stw 3, {{[0-9]+([0-9]+)}}
  37 ; Set the alloca address in the second argument.
  38 ; CHECK-NEXT: addi 4, 1, {{[0-9]+}}
  39 ; Set the first argument to zero.
  40 ; CHECK-NEXT: li 3, 0
  41 ; CHECK-NEXT: bl {{.*}}doSomething
  42 ;
  43 ; With shrink-wrapping, epilogue is just after the call.
  44 ; Restore the link register and return.
  45 ; Note that there could be other epilog code before the link register is
  46 ; restored but we will not check for it here.
  47 ; ENABLE: mtlr
  48 ; ENABLE-NEXT: blr
  49 ;
  50 ; DISABLE: [[EXIT_LABEL]]:
  51 ;
  52 ; Without shrink-wrapping, epilogue is in the exit block.
  53 ; Epilogue code. (What we pop does not matter.)
  54 ; DISABLE: mtlr {{[0-9]+}}
  55 ; DISABLE-NEXT: blr
  56 ;
  57
  58 define i32 @foo(i32 %a, i32 %b) {
  59   %tmp = alloca i32, align 4
  60   %tmp2 = icmp slt i32 %a, %b
  61   br i1 %tmp2, label %true, label %false
  62
  63 true:
  64   store i32 %a, i32* %tmp, align 4
  65   %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
  66   br label %false
  67
  68 false:
  69   %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
  70   ret i32 %tmp.0
  71 }
  72
  73 ; Function Attrs: optsize
  74 declare i32 @doSomething(i32, i32*)
  75
  76
  77 ; Check that we do not perform the restore inside the loop whereas the save
  78 ; is outside.
  79 ; CHECK-LABEL: {{.*}}freqSaveAndRestoreOutsideLoop:
  80 ;
  81 ; Shrink-wrapping allows to skip the prologue in the else case.
  82 ; ENABLE: cmplwi 3, 0
  83 ; ENABLE: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
  84 ;
  85 ; Prologue code.
  86 ; Make sure we save the link register
  87 ; CHECK: mflr {{[0-9]+}}
  88 ;
  89 ; DISABLE: cmplwi 3, 0
  90 ; DISABLE: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
  91 ;
  92 ; Loop preheader
  93 ; CHECK-DAG: li [[SUM:[0-9]+]], 0
  94 ; CHECK-DAG: li [[IV:[0-9]+]], 10
  95 ;
  96 ; Loop body
  97 ; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
  98 ; CHECK: bl {{.*}}something
  99 ;
 100 ; CHECK-DAG: addi [[IV]], [[IV]], -1
 101 ; CHECK-DAG: add [[SUM]], 3, [[SUM]]
 102 ; CHECK-DAG: cmplwi [[IV]], 0
 103 ; CHECK-NEXT: bne 0, {{.*}}[[LOOP]]
 104 ;
 105 ; Next BB.
 106 ; CHECK: slwi 3, [[SUM]], 3
 107 ;
 108 ; Jump to epilogue.
 109 ; DISABLE: b {{.*}}[[EPILOG_BB:BB[0-9_]+]]
 110 ;
 111 ; DISABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
 112 ; Shift second argument by one and store into returned register.
 113 ; DISABLE: slwi 3, 4, 1
 114 ; DISABLE: {{.*}}[[EPILOG_BB]]: # %if.end
 115 ;
 116 ; Epilogue code.
 117 ; CHECK: mtlr {{[0-9]+}}
 118 ; CHECK: blr
 119 ;
 120 ; ENABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
 121 ; Shift second argument by one and store into returned register.
 122 ; ENABLE: slwi 3, 4, 1
 123 ; ENABLE-NEXT: blr
 124 define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
 125 entry:
 126   %tobool = icmp eq i32 %cond, 0
 127   br i1 %tobool, label %if.else, label %for.preheader
 128
 129 for.preheader:
 130   tail call void asm "nop", ""()
 131   br label %for.body
 132
 133 for.body:                                         ; preds = %entry, %for.body
 134   %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
 135   %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
 136   %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
 137   %add = add nsw i32 %call, %sum.04
 138   %inc = add nuw nsw i32 %i.05, 1
 139   %exitcond = icmp eq i32 %inc, 10
 140   br i1 %exitcond, label %for.end, label %for.body
 141
 142 for.end:                                          ; preds = %for.body
 143   %shl = shl i32 %add, 3
 144   br label %if.end
 145
 146 if.else:                                          ; preds = %entry
 147   %mul = shl nsw i32 %N, 1
 148   br label %if.end
 149
 150 if.end:                                           ; preds = %if.else, %for.end
 151   %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
 152   ret i32 %sum.1
 153 }
 154
 155 declare i32 @something(...)
 156
 157 ; Check that we do not perform the shrink-wrapping inside the loop even
 158 ; though that would be legal. The cost model must prevent that.
 159 ; CHECK-LABEL: {{.*}}freqSaveAndRestoreOutsideLoop2:
 160 ; Prologue code.
 161 ; Make sure we save the link register before the call
 162 ; CHECK: mflr {{[0-9]+}}
 163 ;
 164 ; Loop preheader
 165 ; CHECK-DAG: li [[SUM:[0-9]+]], 0
 166 ; CHECK-DAG: li [[IV:[0-9]+]], 10
 167 ;
 168 ; Loop body
 169 ; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
 170 ; CHECK: bl {{.*}}something
 171 ;
 172 ; CHECK-DAG: addi [[IV]], [[IV]], -1
 173 ; CHECK-DAG: add [[SUM]], 3, [[SUM]]
 174 ; CHECK-DAG: cmplwi [[IV]], 0
 175 ;
 176 ; CHECK-NEXT: bne 0, {{.*}}[[LOOP]]
 177 ;
 178 ; Next BB
 179 ; CHECK: %for.exit
 180 ; CHECK: mtlr {{[0-9]+}}
 181 ; CHECK: blr
 182 define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
 183 entry:
 184   br label %for.preheader
 185
 186 for.preheader:
 187   tail call void asm "nop", ""()
 188   br label %for.body
 189
 190 for.body:                                         ; preds = %for.body, %entry
 191   %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
 192   %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
 193   %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
 194   %add = add nsw i32 %call, %sum.03
 195   %inc = add nuw nsw i32 %i.04, 1
 196   %exitcond = icmp eq i32 %inc, 10
 197   br i1 %exitcond, label %for.exit, label %for.body
 198
 199 for.exit:
 200   tail call void asm "nop", ""()
 201   br label %for.end
 202
 203 for.end:                                          ; preds = %for.body
 204   ret i32 %add
 205 }
 206
 207
 208 ; Check with a more complex case that we do not have save within the loop and
 209 ; restore outside.
 210 ; CHECK-LABEL: {{.*}}loopInfoSaveOutsideLoop:
 211 ;
 212 ; ENABLE: cmplwi 3, 0
 213 ; ENABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
 214 ;
 215 ; Prologue code.
 216 ; Make sure we save the link register
 217 ; CHECK: mflr {{[0-9]+}}
 218 ;
 219 ; DISABLE-64-DAG: std {{[0-9]+}}
 220 ; DISABLE-64-DAG: std {{[0-9]+}}
 221 ; DISABLE-64-DAG: std {{[0-9]+}}
 222 ; DISABLE-64-DAG: stdu 1,
 223 ; DISABLE-64-DAG: cmplwi 3, 0
 224 ;
 225 ; DISABLE-32-DAG: stw {{[0-9]+}}
 226 ; DISABLE-32-DAG: stw {{[0-9]+}}
 227 ; DISABLE-32-DAG: stw {{[0-9]+}}
 228 ; DISABLE-32-DAG: stwu 1,
 229 ; DISABLE-32-DAG: cmplwi 3, 0
 230 ;
 231 ; DISABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
 232 ;
 233 ; Loop preheader
 234 ; CHECK-DAG: li [[SUM:[0-9]+]], 0
 235 ; CHECK-DAG: li [[IV:[0-9]+]], 10
 236 ;
 237 ; Loop body
 238 ; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
 239 ; CHECK: bl {{.*}}something
 240 ;
 241 ; CHECK-DAG: addi [[IV]], [[IV]], -1
 242 ; CHECK-DAG: add [[SUM]], 3, [[SUM]]
 243 ; CHECK-DAG: cmplwi [[IV]], 0
 244 ;
 245 ; CHECK-NEXT: bne 0, {{.*}}[[LOOP]]
 246 ;
 247 ; Next BB
 248 ; CHECK: bl {{.*}}somethingElse
 249 ; CHECK: slwi 3, [[SUM]], 3
 250 ;
 251 ; Jump to epilogue
 252 ; DISABLE: b {{.*}}[[EPILOG_BB:BB[0-9_]+]]
 253 ;
 254 ; DISABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
 255 ; Shift second argument by one and store into returned register.
 256 ; DISABLE: slwi 3, 4, 1
 257 ;
 258 ; DISABLE: {{.*}}[[EPILOG_BB]]: # %if.end
 259 ;
 260 ; Epilog code
 261 ; CHECK: mtlr {{[0-9]+}}
 262 ; CHECK: blr
 263 ;
 264 ; ENABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
 265 ; Shift second argument by one and store into returned register.
 266 ; ENABLE: slwi 3, 4, 1
 267 ; ENABLE-NEXT: blr
 268 define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
 269 entry:
 270   %tobool = icmp eq i32 %cond, 0
 271   br i1 %tobool, label %if.else, label %for.preheader
 272
 273 for.preheader:
 274   tail call void asm "nop", ""()
 275   br label %for.body
 276
 277 for.body:                                         ; preds = %entry, %for.body
 278   %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
 279   %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
 280   %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
 281   %add = add nsw i32 %call, %sum.04
 282   %inc = add nuw nsw i32 %i.05, 1
 283   %exitcond = icmp eq i32 %inc, 10
 284   br i1 %exitcond, label %for.end, label %for.body
 285
 286 for.end:                                          ; preds = %for.body
 287   tail call void bitcast (void (...)* @somethingElse to void ()*)()
 288   %shl = shl i32 %add, 3
 289   br label %if.end
 290
 291 if.else:                                          ; preds = %entry
 292   %mul = shl nsw i32 %N, 1
 293   br label %if.end
 294
 295 if.end:                                           ; preds = %if.else, %for.end
 296   %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
 297   ret i32 %sum.1
 298 }
 299
 300 declare void @somethingElse(...)
 301
 302 ; Check with a more complex case that we do not have restore within the loop and
 303 ; save outside.
 304 ; CHECK-LABEL: {{.*}}loopInfoRestoreOutsideLoop:
 305 ;
 306 ; ENABLE: cmplwi 3, 0
 307 ; ENABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
 308 ;
 309 ; Prologue code.
 310 ; Make sure we save the link register
 311 ; CHECK: mflr {{[0-9]+}}
 312 ;
 313 ; DISABLE-64-DAG: std {{[0-9]+}}
 314 ; DISABLE-64-DAG: std {{[0-9]+}}
 315 ; DISABLE-64-DAG: std {{[0-9]+}}
 316 ; DISABLE-64-DAG: stdu 1,
 317 ; DISABLE-64-DAG: cmplwi 3, 0
 318 ;
 319 ; DISABLE-32-DAG: stw {{[0-9]+}}
 320 ; DISABLE-32-DAG: stw {{[0-9]+}}
 321 ; DISABLE-32-DAG: stw {{[0-9]+}}
 322 ; DISABLE-32-DAG: stwu 1,
 323 ; DISABLE-32-DAG: cmplwi 3, 0
 324 ;
 325 ; DISABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
 326 ;
 327 ; CHECK: bl {{.*}}somethingElse
 328 ;
 329 ; Loop preheader
 330 ; CHECK-DAG: li [[SUM:[0-9]+]], 0
 331 ; CHECK-DAG: li [[IV:[0-9]+]], 10
 332 ;
 333 ; Loop body
 334 ; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
 335 ; CHECK: bl {{.*}}something
 336 ;
 337 ; CHECK-DAG: addi [[IV]], [[IV]], -1
 338 ; CHECK-DAG: add [[SUM]], 3, [[SUM]]
 339 ; CHECK-DAG: cmplwi [[IV]], 0
 340 ;
 341 ; CHECK-NEXT: bne 0, {{.*}}[[LOOP]]
 342 ;
 343 ; Next BB.
 344 ; CHECK: slwi 3, [[SUM]], 3
 345 ;
 346 ; DISABLE: b {{.*}}[[EPILOG_BB:BB[0-9_]+]]
 347 ;
 348 ; DISABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
 349 ; Shift second argument by one and store into returned register.
 350 ; DISABLE: slwi 3, 4, 1
 351 ; DISABLE: {{.*}}[[EPILOG_BB]]: # %if.end
 352 ;
 353 ; Epilogue code.
 354 ; CHECK: mtlr {{[0-9]+}}
 355 ; CHECK: blr
 356 ;
 357 ; ENABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
 358 ; Shift second argument by one and store into returned register.
 359 ; ENABLE: slwi 3, 4, 1
 360 ; ENABLE-NEXT: blr
 361 define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind {
 362 entry:
 363   %tobool = icmp eq i32 %cond, 0
 364   br i1 %tobool, label %if.else, label %if.then
 365
 366 if.then:                                          ; preds = %entry
 367   tail call void bitcast (void (...)* @somethingElse to void ()*)()
 368   br label %for.body
 369
 370 for.body:                                         ; preds = %for.body, %if.then
 371   %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
 372   %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
 373   %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
 374   %add = add nsw i32 %call, %sum.04
 375   %inc = add nuw nsw i32 %i.05, 1
 376   %exitcond = icmp eq i32 %inc, 10
 377   br i1 %exitcond, label %for.end, label %for.body
 378
 379 for.end:                                          ; preds = %for.body
 380   %shl = shl i32 %add, 3
 381   br label %if.end
 382
 383 if.else:                                          ; preds = %entry
 384   %mul = shl nsw i32 %N, 1
 385   br label %if.end
 386
 387 if.end:                                           ; preds = %if.else, %for.end
 388   %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
 389   ret i32 %sum.1
 390 }
 391
 392 ; Check that we handle function with no frame information correctly.
 393 ; CHECK-LABEL: {{.*}}emptyFrame:
 394 ; CHECK: # %entry
 395 ; CHECK-NEXT: li 3, 0
 396 ; CHECK-NEXT: blr
 397 define i32 @emptyFrame() {
 398 entry:
 399   ret i32 0
 400 }
 401
 402
 403 ; Check that we handle inline asm correctly.
 404 ; CHECK-LABEL: {{.*}}inlineAsm:
 405 ;
 406 ; ENABLE: cmplwi 3, 0
 407 ; ENABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
 408 ;
 409 ; Prologue code.
 410 ; Make sure we save the CSR used in the inline asm: r14
 411 ; ENABLE-DAG: li [[IV:[0-9]+]], 10
 412 ; ENABLE-64-DAG: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
 413 ; ENABLE-32-DAG: stw 14, -[[STACK_OFFSET:[0-9]+]](1) # 4-byte Folded Spill
 414 ;
 415 ; DISABLE: cmplwi 3, 0
 416 ; DISABLE-64-NEXT: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
 417 ; DISABLE-32-NEXT: stw 14, -[[STACK_OFFSET:[0-9]+]](1) # 4-byte Folded Spill
 418 ; DISABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
 419 ; DISABLE: li [[IV:[0-9]+]], 10
 420 ;
 421 ; CHECK: nop
 422 ; CHECK: mtctr [[IV]]
 423 ;
 424 ; CHECK: {{.*}}[[LOOP_LABEL:BB[0-9_]+]]: # %for.body
 425 ; Inline asm statement.
 426 ; CHECK: addi 14, 14, 1
 427 ; CHECK: bdnz {{.*}}[[LOOP_LABEL]]
 428 ;
 429 ; Epilogue code.
 430 ; CHECK: li 3, 0
 431 ; CHECK-64-DAG: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
 432 ; CHECK-32-DAG: lwz 14, -[[STACK_OFFSET]](1) # 4-byte Folded Reload
 433 ; CHECK-DAG: nop
 434 ; CHECK: blr
 435 ;
 436 ; CHECK: [[ELSE_LABEL]]
 437 ; CHECK-NEXT: slwi 3, 4, 1
 438 ; DISABLE-64-NEXT: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
 439 ; DISABLE-32-NEXT: lwz 14, -[[STACK_OFFSET]](1) # 4-byte Folded Reload
 440 ; CHECK-NEXT: blr
 441 define i32 @inlineAsm(i32 %cond, i32 %N) {
 442 entry:
 443   %tobool = icmp eq i32 %cond, 0
 444   br i1 %tobool, label %if.else, label %for.preheader
 445
 446 for.preheader:
 447   tail call void asm "nop", ""()
 448   br label %for.body
 449
 450 for.body:                                         ; preds = %entry, %for.body
 451   %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
 452   tail call void asm "addi 14, 14, 1", "~{r14}"()
 453   %inc = add nuw nsw i32 %i.03, 1
 454   %exitcond = icmp eq i32 %inc, 10
 455   br i1 %exitcond, label %for.exit, label %for.body
 456
 457 for.exit:
 458   tail call void asm "nop", ""()
 459   br label %if.end
 460
 461 if.else:                                          ; preds = %entry
 462   %mul = shl nsw i32 %N, 1
 463   br label %if.end
 464
 465 if.end:                                           ; preds = %for.body, %if.else
 466   %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ]
 467   ret i32 %sum.0
 468 }
 469
 470
 471 ; Check that we handle calls to variadic functions correctly.
 472 ; CHECK-LABEL: {{.*}}callVariadicFunc:
 473 ;
 474 ; ENABLE: cmplwi 3, 0
 475 ; ENABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
 476 ;
 477 ; Prologue code.
 478 ; CHECK: mflr {{[0-9]+}}
 479 ;
 480 ; DISABLE: cmplwi 3, 0
 481 ; DISABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
 482 ;
 483 ; Setup of the varags.
 484 ; CHECK-64: mr 4, 3
 485 ; CHECK-64-NEXT: mr 5, 3
 486 ; CHECK-64-NEXT: mr 6, 3
 487 ; CHECK-64-NEXT: mr 7, 3
 488 ; CHECK-64-NEXT: mr 8, 3
 489 ; CHECK-64-NEXT: mr 9, 3
 490 ;
 491 ; CHECK-32: mr 3, 4
 492 ; CHECK-32-NEXT: mr 5, 4
 493 ; CHECK-32-NEXT: mr 6, 4
 494 ; CHECK-32-NEXT: mr 7, 4
 495 ; CHECK-32-NEXT: mr 8, 4
 496 ; CHECK-32-NEXT: mr 9, 4
 497 ;
 498 ; CHECK-NEXT: bl {{.*}}someVariadicFunc
 499 ; CHECK: slwi 3, 3, 3
 500 ; DISABLE: b {{.*}}[[EPILOGUE_BB:BB[0-9_]+]]
 501 ;
 502 ; ENABLE: mtlr {{[0-9]+}}
 503 ; ENABLE-NEXT: blr
 504 ;
 505 ; CHECK: {{.*}}[[ELSE_LABEL]]: # %if.else
 506 ; CHECK-NEXT: slwi 3, 4, 1
 507 ;
 508 ; DISABLE: {{.*}}[[EPILOGUE_BB]]: # %if.end
 509 ; DISABLE: mtlr
 510 ; CHECK: blr
 511 define i32 @callVariadicFunc(i32 %cond, i32 %N) {
 512 entry:
 513   %tobool = icmp eq i32 %cond, 0
 514   br i1 %tobool, label %if.else, label %if.then
 515
 516 if.then:                                          ; preds = %entry
 517   %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
 518   %shl = shl i32 %call, 3
 519   br label %if.end
 520
 521 if.else:                                          ; preds = %entry
 522   %mul = shl nsw i32 %N, 1
 523   br label %if.end
 524
 525 if.end:                                           ; preds = %if.else, %if.then
 526   %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
 527   ret i32 %sum.0
 528 }
 529
 530 declare i32 @someVariadicFunc(i32, ...)
 531
 532
 533
 534 ; Make sure we do not insert unreachable code after noreturn function.
 535 ; Although this is not incorrect to insert such code, it is useless
 536 ; and it hurts the binary size.
 537 ;
 538 ; CHECK-LABEL: {{.*}}noreturn:
 539 ; DISABLE: mflr {{[0-9]+}}
 540 ;
 541 ; CHECK: cmplwi 3, 0
 542 ; CHECK-NEXT: bne{{[-]?}} 0, {{.*}}[[ABORT:BB[0-9_]+]]
 543 ;
 544 ; CHECK: li 3, 42
 545 ;
 546 ; DISABLE: mtlr {{[0-9]+}}
 547 ;
 548 ; CHECK-NEXT: blr
 549 ;
 550 ; CHECK: {{.*}}[[ABORT]]: # %if.abort
 551 ; ENABLE: mflr {{[0-9]+}}
 552 ; CHECK: bl {{.*}}abort
 553 ; ENABLE-NOT: mtlr {{[0-9]+}}
 554 define i32 @noreturn(i8 signext %bad_thing) {
 555 entry:
 556   %tobool = icmp eq i8 %bad_thing, 0
 557   br i1 %tobool, label %if.end, label %if.abort
 558
 559 if.abort:
 560   tail call void @abort() #0
 561   unreachable
 562
 563 if.end:
 564   ret i32 42
 565 }
 566
 567 declare void @abort() #0
 568
 569 attributes #0 = { noreturn nounwind }
 570
 571
 572 ; Make sure that we handle infinite loops properly When checking that the Save
 573 ; and Restore blocks are control flow equivalent, the loop searches for the
 574 ; immediate (post) dominator for the (restore) save blocks. When either the Save
 575 ; or Restore block is located in an infinite loop the only immediate (post)
 576 ; dominator is itself. In this case, we cannot perform shrink wrapping, but we
 577 ; should return gracefully and continue compilation.
 578 ; The only condition for this test is the compilation finishes correctly.
 579 ;
 580 ; CHECK-LABEL: {{.*}}infiniteloop
 581 ; CHECK: blr
 582 define void @infiniteloop() {
 583 entry:
 584   br i1 undef, label %if.then, label %if.end
 585
 586 if.then:
 587   %ptr = alloca i32, i32 4
 588   br label %for.body
 589
 590 for.body:                                         ; preds = %for.body, %entry
 591   %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
 592   %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
 593   %add = add nsw i32 %call, %sum.03
 594   store i32 %add, i32* %ptr
 595   br label %for.body
 596
 597 if.end:
 598   ret void
 599 }
 600
 601 ; Another infinite loop test this time with a body bigger than just one block.
 602 ; CHECK-LABEL: {{.*}}infiniteloop2
 603 ; CHECK: blr
 604 define void @infiniteloop2() {
 605 entry:
 606   br i1 undef, label %if.then, label %if.end
 607
 608 if.then:
 609   %ptr = alloca i32, i32 4
 610   br label %for.body
 611
 612 for.body:                                         ; preds = %for.body, %entry
 613   %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2]
 614   %call = tail call i32 asm "mftb $0, 268", "=r,~{r14}"()
 615   %add = add nsw i32 %call, %sum.03
 616   store i32 %add, i32* %ptr
 617   br i1 undef, label %body1, label %body2
 618
 619 body1:
 620   tail call void asm sideeffect "nop", "~{r14}"()
 621   br label %for.body
 622
 623 body2:
 624   tail call void asm sideeffect "nop", "~{r14}"()
 625   br label %for.body
 626
 627 if.end:
 628   ret void
 629 }
 630
 631 ; Another infinite loop test this time with two nested infinite loop.
 632 ; CHECK-LABEL: {{.*}}infiniteloop3
 633 ; CHECK: bclr
 634 define void @infiniteloop3() {
 635 entry:
 636   br i1 undef, label %loop2a, label %body
 637
 638 body:                                             ; preds = %entry
 639   br i1 undef, label %loop2a, label %end
 640
 641 loop1:                                            ; preds = %loop2a, %loop2b
 642   %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
 643   %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
 644   %0 = icmp eq i32* %var, null
 645   %next.load = load i32*, i32** undef
 646   br i1 %0, label %loop2a, label %loop2b
 647
 648 loop2a:                                           ; preds = %loop1, %body, %entry
 649   %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ]
 650   %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ]
 651   br label %loop1
 652
 653 loop2b:                                           ; preds = %loop1
 654   %gep1 = bitcast i32* %var.phi to i32*
 655   %next.ptr = bitcast i32* %gep1 to i32**
 656   store i32* %next.phi, i32** %next.ptr
 657   br label %loop1
 658
 659 end:
 660   ret void
 661 }
 662
 663 @columns = external global [0 x i32], align 4
 664 @lock = common global i32 0, align 4
 665 @htindex = common global i32 0, align 4
 666 @stride = common global i32 0, align 4
 667 @ht = common global i32* null, align 8
 668 @he = common global i8* null, align 8
 669
 670 ; Test for a bug that was caused when save point was equal to restore point.
 671 ; Function Attrs: nounwind
 672 ; CHECK-LABEL: {{.*}}transpose
 673 ;
 674 ; Store of callee-save register saved by shrink wrapping
 675 ; FIXME: Test disabled: Improved scheduling needs no spills/reloads any longer!
 676 ; CHECKXX: std [[CSR:[0-9]+]], -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
 677 ;
 678 ; Reload of callee-save register
 679 ; CHECKXX: ld [[CSR]], -[[STACK_OFFSET]](1) # 8-byte Folded Reload
 680 ;
 681 ; Ensure no subsequent uses of callee-save register before end of function
 682 ; CHECKXX-NOT: {{[a-z]+}} [[CSR]]
 683 ; CHECK: blr
 684 define signext i32 @transpose() {
 685 entry:
 686   %0 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 1), align 4
 687   %shl.i = shl i32 %0, 7
 688   %1 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 2), align 4
 689   %or.i = or i32 %shl.i, %1
 690   %shl1.i = shl i32 %or.i, 7
 691   %2 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 3), align 4
 692   %or2.i = or i32 %shl1.i, %2
 693   %3 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 7), align 4
 694   %shl3.i = shl i32 %3, 7
 695   %4 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 6), align 4
 696   %or4.i = or i32 %shl3.i, %4
 697   %shl5.i = shl i32 %or4.i, 7
 698   %5 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 5), align 4
 699   %or6.i = or i32 %shl5.i, %5
 700   %cmp.i = icmp ugt i32 %or2.i, %or6.i
 701   br i1 %cmp.i, label %cond.true.i, label %cond.false.i
 702
 703 cond.true.i:
 704   %shl7.i = shl i32 %or2.i, 7
 705   %6 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 4), align 4
 706   %or8.i = or i32 %6, %shl7.i
 707   %conv.i = zext i32 %or8.i to i64
 708   %shl9.i = shl nuw nsw i64 %conv.i, 21
 709   %conv10.i = zext i32 %or6.i to i64
 710   %or11.i = or i64 %shl9.i, %conv10.i
 711   br label %hash.exit
 712
 713 cond.false.i:
 714   %shl12.i = shl i32 %or6.i, 7
 715   %7 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 4), align 4
 716   %or13.i = or i32 %7, %shl12.i
 717   %conv14.i = zext i32 %or13.i to i64
 718   %shl15.i = shl nuw nsw i64 %conv14.i, 21
 719   %conv16.i = zext i32 %or2.i to i64
 720   %or17.i = or i64 %shl15.i, %conv16.i
 721   br label %hash.exit
 722
 723 hash.exit:
 724   %cond.i = phi i64 [ %or11.i, %cond.true.i ], [ %or17.i, %cond.false.i ]
 725   %shr.29.i = lshr i64 %cond.i, 17
 726   %conv18.i = trunc i64 %shr.29.i to i32
 727   store i32 %conv18.i, i32* @lock, align 4
 728   %rem.i = srem i64 %cond.i, 1050011
 729   %conv19.i = trunc i64 %rem.i to i32
 730   store i32 %conv19.i, i32* @htindex, align 4
 731   %rem20.i = urem i32 %conv18.i, 179
 732   %add.i = or i32 %rem20.i, 131072
 733   store i32 %add.i, i32* @stride, align 4
 734   %8 = load i32*, i32** @ht, align 8
 735   %arrayidx = getelementptr inbounds i32, i32* %8, i64 %rem.i
 736   %9 = load i32, i32* %arrayidx, align 4
 737   %cmp1 = icmp eq i32 %9, %conv18.i
 738   br i1 %cmp1, label %if.then, label %if.end
 739
 740 if.then:
 741   %idxprom.lcssa = phi i64 [ %rem.i, %hash.exit ], [ %idxprom.1, %if.end ], [ %idxprom.2, %if.end.1 ], [ %idxprom.3, %if.end.2 ], [ %idxprom.4, %if.end.3 ], [ %idxprom.5, %if.end.4 ], [ %idxprom.6, %if.end.5 ], [ %idxprom.7, %if.end.6 ]
 742   %10 = load i8*, i8** @he, align 8
 743   %arrayidx3 = getelementptr inbounds i8, i8* %10, i64 %idxprom.lcssa
 744   %11 = load i8, i8* %arrayidx3, align 1
 745   %conv = sext i8 %11 to i32
 746   br label %cleanup
 747
 748 if.end:
 749   %add = add nsw i32 %add.i, %conv19.i
 750   %cmp4 = icmp sgt i32 %add, 1050010
 751   %sub = add nsw i32 %add, -1050011
 752   %sub.add = select i1 %cmp4, i32 %sub, i32 %add
 753   %idxprom.1 = sext i32 %sub.add to i64
 754   %arrayidx.1 = getelementptr inbounds i32, i32* %8, i64 %idxprom.1
 755   %12 = load i32, i32* %arrayidx.1, align 4
 756   %cmp1.1 = icmp eq i32 %12, %conv18.i
 757   br i1 %cmp1.1, label %if.then, label %if.end.1
 758
 759 cleanup:
 760   %retval.0 = phi i32 [ %conv, %if.then ], [ -128, %if.end.6 ]
 761   ret i32 %retval.0
 762
 763 if.end.1:
 764   %add.1 = add nsw i32 %add.i, %sub.add
 765   %cmp4.1 = icmp sgt i32 %add.1, 1050010
 766   %sub.1 = add nsw i32 %add.1, -1050011
 767   %sub.add.1 = select i1 %cmp4.1, i32 %sub.1, i32 %add.1
 768   %idxprom.2 = sext i32 %sub.add.1 to i64
 769   %arrayidx.2 = getelementptr inbounds i32, i32* %8, i64 %idxprom.2
 770   %13 = load i32, i32* %arrayidx.2, align 4
 771   %cmp1.2 = icmp eq i32 %13, %conv18.i
 772   br i1 %cmp1.2, label %if.then, label %if.end.2
 773
 774 if.end.2:
 775   %add.2 = add nsw i32 %add.i, %sub.add.1
 776   %cmp4.2 = icmp sgt i32 %add.2, 1050010
 777   %sub.2 = add nsw i32 %add.2, -1050011
 778   %sub.add.2 = select i1 %cmp4.2, i32 %sub.2, i32 %add.2
 779   %idxprom.3 = sext i32 %sub.add.2 to i64
 780   %arrayidx.3 = getelementptr inbounds i32, i32* %8, i64 %idxprom.3
 781   %14 = load i32, i32* %arrayidx.3, align 4
 782   %cmp1.3 = icmp eq i32 %14, %conv18.i
 783   br i1 %cmp1.3, label %if.then, label %if.end.3
 784
 785 if.end.3:
 786   %add.3 = add nsw i32 %add.i, %sub.add.2
 787   %cmp4.3 = icmp sgt i32 %add.3, 1050010
 788   %sub.3 = add nsw i32 %add.3, -1050011
 789   %sub.add.3 = select i1 %cmp4.3, i32 %sub.3, i32 %add.3
 790   %idxprom.4 = sext i32 %sub.add.3 to i64
 791   %arrayidx.4 = getelementptr inbounds i32, i32* %8, i64 %idxprom.4
 792   %15 = load i32, i32* %arrayidx.4, align 4
 793   %cmp1.4 = icmp eq i32 %15, %conv18.i
 794   br i1 %cmp1.4, label %if.then, label %if.end.4
 795
 796 if.end.4:
 797   %add.4 = add nsw i32 %add.i, %sub.add.3
 798   %cmp4.4 = icmp sgt i32 %add.4, 1050010
 799   %sub.4 = add nsw i32 %add.4, -1050011
 800   %sub.add.4 = select i1 %cmp4.4, i32 %sub.4, i32 %add.4
 801   %idxprom.5 = sext i32 %sub.add.4 to i64
 802   %arrayidx.5 = getelementptr inbounds i32, i32* %8, i64 %idxprom.5
 803   %16 = load i32, i32* %arrayidx.5, align 4
 804   %cmp1.5 = icmp eq i32 %16, %conv18.i
 805   br i1 %cmp1.5, label %if.then, label %if.end.5
 806
 807 if.end.5:
 808   %add.5 = add nsw i32 %add.i, %sub.add.4
 809   %cmp4.5 = icmp sgt i32 %add.5, 1050010
 810   %sub.5 = add nsw i32 %add.5, -1050011
 811   %sub.add.5 = select i1 %cmp4.5, i32 %sub.5, i32 %add.5
 812   %idxprom.6 = sext i32 %sub.add.5 to i64
 813   %arrayidx.6 = getelementptr inbounds i32, i32* %8, i64 %idxprom.6
 814   %17 = load i32, i32* %arrayidx.6, align 4
 815   %cmp1.6 = icmp eq i32 %17, %conv18.i
 816   br i1 %cmp1.6, label %if.then, label %if.end.6
 817
 818 if.end.6:
 819   %add.6 = add nsw i32 %add.i, %sub.add.5
 820   %cmp4.6 = icmp sgt i32 %add.6, 1050010
 821   %sub.6 = add nsw i32 %add.6, -1050011
 822   %sub.add.6 = select i1 %cmp4.6, i32 %sub.6, i32 %add.6
 823   %idxprom.7 = sext i32 %sub.add.6 to i64
 824   %arrayidx.7 = getelementptr inbounds i32, i32* %8, i64 %idxprom.7
 825   %18 = load i32, i32* %arrayidx.7, align 4
 826   %cmp1.7 = icmp eq i32 %18, %conv18.i
 827   br i1 %cmp1.7, label %if.then, label %cleanup
 828 }