test/CodeGen/AMDGPU/spill-cfg-position.ll

   1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -stress-regalloc=6 < %s | FileCheck %s
   2
   3 ; Inline spiller can decide to move a spill as early as possible in the basic block.
   4 ; It will skip phis and label, but we also need to make sure it skips instructions
   5 ; in the basic block prologue which restore exec mask.
   6 ; Make sure instruction to restore exec mask immediately follows label
   7
   8 ; CHECK-LABEL: {{^}}spill_cfg_position:
   9 ; CHECK: s_cbranch_execz [[LABEL1:BB[0-9_]+]]
  10 ; CHECK: {{^}}[[LABEL1]]:
  11 ; CHECK: s_cbranch_execz [[LABEL2:BB[0-9_]+]]
  12 ; CHECK: {{^}}[[LABEL2]]:
  13 ; CHECK-NEXT: s_or_b64 exec
  14 ; CHECK: buffer_
  15
  16 define amdgpu_kernel void @spill_cfg_position(i32 addrspace(1)* nocapture %arg) {
  17 bb:
  18   %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #0
  19   %tmp14 = load i32, i32 addrspace(1)* %arg, align 4
  20   %tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
  21   %tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4
  22   %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
  23   %tmp18 = load i32, i32 addrspace(1)* %tmp17, align 4
  24   %tmp19 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3
  25   %tmp20 = load i32, i32 addrspace(1)* %tmp19, align 4
  26   %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4
  27   %tmp22 = load i32, i32 addrspace(1)* %tmp21, align 4
  28   %tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 5
  29   %tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4
  30   %tmp25 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 6
  31   %tmp26 = load i32, i32 addrspace(1)* %tmp25, align 4
  32   %tmp27 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 7
  33   %tmp28 = load i32, i32 addrspace(1)* %tmp27, align 4
  34   %tmp29 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8
  35   %tmp30 = load i32, i32 addrspace(1)* %tmp29, align 4
  36   %tmp33 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp1
  37   %tmp34 = load i32, i32 addrspace(1)* %tmp33, align 4
  38   %tmp35 = icmp eq i32 %tmp34, 0
  39   br i1 %tmp35, label %bb44, label %bb36
  40
  41 bb36:                                             ; preds = %bb
  42   %tmp37 = mul nsw i32 %tmp20, %tmp18
  43   %tmp38 = add nsw i32 %tmp37, %tmp16
  44   %tmp39 = mul nsw i32 %tmp24, %tmp22
  45   %tmp40 = add nsw i32 %tmp38, %tmp39
  46   %tmp41 = mul nsw i32 %tmp28, %tmp26
  47   %tmp42 = add nsw i32 %tmp40, %tmp41
  48   %tmp43 = add nsw i32 %tmp42, %tmp30
  49   br label %bb52
  50
  51 bb44:                                             ; preds = %bb
  52   %tmp45 = mul nsw i32 %tmp18, %tmp16
  53   %tmp46 = mul nsw i32 %tmp22, %tmp20
  54   %tmp47 = add nsw i32 %tmp46, %tmp45
  55   %tmp48 = mul nsw i32 %tmp26, %tmp24
  56   %tmp49 = add nsw i32 %tmp47, %tmp48
  57   %tmp50 = mul nsw i32 %tmp30, %tmp28
  58   %tmp51 = add nsw i32 %tmp49, %tmp50
  59   br label %bb52
  60
  61 bb52:                                             ; preds = %bb44, %bb36
  62   %tmp53 = phi i32 [ %tmp43, %bb36 ], [ %tmp51, %bb44 ]
  63   %tmp54 = mul nsw i32 %tmp16, %tmp14
  64   %tmp55 = mul nsw i32 %tmp22, %tmp18
  65   %tmp56 = mul nsw i32 %tmp24, %tmp20
  66   %tmp57 = mul nsw i32 %tmp30, %tmp26
  67   %tmp58 = add i32 %tmp55, %tmp54
  68   %tmp59 = add i32 %tmp58, %tmp56
  69   %tmp60 = add i32 %tmp59, %tmp28
  70   %tmp61 = add i32 %tmp60, %tmp57
  71   %tmp62 = add i32 %tmp61, %tmp53
  72   store i32 %tmp62, i32 addrspace(1)* %tmp33, align 4
  73   ret void
  74 }
  75
  76 declare i32 @llvm.amdgcn.workitem.id.x() #0
  77
  78 attributes #0 = { nounwind readnone }