llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
   3
   4 define amdgpu_ps void @v_interp_f32(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
   5 ; GCN-LABEL: v_interp_f32:
   6 ; GCN:       ; %bb.0: ; %main_body
   7 ; GCN-NEXT:    s_mov_b32 s3, exec_lo
   8 ; GCN-NEXT:    s_wqm_b32 exec_lo, exec_lo
   9 ; GCN-NEXT:    s_mov_b32 m0, s2
  10 ; GCN-NEXT:    lds_param_load v0, attr0.y wait_vdst:15
  11 ; GCN-NEXT:    lds_param_load v1, attr1.x wait_vdst:15
  12 ; GCN-NEXT:    s_mov_b32 exec_lo, s3
  13 ; GCN-NEXT:    v_mov_b32_e32 v2, s0
  14 ; GCN-NEXT:    v_mov_b32_e32 v4, s1
  15 ; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
  16 ; GCN-NEXT:    v_interp_p10_f32 v3, v0, v2, v0 wait_exp:1
  17 ; GCN-NEXT:    v_interp_p10_f32 v2, v1, v2, v1
  18 ; GCN-NEXT:    v_interp_p2_f32 v5, v0, v4, v3 wait_exp:7
  19 ; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1)
  20 ; GCN-NEXT:    v_interp_p2_f32 v4, v1, v4, v5 wait_exp:7
  21 ; GCN-NEXT:    exp mrt0 v3, v2, v5, v4 done
  22 ; GCN-NEXT:    s_endpgm
  23 main_body:
  24   %p0 = call float @llvm.amdgcn.lds.param.load(i32 1, i32 0, i32 %m0)
  25   %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0)
  26   %p0_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p0, float %i, float %p0)
  27   %p1_0 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0)
  28   %p0_1 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1)
  29   %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0)
  30   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_1, float %p1_0, float %p1_1, i1 true, i1 true) #0
  31   ret void
  32 }
  33
  34 define amdgpu_ps void @v_interp_f32_many(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
  35 ; GCN-LABEL: v_interp_f32_many:
  36 ; GCN:       ; %bb.0: ; %main_body
  37 ; GCN-NEXT:    s_mov_b32 s3, exec_lo
  38 ; GCN-NEXT:    s_wqm_b32 exec_lo, exec_lo
  39 ; GCN-NEXT:    s_mov_b32 m0, s2
  40 ; GCN-NEXT:    lds_param_load v0, attr0.x wait_vdst:15
  41 ; GCN-NEXT:    lds_param_load v1, attr1.x wait_vdst:15
  42 ; GCN-NEXT:    lds_param_load v2, attr2.x wait_vdst:15
  43 ; GCN-NEXT:    lds_param_load v3, attr3.x wait_vdst:15
  44 ; GCN-NEXT:    s_mov_b32 exec_lo, s3
  45 ; GCN-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
  46 ; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
  47 ; GCN-NEXT:    v_interp_p10_f32 v6, v0, v4, v0 wait_exp:3
  48 ; GCN-NEXT:    v_interp_p10_f32 v7, v1, v4, v1 wait_exp:2
  49 ; GCN-NEXT:    v_interp_p10_f32 v8, v2, v4, v2 wait_exp:1
  50 ; GCN-NEXT:    v_interp_p10_f32 v4, v3, v4, v3
  51 ; GCN-NEXT:    v_interp_p2_f32 v6, v0, v5, v6 wait_exp:7
  52 ; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
  53 ; GCN-NEXT:    v_interp_p2_f32 v7, v1, v5, v7 wait_exp:7
  54 ; GCN-NEXT:    v_interp_p2_f32 v8, v2, v5, v8 wait_exp:7
  55 ; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_4)
  56 ; GCN-NEXT:    v_interp_p2_f32 v4, v3, v5, v4 wait_exp:7
  57 ; GCN-NEXT:    exp mrt0 v6, v7, v8, v4 done
  58 ; GCN-NEXT:    s_endpgm
  59 main_body:
  60   %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0)
  61   %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0)
  62   %p2 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 2, i32 %m0)
  63   %p3 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 3, i32 %m0)
  64   %p0_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p0, float %i, float %p0)
  65   %p0_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0)
  66   %p1_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1)
  67   %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0)
  68   %p2_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p2, float %i, float %p2)
  69   %p2_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p2, float %j, float %p2_0)
  70   %p3_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p3, float %i, float %p3)
  71   %p3_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p3, float %j, float %p3_0)
  72   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) #0
  73   ret void
  74 }
  75
  76 define amdgpu_ps void @v_interp_f32_many_vm(ptr addrspace(1) %ptr, i32 inreg %m0) #0 {
  77 ; GCN-LABEL: v_interp_f32_many_vm:
  78 ; GCN:       ; %bb.0: ; %main_body
  79 ; GCN-NEXT:    global_load_b64 v[0:1], v[0:1], off offset:4
  80 ; GCN-NEXT:    s_mov_b32 m0, s0
  81 ; GCN-NEXT:    s_mov_b32 s0, exec_lo
  82 ; GCN-NEXT:    s_wqm_b32 exec_lo, exec_lo
  83 ; GCN-NEXT:    lds_param_load v2, attr0.x wait_vdst:15
  84 ; GCN-NEXT:    lds_param_load v3, attr1.x wait_vdst:15
  85 ; GCN-NEXT:    lds_param_load v4, attr2.x wait_vdst:15
  86 ; GCN-NEXT:    lds_param_load v5, attr3.x wait_vdst:15
  87 ; GCN-NEXT:    s_mov_b32 exec_lo, s0
  88 ; GCN-NEXT:    s_waitcnt vmcnt(0)
  89 ; GCN-NEXT:    v_interp_p10_f32 v6, v2, v0, v2 wait_exp:3
  90 ; GCN-NEXT:    v_interp_p10_f32 v7, v3, v0, v3 wait_exp:2
  91 ; GCN-NEXT:    v_interp_p10_f32 v8, v4, v0, v4 wait_exp:1
  92 ; GCN-NEXT:    v_interp_p10_f32 v0, v5, v0, v5
  93 ; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
  94 ; GCN-NEXT:    v_interp_p2_f32 v6, v2, v1, v6 wait_exp:7
  95 ; GCN-NEXT:    v_interp_p2_f32 v7, v3, v1, v7 wait_exp:7
  96 ; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
  97 ; GCN-NEXT:    v_interp_p2_f32 v8, v4, v1, v8 wait_exp:7
  98 ; GCN-NEXT:    v_interp_p2_f32 v0, v5, v1, v0 wait_exp:7
  99 ; GCN-NEXT:    exp mrt0 v6, v7, v8, v0 done
 100 ; GCN-NEXT:    s_endpgm
 101 main_body:
 102   %i.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 1
 103   %i = load float, ptr addrspace(1) %i.ptr, align 4
 104   %j.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 2
 105   %j = load float, ptr addrspace(1) %j.ptr, align 4
 106   %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0)
 107   %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0)
 108   %p2 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 2, i32 %m0)
 109   %p3 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 3, i32 %m0)
 110   %p0_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p0, float %i, float %p0)
 111   %p0_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0)
 112   %p1_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1)
 113   %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0)
 114   %p2_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p2, float %i, float %p2)
 115   %p2_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p2, float %j, float %p2_0)
 116   %p3_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p3, float %i, float %p3)
 117   %p3_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p3, float %j, float %p3_0)
 118   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) #0
 119   ret void
 120 }
 121
 122 define amdgpu_ps half @v_interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
 123 ; GCN-LABEL: v_interp_f16:
 124 ; GCN:       ; %bb.0: ; %main_body
 125 ; GCN-NEXT:    s_mov_b32 s3, exec_lo
 126 ; GCN-NEXT:    s_wqm_b32 exec_lo, exec_lo
 127 ; GCN-NEXT:    s_mov_b32 m0, s2
 128 ; GCN-NEXT:    lds_param_load v1, attr0.x wait_vdst:15
 129 ; GCN-NEXT:    s_mov_b32 exec_lo, s3
 130 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
 131 ; GCN-NEXT:    v_mov_b32_e32 v2, s1
 132 ; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
 133 ; GCN-NEXT:    v_interp_p10_f16_f32 v3, v1, v0, v1
 134 ; GCN-NEXT:    v_interp_p10_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7
 135 ; GCN-NEXT:    v_interp_p2_f16_f32 v3, v1, v2, v3 wait_exp:7
 136 ; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 137 ; GCN-NEXT:    v_interp_p2_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7
 138 ; GCN-NEXT:    v_add_f16_e32 v0, v3, v0
 139 ; GCN-NEXT:    ; return to shader part epilog
 140 main_body:
 141   %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0)
 142   %l_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float %p0, float %i, float %p0, i1 0)
 143   %l_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16(float %p0, float %j, float %l_p0, i1 0)
 144   %h_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float %p0, float %i, float %p0, i1 1)
 145   %h_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16(float %p0, float %j, float %h_p0, i1 1)
 146   %res = fadd half %l_p1, %h_p1
 147   ret half %res
 148 }
 149
 150 define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #0 {
 151 ; GCN-LABEL: v_interp_f16_imm_params:
 152 ; GCN:       ; %bb.0: ; %main_body
 153 ; GCN-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
 154 ; GCN-NEXT:    v_mov_b32_e32 v2, s1
 155 ; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 156 ; GCN-NEXT:    v_interp_p10_f16_f32 v1, v0, v1, v0 wait_exp:7
 157 ; GCN-NEXT:    v_interp_p2_f16_f32 v0, v0, v2, v0 wait_exp:7
 158 ; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 159 ; GCN-NEXT:    v_cvt_f16_f32_e32 v1, v1
 160 ; GCN-NEXT:    v_add_f16_e32 v0, v1, v0
 161 ; GCN-NEXT:    ; return to shader part epilog
 162 main_body:
 163   %l_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float 0.0, float %i, float 0.0, i1 0)
 164   %l_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16(float 0.0, float %j, float 0.0, i1 0)
 165   %h = fptrunc float %l_p0 to half
 166   %res = fadd half %h, %l_p1
 167   ret half %res
 168 }
 169
 170 declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) #1
 171 declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #0
 172 declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) #0
 173 declare float @llvm.amdgcn.interp.inreg.p10.f16(float, float, float, i1) #0
 174 declare half @llvm.amdgcn.interp.inreg.p2.f16(float, float, float, i1) #0
 175 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
 176 declare void @llvm.amdgcn.exp.f16(i32, i32, float, float, float, float, i1, i1) #0
 177
 178 attributes #0 = { nounwind }
 179 attributes #1 = { nounwind readnone }