test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll

   1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=CHECK %s
   2
   3 ; Check that WQM is not triggered by the softwqm intrinsic alone.
   4 ;
   5 ;CHECK-LABEL: {{^}}test1:
   6 ;CHECK-NOT: s_wqm_b64 exec, exec
   7 ;CHECK: buffer_load_dword
   8 ;CHECK: buffer_load_dword
   9 ;CHECK: v_add_f32_e32
  10 define amdgpu_ps float @test1(i32 inreg %idx0, i32 inreg %idx1) {
  11 main_body:
  12   %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
  13   %src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
  14   %out = fadd float %src0, %src1
  15   %out.0 = call float @llvm.amdgcn.softwqm.f32(float %out)
  16   ret float %out.0
  17 }
  18
  19 ; Check that the softwqm intrinsic works correctly for integers.
  20 ;
  21 ;CHECK-LABEL: {{^}}test2:
  22 ;CHECK-NOT: s_wqm_b64 exec, exec
  23 ;CHECK: buffer_load_dword
  24 ;CHECK: buffer_load_dword
  25 ;CHECK: v_add_f32_e32
  26 define amdgpu_ps float @test2(i32 inreg %idx0, i32 inreg %idx1) {
  27 main_body:
  28   %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
  29   %src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
  30   %out = fadd float %src0, %src1
  31   %out.0 = bitcast float %out to i32
  32   %out.1 = call i32 @llvm.amdgcn.softwqm.i32(i32 %out.0)
  33   %out.2 = bitcast i32 %out.1 to float
  34   ret float %out.2
  35 }
  36
  37 ; Make sure the transition from WQM to Exact to softwqm does not trigger WQM.
  38 ;
  39 ;CHECK-LABEL: {{^}}test_softwqm1:
  40 ;CHECK-NOT: s_wqm_b64 exec, exec
  41 ;CHECK: buffer_load_dword
  42 ;CHECK: buffer_load_dword
  43 ;CHECK: buffer_store_dword
  44 ;CHECK-NOT; s_wqm_b64 exec, exec
  45 ;CHECK: v_add_f32_e32
  46 define amdgpu_ps float @test_softwqm1(i32 inreg %idx0, i32 inreg %idx1) {
  47 main_body:
  48   %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
  49   %src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
  50   %temp = fadd float %src0, %src1
  51   call void @llvm.amdgcn.buffer.store.f32(float %temp, <4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
  52   %out = fadd float %temp, %temp
  53   %out.0 = call float @llvm.amdgcn.softwqm.f32(float %out)
  54   ret float %out.0
  55 }
  56
  57 ; Make sure the transition from WQM to Exact to softwqm does trigger WQM.
  58 ;
  59 ;CHECK-LABEL: {{^}}test_softwqm2:
  60 ;CHECK: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec
  61 ;CHECK: s_wqm_b64 exec, exec
  62 ;CHECK: buffer_load_dword
  63 ;CHECK: buffer_load_dword
  64 ;CHECK: s_and_b64 exec, exec, [[ORIG]]
  65 ;CHECK: buffer_store_dword
  66 ;CHECK; s_wqm_b64 exec, exec
  67 ;CHECK: v_add_f32_e32
  68 define amdgpu_ps float @test_softwqm2(i32 inreg %idx0, i32 inreg %idx1) {
  69 main_body:
  70   %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
  71   %src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
  72   %temp = fadd float %src0, %src1
  73   %temp.0 = call float @llvm.amdgcn.wqm.f32(float %temp)
  74   call void @llvm.amdgcn.buffer.store.f32(float %temp.0, <4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
  75   %out = fadd float %temp, %temp
  76   %out.0 = call float @llvm.amdgcn.softwqm.f32(float %out)
  77   ret float %out.0
  78 }
  79
  80 ; Make sure the transition from Exact to WWM then softwqm does not trigger WQM.
  81 ;
  82 ;CHECK-LABEL: {{^}}test_wwm1:
  83 ;CHECK: buffer_load_dword
  84 ;CHECK: buffer_store_dword
  85 ;CHECK: s_or_saveexec_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], -1
  86 ;CHECK: buffer_load_dword
  87 ;CHECK: v_add_f32_e32
  88 ;CHECK: s_mov_b64 exec, [[ORIG]]
  89 ;CHECK-NOT: s_wqm_b64
  90 define amdgpu_ps float @test_wwm1(i32 inreg %idx0, i32 inreg %idx1) {
  91 main_body:
  92   %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
  93   call void @llvm.amdgcn.buffer.store.f32(float %src0, <4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
  94   %src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
  95   %temp = fadd float %src0, %src1
  96   %temp.0 = call float @llvm.amdgcn.wwm.f32(float %temp)
  97   %out = fadd float %temp.0, %temp.0
  98   %out.0 = call float @llvm.amdgcn.softwqm.f32(float %out)
  99   ret float %out.0
 100 }
 101
 102 ; Check that softwqm on one case of branch does not trigger WQM for shader.
 103 ;
 104 ;CHECK-LABEL: {{^}}test_control_flow_0:
 105 ;CHECK-NEXT: ; %main_body
 106 ;CHECK-NOT: s_wqm_b64 exec, exec
 107 ;CHECK: %ELSE
 108 ;CHECK: store
 109 ;CHECK: %IF
 110 ;CHECK: buffer_load
 111 ;CHECK: buffer_load
 112 define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 inreg %idx0, i32 inreg %idx1, i32 %c, i32 %z, float %data) {
 113 main_body:
 114   %cmp = icmp eq i32 %z, 0
 115   br i1 %cmp, label %IF, label %ELSE
 116
 117 IF:
 118   %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
 119   %src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
 120   %out = fadd float %src0, %src1
 121   %data.if = call float @llvm.amdgcn.softwqm.f32(float %out)
 122   br label %END
 123
 124 ELSE:
 125   call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 %c, i32 0, i1 0, i1 0)
 126   br label %END
 127
 128 END:
 129   %r = phi float [ %data.if, %IF ], [ %data, %ELSE ]
 130   ret float %r
 131 }
 132
 133 ; Check that softwqm on one case of branch is treated as WQM in WQM shader.
 134 ;
 135 ;CHECK-LABEL: {{^}}test_control_flow_1:
 136 ;CHECK-NEXT: ; %main_body
 137 ;CHECK-NEXT: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec
 138 ;CHECK-NEXT: s_wqm_b64 exec, exec
 139 ;CHECK: %ELSE
 140 ;CHECK: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[ORIG]]
 141 ;CHECK: store
 142 ;CHECK: s_mov_b64 exec, [[SAVED]]
 143 ;CHECK: %IF
 144 ;CHECK-NOT: s_and_saveexec_b64
 145 ;CHECK-NOT: s_and_b64 exec
 146 ;CHECK: buffer_load
 147 ;CHECK: buffer_load
 148 define amdgpu_ps float @test_control_flow_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 inreg %idx0, i32 inreg %idx1, i32 %c, i32 %z, float %data) {
 149 main_body:
 150   %c.bc = bitcast i32 %c to float
 151   %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
 152   %tex0 = extractelement <4 x float> %tex, i32 0
 153   %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 0, i32 0, i32 0) #0
 154   %data.sample = extractelement <4 x float> %dtex, i32 0
 155
 156   %cmp = icmp eq i32 %z, 0
 157   br i1 %cmp, label %IF, label %ELSE
 158
 159 IF:
 160   %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
 161   %src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
 162   %out = fadd float %src0, %src1
 163   %data.if = call float @llvm.amdgcn.softwqm.f32(float %out)
 164   br label %END
 165
 166 ELSE:
 167   call void @llvm.amdgcn.buffer.store.f32(float %data.sample, <4 x i32> undef, i32 %c, i32 0, i1 0, i1 0)
 168   br label %END
 169
 170 END:
 171   %r = phi float [ %data.if, %IF ], [ %data, %ELSE ]
 172   ret float %r
 173 }
 174
 175 declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #2
 176 declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #2
 177 declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #3
 178 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
 179 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
 180 declare void @llvm.amdgcn.kill(i1) #1
 181 declare float @llvm.amdgcn.wqm.f32(float) #3
 182 declare float @llvm.amdgcn.softwqm.f32(float) #3
 183 declare i32 @llvm.amdgcn.softwqm.i32(i32) #3
 184 declare float @llvm.amdgcn.wwm.f32(float) #3
 185
 186 attributes #1 = { nounwind }
 187 attributes #2 = { nounwind readonly }
 188 attributes #3 = { nounwind readnone }