1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global,+WavefrontSize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
5 declare i32 @llvm.amdgcn.workitem.id.x() #1
6 declare half @llvm.fabs.f16(half)
7 declare float @llvm.fabs.f32(float)
8 declare double @llvm.fabs.f64(double)
10 ; GCN-LABEL: {{^}}v_cnd_nan_nosgpr:
11 ; GCN: v_cmp_eq_u32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0
12 ; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]}}, -1, v{{[0-9]+}}, [[COND]]
14 ; All nan values are converted to 0xffffffff
16 define amdgpu_kernel void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
17 %idx = call i32 @llvm.amdgcn.workitem.id.x() #1
18 %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx
19 %f = load float, float addrspace(1)* %f.gep
20 %setcc = icmp ne i32 %c, 0
21 %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
22 store float %select, float addrspace(1)* %out
27 ; This requires slightly trickier SGPR operand legalization since the
28 ; single constant bus SGPR usage is the last operand, and it should
30 ; However on GFX10 constant bus is limited to 2 scalar operands, not one.
32 ; GCN-LABEL: {{^}}v_cnd_nan:
33 ; SIVI: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0
34 ; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, -1, v{{[0-9]+}}, vcc
35 ; GFX10: v_cmp_eq_u32_e64 [[CC:s\[[0-9:]+\]]], s{{[0-9]+}}, 0
36 ; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, -1, s{{[0-9]+}}, [[CC]]
38 ; All nan values are converted to 0xffffffff
40 define amdgpu_kernel void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) #0 {
41 %setcc = icmp ne i32 %c, 0
42 %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
43 store float %select, float addrspace(1)* %out
47 ; Test different compare and select operand types for optimal code
49 ; (select (cmp (sgprX, constant)), constant, sgprZ)
51 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprZ_f32:
53 ; GCN: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Z:[0-9]+]]{{\]}}
54 ; SIVI-DAG: v_cmp_nlg_f32_e64 [[CC:vcc]], s[[X]], 0
55 ; GFX10-DAG: v_cmp_nlg_f32_e64 [[CC:s\[[0-9:]+\]]], s[[X]], 0
56 ; SIVI-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], s[[Z]]
57 ; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], [[CC]]
58 ; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 1.0, s[[Z]], [[CC]]
59 define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, [8 x i32], float %x, float %z) #0 {
60 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
61 %tid.ext = sext i32 %tid to i64
62 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
63 %setcc = fcmp one float %x, 0.0
64 %select = select i1 %setcc, float 1.0, float %z
65 store float %select, float addrspace(1)* %out.gep
69 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprX_f32:
70 ; GCN: s_load_dword [[X:s[0-9]+]]
71 ; SIVI-DAG: v_cmp_nlg_f32_e64 [[CC:vcc]], [[X]], 0
72 ; GFX10-DAG: v_cmp_nlg_f32_e64 [[CC:s\[[0-9:]+\]]], [[X]], 0
73 ; SIVI-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]]
74 ; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VX]], [[CC]]
75 ; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 1.0, [[X]], [[CC]]
76 define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)* %out, float %x) #0 {
77 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
78 %tid.ext = sext i32 %tid to i64
79 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
80 %setcc = fcmp one float %x, 0.0
81 %select = select i1 %setcc, float 1.0, float %x
82 store float %select, float addrspace(1)* %out.gep
86 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprZ_f32:
87 ; GCN-DAG: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Z:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
88 ; SIVI-DAG: v_cmp_nlg_f32_e64 [[CC:vcc]], s[[X]], 0
89 ; GFX10-DAG: v_cmp_nlg_f32_e64 [[CC:s\[[0-9:]+\]]], s[[X]], 0
90 ; SIVI-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], s[[Z]]
91 ; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VZ]], [[CC]]
92 ; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 0, s[[Z]], [[CC]]
93 define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(float addrspace(1)* %out, [8 x i32], float %x, float %z) #0 {
94 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
95 %tid.ext = sext i32 %tid to i64
96 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
97 %setcc = fcmp one float %x, 0.0
98 %select = select i1 %setcc, float 0.0, float %z
99 store float %select, float addrspace(1)* %out.gep
103 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprX_f32:
104 ; GCN: s_load_dword [[X:s[0-9]+]]
105 ; SIVI-DAG: v_cmp_nlg_f32_e64 [[CC:vcc]], [[X]], 0
106 ; GFX10-DAG: v_cmp_nlg_f32_e64 [[CC:s\[[0-9:]+\]]], [[X]], 0
107 ; SIVI-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[X]]
108 ; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[VX]], [[CC]]
109 ; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 0, [[X]], [[CC]]
110 define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)* %out, float %x) #0 {
111 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
112 %tid.ext = sext i32 %tid to i64
113 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
114 %setcc = fcmp one float %x, 0.0
115 %select = select i1 %setcc, float 0.0, float %x
116 store float %select, float addrspace(1)* %out.gep
120 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_vgprZ_f32:
121 ; GCN-DAG: s_load_dword [[X:s[0-9]+]]
122 ; GCN-DAG: {{buffer|flat|global}}_load_dword [[Z:v[0-9]+]]
123 ; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0
124 ; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 0, [[Z]], [[COND]]
125 define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 {
126 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
127 %tid.ext = sext i32 %tid to i64
128 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
129 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
130 %z = load float, float addrspace(1)* %z.gep
131 %setcc = fcmp one float %x, 0.0
132 %select = select i1 %setcc, float 0.0, float %z
133 store float %select, float addrspace(1)* %out.gep
137 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_vgprZ_f32:
138 ; GCN-DAG: {{buffer|flat|global}}_load_dword [[Z:v[0-9]+]]
139 ; GCN-DAG: s_load_dword [[X:s[0-9]+]]
140 ; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0
141 ; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 1.0, [[Z]], [[COND]]
142 define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 {
143 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
144 %tid.ext = sext i32 %tid to i64
145 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
146 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
147 %z = load float, float addrspace(1)* %z.gep
148 %setcc = fcmp one float %x, 0.0
149 %select = select i1 %setcc, float 1.0, float %z
150 store float %select, float addrspace(1)* %out.gep
154 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_sgprZ_f32:
155 ; GCN-DAG: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
156 ; GCN-DAG: s_load_dword [[Z:s[0-9]+]]
157 ; GCN-DAG: v_cmp_ngt_f32_e32 vcc, 0, [[X]]
158 ; SIVI-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
159 ; SIVI: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc
160 ; GFX10: v_cndmask_b32_e64 v{{[0-9]+}}, 1.0, [[Z]], vcc
161 define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float %z) #0 {
162 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
163 %tid.ext = sext i32 %tid to i64
164 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
165 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
166 %x = load float, float addrspace(1)* %x.gep
167 %setcc = fcmp olt float %x, 0.0
168 %select = select i1 %setcc, float 1.0, float %z
169 store float %select, float addrspace(1)* %out.gep
173 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_vgprZ_f32:
174 ; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
175 ; GCN: {{buffer|flat|global}}_load_dword [[Z:v[0-9]+]]
176 ; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]]
177 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[Z]], vcc
178 define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 {
179 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
180 %tid.ext = sext i32 %tid to i64
181 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
182 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
183 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
184 %x = load volatile float, float addrspace(1)* %x.gep
185 %z = load volatile float, float addrspace(1)* %z.gep
186 %setcc = fcmp ult float %x, 0.0
187 %select = select i1 %setcc, float 1.0, float %z
188 store float %select, float addrspace(1)* %out.gep
192 ; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i32:
193 ; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
194 ; GCN: {{buffer|flat|global}}_load_dword [[Z:v[0-9]+]]
195 ; GCN: v_cmp_lt_i32_e32 vcc, -1, [[X]]
196 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 2, [[Z]], vcc
197 define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i32 addrspace(1)* %z.ptr) #0 {
198 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
199 %tid.ext = sext i32 %tid to i64
200 %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext
201 %z.gep = getelementptr inbounds i32, i32 addrspace(1)* %z.ptr, i64 %tid.ext
202 %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
203 %x = load volatile i32, i32 addrspace(1)* %x.gep
204 %z = load volatile i32, i32 addrspace(1)* %z.gep
205 %setcc = icmp slt i32 %x, 0
206 %select = select i1 %setcc, i32 2, i32 %z
207 store i32 %select, i32 addrspace(1)* %out.gep
211 ; FIXME: Why does VI make the wrong regalloc choice?
212 ; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i64:
213 ; GCN: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[X_LO:[0-9]+]]:[[X_HI:[0-9]+]]{{\]}}
214 ; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v{{\[}}[[Z_LO:[0-9]+]]:[[Z_HI:[0-9]+]]{{\]}}
215 ; SI-DAG: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[X_LO]]:[[X_HI]]{{\]}}
216 ; SI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v[[Z_HI]], vcc
217 ; SI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v[[Z_LO]], vcc
219 ; VI-DAG: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[X_LO]]:[[X_HI]]{{\]}}
220 ; VI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v[[Z_HI]], vcc
221 ; VI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v[[Z_LO]], vcc
222 define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 {
223 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
224 %tid.ext = sext i32 %tid to i64
225 %x.gep = getelementptr inbounds i64, i64 addrspace(1)* %x.ptr, i64 %tid.ext
226 %z.gep = getelementptr inbounds i64, i64 addrspace(1)* %z.ptr, i64 %tid.ext
227 %out.gep = getelementptr inbounds i64, i64 addrspace(1)* %out, i64 %tid.ext
228 %x = load volatile i64, i64 addrspace(1)* %x.gep
229 %z = load volatile i64, i64 addrspace(1)* %z.gep
230 %setcc = icmp slt i64 %x, 0
231 %select = select i1 %setcc, i64 2, i64 %z
232 store i64 %select, i64 addrspace(1)* %out.gep
236 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_vgprZ_k1_v4f32:
237 ; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
238 ; GCN: {{buffer|flat|global}}_load_dwordx4
240 ; GCN: v_cmp_nge_f32_e32 vcc, 4.0, [[X]]
241 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
242 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc
243 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc
244 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc
245 define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 {
246 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
247 %tid.ext = sext i32 %tid to i64
248 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
249 %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext
250 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext
251 %x = load volatile float, float addrspace(1)* %x.gep
252 %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep
253 %setcc = fcmp ugt float %x, 4.0
254 %select = select i1 %setcc, <4 x float> %z, <4 x float> <float 1.0, float 2.0, float -0.5, float 4.0>
255 store <4 x float> %select, <4 x float> addrspace(1)* %out.gep
259 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_vgprZ_v4f32:
260 ; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
261 ; GCN: {{buffer|flat|global}}_load_dwordx4
263 ; GCN: v_cmp_ge_f32_e32 vcc, 4.0, [[X]]
264 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
265 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc
266 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc
267 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc
268 define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 {
269 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
270 %tid.ext = sext i32 %tid to i64
271 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
272 %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext
273 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext
274 %x = load volatile float, float addrspace(1)* %x.gep
275 %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep
276 %setcc = fcmp ugt float %x, 4.0
277 %select = select i1 %setcc, <4 x float> <float 1.0, float 2.0, float -0.5, float 4.0>, <4 x float> %z
278 store <4 x float> %select, <4 x float> addrspace(1)* %out.gep
282 ; This must be swapped as a vector type before the condition has
285 ; GCN-LABEL: {{^}}fcmp_k0_vgprX_select_k1_vgprZ_v4f32:
286 ; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
287 ; GCN: {{buffer|flat|global}}_load_dwordx4
289 ; GCN: v_cmp_le_f32_e32 vcc, 4.0, [[X]]
290 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
291 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}, vcc
292 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, -0.5, v{{[0-9]+}}, vcc
293 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}, vcc
294 define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(<4 x float> addrspace(1)* %out, float addrspace(1)* %x.ptr, <4 x float> addrspace(1)* %z.ptr) #0 {
295 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
296 %tid.ext = sext i32 %tid to i64
297 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
298 %z.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %z.ptr, i64 %tid.ext
299 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext
300 %x = load volatile float, float addrspace(1)* %x.gep
301 %z = load volatile <4 x float>, <4 x float> addrspace(1)* %z.gep
302 %setcc = fcmp ugt float 4.0, %x
303 %select = select i1 %setcc, <4 x float> <float 1.0, float 2.0, float -0.5, float 4.0>, <4 x float> %z
304 store <4 x float> %select, <4 x float> addrspace(1)* %out.gep
308 ; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i1:
311 ; GCN-DAG: v_cmp_gt_i32_e32 vcc, 0, v
312 ; DCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 1,
313 ; GCN-DAG: v_cmp_eq_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1, v
314 ; GCN-DAG: s_or_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc, s{{\[[0-9]+:[0-9]+\]}}
315 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s
317 define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i1 addrspace(1)* %z.ptr) #0 {
318 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
319 %tid.ext = sext i32 %tid to i64
320 %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext
321 %z.gep = getelementptr inbounds i1, i1 addrspace(1)* %z.ptr, i64 %tid.ext
322 %out.gep = getelementptr inbounds i1, i1 addrspace(1)* %out, i64 %tid.ext
323 %x = load volatile i32, i32 addrspace(1)* %x.gep
324 %z = load volatile i1, i1 addrspace(1)* %z.gep
325 %setcc = icmp slt i32 %x, 0
326 %select = select i1 %setcc, i1 true, i1 %z
327 store i1 %select, i1 addrspace(1)* %out.gep
331 ; Different types compared vs. selected
332 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_selectf64_k1_vgprZ_f32:
333 ; SIVI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3ff00000
334 ; GCN-DAG: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
335 ; GCN-DAG: {{buffer|flat|global}}_load_dwordx2
337 ; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]]
338 ; SIVI-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, vcc
339 ; GFX10-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0x3ff00000, v{{[0-9]+}}, vcc
340 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc
341 define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(double addrspace(1)* %out, float addrspace(1)* %x.ptr, double addrspace(1)* %z.ptr) #0 {
342 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
343 %tid.ext = sext i32 %tid to i64
344 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
345 %z.gep = getelementptr inbounds double, double addrspace(1)* %z.ptr, i64 %tid.ext
346 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
347 %x = load volatile float, float addrspace(1)* %x.gep
348 %z = load volatile double, double addrspace(1)* %z.gep
349 %setcc = fcmp ult float %x, 0.0
350 %select = select i1 %setcc, double 1.0, double %z
351 store double %select, double addrspace(1)* %out.gep
355 ; Different types compared vs. selected
356 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_selecti64_k1_vgprZ_f32:
357 ; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
358 ; GCN: {{buffer|flat|global}}_load_dwordx2
360 ; GCN: v_cmp_nlg_f32_e32 vcc, 0, [[X]]
361 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 3, v{{[0-9]+}}, vcc
362 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}, vcc
363 define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(i64 addrspace(1)* %out, float addrspace(1)* %x.ptr, i64 addrspace(1)* %z.ptr) #0 {
364 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
365 %tid.ext = sext i32 %tid to i64
366 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
367 %z.gep = getelementptr inbounds i64, i64 addrspace(1)* %z.ptr, i64 %tid.ext
368 %out.gep = getelementptr inbounds i64, i64 addrspace(1)* %out, i64 %tid.ext
369 %x = load volatile float, float addrspace(1)* %x.gep
370 %z = load volatile i64, i64 addrspace(1)* %z.gep
371 %setcc = fcmp one float %x, 0.0
372 %select = select i1 %setcc, i64 3, i64 %z
373 store i64 %select, i64 addrspace(1)* %out.gep
377 ; Different types compared vs. selected
378 ; GCN-LABEL: {{^}}icmp_vgprX_k0_selectf32_k1_vgprZ_i32:
379 ; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
380 ; GCN: {{buffer|flat|global}}_load_dword [[Z:v[0-9]+]]
382 ; GCN: v_cmp_gt_u32_e32 vcc, 2, [[X]]
383 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[Z]], vcc
384 define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(float addrspace(1)* %out, i32 addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 {
385 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
386 %tid.ext = sext i32 %tid to i64
387 %x.gep = getelementptr inbounds i32, i32 addrspace(1)* %x.ptr, i64 %tid.ext
388 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
389 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
390 %x = load volatile i32, i32 addrspace(1)* %x.gep
391 %z = load volatile float, float addrspace(1)* %z.gep
392 %setcc = icmp ugt i32 %x, 1
393 %select = select i1 %setcc, float 4.0, float %z
394 store float %select, float addrspace(1)* %out.gep
398 ; FIXME: Should be able to handle multiple uses
400 ; GCN-LABEL: {{^}}fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2:
401 ; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
403 ; GCN: v_cmp_nle_f32_e32 vcc, 4.0, [[X]]
404 ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -1.0, vcc
405 ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -2.0, vcc
406 define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 {
407 %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
408 %tid.ext = sext i32 %tid to i64
409 %x.gep = getelementptr inbounds float, float addrspace(1)* %x.ptr, i64 %tid.ext
410 %z.gep = getelementptr inbounds float, float addrspace(1)* %z.ptr, i64 %tid.ext
411 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
412 %x = load volatile float, float addrspace(1)* %x.gep
413 %z = load volatile float, float addrspace(1)* %z.gep
414 %setcc = fcmp ugt float 4.0, %x
415 %select0 = select i1 %setcc, float -1.0, float %z
416 %select1 = select i1 %setcc, float -2.0, float %z
417 store volatile float %select0, float addrspace(1)* %out.gep
418 store volatile float %select1, float addrspace(1)* %out.gep
422 ; Source modifiers abs/neg only work for f32
424 ; GCN-LABEL: {{^}}v_cndmask_abs_neg_f16:
425 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}},
426 define amdgpu_kernel void @v_cndmask_abs_neg_f16(half addrspace(1)* %out, i32 %c, half addrspace(1)* %fptr) #0 {
427 %idx = call i32 @llvm.amdgcn.workitem.id.x() #1
428 %f.gep = getelementptr half, half addrspace(1)* %fptr, i32 %idx
429 %f = load half, half addrspace(1)* %f.gep
430 %f.abs = call half @llvm.fabs.f16(half %f)
431 %f.neg = fneg half %f
432 %setcc = icmp ne i32 %c, 0
433 %select = select i1 %setcc, half %f.abs, half %f.neg
434 store half %select, half addrspace(1)* %out
438 ; GCN-LABEL: {{^}}v_cndmask_abs_neg_f32:
439 ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, |v{{[0-9]+}}|,
440 define amdgpu_kernel void @v_cndmask_abs_neg_f32(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
441 %idx = call i32 @llvm.amdgcn.workitem.id.x() #1
442 %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx
443 %f = load float, float addrspace(1)* %f.gep
444 %f.abs = call float @llvm.fabs.f32(float %f)
445 %f.neg = fneg float %f
446 %setcc = icmp ne i32 %c, 0
447 %select = select i1 %setcc, float %f.abs, float %f.neg
448 store float %select, float addrspace(1)* %out
452 ; GCN-LABEL: {{^}}v_cndmask_abs_neg_f64:
453 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}},
454 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}},
455 define amdgpu_kernel void @v_cndmask_abs_neg_f64(double addrspace(1)* %out, i32 %c, double addrspace(1)* %fptr) #0 {
456 %idx = call i32 @llvm.amdgcn.workitem.id.x() #1
457 %f.gep = getelementptr double, double addrspace(1)* %fptr, i32 %idx
458 %f = load double, double addrspace(1)* %f.gep
459 %f.abs = call double @llvm.fabs.f64(double %f)
460 %f.neg = fneg double %f
461 %setcc = icmp ne i32 %c, 0
462 %select = select i1 %setcc, double %f.abs, double %f.neg
463 store double %select, double addrspace(1)* %out
467 attributes #0 = { nounwind }
468 attributes #1 = { nounwind readnone }