1 ; RUN: llc -march=amdgcn -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3 declare i32 @llvm.amdgcn.workitem.id.x() #0
5 ; --------------------------------------------------------------------------------
7 ; --------------------------------------------------------------------------------
9 ; GCN-LABEL: {{^}}commute_eq_64_i32:
10 ; GCN: v_cmp_eq_u32_e32 vcc, 64, v{{[0-9]+}}
11 define amdgpu_kernel void @commute_eq_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
12 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
13 %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
14 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
15 %val = load i32, ptr addrspace(1) %gep.in
16 %cmp = icmp eq i32 %val, 64
17 %ext = sext i1 %cmp to i32
18 store i32 %ext, ptr addrspace(1) %gep.out
22 ; GCN-LABEL: {{^}}commute_ne_64_i32:
23 ; GCN: v_cmp_ne_u32_e32 vcc, 64, v{{[0-9]+}}
24 define amdgpu_kernel void @commute_ne_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
25 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
26 %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
27 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
28 %val = load i32, ptr addrspace(1) %gep.in
29 %cmp = icmp ne i32 %val, 64
30 %ext = sext i1 %cmp to i32
31 store i32 %ext, ptr addrspace(1) %gep.out
35 ; FIXME: Why isn't this being folded as a constant?
36 ; GCN-LABEL: {{^}}commute_ne_litk_i32:
37 ; GCN: s_movk_i32 [[K:s[0-9]+]], 0x3039
38 ; GCN: v_cmp_ne_u32_e32 vcc, [[K]], v{{[0-9]+}}
39 define amdgpu_kernel void @commute_ne_litk_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
40 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
41 %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
42 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
43 %val = load i32, ptr addrspace(1) %gep.in
44 %cmp = icmp ne i32 %val, 12345
45 %ext = sext i1 %cmp to i32
46 store i32 %ext, ptr addrspace(1) %gep.out
50 ; GCN-LABEL: {{^}}commute_ugt_64_i32:
51 ; GCN: v_cmp_lt_u32_e32 vcc, 64, v{{[0-9]+}}
52 define amdgpu_kernel void @commute_ugt_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
53 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
54 %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
55 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
56 %val = load i32, ptr addrspace(1) %gep.in
57 %cmp = icmp ugt i32 %val, 64
58 %ext = sext i1 %cmp to i32
59 store i32 %ext, ptr addrspace(1) %gep.out
63 ; GCN-LABEL: {{^}}commute_uge_64_i32:
64 ; GCN: v_cmp_lt_u32_e32 vcc, 63, v{{[0-9]+}}
65 define amdgpu_kernel void @commute_uge_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
66 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
67 %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
68 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
69 %val = load i32, ptr addrspace(1) %gep.in
70 %cmp = icmp uge i32 %val, 64
71 %ext = sext i1 %cmp to i32
72 store i32 %ext, ptr addrspace(1) %gep.out
76 ; GCN-LABEL: {{^}}commute_ult_64_i32:
77 ; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
78 define amdgpu_kernel void @commute_ult_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
79 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
80 %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
81 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
82 %val = load i32, ptr addrspace(1) %gep.in
83 %cmp = icmp ult i32 %val, 64
84 %ext = sext i1 %cmp to i32
85 store i32 %ext, ptr addrspace(1) %gep.out
89 ; GCN-LABEL: {{^}}commute_ule_63_i32:
90 ; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
91 define amdgpu_kernel void @commute_ule_63_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
92 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
93 %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
94 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
95 %val = load i32, ptr addrspace(1) %gep.in
96 %cmp = icmp ule i32 %val, 63
97 %ext = sext i1 %cmp to i32
98 store i32 %ext, ptr addrspace(1) %gep.out
102 ; GCN-LABEL: {{^}}commute_ule_64_i32:
103 ; GCN: s_movk_i32 [[K:s[0-9]+]], 0x41{{$}}
104 ; GCN: v_cmp_gt_u32_e32 vcc, [[K]], v{{[0-9]+}}
105 define amdgpu_kernel void @commute_ule_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
106 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
107 %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
108 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
109 %val = load i32, ptr addrspace(1) %gep.in
110 %cmp = icmp ule i32 %val, 64
111 %ext = sext i1 %cmp to i32
112 store i32 %ext, ptr addrspace(1) %gep.out
116 ; GCN-LABEL: {{^}}commute_sgt_neg1_i32:
117 ; GCN: v_ashrrev_i32_e32 v2, 31, v2
118 define amdgpu_kernel void @commute_sgt_neg1_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
119 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
120 %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
121 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
122 %val = load i32, ptr addrspace(1) %gep.in
123 %cmp = icmp sgt i32 %val, -1
124 %ext = sext i1 %cmp to i32
125 store i32 %ext, ptr addrspace(1) %gep.out
129 ; GCN-LABEL: {{^}}commute_sge_neg2_i32:
130 ; GCN: v_cmp_lt_i32_e32 vcc, -3, v{{[0-9]+}}
131 define amdgpu_kernel void @commute_sge_neg2_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
132 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
133 %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
134 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
135 %val = load i32, ptr addrspace(1) %gep.in
136 %cmp = icmp sge i32 %val, -2
137 %ext = sext i1 %cmp to i32
138 store i32 %ext, ptr addrspace(1) %gep.out
142 ; GCN-LABEL: {{^}}commute_slt_neg16_i32:
143 ; GCN: v_cmp_gt_i32_e32 vcc, -16, v{{[0-9]+}}
144 define amdgpu_kernel void @commute_slt_neg16_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
145 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
146 %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
147 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
148 %val = load i32, ptr addrspace(1) %gep.in
149 %cmp = icmp slt i32 %val, -16
150 %ext = sext i1 %cmp to i32
151 store i32 %ext, ptr addrspace(1) %gep.out
155 ; GCN-LABEL: {{^}}commute_sle_5_i32:
156 ; GCN: v_cmp_gt_i32_e32 vcc, 6, v{{[0-9]+}}
157 define amdgpu_kernel void @commute_sle_5_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
158 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
159 %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
160 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
161 %val = load i32, ptr addrspace(1) %gep.in
162 %cmp = icmp sle i32 %val, 5
163 %ext = sext i1 %cmp to i32
164 store i32 %ext, ptr addrspace(1) %gep.out
168 ; --------------------------------------------------------------------------------
170 ; --------------------------------------------------------------------------------
172 ; GCN-LABEL: {{^}}commute_eq_64_i64:
173 ; GCN: v_cmp_eq_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
174 define amdgpu_kernel void @commute_eq_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
175 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
176 %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
177 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
178 %val = load i64, ptr addrspace(1) %gep.in
179 %cmp = icmp eq i64 %val, 64
180 %ext = sext i1 %cmp to i32
181 store i32 %ext, ptr addrspace(1) %gep.out
185 ; GCN-LABEL: {{^}}commute_ne_64_i64:
186 ; GCN: v_cmp_ne_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
187 define amdgpu_kernel void @commute_ne_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
188 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
189 %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
190 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
191 %val = load i64, ptr addrspace(1) %gep.in
192 %cmp = icmp ne i64 %val, 64
193 %ext = sext i1 %cmp to i32
194 store i32 %ext, ptr addrspace(1) %gep.out
198 ; GCN-LABEL: {{^}}commute_ugt_64_i64:
199 ; GCN: v_cmp_lt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
200 define amdgpu_kernel void @commute_ugt_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
201 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
202 %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
203 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
204 %val = load i64, ptr addrspace(1) %gep.in
205 %cmp = icmp ugt i64 %val, 64
206 %ext = sext i1 %cmp to i32
207 store i32 %ext, ptr addrspace(1) %gep.out
211 ; GCN-LABEL: {{^}}commute_uge_64_i64:
212 ; GCN: v_cmp_lt_u64_e32 vcc, 63, v{{\[[0-9]+:[0-9]+\]}}
213 define amdgpu_kernel void @commute_uge_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
214 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
215 %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
216 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
217 %val = load i64, ptr addrspace(1) %gep.in
218 %cmp = icmp uge i64 %val, 64
219 %ext = sext i1 %cmp to i32
220 store i32 %ext, ptr addrspace(1) %gep.out
224 ; GCN-LABEL: {{^}}commute_ult_64_i64:
225 ; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
226 define amdgpu_kernel void @commute_ult_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
227 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
228 %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
229 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
230 %val = load i64, ptr addrspace(1) %gep.in
231 %cmp = icmp ult i64 %val, 64
232 %ext = sext i1 %cmp to i32
233 store i32 %ext, ptr addrspace(1) %gep.out
237 ; GCN-LABEL: {{^}}commute_ule_63_i64:
238 ; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
239 define amdgpu_kernel void @commute_ule_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
240 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
241 %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
242 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
243 %val = load i64, ptr addrspace(1) %gep.in
244 %cmp = icmp ule i64 %val, 63
245 %ext = sext i1 %cmp to i32
246 store i32 %ext, ptr addrspace(1) %gep.out
250 ; FIXME: Undo canonicalization to gt (x + 1) since it doesn't use the inline imm
252 ; GCN-LABEL: {{^}}commute_ule_64_i64:
253 ; GCN: s_mov_b64 [[K:s\[[0-9:]+\]]], 0x41
254 ; GCN: v_cmp_gt_u64_e32 vcc, [[K]], v{{\[[0-9]+:[0-9]+\]}}
255 define amdgpu_kernel void @commute_ule_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
256 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
257 %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
258 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
259 %val = load i64, ptr addrspace(1) %gep.in
260 %cmp = icmp ule i64 %val, 64
261 %ext = sext i1 %cmp to i32
262 store i32 %ext, ptr addrspace(1) %gep.out
266 ; GCN-LABEL: {{^}}commute_sgt_neg1_i64:
267 ; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[[0-9]+:[0-9]+\]}}
268 define amdgpu_kernel void @commute_sgt_neg1_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
269 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
270 %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
271 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
272 %val = load i64, ptr addrspace(1) %gep.in
273 %cmp = icmp sgt i64 %val, -1
274 %ext = sext i1 %cmp to i32
275 store i32 %ext, ptr addrspace(1) %gep.out
279 ; GCN-LABEL: {{^}}commute_sge_neg2_i64:
280 ; GCN: v_cmp_lt_i64_e32 vcc, -3, v{{\[[0-9]+:[0-9]+\]}}
281 define amdgpu_kernel void @commute_sge_neg2_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
282 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
283 %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
284 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
285 %val = load i64, ptr addrspace(1) %gep.in
286 %cmp = icmp sge i64 %val, -2
287 %ext = sext i1 %cmp to i32
288 store i32 %ext, ptr addrspace(1) %gep.out
292 ; GCN-LABEL: {{^}}commute_slt_neg16_i64:
293 ; GCN: v_cmp_gt_i64_e32 vcc, -16, v{{\[[0-9]+:[0-9]+\]}}
294 define amdgpu_kernel void @commute_slt_neg16_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
295 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
296 %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
297 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
298 %val = load i64, ptr addrspace(1) %gep.in
299 %cmp = icmp slt i64 %val, -16
300 %ext = sext i1 %cmp to i32
301 store i32 %ext, ptr addrspace(1) %gep.out
305 ; GCN-LABEL: {{^}}commute_sle_5_i64:
306 ; GCN: v_cmp_gt_i64_e32 vcc, 6, v{{\[[0-9]+:[0-9]+\]}}
307 define amdgpu_kernel void @commute_sle_5_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
308 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
309 %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
310 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
311 %val = load i64, ptr addrspace(1) %gep.in
312 %cmp = icmp sle i64 %val, 5
313 %ext = sext i1 %cmp to i32
314 store i32 %ext, ptr addrspace(1) %gep.out
318 ; --------------------------------------------------------------------------------
320 ; --------------------------------------------------------------------------------
323 ; GCN-LABEL: {{^}}commute_oeq_2.0_f32:
324 ; GCN: v_cmp_eq_f32_e32 vcc, 2.0, v{{[0-9]+}}
325 define amdgpu_kernel void @commute_oeq_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
326 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
327 %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
328 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
329 %val = load float, ptr addrspace(1) %gep.in
330 %cmp = fcmp oeq float %val, 2.0
331 %ext = sext i1 %cmp to i32
332 store i32 %ext, ptr addrspace(1) %gep.out
337 ; GCN-LABEL: {{^}}commute_ogt_2.0_f32:
338 ; GCN: v_cmp_lt_f32_e32 vcc, 2.0, v{{[0-9]+}}
339 define amdgpu_kernel void @commute_ogt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
340 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
341 %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
342 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
343 %val = load float, ptr addrspace(1) %gep.in
344 %cmp = fcmp ogt float %val, 2.0
345 %ext = sext i1 %cmp to i32
346 store i32 %ext, ptr addrspace(1) %gep.out
350 ; GCN-LABEL: {{^}}commute_oge_2.0_f32:
351 ; GCN: v_cmp_le_f32_e32 vcc, 2.0, v{{[0-9]+}}
352 define amdgpu_kernel void @commute_oge_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
353 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
354 %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
355 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
356 %val = load float, ptr addrspace(1) %gep.in
357 %cmp = fcmp oge float %val, 2.0
358 %ext = sext i1 %cmp to i32
359 store i32 %ext, ptr addrspace(1) %gep.out
363 ; GCN-LABEL: {{^}}commute_olt_2.0_f32:
364 ; GCN: v_cmp_gt_f32_e32 vcc, 2.0, v{{[0-9]+}}
365 define amdgpu_kernel void @commute_olt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
366 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
367 %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
368 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
369 %val = load float, ptr addrspace(1) %gep.in
370 %cmp = fcmp olt float %val, 2.0
371 %ext = sext i1 %cmp to i32
372 store i32 %ext, ptr addrspace(1) %gep.out
376 ; GCN-LABEL: {{^}}commute_ole_2.0_f32:
377 ; GCN: v_cmp_ge_f32_e32 vcc, 2.0, v{{[0-9]+}}
378 define amdgpu_kernel void @commute_ole_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
379 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
380 %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
381 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
382 %val = load float, ptr addrspace(1) %gep.in
383 %cmp = fcmp ole float %val, 2.0
384 %ext = sext i1 %cmp to i32
385 store i32 %ext, ptr addrspace(1) %gep.out
389 ; GCN-LABEL: {{^}}commute_one_2.0_f32:
390 ; GCN: v_cmp_lg_f32_e32 vcc, 2.0, v{{[0-9]+}}
391 define amdgpu_kernel void @commute_one_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
392 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
393 %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
394 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
395 %val = load float, ptr addrspace(1) %gep.in
396 %cmp = fcmp one float %val, 2.0
397 %ext = sext i1 %cmp to i32
398 store i32 %ext, ptr addrspace(1) %gep.out
402 ; GCN-LABEL: {{^}}commute_ord_2.0_f32:
403 ; GCN: v_cmp_o_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
404 define amdgpu_kernel void @commute_ord_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
405 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
406 %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
407 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
408 %val = load float, ptr addrspace(1) %gep.in
409 %cmp = fcmp ord float %val, 2.0
410 %ext = sext i1 %cmp to i32
411 store i32 %ext, ptr addrspace(1) %gep.out
415 ; GCN-LABEL: {{^}}commute_ueq_2.0_f32:
416 ; GCN: v_cmp_nlg_f32_e32 vcc, 2.0, v{{[0-9]+}}
417 define amdgpu_kernel void @commute_ueq_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
418 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
419 %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
420 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
421 %val = load float, ptr addrspace(1) %gep.in
422 %cmp = fcmp ueq float %val, 2.0
423 %ext = sext i1 %cmp to i32
424 store i32 %ext, ptr addrspace(1) %gep.out
428 ; GCN-LABEL: {{^}}commute_ugt_2.0_f32:
429 ; GCN: v_cmp_nge_f32_e32 vcc, 2.0, v{{[0-9]+}}
430 define amdgpu_kernel void @commute_ugt_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
431 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
432 %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
433 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
434 %val = load float, ptr addrspace(1) %gep.in
435 %cmp = fcmp ugt float %val, 2.0
436 %ext = sext i1 %cmp to i32
437 store i32 %ext, ptr addrspace(1) %gep.out
441 ; GCN-LABEL: {{^}}commute_uge_2.0_f32:
442 ; GCN: v_cmp_ngt_f32_e32 vcc, 2.0, v{{[0-9]+}}
443 define amdgpu_kernel void @commute_uge_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
444 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
445 %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
446 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
447 %val = load float, ptr addrspace(1) %gep.in
448 %cmp = fcmp uge float %val, 2.0
449 %ext = sext i1 %cmp to i32
450 store i32 %ext, ptr addrspace(1) %gep.out
454 ; GCN-LABEL: {{^}}commute_ult_2.0_f32:
455 ; GCN: v_cmp_nle_f32_e32 vcc, 2.0, v{{[0-9]+}}
456 define amdgpu_kernel void @commute_ult_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
457 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
458 %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
459 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
460 %val = load float, ptr addrspace(1) %gep.in
461 %cmp = fcmp ult float %val, 2.0
462 %ext = sext i1 %cmp to i32
463 store i32 %ext, ptr addrspace(1) %gep.out
467 ; GCN-LABEL: {{^}}commute_ule_2.0_f32:
468 ; GCN: v_cmp_nlt_f32_e32 vcc, 2.0, v{{[0-9]+}}
469 define amdgpu_kernel void @commute_ule_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
470 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
471 %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
472 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
473 %val = load float, ptr addrspace(1) %gep.in
474 %cmp = fcmp ule float %val, 2.0
475 %ext = sext i1 %cmp to i32
476 store i32 %ext, ptr addrspace(1) %gep.out
480 ; GCN-LABEL: {{^}}commute_une_2.0_f32:
481 ; GCN: v_cmp_neq_f32_e32 vcc, 2.0, v{{[0-9]+}}
482 define amdgpu_kernel void @commute_une_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
483 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
484 %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
485 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
486 %val = load float, ptr addrspace(1) %gep.in
487 %cmp = fcmp une float %val, 2.0
488 %ext = sext i1 %cmp to i32
489 store i32 %ext, ptr addrspace(1) %gep.out
493 ; GCN-LABEL: {{^}}commute_uno_2.0_f32:
494 ; GCN: v_cmp_u_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
495 define amdgpu_kernel void @commute_uno_2.0_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
496 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
497 %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid
498 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
499 %val = load float, ptr addrspace(1) %gep.in
500 %cmp = fcmp uno float %val, 2.0
501 %ext = sext i1 %cmp to i32
502 store i32 %ext, ptr addrspace(1) %gep.out
506 ; --------------------------------------------------------------------------------
508 ; --------------------------------------------------------------------------------
511 ; GCN-LABEL: {{^}}commute_oeq_2.0_f64:
512 ; GCN: v_cmp_eq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
513 define amdgpu_kernel void @commute_oeq_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
514 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
515 %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
516 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
517 %val = load double, ptr addrspace(1) %gep.in
518 %cmp = fcmp oeq double %val, 2.0
519 %ext = sext i1 %cmp to i32
520 store i32 %ext, ptr addrspace(1) %gep.out
525 ; GCN-LABEL: {{^}}commute_ogt_2.0_f64:
526 ; GCN: v_cmp_lt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
527 define amdgpu_kernel void @commute_ogt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
528 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
529 %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
530 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
531 %val = load double, ptr addrspace(1) %gep.in
532 %cmp = fcmp ogt double %val, 2.0
533 %ext = sext i1 %cmp to i32
534 store i32 %ext, ptr addrspace(1) %gep.out
538 ; GCN-LABEL: {{^}}commute_oge_2.0_f64:
539 ; GCN: v_cmp_le_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
540 define amdgpu_kernel void @commute_oge_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
541 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
542 %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
543 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
544 %val = load double, ptr addrspace(1) %gep.in
545 %cmp = fcmp oge double %val, 2.0
546 %ext = sext i1 %cmp to i32
547 store i32 %ext, ptr addrspace(1) %gep.out
551 ; GCN-LABEL: {{^}}commute_olt_2.0_f64:
552 ; GCN: v_cmp_gt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
553 define amdgpu_kernel void @commute_olt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
554 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
555 %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
556 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
557 %val = load double, ptr addrspace(1) %gep.in
558 %cmp = fcmp olt double %val, 2.0
559 %ext = sext i1 %cmp to i32
560 store i32 %ext, ptr addrspace(1) %gep.out
564 ; GCN-LABEL: {{^}}commute_ole_2.0_f64:
565 ; GCN: v_cmp_ge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
566 define amdgpu_kernel void @commute_ole_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
567 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
568 %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
569 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
570 %val = load double, ptr addrspace(1) %gep.in
571 %cmp = fcmp ole double %val, 2.0
572 %ext = sext i1 %cmp to i32
573 store i32 %ext, ptr addrspace(1) %gep.out
577 ; GCN-LABEL: {{^}}commute_one_2.0_f64:
578 ; GCN: v_cmp_lg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
579 define amdgpu_kernel void @commute_one_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
580 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
581 %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
582 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
583 %val = load double, ptr addrspace(1) %gep.in
584 %cmp = fcmp one double %val, 2.0
585 %ext = sext i1 %cmp to i32
586 store i32 %ext, ptr addrspace(1) %gep.out
590 ; GCN-LABEL: {{^}}commute_ord_2.0_f64:
591 ; GCN: v_cmp_o_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
592 define amdgpu_kernel void @commute_ord_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
593 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
594 %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
595 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
596 %val = load double, ptr addrspace(1) %gep.in
597 %cmp = fcmp ord double %val, 2.0
598 %ext = sext i1 %cmp to i32
599 store i32 %ext, ptr addrspace(1) %gep.out
603 ; GCN-LABEL: {{^}}commute_ueq_2.0_f64:
604 ; GCN: v_cmp_nlg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
605 define amdgpu_kernel void @commute_ueq_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
606 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
607 %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
608 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
609 %val = load double, ptr addrspace(1) %gep.in
610 %cmp = fcmp ueq double %val, 2.0
611 %ext = sext i1 %cmp to i32
612 store i32 %ext, ptr addrspace(1) %gep.out
616 ; GCN-LABEL: {{^}}commute_ugt_2.0_f64:
617 ; GCN: v_cmp_nge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
618 define amdgpu_kernel void @commute_ugt_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
619 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
620 %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
621 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
622 %val = load double, ptr addrspace(1) %gep.in
623 %cmp = fcmp ugt double %val, 2.0
624 %ext = sext i1 %cmp to i32
625 store i32 %ext, ptr addrspace(1) %gep.out
629 ; GCN-LABEL: {{^}}commute_uge_2.0_f64:
630 ; GCN: v_cmp_ngt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
631 define amdgpu_kernel void @commute_uge_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
632 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
633 %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
634 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
635 %val = load double, ptr addrspace(1) %gep.in
636 %cmp = fcmp uge double %val, 2.0
637 %ext = sext i1 %cmp to i32
638 store i32 %ext, ptr addrspace(1) %gep.out
642 ; GCN-LABEL: {{^}}commute_ult_2.0_f64:
643 ; GCN: v_cmp_nle_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
644 define amdgpu_kernel void @commute_ult_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
645 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
646 %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
647 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
648 %val = load double, ptr addrspace(1) %gep.in
649 %cmp = fcmp ult double %val, 2.0
650 %ext = sext i1 %cmp to i32
651 store i32 %ext, ptr addrspace(1) %gep.out
655 ; GCN-LABEL: {{^}}commute_ule_2.0_f64:
656 ; GCN: v_cmp_nlt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
657 define amdgpu_kernel void @commute_ule_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
658 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
659 %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
660 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
661 %val = load double, ptr addrspace(1) %gep.in
662 %cmp = fcmp ule double %val, 2.0
663 %ext = sext i1 %cmp to i32
664 store i32 %ext, ptr addrspace(1) %gep.out
668 ; GCN-LABEL: {{^}}commute_une_2.0_f64:
669 ; GCN: v_cmp_neq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
670 define amdgpu_kernel void @commute_une_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
671 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
672 %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
673 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
674 %val = load double, ptr addrspace(1) %gep.in
675 %cmp = fcmp une double %val, 2.0
676 %ext = sext i1 %cmp to i32
677 store i32 %ext, ptr addrspace(1) %gep.out
681 ; GCN-LABEL: {{^}}commute_uno_2.0_f64:
682 ; GCN: v_cmp_u_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
683 define amdgpu_kernel void @commute_uno_2.0_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
684 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
685 %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid
686 %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
687 %val = load double, ptr addrspace(1) %gep.in
688 %cmp = fcmp uno double %val, 2.0
689 %ext = sext i1 %cmp to i32
690 store i32 %ext, ptr addrspace(1) %gep.out
695 ; FIXME: Should be able to fold this frameindex
696 ; Without commuting the frame index in the pre-regalloc run of
697 ; SIShrinkInstructions, this was using the VOP3 compare.
699 ; GCN-LABEL: {{^}}commute_frameindex:
700 ; XGCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}}
702 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
703 ; GCN: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, [[FI]]
704 define amdgpu_kernel void @commute_frameindex(ptr addrspace(1) nocapture %out) #0 {
706 %stack0 = alloca i32, addrspace(5)
707 %ptr0 = load volatile ptr addrspace(5), ptr addrspace(1) undef
708 %eq = icmp eq ptr addrspace(5) %ptr0, %stack0
709 %ext = zext i1 %eq to i32
710 store volatile i32 %ext, ptr addrspace(1) %out
714 attributes #0 = { nounwind readnone }
715 attributes #1 = { nounwind }