1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc < %s --mtriple=nvptx64 -mcpu=sm_20 | FileCheck --check-prefix=SM20 %s
3 ; RUN: llc < %s --mtriple=nvptx64 -mcpu=sm_35 | FileCheck --check-prefix=SM35 %s
4 ; RUN: %if ptxas %{ llc < %s --mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
5 ; RUN: %if ptxas %{ llc < %s --mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
8 declare i32 @llvm.nvvm.rotate.b32(i32, i32)
9 declare i64 @llvm.nvvm.rotate.b64(i64, i32)
10 declare i64 @llvm.nvvm.rotate.right.b64(i64, i32)
12 declare i64 @llvm.fshl.i64(i64, i64, i64)
13 declare i64 @llvm.fshr.i64(i64, i64, i64)
14 declare i32 @llvm.fshl.i32(i32, i32, i32)
15 declare i32 @llvm.fshr.i32(i32, i32, i32)
20 define i32 @rotate32(i32 %a, i32 %b) {
21 ; SM20-LABEL: rotate32(
23 ; SM20-NEXT: .reg .b32 %r<9>;
25 ; SM20-NEXT: // %bb.0:
26 ; SM20-NEXT: ld.param.u32 %r1, [rotate32_param_0];
27 ; SM20-NEXT: ld.param.u32 %r2, [rotate32_param_1];
28 ; SM20-NEXT: and.b32 %r3, %r2, 31;
29 ; SM20-NEXT: shl.b32 %r4, %r1, %r3;
30 ; SM20-NEXT: neg.s32 %r5, %r2;
31 ; SM20-NEXT: and.b32 %r6, %r5, 31;
32 ; SM20-NEXT: shr.u32 %r7, %r1, %r6;
33 ; SM20-NEXT: or.b32 %r8, %r4, %r7;
34 ; SM20-NEXT: st.param.b32 [func_retval0], %r8;
37 ; SM35-LABEL: rotate32(
39 ; SM35-NEXT: .reg .b32 %r<4>;
41 ; SM35-NEXT: // %bb.0:
42 ; SM35-NEXT: ld.param.u32 %r1, [rotate32_param_0];
43 ; SM35-NEXT: ld.param.u32 %r2, [rotate32_param_1];
44 ; SM35-NEXT: shf.l.wrap.b32 %r3, %r1, %r1, %r2;
45 ; SM35-NEXT: st.param.b32 [func_retval0], %r3;
47 %val = tail call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 %b)
53 define i64 @rotate64(i64 %a, i32 %b) {
54 ; SM20-LABEL: rotate64(
56 ; SM20-NEXT: .reg .b32 %r<5>;
57 ; SM20-NEXT: .reg .b64 %rd<5>;
59 ; SM20-NEXT: // %bb.0:
60 ; SM20-NEXT: ld.param.u64 %rd1, [rotate64_param_0];
61 ; SM20-NEXT: ld.param.u32 %r1, [rotate64_param_1];
62 ; SM20-NEXT: and.b32 %r2, %r1, 63;
63 ; SM20-NEXT: shl.b64 %rd2, %rd1, %r2;
64 ; SM20-NEXT: neg.s32 %r3, %r1;
65 ; SM20-NEXT: and.b32 %r4, %r3, 63;
66 ; SM20-NEXT: shr.u64 %rd3, %rd1, %r4;
67 ; SM20-NEXT: or.b64 %rd4, %rd2, %rd3;
68 ; SM20-NEXT: st.param.b64 [func_retval0], %rd4;
71 ; SM35-LABEL: rotate64(
73 ; SM35-NEXT: .reg .b32 %r<5>;
74 ; SM35-NEXT: .reg .b64 %rd<5>;
76 ; SM35-NEXT: // %bb.0:
77 ; SM35-NEXT: ld.param.u64 %rd1, [rotate64_param_0];
78 ; SM35-NEXT: ld.param.u32 %r1, [rotate64_param_1];
79 ; SM35-NEXT: and.b32 %r2, %r1, 63;
80 ; SM35-NEXT: shl.b64 %rd2, %rd1, %r2;
81 ; SM35-NEXT: neg.s32 %r3, %r1;
82 ; SM35-NEXT: and.b32 %r4, %r3, 63;
83 ; SM35-NEXT: shr.u64 %rd3, %rd1, %r4;
84 ; SM35-NEXT: or.b64 %rd4, %rd2, %rd3;
85 ; SM35-NEXT: st.param.b64 [func_retval0], %rd4;
87 %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 %b)
93 define i64 @rotateright64(i64 %a, i32 %b) {
94 ; SM20-LABEL: rotateright64(
96 ; SM20-NEXT: .reg .b32 %r<5>;
97 ; SM20-NEXT: .reg .b64 %rd<5>;
99 ; SM20-NEXT: // %bb.0:
100 ; SM20-NEXT: ld.param.u64 %rd1, [rotateright64_param_0];
101 ; SM20-NEXT: ld.param.u32 %r1, [rotateright64_param_1];
102 ; SM20-NEXT: and.b32 %r2, %r1, 63;
103 ; SM20-NEXT: shr.u64 %rd2, %rd1, %r2;
104 ; SM20-NEXT: neg.s32 %r3, %r1;
105 ; SM20-NEXT: and.b32 %r4, %r3, 63;
106 ; SM20-NEXT: shl.b64 %rd3, %rd1, %r4;
107 ; SM20-NEXT: or.b64 %rd4, %rd2, %rd3;
108 ; SM20-NEXT: st.param.b64 [func_retval0], %rd4;
111 ; SM35-LABEL: rotateright64(
113 ; SM35-NEXT: .reg .b32 %r<5>;
114 ; SM35-NEXT: .reg .b64 %rd<5>;
116 ; SM35-NEXT: // %bb.0:
117 ; SM35-NEXT: ld.param.u64 %rd1, [rotateright64_param_0];
118 ; SM35-NEXT: ld.param.u32 %r1, [rotateright64_param_1];
119 ; SM35-NEXT: and.b32 %r2, %r1, 63;
120 ; SM35-NEXT: shr.u64 %rd2, %rd1, %r2;
121 ; SM35-NEXT: neg.s32 %r3, %r1;
122 ; SM35-NEXT: and.b32 %r4, %r3, 63;
123 ; SM35-NEXT: shl.b64 %rd3, %rd1, %r4;
124 ; SM35-NEXT: or.b64 %rd4, %rd2, %rd3;
125 ; SM35-NEXT: st.param.b64 [func_retval0], %rd4;
127 %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 %b)
133 define i32 @rotl0(i32 %x) {
136 ; SM20-NEXT: .reg .b32 %r<5>;
138 ; SM20-NEXT: // %bb.0:
139 ; SM20-NEXT: ld.param.u32 %r1, [rotl0_param_0];
140 ; SM20-NEXT: shr.u32 %r2, %r1, 24;
141 ; SM20-NEXT: shl.b32 %r3, %r1, 8;
142 ; SM20-NEXT: or.b32 %r4, %r3, %r2;
143 ; SM20-NEXT: st.param.b32 [func_retval0], %r4;
148 ; SM35-NEXT: .reg .b32 %r<3>;
150 ; SM35-NEXT: // %bb.0:
151 ; SM35-NEXT: ld.param.u32 %r1, [rotl0_param_0];
152 ; SM35-NEXT: shf.l.wrap.b32 %r2, %r1, %r1, 8;
153 ; SM35-NEXT: st.param.b32 [func_retval0], %r2;
156 %t1 = lshr i32 %x, 24
157 %t2 = or i32 %t0, %t1
162 define i64 @rotl64(i64 %a, i64 %n) {
163 ; SM20-LABEL: rotl64(
165 ; SM20-NEXT: .reg .b32 %r<5>;
166 ; SM20-NEXT: .reg .b64 %rd<5>;
168 ; SM20-NEXT: // %bb.0:
169 ; SM20-NEXT: ld.param.u64 %rd1, [rotl64_param_0];
170 ; SM20-NEXT: ld.param.u32 %r1, [rotl64_param_1];
171 ; SM20-NEXT: and.b32 %r2, %r1, 63;
172 ; SM20-NEXT: shl.b64 %rd2, %rd1, %r2;
173 ; SM20-NEXT: neg.s32 %r3, %r1;
174 ; SM20-NEXT: and.b32 %r4, %r3, 63;
175 ; SM20-NEXT: shr.u64 %rd3, %rd1, %r4;
176 ; SM20-NEXT: or.b64 %rd4, %rd2, %rd3;
177 ; SM20-NEXT: st.param.b64 [func_retval0], %rd4;
180 ; SM35-LABEL: rotl64(
182 ; SM35-NEXT: .reg .b32 %r<5>;
183 ; SM35-NEXT: .reg .b64 %rd<5>;
185 ; SM35-NEXT: // %bb.0:
186 ; SM35-NEXT: ld.param.u64 %rd1, [rotl64_param_0];
187 ; SM35-NEXT: ld.param.u32 %r1, [rotl64_param_1];
188 ; SM35-NEXT: and.b32 %r2, %r1, 63;
189 ; SM35-NEXT: shl.b64 %rd2, %rd1, %r2;
190 ; SM35-NEXT: neg.s32 %r3, %r1;
191 ; SM35-NEXT: and.b32 %r4, %r3, 63;
192 ; SM35-NEXT: shr.u64 %rd3, %rd1, %r4;
193 ; SM35-NEXT: or.b64 %rd4, %rd2, %rd3;
194 ; SM35-NEXT: st.param.b64 [func_retval0], %rd4;
196 %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %n)
201 define i64 @rotl64_imm(i64 %a) {
202 ; SM20-LABEL: rotl64_imm(
204 ; SM20-NEXT: .reg .b64 %rd<5>;
206 ; SM20-NEXT: // %bb.0:
207 ; SM20-NEXT: ld.param.u64 %rd1, [rotl64_imm_param_0];
208 ; SM20-NEXT: shr.u64 %rd2, %rd1, 62;
209 ; SM20-NEXT: shl.b64 %rd3, %rd1, 2;
210 ; SM20-NEXT: or.b64 %rd4, %rd3, %rd2;
211 ; SM20-NEXT: st.param.b64 [func_retval0], %rd4;
214 ; SM35-LABEL: rotl64_imm(
216 ; SM35-NEXT: .reg .b64 %rd<5>;
218 ; SM35-NEXT: // %bb.0:
219 ; SM35-NEXT: ld.param.u64 %rd1, [rotl64_imm_param_0];
220 ; SM35-NEXT: shr.u64 %rd2, %rd1, 62;
221 ; SM35-NEXT: shl.b64 %rd3, %rd1, 2;
222 ; SM35-NEXT: or.b64 %rd4, %rd3, %rd2;
223 ; SM35-NEXT: st.param.b64 [func_retval0], %rd4;
225 %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 66)
230 define i64 @rotr64(i64 %a, i64 %n) {
231 ; SM20-LABEL: rotr64(
233 ; SM20-NEXT: .reg .b32 %r<5>;
234 ; SM20-NEXT: .reg .b64 %rd<5>;
236 ; SM20-NEXT: // %bb.0:
237 ; SM20-NEXT: ld.param.u64 %rd1, [rotr64_param_0];
238 ; SM20-NEXT: ld.param.u32 %r1, [rotr64_param_1];
239 ; SM20-NEXT: and.b32 %r2, %r1, 63;
240 ; SM20-NEXT: shr.u64 %rd2, %rd1, %r2;
241 ; SM20-NEXT: neg.s32 %r3, %r1;
242 ; SM20-NEXT: and.b32 %r4, %r3, 63;
243 ; SM20-NEXT: shl.b64 %rd3, %rd1, %r4;
244 ; SM20-NEXT: or.b64 %rd4, %rd2, %rd3;
245 ; SM20-NEXT: st.param.b64 [func_retval0], %rd4;
248 ; SM35-LABEL: rotr64(
250 ; SM35-NEXT: .reg .b32 %r<5>;
251 ; SM35-NEXT: .reg .b64 %rd<5>;
253 ; SM35-NEXT: // %bb.0:
254 ; SM35-NEXT: ld.param.u64 %rd1, [rotr64_param_0];
255 ; SM35-NEXT: ld.param.u32 %r1, [rotr64_param_1];
256 ; SM35-NEXT: and.b32 %r2, %r1, 63;
257 ; SM35-NEXT: shr.u64 %rd2, %rd1, %r2;
258 ; SM35-NEXT: neg.s32 %r3, %r1;
259 ; SM35-NEXT: and.b32 %r4, %r3, 63;
260 ; SM35-NEXT: shl.b64 %rd3, %rd1, %r4;
261 ; SM35-NEXT: or.b64 %rd4, %rd2, %rd3;
262 ; SM35-NEXT: st.param.b64 [func_retval0], %rd4;
264 %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %n)
269 define i64 @rotr64_imm(i64 %a) {
270 ; SM20-LABEL: rotr64_imm(
272 ; SM20-NEXT: .reg .b64 %rd<5>;
274 ; SM20-NEXT: // %bb.0:
275 ; SM20-NEXT: ld.param.u64 %rd1, [rotr64_imm_param_0];
276 ; SM20-NEXT: shl.b64 %rd2, %rd1, 62;
277 ; SM20-NEXT: shr.u64 %rd3, %rd1, 2;
278 ; SM20-NEXT: or.b64 %rd4, %rd3, %rd2;
279 ; SM20-NEXT: st.param.b64 [func_retval0], %rd4;
282 ; SM35-LABEL: rotr64_imm(
284 ; SM35-NEXT: .reg .b64 %rd<5>;
286 ; SM35-NEXT: // %bb.0:
287 ; SM35-NEXT: ld.param.u64 %rd1, [rotr64_imm_param_0];
288 ; SM35-NEXT: shl.b64 %rd2, %rd1, 62;
289 ; SM35-NEXT: shr.u64 %rd3, %rd1, 2;
290 ; SM35-NEXT: or.b64 %rd4, %rd3, %rd2;
291 ; SM35-NEXT: st.param.b64 [func_retval0], %rd4;
293 %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 66)
297 define i32 @funnel_shift_right_32(i32 %a, i32 %b, i32 %c) {
298 ; SM20-LABEL: funnel_shift_right_32(
300 ; SM20-NEXT: .reg .b32 %r<11>;
302 ; SM20-NEXT: // %bb.0:
303 ; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_right_32_param_0];
304 ; SM20-NEXT: ld.param.u32 %r2, [funnel_shift_right_32_param_2];
305 ; SM20-NEXT: and.b32 %r3, %r2, 31;
306 ; SM20-NEXT: ld.param.u32 %r4, [funnel_shift_right_32_param_1];
307 ; SM20-NEXT: shr.u32 %r5, %r4, %r3;
308 ; SM20-NEXT: shl.b32 %r6, %r1, 1;
309 ; SM20-NEXT: not.b32 %r7, %r2;
310 ; SM20-NEXT: and.b32 %r8, %r7, 31;
311 ; SM20-NEXT: shl.b32 %r9, %r6, %r8;
312 ; SM20-NEXT: or.b32 %r10, %r9, %r5;
313 ; SM20-NEXT: st.param.b32 [func_retval0], %r10;
316 ; SM35-LABEL: funnel_shift_right_32(
318 ; SM35-NEXT: .reg .b32 %r<5>;
320 ; SM35-NEXT: // %bb.0:
321 ; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_right_32_param_0];
322 ; SM35-NEXT: ld.param.u32 %r2, [funnel_shift_right_32_param_1];
323 ; SM35-NEXT: ld.param.u32 %r3, [funnel_shift_right_32_param_2];
324 ; SM35-NEXT: shf.r.wrap.b32 %r4, %r2, %r1, %r3;
325 ; SM35-NEXT: st.param.b32 [func_retval0], %r4;
327 %val = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
331 define i32 @funnel_shift_left_32(i32 %a, i32 %b, i32 %c) {
332 ; SM20-LABEL: funnel_shift_left_32(
334 ; SM20-NEXT: .reg .b32 %r<11>;
336 ; SM20-NEXT: // %bb.0:
337 ; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_left_32_param_0];
338 ; SM20-NEXT: ld.param.u32 %r2, [funnel_shift_left_32_param_2];
339 ; SM20-NEXT: and.b32 %r3, %r2, 31;
340 ; SM20-NEXT: shl.b32 %r4, %r1, %r3;
341 ; SM20-NEXT: ld.param.u32 %r5, [funnel_shift_left_32_param_1];
342 ; SM20-NEXT: shr.u32 %r6, %r5, 1;
343 ; SM20-NEXT: not.b32 %r7, %r2;
344 ; SM20-NEXT: and.b32 %r8, %r7, 31;
345 ; SM20-NEXT: shr.u32 %r9, %r6, %r8;
346 ; SM20-NEXT: or.b32 %r10, %r4, %r9;
347 ; SM20-NEXT: st.param.b32 [func_retval0], %r10;
350 ; SM35-LABEL: funnel_shift_left_32(
352 ; SM35-NEXT: .reg .b32 %r<5>;
354 ; SM35-NEXT: // %bb.0:
355 ; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_left_32_param_0];
356 ; SM35-NEXT: ld.param.u32 %r2, [funnel_shift_left_32_param_1];
357 ; SM35-NEXT: ld.param.u32 %r3, [funnel_shift_left_32_param_2];
358 ; SM35-NEXT: shf.l.wrap.b32 %r4, %r2, %r1, %r3;
359 ; SM35-NEXT: st.param.b32 [func_retval0], %r4;
361 %val = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
365 define i64 @funnel_shift_right_64(i64 %a, i64 %b, i64 %c) {
366 ; SM20-LABEL: funnel_shift_right_64(
368 ; SM20-NEXT: .reg .b32 %r<5>;
369 ; SM20-NEXT: .reg .b64 %rd<7>;
371 ; SM20-NEXT: // %bb.0:
372 ; SM20-NEXT: ld.param.u64 %rd1, [funnel_shift_right_64_param_0];
373 ; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_right_64_param_2];
374 ; SM20-NEXT: and.b32 %r2, %r1, 63;
375 ; SM20-NEXT: ld.param.u64 %rd2, [funnel_shift_right_64_param_1];
376 ; SM20-NEXT: shr.u64 %rd3, %rd2, %r2;
377 ; SM20-NEXT: shl.b64 %rd4, %rd1, 1;
378 ; SM20-NEXT: not.b32 %r3, %r1;
379 ; SM20-NEXT: and.b32 %r4, %r3, 63;
380 ; SM20-NEXT: shl.b64 %rd5, %rd4, %r4;
381 ; SM20-NEXT: or.b64 %rd6, %rd5, %rd3;
382 ; SM20-NEXT: st.param.b64 [func_retval0], %rd6;
385 ; SM35-LABEL: funnel_shift_right_64(
387 ; SM35-NEXT: .reg .b32 %r<5>;
388 ; SM35-NEXT: .reg .b64 %rd<7>;
390 ; SM35-NEXT: // %bb.0:
391 ; SM35-NEXT: ld.param.u64 %rd1, [funnel_shift_right_64_param_0];
392 ; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_right_64_param_2];
393 ; SM35-NEXT: and.b32 %r2, %r1, 63;
394 ; SM35-NEXT: ld.param.u64 %rd2, [funnel_shift_right_64_param_1];
395 ; SM35-NEXT: shr.u64 %rd3, %rd2, %r2;
396 ; SM35-NEXT: shl.b64 %rd4, %rd1, 1;
397 ; SM35-NEXT: not.b32 %r3, %r1;
398 ; SM35-NEXT: and.b32 %r4, %r3, 63;
399 ; SM35-NEXT: shl.b64 %rd5, %rd4, %r4;
400 ; SM35-NEXT: or.b64 %rd6, %rd5, %rd3;
401 ; SM35-NEXT: st.param.b64 [func_retval0], %rd6;
403 %val = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
407 define i64 @funnel_shift_left_64(i64 %a, i64 %b, i64 %c) {
408 ; SM20-LABEL: funnel_shift_left_64(
410 ; SM20-NEXT: .reg .b32 %r<5>;
411 ; SM20-NEXT: .reg .b64 %rd<7>;
413 ; SM20-NEXT: // %bb.0:
414 ; SM20-NEXT: ld.param.u64 %rd1, [funnel_shift_left_64_param_0];
415 ; SM20-NEXT: ld.param.u32 %r1, [funnel_shift_left_64_param_2];
416 ; SM20-NEXT: and.b32 %r2, %r1, 63;
417 ; SM20-NEXT: shl.b64 %rd2, %rd1, %r2;
418 ; SM20-NEXT: ld.param.u64 %rd3, [funnel_shift_left_64_param_1];
419 ; SM20-NEXT: shr.u64 %rd4, %rd3, 1;
420 ; SM20-NEXT: not.b32 %r3, %r1;
421 ; SM20-NEXT: and.b32 %r4, %r3, 63;
422 ; SM20-NEXT: shr.u64 %rd5, %rd4, %r4;
423 ; SM20-NEXT: or.b64 %rd6, %rd2, %rd5;
424 ; SM20-NEXT: st.param.b64 [func_retval0], %rd6;
427 ; SM35-LABEL: funnel_shift_left_64(
429 ; SM35-NEXT: .reg .b32 %r<5>;
430 ; SM35-NEXT: .reg .b64 %rd<7>;
432 ; SM35-NEXT: // %bb.0:
433 ; SM35-NEXT: ld.param.u64 %rd1, [funnel_shift_left_64_param_0];
434 ; SM35-NEXT: ld.param.u32 %r1, [funnel_shift_left_64_param_2];
435 ; SM35-NEXT: and.b32 %r2, %r1, 63;
436 ; SM35-NEXT: shl.b64 %rd2, %rd1, %r2;
437 ; SM35-NEXT: ld.param.u64 %rd3, [funnel_shift_left_64_param_1];
438 ; SM35-NEXT: shr.u64 %rd4, %rd3, 1;
439 ; SM35-NEXT: not.b32 %r3, %r1;
440 ; SM35-NEXT: and.b32 %r4, %r3, 63;
441 ; SM35-NEXT: shr.u64 %rd5, %rd4, %r4;
442 ; SM35-NEXT: or.b64 %rd6, %rd2, %rd5;
443 ; SM35-NEXT: st.param.b64 [func_retval0], %rd6;
445 %val = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)