1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc < %s --mtriple=nvptx64 -mcpu=sm_20 | FileCheck --check-prefix=SM20 %s
3 ; RUN: llc < %s --mtriple=nvptx64 -mcpu=sm_35 | FileCheck --check-prefix=SM35 %s
4 ; RUN: %if ptxas %{ llc < %s --mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
5 ; RUN: %if ptxas %{ llc < %s --mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
8 declare i32 @llvm.nvvm.rotate.b32(i32, i32)
9 declare i64 @llvm.nvvm.rotate.b64(i64, i32)
10 declare i64 @llvm.nvvm.rotate.right.b64(i64, i32)
14 define i32 @rotate32(i32 %a, i32 %b) {
15 ; SM20-LABEL: rotate32(
17 ; SM20-NEXT: .reg .b32 %r<4>;
19 ; SM20-NEXT: // %bb.0:
20 ; SM20-NEXT: ld.param.u32 %r1, [rotate32_param_0];
21 ; SM20-NEXT: ld.param.u32 %r2, [rotate32_param_1];
23 ; SM20-NEXT: .reg .b32 %lhs;
24 ; SM20-NEXT: .reg .b32 %rhs;
25 ; SM20-NEXT: .reg .b32 %amt2;
26 ; SM20-NEXT: shl.b32 %lhs, %r1, %r2;
27 ; SM20-NEXT: sub.s32 %amt2, 32, %r2;
28 ; SM20-NEXT: shr.b32 %rhs, %r1, %amt2;
29 ; SM20-NEXT: add.u32 %r3, %lhs, %rhs;
31 ; SM20-NEXT: st.param.b32 [func_retval0+0], %r3;
34 ; SM35-LABEL: rotate32(
36 ; SM35-NEXT: .reg .b32 %r<4>;
38 ; SM35-NEXT: // %bb.0:
39 ; SM35-NEXT: ld.param.u32 %r1, [rotate32_param_0];
40 ; SM35-NEXT: ld.param.u32 %r2, [rotate32_param_1];
41 ; SM35-NEXT: shf.l.wrap.b32 %r3, %r1, %r1, %r2;
42 ; SM35-NEXT: st.param.b32 [func_retval0+0], %r3;
44 %val = tail call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 %b)
50 define i64 @rotate64(i64 %a, i32 %b) {
51 ; SM20-LABEL: rotate64(
53 ; SM20-NEXT: .reg .b32 %r<2>;
54 ; SM20-NEXT: .reg .b64 %rd<3>;
56 ; SM20-NEXT: // %bb.0:
57 ; SM20-NEXT: ld.param.u64 %rd1, [rotate64_param_0];
58 ; SM20-NEXT: ld.param.u32 %r1, [rotate64_param_1];
60 ; SM20-NEXT: .reg .b64 %lhs;
61 ; SM20-NEXT: .reg .b64 %rhs;
62 ; SM20-NEXT: .reg .u32 %amt2;
63 ; SM20-NEXT: and.b32 %amt2, %r1, 63;
64 ; SM20-NEXT: shl.b64 %lhs, %rd1, %amt2;
65 ; SM20-NEXT: sub.u32 %amt2, 64, %amt2;
66 ; SM20-NEXT: shr.b64 %rhs, %rd1, %amt2;
67 ; SM20-NEXT: add.u64 %rd2, %lhs, %rhs;
69 ; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2;
72 ; SM35-LABEL: rotate64(
74 ; SM35-NEXT: .reg .b32 %r<6>;
75 ; SM35-NEXT: .reg .b64 %rd<3>;
77 ; SM35-NEXT: // %bb.0:
78 ; SM35-NEXT: ld.param.u64 %rd1, [rotate64_param_0];
80 ; SM35-NEXT: .reg .b32 %dummy;
81 ; SM35-NEXT: mov.b64 {%dummy,%r1}, %rd1;
84 ; SM35-NEXT: .reg .b32 %dummy;
85 ; SM35-NEXT: mov.b64 {%r2,%dummy}, %rd1;
87 ; SM35-NEXT: ld.param.u32 %r3, [rotate64_param_1];
88 ; SM35-NEXT: shf.l.wrap.b32 %r4, %r2, %r1, %r3;
89 ; SM35-NEXT: shf.l.wrap.b32 %r5, %r1, %r2, %r3;
90 ; SM35-NEXT: mov.b64 %rd2, {%r5, %r4};
91 ; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2;
93 %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 %b)
99 define i64 @rotateright64(i64 %a, i32 %b) {
100 ; SM20-LABEL: rotateright64(
102 ; SM20-NEXT: .reg .b32 %r<2>;
103 ; SM20-NEXT: .reg .b64 %rd<3>;
105 ; SM20-NEXT: // %bb.0:
106 ; SM20-NEXT: ld.param.u64 %rd1, [rotateright64_param_0];
107 ; SM20-NEXT: ld.param.u32 %r1, [rotateright64_param_1];
109 ; SM20-NEXT: .reg .b64 %lhs;
110 ; SM20-NEXT: .reg .b64 %rhs;
111 ; SM20-NEXT: .reg .u32 %amt2;
112 ; SM20-NEXT: and.b32 %amt2, %r1, 63;
113 ; SM20-NEXT: shr.b64 %lhs, %rd1, %amt2;
114 ; SM20-NEXT: sub.u32 %amt2, 64, %amt2;
115 ; SM20-NEXT: shl.b64 %rhs, %rd1, %amt2;
116 ; SM20-NEXT: add.u64 %rd2, %lhs, %rhs;
118 ; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2;
121 ; SM35-LABEL: rotateright64(
123 ; SM35-NEXT: .reg .b32 %r<6>;
124 ; SM35-NEXT: .reg .b64 %rd<3>;
126 ; SM35-NEXT: // %bb.0:
127 ; SM35-NEXT: ld.param.u64 %rd1, [rotateright64_param_0];
129 ; SM35-NEXT: .reg .b32 %dummy;
130 ; SM35-NEXT: mov.b64 {%r1,%dummy}, %rd1;
133 ; SM35-NEXT: .reg .b32 %dummy;
134 ; SM35-NEXT: mov.b64 {%dummy,%r2}, %rd1;
136 ; SM35-NEXT: ld.param.u32 %r3, [rotateright64_param_1];
137 ; SM35-NEXT: shf.r.wrap.b32 %r4, %r2, %r1, %r3;
138 ; SM35-NEXT: shf.r.wrap.b32 %r5, %r1, %r2, %r3;
139 ; SM35-NEXT: mov.b64 %rd2, {%r5, %r4};
140 ; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2;
142 %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 %b)
148 define i32 @rotl0(i32 %x) {
151 ; SM20-NEXT: .reg .b32 %r<3>;
153 ; SM20-NEXT: // %bb.0:
154 ; SM20-NEXT: ld.param.u32 %r1, [rotl0_param_0];
156 ; SM20-NEXT: .reg .b32 %lhs;
157 ; SM20-NEXT: .reg .b32 %rhs;
158 ; SM20-NEXT: shl.b32 %lhs, %r1, 8;
159 ; SM20-NEXT: shr.b32 %rhs, %r1, 24;
160 ; SM20-NEXT: add.u32 %r2, %lhs, %rhs;
162 ; SM20-NEXT: st.param.b32 [func_retval0+0], %r2;
167 ; SM35-NEXT: .reg .b32 %r<3>;
169 ; SM35-NEXT: // %bb.0:
170 ; SM35-NEXT: ld.param.u32 %r1, [rotl0_param_0];
171 ; SM35-NEXT: shf.l.wrap.b32 %r2, %r1, %r1, 8;
172 ; SM35-NEXT: st.param.b32 [func_retval0+0], %r2;
175 %t1 = lshr i32 %x, 24
176 %t2 = or i32 %t0, %t1
180 declare i64 @llvm.fshl.i64(i64, i64, i64)
181 declare i64 @llvm.fshr.i64(i64, i64, i64)
184 define i64 @rotl64(i64 %a, i64 %n) {
185 ; SM20-LABEL: rotl64(
187 ; SM20-NEXT: .reg .b32 %r<2>;
188 ; SM20-NEXT: .reg .b64 %rd<3>;
190 ; SM20-NEXT: // %bb.0:
191 ; SM20-NEXT: ld.param.u64 %rd1, [rotl64_param_0];
192 ; SM20-NEXT: ld.param.u32 %r1, [rotl64_param_1];
194 ; SM20-NEXT: .reg .b64 %lhs;
195 ; SM20-NEXT: .reg .b64 %rhs;
196 ; SM20-NEXT: .reg .u32 %amt2;
197 ; SM20-NEXT: and.b32 %amt2, %r1, 63;
198 ; SM20-NEXT: shl.b64 %lhs, %rd1, %amt2;
199 ; SM20-NEXT: sub.u32 %amt2, 64, %amt2;
200 ; SM20-NEXT: shr.b64 %rhs, %rd1, %amt2;
201 ; SM20-NEXT: add.u64 %rd2, %lhs, %rhs;
203 ; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2;
206 ; SM35-LABEL: rotl64(
208 ; SM35-NEXT: .reg .b32 %r<2>;
209 ; SM35-NEXT: .reg .b64 %rd<3>;
211 ; SM35-NEXT: // %bb.0:
212 ; SM35-NEXT: ld.param.u64 %rd1, [rotl64_param_0];
213 ; SM35-NEXT: ld.param.u32 %r1, [rotl64_param_1];
215 ; SM35-NEXT: .reg .b64 %lhs;
216 ; SM35-NEXT: .reg .b64 %rhs;
217 ; SM35-NEXT: .reg .u32 %amt2;
218 ; SM35-NEXT: and.b32 %amt2, %r1, 63;
219 ; SM35-NEXT: shl.b64 %lhs, %rd1, %amt2;
220 ; SM35-NEXT: sub.u32 %amt2, 64, %amt2;
221 ; SM35-NEXT: shr.b64 %rhs, %rd1, %amt2;
222 ; SM35-NEXT: add.u64 %rd2, %lhs, %rhs;
224 ; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2;
226 %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %n)
231 define i64 @rotl64_imm(i64 %a) {
232 ; SM20-LABEL: rotl64_imm(
234 ; SM20-NEXT: .reg .b64 %rd<3>;
236 ; SM20-NEXT: // %bb.0:
237 ; SM20-NEXT: ld.param.u64 %rd1, [rotl64_imm_param_0];
239 ; SM20-NEXT: .reg .b64 %lhs;
240 ; SM20-NEXT: .reg .b64 %rhs;
241 ; SM20-NEXT: shl.b64 %lhs, %rd1, 2;
242 ; SM20-NEXT: shr.b64 %rhs, %rd1, 62;
243 ; SM20-NEXT: add.u64 %rd2, %lhs, %rhs;
245 ; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2;
248 ; SM35-LABEL: rotl64_imm(
250 ; SM35-NEXT: .reg .b64 %rd<3>;
252 ; SM35-NEXT: // %bb.0:
253 ; SM35-NEXT: ld.param.u64 %rd1, [rotl64_imm_param_0];
255 ; SM35-NEXT: .reg .b64 %lhs;
256 ; SM35-NEXT: .reg .b64 %rhs;
257 ; SM35-NEXT: shl.b64 %lhs, %rd1, 2;
258 ; SM35-NEXT: shr.b64 %rhs, %rd1, 62;
259 ; SM35-NEXT: add.u64 %rd2, %lhs, %rhs;
261 ; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2;
263 %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 66)
268 define i64 @rotr64(i64 %a, i64 %n) {
269 ; SM20-LABEL: rotr64(
271 ; SM20-NEXT: .reg .b32 %r<2>;
272 ; SM20-NEXT: .reg .b64 %rd<3>;
274 ; SM20-NEXT: // %bb.0:
275 ; SM20-NEXT: ld.param.u64 %rd1, [rotr64_param_0];
276 ; SM20-NEXT: ld.param.u32 %r1, [rotr64_param_1];
278 ; SM20-NEXT: .reg .b64 %lhs;
279 ; SM20-NEXT: .reg .b64 %rhs;
280 ; SM20-NEXT: .reg .u32 %amt2;
281 ; SM20-NEXT: and.b32 %amt2, %r1, 63;
282 ; SM20-NEXT: shr.b64 %lhs, %rd1, %amt2;
283 ; SM20-NEXT: sub.u32 %amt2, 64, %amt2;
284 ; SM20-NEXT: shl.b64 %rhs, %rd1, %amt2;
285 ; SM20-NEXT: add.u64 %rd2, %lhs, %rhs;
287 ; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2;
290 ; SM35-LABEL: rotr64(
292 ; SM35-NEXT: .reg .b32 %r<2>;
293 ; SM35-NEXT: .reg .b64 %rd<3>;
295 ; SM35-NEXT: // %bb.0:
296 ; SM35-NEXT: ld.param.u64 %rd1, [rotr64_param_0];
297 ; SM35-NEXT: ld.param.u32 %r1, [rotr64_param_1];
299 ; SM35-NEXT: .reg .b64 %lhs;
300 ; SM35-NEXT: .reg .b64 %rhs;
301 ; SM35-NEXT: .reg .u32 %amt2;
302 ; SM35-NEXT: and.b32 %amt2, %r1, 63;
303 ; SM35-NEXT: shr.b64 %lhs, %rd1, %amt2;
304 ; SM35-NEXT: sub.u32 %amt2, 64, %amt2;
305 ; SM35-NEXT: shl.b64 %rhs, %rd1, %amt2;
306 ; SM35-NEXT: add.u64 %rd2, %lhs, %rhs;
308 ; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2;
310 %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %n)
315 define i64 @rotr64_imm(i64 %a) {
316 ; SM20-LABEL: rotr64_imm(
318 ; SM20-NEXT: .reg .b64 %rd<3>;
320 ; SM20-NEXT: // %bb.0:
321 ; SM20-NEXT: ld.param.u64 %rd1, [rotr64_imm_param_0];
323 ; SM20-NEXT: .reg .b64 %lhs;
324 ; SM20-NEXT: .reg .b64 %rhs;
325 ; SM20-NEXT: shl.b64 %lhs, %rd1, 62;
326 ; SM20-NEXT: shr.b64 %rhs, %rd1, 2;
327 ; SM20-NEXT: add.u64 %rd2, %lhs, %rhs;
329 ; SM20-NEXT: st.param.b64 [func_retval0+0], %rd2;
332 ; SM35-LABEL: rotr64_imm(
334 ; SM35-NEXT: .reg .b64 %rd<3>;
336 ; SM35-NEXT: // %bb.0:
337 ; SM35-NEXT: ld.param.u64 %rd1, [rotr64_imm_param_0];
339 ; SM35-NEXT: .reg .b64 %lhs;
340 ; SM35-NEXT: .reg .b64 %rhs;
341 ; SM35-NEXT: shl.b64 %lhs, %rd1, 62;
342 ; SM35-NEXT: shr.b64 %rhs, %rd1, 2;
343 ; SM35-NEXT: add.u64 %rd2, %lhs, %rhs;
345 ; SM35-NEXT: st.param.b64 [func_retval0+0], %rd2;
347 %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 66)