1 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s
3 declare {i32, i1} @llvm.nvvm.shfl.down.i32p(i32, i32, i32)
4 declare {float, i1} @llvm.nvvm.shfl.down.f32p(float, i32, i32)
5 declare {i32, i1} @llvm.nvvm.shfl.up.i32p(i32, i32, i32)
6 declare {float, i1} @llvm.nvvm.shfl.up.f32p(float, i32, i32)
7 declare {i32, i1} @llvm.nvvm.shfl.bfly.i32p(i32, i32, i32)
8 declare {float, i1} @llvm.nvvm.shfl.bfly.f32p(float, i32, i32)
9 declare {i32, i1} @llvm.nvvm.shfl.idx.i32p(i32, i32, i32)
10 declare {float, i1} @llvm.nvvm.shfl.idx.f32p(float, i32, i32)
12 ; CHECK-LABEL: .func{{.*}}shfl.i32.rrr
13 define {i32, i1} @shfl.i32.rrr(i32 %a, i32 %b, i32 %c) {
14 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
15 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
16 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
17 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], [[C]];
18 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
19 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 %c)
23 ; CHECK-LABEL: .func{{.*}}shfl.i32.irr
24 define {i32, i1} @shfl.i32.irr(i32 %a, i32 %b, i32 %c) {
25 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
26 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
27 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
28 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], [[C]];
29 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
30 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 %c)
34 ; CHECK-LABEL: .func{{.*}}shfl.i32.rri
35 define {i32, i1} @shfl.i32.rri(i32 %a, i32 %b) {
36 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
37 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
38 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], 1;
39 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
40 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 1)
44 ; CHECK-LABEL: .func{{.*}}shfl.i32.iri
45 define {i32, i1} @shfl.i32.iri(i32 %a, i32 %b) {
46 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
47 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
48 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], 2;
49 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
50 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 2)
54 ; CHECK-LABEL: .func{{.*}}shfl.i32.rir
55 define {i32, i1} @shfl.i32.rir(i32 %a, i32 %c) {
56 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
57 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
58 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 1, [[C]];
59 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
60 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 1, i32 %c)
64 ; CHECK-LABEL: .func{{.*}}shfl.i32.iir
65 define {i32, i1} @shfl.i32.iir(i32 %a, i32 %c) {
66 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
67 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
68 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, [[C]];
69 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
70 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 2, i32 %c)
74 ; CHECK-LABEL: .func{{.*}}shfl.i32.rii
75 define {i32, i1} @shfl.i32.rii(i32 %a) {
76 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
77 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 1, 2;
78 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
79 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 1, i32 2)
83 ; CHECK-LABEL: .func{{.*}}shfl.i32.iii
84 define {i32, i1} @shfl.i32.iii(i32 %a, i32 %b) {
85 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
86 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, 3;
87 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
88 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 2, i32 3)
92 ;; Same intrinsics, but for float
94 ; CHECK-LABEL: .func{{.*}}shfl.f32.rrr
95 define {float, i1} @shfl.f32.rrr(float %a, i32 %b, i32 %c) {
96 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
97 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
98 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
99 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], [[C]];
100 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
101 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 %c)
105 ; CHECK-LABEL: .func{{.*}}shfl.f32.irr
106 define {float, i1} @shfl.f32.irr(float %a, i32 %b, i32 %c) {
107 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
108 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
109 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
110 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], [[C]];
111 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
112 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 %c)
116 ; CHECK-LABEL: .func{{.*}}shfl.f32.rri
117 define {float, i1} @shfl.f32.rri(float %a, i32 %b) {
118 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
119 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
120 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], 1;
121 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
122 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 1)
126 ; CHECK-LABEL: .func{{.*}}shfl.f32.iri
127 define {float, i1} @shfl.f32.iri(float %a, i32 %b) {
128 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
129 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
130 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], 2;
131 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
132 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 2)
136 ; CHECK-LABEL: .func{{.*}}shfl.f32.rir
137 define {float, i1} @shfl.f32.rir(float %a, i32 %c) {
138 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
139 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
140 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 1, [[C]];
141 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
142 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 1, i32 %c)
146 ; CHECK-LABEL: .func{{.*}}shfl.f32.iir
147 define {float, i1} @shfl.f32.iir(float %a, i32 %c) {
148 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
149 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
150 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, [[C]];
151 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
152 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 2, i32 %c)
156 ; CHECK-LABEL: .func{{.*}}shfl.f32.rii
157 define {float, i1} @shfl.f32.rii(float %a) {
158 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
159 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 1, 2;
160 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
161 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 1, i32 2)
165 ; CHECK-LABEL: .func{{.*}}shfl.f32.iii
166 define {float, i1} @shfl.f32.iii(float %a, i32 %b) {
167 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
168 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, 3;
169 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
170 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 2, i32 3)