1 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s
2 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %}
4 declare {i32, i1} @llvm.nvvm.shfl.down.i32p(i32, i32, i32)
5 declare {float, i1} @llvm.nvvm.shfl.down.f32p(float, i32, i32)
6 declare {i32, i1} @llvm.nvvm.shfl.up.i32p(i32, i32, i32)
7 declare {float, i1} @llvm.nvvm.shfl.up.f32p(float, i32, i32)
8 declare {i32, i1} @llvm.nvvm.shfl.bfly.i32p(i32, i32, i32)
9 declare {float, i1} @llvm.nvvm.shfl.bfly.f32p(float, i32, i32)
10 declare {i32, i1} @llvm.nvvm.shfl.idx.i32p(i32, i32, i32)
11 declare {float, i1} @llvm.nvvm.shfl.idx.f32p(float, i32, i32)
13 ; CHECK-LABEL: .func{{.*}}shfl_i32_rrr
14 define {i32, i1} @shfl_i32_rrr(i32 %a, i32 %b, i32 %c) {
15 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
16 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
17 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
18 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], [[C]];
19 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
20 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 %c)
24 ; CHECK-LABEL: .func{{.*}}shfl_i32_irr
25 define {i32, i1} @shfl_i32_irr(i32 %a, i32 %b, i32 %c) {
26 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
27 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
28 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
29 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], [[C]];
30 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
31 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 %c)
35 ; CHECK-LABEL: .func{{.*}}shfl_i32_rri
36 define {i32, i1} @shfl_i32_rri(i32 %a, i32 %b) {
37 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
38 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
39 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], 1;
40 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
41 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 1)
45 ; CHECK-LABEL: .func{{.*}}shfl_i32_iri
46 define {i32, i1} @shfl_i32_iri(i32 %a, i32 %b) {
47 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
48 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
49 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], 2;
50 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
51 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 %b, i32 2)
55 ; CHECK-LABEL: .func{{.*}}shfl_i32_rir
56 define {i32, i1} @shfl_i32_rir(i32 %a, i32 %c) {
57 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
58 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
59 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 1, [[C]];
60 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
61 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 1, i32 %c)
65 ; CHECK-LABEL: .func{{.*}}shfl_i32_iir
66 define {i32, i1} @shfl_i32_iir(i32 %a, i32 %c) {
67 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
68 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
69 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, [[C]];
70 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
71 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 2, i32 %c)
75 ; CHECK-LABEL: .func{{.*}}shfl_i32_rii
76 define {i32, i1} @shfl_i32_rii(i32 %a) {
77 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
78 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 1, 2;
79 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
80 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 1, i32 2)
84 ; CHECK-LABEL: .func{{.*}}shfl_i32_iii
85 define {i32, i1} @shfl_i32_iii(i32 %a, i32 %b) {
86 ; CHECK: ld.param.u32 [[A:%r[0-9]+]]
87 ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, 3;
88 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
89 %val = call {i32, i1} @llvm.nvvm.shfl.down.i32p(i32 %a, i32 2, i32 3)
93 ;; Same intrinsics, but for float
95 ; CHECK-LABEL: .func{{.*}}shfl_f32_rrr
96 define {float, i1} @shfl_f32_rrr(float %a, i32 %b, i32 %c) {
97 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
98 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
99 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
100 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], [[C]];
101 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
102 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 %c)
106 ; CHECK-LABEL: .func{{.*}}shfl_f32_irr
107 define {float, i1} @shfl_f32_irr(float %a, i32 %b, i32 %c) {
108 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
109 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
110 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
111 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], [[C]];
112 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
113 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 %c)
117 ; CHECK-LABEL: .func{{.*}}shfl_f32_rri
118 define {float, i1} @shfl_f32_rri(float %a, i32 %b) {
119 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
120 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
121 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], 1;
122 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
123 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 1)
127 ; CHECK-LABEL: .func{{.*}}shfl_f32_iri
128 define {float, i1} @shfl_f32_iri(float %a, i32 %b) {
129 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
130 ; CHECK: ld.param.u32 [[B:%r[0-9]+]]
131 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], 2;
132 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
133 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 %b, i32 2)
137 ; CHECK-LABEL: .func{{.*}}shfl_f32_rir
138 define {float, i1} @shfl_f32_rir(float %a, i32 %c) {
139 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
140 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
141 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 1, [[C]];
142 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
143 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 1, i32 %c)
147 ; CHECK-LABEL: .func{{.*}}shfl_f32_iir
148 define {float, i1} @shfl_f32_iir(float %a, i32 %c) {
149 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
150 ; CHECK: ld.param.u32 [[C:%r[0-9]+]]
151 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, [[C]];
152 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
153 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 2, i32 %c)
157 ; CHECK-LABEL: .func{{.*}}shfl_f32_rii
158 define {float, i1} @shfl_f32_rii(float %a) {
159 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
160 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 1, 2;
161 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
162 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 1, i32 2)
166 ; CHECK-LABEL: .func{{.*}}shfl_f32_iii
167 define {float, i1} @shfl_f32_iii(float %a, i32 %b) {
168 ; CHECK: ld.param.f32 [[A:%f[0-9]+]]
169 ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, 3;
170 ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]]
171 %val = call {float, i1} @llvm.nvvm.shfl.down.f32p(float %a, i32 2, i32 3)