1 ; Verifies correctness of load/store of parameters and return values.
2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s
3 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %}
10 %s_f32 = type { float }
12 %s_f64 = type { double }
14 ; More complicated types. i64 is used to increase natural alignment
15 ; requirement for the type.
16 %s_i32x4 = type { i32, i32, i32, i32, i64}
17 %s_i32f32 = type { i32, float, i32, float, i64}
18 %s_i8i32x4 = type { i32, i32, i8, i32, i32, i64}
19 %s_i8i32x4p = type <{ i32, i32, i8, i32, i32, i64}>
20 %s_crossfield = type { i32, [2 x i32], <4 x i32>, [3 x {i32, i32, i32}]}
21 ; All scalar parameters must be at least 32 bits in size.
22 ; i1 is loaded/stored as i8.
24 ; CHECK: .func (.param .b32 func_retval0)
25 ; CHECK-LABEL: test_i1(
26 ; CHECK-NEXT: .param .b32 test_i1_param_0
27 ; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i1_param_0];
28 ; CHECK: and.b16 [[A:%rs[0-9]+]], [[A8]], 1;
29 ; CHECK: setp.eq.b16 %p1, [[A]], 1
30 ; CHECK: cvt.u32.u16 [[B:%r[0-9]+]], [[A8]]
31 ; CHECK: and.b32 [[C:%r[0-9]+]], [[B]], 1;
32 ; CHECK: .param .b32 param0;
33 ; CHECK: st.param.b32 [param0+0], [[C]]
34 ; CHECK: .param .b32 retval0;
36 ; CHECK-NEXT: test_i1,
37 ; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0];
38 ; CHECK: and.b32 [[R:%r[0-9]+]], [[R8]], 1;
39 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
41 define i1 @test_i1(i1 %a) {
42 %r = tail call i1 @test_i1(i1 %a);
46 ; Signed i1 is a somewhat special case. We only care about one bit and
47 ; then us neg.s32 to convert it to 32-bit -1 if it's set.
48 ; CHECK: .func (.param .b32 func_retval0)
49 ; CHECK-LABEL: test_i1s(
50 ; CHECK-NEXT: .param .b32 test_i1s_param_0
51 ; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i1s_param_0];
52 ; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
53 ; CHECK: and.b32 [[A1:%r[0-9]+]], [[A32]], 1;
54 ; CHECK: neg.s32 [[A:%r[0-9]+]], [[A1]];
55 ; CHECK: .param .b32 param0;
56 ; CHECK: st.param.b32 [param0+0], [[A]];
57 ; CHECK: .param .b32 retval0;
59 ; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0];
60 ; CHECK: and.b32 [[R1:%r[0-9]+]], [[R8]], 1;
61 ; CHECK: neg.s32 [[R:%r[0-9]+]], [[R1]];
62 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
64 define signext i1 @test_i1s(i1 signext %a) {
65 %r = tail call signext i1 @test_i1s(i1 signext %a);
69 ; Make sure that i1 loads are vectorized as i8 loads, respecting each element alignment.
70 ; CHECK: .func (.param .align 1 .b8 func_retval0[1])
71 ; CHECK-LABEL: test_v3i1(
72 ; CHECK-NEXT: .param .align 1 .b8 test_v3i1_param_0[1]
73 ; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i1_param_0+2];
74 ; CHECK-DAG: ld.param.u8 [[E0:%rs[0-9]+]], [test_v3i1_param_0]
75 ; CHECK: .param .align 1 .b8 param0[1];
76 ; CHECK-DAG: st.param.b8 [param0+0], [[E0]];
77 ; CHECK-DAG: st.param.b8 [param0+2], [[E2]];
78 ; CHECK: .param .align 1 .b8 retval0[1];
79 ; CHECK: call.uni (retval0),
80 ; CHECK-NEXT: test_v3i1,
81 ; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0+0];
82 ; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
83 ; CHECK-DAG: st.param.b8 [func_retval0+0], [[RE0]]
84 ; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]];
86 define <3 x i1> @test_v3i1(<3 x i1> %a) {
87 %r = tail call <3 x i1> @test_v3i1(<3 x i1> %a);
91 ; CHECK: .func (.param .align 1 .b8 func_retval0[1])
92 ; CHECK-LABEL: test_v4i1(
93 ; CHECK-NEXT: .param .align 1 .b8 test_v4i1_param_0[1]
94 ; CHECK: ld.param.u8 [[E0:%rs[0-9]+]], [test_v4i1_param_0]
95 ; CHECK: .param .align 1 .b8 param0[1];
96 ; CHECK: st.param.b8 [param0+0], [[E0]];
97 ; CHECK: .param .align 1 .b8 retval0[1];
98 ; CHECK: call.uni (retval0),
100 ; CHECK: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0+0];
101 ; CHECK: ld.param.b8 [[RE1:%rs[0-9]+]], [retval0+1];
102 ; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
103 ; CHECK: ld.param.b8 [[RE3:%rs[0-9]+]], [retval0+3];
104 ; CHECK: st.param.b8 [func_retval0+0], [[RE0]];
105 ; CHECK: st.param.b8 [func_retval0+1], [[RE1]];
106 ; CHECK: st.param.b8 [func_retval0+2], [[RE2]];
107 ; CHECK: st.param.b8 [func_retval0+3], [[RE3]];
109 define <4 x i1> @test_v4i1(<4 x i1> %a) {
110 %r = tail call <4 x i1> @test_v4i1(<4 x i1> %a);
114 ; CHECK: .func (.param .align 1 .b8 func_retval0[1])
115 ; CHECK-LABEL: test_v5i1(
116 ; CHECK-NEXT: .param .align 1 .b8 test_v5i1_param_0[1]
117 ; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i1_param_0+4];
118 ; CHECK-DAG: ld.param.u8 [[E0:%rs[0-9]+]], [test_v5i1_param_0]
119 ; CHECK: .param .align 1 .b8 param0[1];
120 ; CHECK-DAG: st.param.b8 [param0+0], [[E0]];
121 ; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
122 ; CHECK: .param .align 1 .b8 retval0[1];
123 ; CHECK: call.uni (retval0),
124 ; CHECK-NEXT: test_v5i1,
125 ; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0+0];
126 ; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
127 ; CHECK-DAG: st.param.b8 [func_retval0+0], [[RE0]]
128 ; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]];
130 define <5 x i1> @test_v5i1(<5 x i1> %a) {
131 %r = tail call <5 x i1> @test_v5i1(<5 x i1> %a);
135 ; CHECK: .func (.param .b32 func_retval0)
136 ; CHECK-LABEL: test_i2(
137 ; CHECK-NEXT: .param .b32 test_i2_param_0
138 ; CHECK: ld.param.u8 {{%rs[0-9]+}}, [test_i2_param_0];
139 ; CHECK: .param .b32 param0;
140 ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}};
141 ; CHECK: .param .b32 retval0;
142 ; CHECK: call.uni (retval0),
144 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0];
145 ; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}};
147 define i2 @test_i2(i2 %a) {
148 %r = tail call i2 @test_i2(i2 %a);
152 ; CHECK: .func (.param .b32 func_retval0)
153 ; CHECK-LABEL: test_i3(
154 ; CHECK-NEXT: .param .b32 test_i3_param_0
155 ; CHECK: ld.param.u8 {{%rs[0-9]+}}, [test_i3_param_0];
156 ; CHECK: .param .b32 param0;
157 ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}};
158 ; CHECK: .param .b32 retval0;
159 ; CHECK: call.uni (retval0),
161 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0];
162 ; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}};
164 define i3 @test_i3(i3 %a) {
165 %r = tail call i3 @test_i3(i3 %a);
169 ; Unsigned i8 is loaded directly into 32-bit register.
170 ; CHECK: .func (.param .b32 func_retval0)
171 ; CHECK-LABEL: test_i8(
172 ; CHECK-NEXT: .param .b32 test_i8_param_0
173 ; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i8_param_0];
174 ; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
175 ; CHECK: and.b32 [[A:%r[0-9]+]], [[A32]], 255;
176 ; CHECK: .param .b32 param0;
177 ; CHECK: st.param.b32 [param0+0], [[A]];
178 ; CHECK: .param .b32 retval0;
179 ; CHECK: call.uni (retval0),
181 ; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0];
182 ; CHECK: and.b32 [[R:%r[0-9]+]], [[R32]], 255;
183 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
185 define i8 @test_i8(i8 %a) {
186 %r = tail call i8 @test_i8(i8 %a);
190 ; signed i8 is loaded into 16-bit register which is then sign-extended to i32.
191 ; CHECK: .func (.param .b32 func_retval0)
192 ; CHECK-LABEL: test_i8s(
193 ; CHECK-NEXT: .param .b32 test_i8s_param_0
194 ; CHECK: ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0];
195 ; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]];
196 ; CHECK: .param .b32 param0;
197 ; CHECK: st.param.b32 [param0+0], [[A]];
198 ; CHECK: .param .b32 retval0;
199 ; CHECK: call.uni (retval0),
201 ; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0];
202 ; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ?
203 ; CHECK: cvt.u16.u32 [[R16:%rs[0-9]+]], [[R32]];
204 ; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[R16]];
205 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
207 define signext i8 @test_i8s(i8 signext %a) {
208 %r = tail call signext i8 @test_i8s(i8 signext %a);
212 ; CHECK: .func (.param .align 4 .b8 func_retval0[4])
213 ; CHECK-LABEL: test_v3i8(
214 ; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4]
215 ; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i8_param_0+2];
216 ; CHECK-DAG: ld.param.v2.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i8_param_0];
217 ; CHECK: .param .align 4 .b8 param0[4];
218 ; CHECK: st.param.v2.b8 [param0+0], {[[E0]], [[E1]]};
219 ; CHECK: st.param.b8 [param0+2], [[E2]];
220 ; CHECK: .param .align 4 .b8 retval0[4];
221 ; CHECK: call.uni (retval0),
222 ; CHECK-NEXT: test_v3i8,
223 ; CHECK-DAG: ld.param.v2.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0];
224 ; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
225 ; CHECK-DAG: st.param.v2.b8 [func_retval0+0], {[[RE0]], [[RE1]]};
226 ; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]];
228 define <3 x i8> @test_v3i8(<3 x i8> %a) {
229 %r = tail call <3 x i8> @test_v3i8(<3 x i8> %a);
233 ; CHECK: .func (.param .align 4 .b8 func_retval0[4])
234 ; CHECK-LABEL: test_v4i8(
235 ; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4]
236 ; CHECK: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i8_param_0]
237 ; CHECK: .param .align 4 .b8 param0[4];
238 ; CHECK: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
239 ; CHECK: .param .align 4 .b8 retval0[4];
240 ; CHECK: call.uni (retval0),
241 ; CHECK-NEXT: test_v4i8,
242 ; CHECK: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
243 ; CHECK: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
245 define <4 x i8> @test_v4i8(<4 x i8> %a) {
246 %r = tail call <4 x i8> @test_v4i8(<4 x i8> %a);
250 ; CHECK: .func (.param .align 8 .b8 func_retval0[8])
251 ; CHECK-LABEL: test_v5i8(
252 ; CHECK-NEXT: .param .align 8 .b8 test_v5i8_param_0[8]
253 ; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4];
254 ; CHECK-DAG: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i8_param_0]
255 ; CHECK: .param .align 8 .b8 param0[8];
256 ; CHECK-DAG: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
257 ; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
258 ; CHECK: .param .align 8 .b8 retval0[8];
259 ; CHECK: call.uni (retval0),
260 ; CHECK-NEXT: test_v5i8,
261 ; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
262 ; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
263 ; CHECK-DAG: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
264 ; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]];
266 define <5 x i8> @test_v5i8(<5 x i8> %a) {
267 %r = tail call <5 x i8> @test_v5i8(<5 x i8> %a);
271 ; CHECK: .func (.param .b32 func_retval0)
272 ; CHECK-LABEL: test_i11(
273 ; CHECK-NEXT: .param .b32 test_i11_param_0
274 ; CHECK: ld.param.u16 {{%rs[0-9]+}}, [test_i11_param_0];
275 ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}};
276 ; CHECK: .param .b32 retval0;
277 ; CHECK: call.uni (retval0),
278 ; CHECK-NEXT: test_i11,
279 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0];
280 ; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}};
282 define i11 @test_i11(i11 %a) {
283 %r = tail call i11 @test_i11(i11 %a);
287 ; CHECK: .func (.param .b32 func_retval0)
288 ; CHECK-LABEL: test_i16(
289 ; CHECK-NEXT: .param .b32 test_i16_param_0
290 ; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16_param_0];
291 ; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]];
292 ; CHECK: .param .b32 param0;
293 ; CHECK: st.param.b32 [param0+0], [[E32]];
294 ; CHECK: .param .b32 retval0;
295 ; CHECK: call.uni (retval0),
296 ; CHECK-NEXT: test_i16,
297 ; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0];
298 ; CHECK: and.b32 [[R:%r[0-9]+]], [[RE32]], 65535;
299 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
301 define i16 @test_i16(i16 %a) {
302 %r = tail call i16 @test_i16(i16 %a);
306 ; CHECK: .func (.param .b32 func_retval0)
307 ; CHECK-LABEL: test_i16s(
308 ; CHECK-NEXT: .param .b32 test_i16s_param_0
309 ; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16s_param_0];
310 ; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]];
311 ; CHECK: .param .b32 param0;
312 ; CHECK: st.param.b32 [param0+0], [[E32]];
313 ; CHECK: .param .b32 retval0;
314 ; CHECK: call.uni (retval0),
315 ; CHECK-NEXT: test_i16s,
316 ; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0];
317 ; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]];
318 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
320 define signext i16 @test_i16s(i16 signext %a) {
321 %r = tail call signext i16 @test_i16s(i16 signext %a);
325 ; CHECK: .func (.param .align 8 .b8 func_retval0[8])
326 ; CHECK-LABEL: test_v3i16(
327 ; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8]
328 ; CHECK-DAG: ld.param.u16 [[E2:%rs[0-9]+]], [test_v3i16_param_0+4];
329 ; CHECK-DAG: ld.param.u32 [[R:%r[0-9]+]], [test_v3i16_param_0];
330 ; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[R]];
331 ; CHECK: .param .align 8 .b8 param0[8];
332 ; CHECK: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]};
333 ; CHECK: st.param.b16 [param0+4], [[E2]];
334 ; CHECK: .param .align 8 .b8 retval0[8];
335 ; CHECK: call.uni (retval0),
336 ; CHECK-NEXT: test_v3i16,
337 ; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0];
338 ; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4];
339 ; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[RE0]], [[RE1]]};
340 ; CHECK-DAG: st.param.b16 [func_retval0+4], [[RE2]];
342 define <3 x i16> @test_v3i16(<3 x i16> %a) {
343 %r = tail call <3 x i16> @test_v3i16(<3 x i16> %a);
347 ; CHECK: .func (.param .align 8 .b8 func_retval0[8])
348 ; CHECK-LABEL: test_v4i16(
349 ; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8]
350 ; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v4i16_param_0]
351 ; CHECK: .param .align 8 .b8 param0[8];
352 ; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
353 ; CHECK: .param .align 8 .b8 retval0[8];
354 ; CHECK: call.uni (retval0),
355 ; CHECK-NEXT: test_v4i16,
356 ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
357 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}
359 define <4 x i16> @test_v4i16(<4 x i16> %a) {
360 %r = tail call <4 x i16> @test_v4i16(<4 x i16> %a);
364 ; CHECK: .func (.param .align 16 .b8 func_retval0[16])
365 ; CHECK-LABEL: test_v5i16(
366 ; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16]
367 ; CHECK-DAG: ld.param.u16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8];
368 ; CHECK-DAG: ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0]
369 ; CHECK-DAG: mov.b32 [[R0:%r[0-9]+]], {[[E0]], [[E1]]};
370 ; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[R0]];
371 ; CHECK-DAG: mov.b32 [[R1:%r[0-9]+]], {[[E2]], [[E3]]};
372 ; CHECK-DAG: mov.b32 {[[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [[R1]];
373 ; CHECK: .param .align 16 .b8 param0[16];
374 ; CHECK-DAG: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
375 ; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
376 ; CHECK: .param .align 16 .b8 retval0[16];
377 ; CHECK: call.uni (retval0),
378 ; CHECK-NEXT: test_v5i16,
379 ; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
380 ; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8];
381 ; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
382 ; CHECK-DAG: st.param.b16 [func_retval0+8], [[RE4]];
384 define <5 x i16> @test_v5i16(<5 x i16> %a) {
385 %r = tail call <5 x i16> @test_v5i16(<5 x i16> %a);
389 ; CHECK: .func (.param .align 2 .b8 func_retval0[2])
390 ; CHECK-LABEL: test_f16(
391 ; CHECK-NEXT: .param .align 2 .b8 test_f16_param_0[2]
392 ; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_f16_param_0];
393 ; CHECK: .param .align 2 .b8 param0[2];
394 ; CHECK: st.param.b16 [param0+0], [[E]];
395 ; CHECK: .param .align 2 .b8 retval0[2];
396 ; CHECK: call.uni (retval0),
397 ; CHECK-NEXT: test_f16,
398 ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0];
399 ; CHECK: st.param.b16 [func_retval0+0], [[R]]
401 define half @test_f16(half %a) {
402 %r = tail call half @test_f16(half %a);
406 ; CHECK: .func (.param .align 4 .b8 func_retval0[4])
407 ; CHECK-LABEL: test_v2f16(
408 ; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4]
409 ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2f16_param_0];
410 ; CHECK: .param .align 4 .b8 param0[4];
411 ; CHECK: st.param.b32 [param0+0], [[E]];
412 ; CHECK: .param .align 4 .b8 retval0[4];
413 ; CHECK: call.uni (retval0),
414 ; CHECK-NEXT: test_v2f16,
415 ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
416 ; CHECK: st.param.b32 [func_retval0+0], [[R]]
418 define <2 x half> @test_v2f16(<2 x half> %a) {
419 %r = tail call <2 x half> @test_v2f16(<2 x half> %a);
423 ; CHECK: .func (.param .align 2 .b8 func_retval0[2])
424 ; CHECK-LABEL: test_bf16(
425 ; CHECK-NEXT: .param .align 2 .b8 test_bf16_param_0[2]
426 ; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_bf16_param_0];
427 ; CHECK: .param .align 2 .b8 param0[2];
428 ; CHECK: st.param.b16 [param0+0], [[E]];
429 ; CHECK: .param .align 2 .b8 retval0[2];
430 ; CHECK: call.uni (retval0),
431 ; CHECK-NEXT: test_bf16,
432 ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0];
433 ; CHECK: st.param.b16 [func_retval0+0], [[R]]
435 define bfloat @test_bf16(bfloat %a) {
436 %r = tail call bfloat @test_bf16(bfloat %a);
440 ; CHECK: .func (.param .align 4 .b8 func_retval0[4])
441 ; CHECK-LABEL: test_v2bf16(
442 ; CHECK-NEXT: .param .align 4 .b8 test_v2bf16_param_0[4]
443 ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2bf16_param_0];
444 ; CHECK: .param .align 4 .b8 param0[4];
445 ; CHECK: st.param.b32 [param0+0], [[E]];
446 ; CHECK: .param .align 4 .b8 retval0[4];
447 ; CHECK: call.uni (retval0),
448 ; CHECK-NEXT: test_v2bf16,
449 ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
450 ; CHECK: st.param.b32 [func_retval0+0], [[R]]
452 define <2 x bfloat> @test_v2bf16(<2 x bfloat> %a) {
453 %r = tail call <2 x bfloat> @test_v2bf16(<2 x bfloat> %a);
458 ; CHECK:.func (.param .align 8 .b8 func_retval0[8])
459 ; CHECK-LABEL: test_v3f16(
460 ; CHECK: .param .align 8 .b8 test_v3f16_param_0[8]
461 ; CHECK-DAG: ld.param.b32 [[HH01:%r[0-9]+]], [test_v3f16_param_0];
462 ; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]];
463 ; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3f16_param_0+4];
464 ; CHECK: .param .align 8 .b8 param0[8];
465 ; CHECK-DAG: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]};
466 ; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
467 ; CHECK: .param .align 8 .b8 retval0[8];
468 ; CHECK: call.uni (retval0),
470 ; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0+0];
471 ; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4];
472 ; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]};
473 ; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]];
475 define <3 x half> @test_v3f16(<3 x half> %a) {
476 %r = tail call <3 x half> @test_v3f16(<3 x half> %a);
480 ; CHECK:.func (.param .align 8 .b8 func_retval0[8])
481 ; CHECK-LABEL: test_v4f16(
482 ; CHECK: .param .align 8 .b8 test_v4f16_param_0[8]
483 ; CHECK: ld.param.v2.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0];
484 ; CHECK: .param .align 8 .b8 param0[8];
485 ; CHECK: st.param.v2.b32 [param0+0], {[[R01]], [[R23]]};
486 ; CHECK: .param .align 8 .b8 retval0[8];
487 ; CHECK: call.uni (retval0),
489 ; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0+0];
490 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[RH01]], [[RH23]]};
492 define <4 x half> @test_v4f16(<4 x half> %a) {
493 %r = tail call <4 x half> @test_v4f16(<4 x half> %a);
497 ; CHECK:.func (.param .align 16 .b8 func_retval0[16])
498 ; CHECK-LABEL: test_v5f16(
499 ; CHECK: .param .align 16 .b8 test_v5f16_param_0[16]
500 ; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0];
501 ; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]];
502 ; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8];
503 ; CHECK: .param .align 16 .b8 param0[16];
504 ; CHECK-DAG: st.param.v4.b16 [param0+0],
505 ; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
506 ; CHECK: .param .align 16 .b8 retval0[16];
507 ; CHECK: call.uni (retval0),
509 ; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0+0];
510 ; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8];
511 ; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]};
512 ; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]];
514 define <5 x half> @test_v5f16(<5 x half> %a) {
515 %r = tail call <5 x half> @test_v5f16(<5 x half> %a);
519 ; CHECK:.func (.param .align 16 .b8 func_retval0[16])
520 ; CHECK-LABEL: test_v8f16(
521 ; CHECK: .param .align 16 .b8 test_v8f16_param_0[16]
522 ; CHECK: ld.param.v4.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0];
523 ; CHECK: .param .align 16 .b8 param0[16];
524 ; CHECK: st.param.v4.b32 [param0+0], {[[R01]], [[R23]], [[R45]], [[R67]]};
525 ; CHECK: .param .align 16 .b8 retval0[16];
526 ; CHECK: call.uni (retval0),
528 ; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0+0];
529 ; CHECK: st.param.v4.b32 [func_retval0+0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]};
531 define <8 x half> @test_v8f16(<8 x half> %a) {
532 %r = tail call <8 x half> @test_v8f16(<8 x half> %a);
536 ; CHECK:.func (.param .align 32 .b8 func_retval0[32])
537 ; CHECK-LABEL: test_v9f16(
538 ; CHECK: .param .align 32 .b8 test_v9f16_param_0[32]
539 ; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v9f16_param_0];
540 ; CHECK-DAG: ld.param.v4.b16 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [test_v9f16_param_0+8];
541 ; CHECK-DAG: ld.param.b16 [[E8:%rs[0-9]+]], [test_v9f16_param_0+16];
542 ; CHECK: .param .align 32 .b8 param0[32];
543 ; CHECK-DAG: st.param.v4.b16 [param0+0],
544 ; CHECK-DAG: st.param.v4.b16 [param0+8],
545 ; CHECK-DAG: st.param.b16 [param0+16], [[E8]];
546 ; CHECK: .param .align 32 .b8 retval0[32];
547 ; CHECK: call.uni (retval0),
549 ; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0+0];
550 ; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8];
551 ; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16];
552 ; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]};
553 ; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]};
554 ; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]];
556 define <9 x half> @test_v9f16(<9 x half> %a) {
557 %r = tail call <9 x half> @test_v9f16(<9 x half> %a);
561 ; CHECK: .func (.param .b32 func_retval0)
562 ; CHECK-LABEL: test_i19(
563 ; CHECK-NEXT: .param .b32 test_i19_param_0
564 ; CHECK-DAG: ld.param.u16 {{%r[0-9]+}}, [test_i19_param_0];
565 ; CHECK-DAG: ld.param.u8 {{%r[0-9]+}}, [test_i19_param_0+2];
566 ; CHECK: .param .b32 param0;
567 ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}};
568 ; CHECK: .param .b32 retval0;
569 ; CHECK: call.uni (retval0),
570 ; CHECK-NEXT: test_i19,
571 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0];
572 ; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}};
574 define i19 @test_i19(i19 %a) {
575 %r = tail call i19 @test_i19(i19 %a);
579 ; CHECK: .func (.param .b32 func_retval0)
580 ; CHECK-LABEL: test_i23(
581 ; CHECK-NEXT: .param .b32 test_i23_param_0
582 ; CHECK-DAG: ld.param.u16 {{%r[0-9]+}}, [test_i23_param_0];
583 ; CHECK-DAG: ld.param.u8 {{%r[0-9]+}}, [test_i23_param_0+2];
584 ; CHECK: .param .b32 param0;
585 ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}};
586 ; CHECK: .param .b32 retval0;
587 ; CHECK: call.uni (retval0),
588 ; CHECK-NEXT: test_i23,
589 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0];
590 ; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}};
592 define i23 @test_i23(i23 %a) {
593 %r = tail call i23 @test_i23(i23 %a);
597 ; CHECK: .func (.param .b32 func_retval0)
598 ; CHECK-LABEL: test_i24(
599 ; CHECK-NEXT: .param .b32 test_i24_param_0
600 ; CHECK-DAG: ld.param.u8 {{%r[0-9]+}}, [test_i24_param_0+2];
601 ; CHECK-DAG: ld.param.u16 {{%r[0-9]+}}, [test_i24_param_0];
602 ; CHECK: .param .b32 param0;
603 ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}};
604 ; CHECK: .param .b32 retval0;
605 ; CHECK: call.uni (retval0),
606 ; CHECK-NEXT: test_i24,
607 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0];
608 ; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}};
610 define i24 @test_i24(i24 %a) {
611 %r = tail call i24 @test_i24(i24 %a);
615 ; CHECK: .func (.param .b32 func_retval0)
616 ; CHECK-LABEL: test_i29(
617 ; CHECK-NEXT: .param .b32 test_i29_param_0
618 ; CHECK: ld.param.u32 {{%r[0-9]+}}, [test_i29_param_0];
619 ; CHECK: .param .b32 param0;
620 ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}};
621 ; CHECK: .param .b32 retval0;
622 ; CHECK: call.uni (retval0),
623 ; CHECK-NEXT: test_i29,
624 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0];
625 ; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}};
627 define i29 @test_i29(i29 %a) {
628 %r = tail call i29 @test_i29(i29 %a);
632 ; CHECK: .func (.param .b32 func_retval0)
633 ; CHECK-LABEL: test_i32(
634 ; CHECK-NEXT: .param .b32 test_i32_param_0
635 ; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_i32_param_0];
636 ; CHECK: .param .b32 param0;
637 ; CHECK: st.param.b32 [param0+0], [[E]];
638 ; CHECK: .param .b32 retval0;
639 ; CHECK: call.uni (retval0),
640 ; CHECK-NEXT: test_i32,
641 ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
642 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
644 define i32 @test_i32(i32 %a) {
645 %r = tail call i32 @test_i32(i32 %a);
649 ; CHECK: .func (.param .align 16 .b8 func_retval0[16])
650 ; CHECK-LABEL: test_v3i32(
651 ; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16]
652 ; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_v3i32_param_0+8];
653 ; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0];
654 ; CHECK: .param .align 16 .b8 param0[16];
655 ; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
656 ; CHECK: st.param.b32 [param0+8], [[E2]];
657 ; CHECK: .param .align 16 .b8 retval0[16];
658 ; CHECK: call.uni (retval0),
659 ; CHECK-NEXT: test_v3i32,
660 ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
661 ; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
662 ; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
663 ; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]];
665 define <3 x i32> @test_v3i32(<3 x i32> %a) {
666 %r = tail call <3 x i32> @test_v3i32(<3 x i32> %a);
670 ; CHECK: .func (.param .align 16 .b8 func_retval0[16])
671 ; CHECK-LABEL: test_v4i32(
672 ; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16]
673 ; CHECK: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0]
674 ; CHECK: .param .align 16 .b8 param0[16];
675 ; CHECK: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
676 ; CHECK: .param .align 16 .b8 retval0[16];
677 ; CHECK: call.uni (retval0),
678 ; CHECK-NEXT: test_v4i32,
679 ; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0];
680 ; CHECK: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
682 define <4 x i32> @test_v4i32(<4 x i32> %a) {
683 %r = tail call <4 x i32> @test_v4i32(<4 x i32> %a);
687 ; CHECK: .func (.param .align 32 .b8 func_retval0[32])
688 ; CHECK-LABEL: test_v5i32(
689 ; CHECK-NEXT: .param .align 32 .b8 test_v5i32_param_0[32]
690 ; CHECK-DAG: ld.param.u32 [[E4:%r[0-9]+]], [test_v5i32_param_0+16];
691 ; CHECK-DAG: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0]
692 ; CHECK: .param .align 32 .b8 param0[32];
693 ; CHECK-DAG: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
694 ; CHECK-DAG: st.param.b32 [param0+16], [[E4]];
695 ; CHECK: .param .align 32 .b8 retval0[32];
696 ; CHECK: call.uni (retval0),
697 ; CHECK-NEXT: test_v5i32,
698 ; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0];
699 ; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16];
700 ; CHECK-DAG: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
701 ; CHECK-DAG: st.param.b32 [func_retval0+16], [[RE4]];
703 define <5 x i32> @test_v5i32(<5 x i32> %a) {
704 %r = tail call <5 x i32> @test_v5i32(<5 x i32> %a);
708 ; CHECK: .func (.param .b32 func_retval0)
709 ; CHECK-LABEL: test_f32(
710 ; CHECK-NEXT: .param .b32 test_f32_param_0
711 ; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_f32_param_0];
712 ; CHECK: .param .b32 param0;
713 ; CHECK: st.param.f32 [param0+0], [[E]];
714 ; CHECK: .param .b32 retval0;
715 ; CHECK: call.uni (retval0),
716 ; CHECK-NEXT: test_f32,
717 ; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0];
718 ; CHECK: st.param.f32 [func_retval0+0], [[R]];
720 define float @test_f32(float %a) {
721 %r = tail call float @test_f32(float %a);
725 ; CHECK: .func (.param .b64 func_retval0)
726 ; CHECK-LABEL: test_i40(
727 ; CHECK-NEXT: .param .b64 test_i40_param_0
728 ; CHECK-DAG: ld.param.u8 {{%rd[0-9]+}}, [test_i40_param_0+4];
729 ; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i40_param_0];
730 ; CHECK: .param .b64 param0;
731 ; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}};
732 ; CHECK: .param .b64 retval0;
733 ; CHECK: call.uni (retval0),
734 ; CHECK-NEXT: test_i40,
735 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0];
736 ; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}};
738 define i40 @test_i40(i40 %a) {
739 %r = tail call i40 @test_i40(i40 %a);
743 ; CHECK: .func (.param .b64 func_retval0)
744 ; CHECK-LABEL: test_i47(
745 ; CHECK-NEXT: .param .b64 test_i47_param_0
746 ; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i47_param_0+4];
747 ; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i47_param_0];
748 ; CHECK: .param .b64 param0;
749 ; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}};
750 ; CHECK: .param .b64 retval0;
751 ; CHECK: call.uni (retval0),
752 ; CHECK-NEXT: test_i47,
753 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0];
754 ; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}};
756 define i47 @test_i47(i47 %a) {
757 %r = tail call i47 @test_i47(i47 %a);
761 ; CHECK: .func (.param .b64 func_retval0)
762 ; CHECK-LABEL: test_i48(
763 ; CHECK-NEXT: .param .b64 test_i48_param_0
764 ; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i48_param_0+4];
765 ; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i48_param_0];
766 ; CHECK: .param .b64 param0;
767 ; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}};
768 ; CHECK: .param .b64 retval0;
769 ; CHECK: call.uni (retval0),
770 ; CHECK-NEXT: test_i48,
771 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0];
772 ; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}};
774 define i48 @test_i48(i48 %a) {
775 %r = tail call i48 @test_i48(i48 %a);
779 ; CHECK: .func (.param .b64 func_retval0)
780 ; CHECK-LABEL: test_i51(
781 ; CHECK-NEXT: .param .b64 test_i51_param_0
782 ; CHECK-DAG: ld.param.u8 {{%rd[0-9]+}}, [test_i51_param_0+6];
783 ; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i51_param_0+4];
784 ; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i51_param_0];
785 ; CHECK: .param .b64 param0;
786 ; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}};
787 ; CHECK: .param .b64 retval0;
788 ; CHECK: call.uni (retval0),
789 ; CHECK-NEXT: test_i51,
790 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0];
791 ; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}};
793 define i51 @test_i51(i51 %a) {
794 %r = tail call i51 @test_i51(i51 %a);
798 ; CHECK: .func (.param .b64 func_retval0)
799 ; CHECK-LABEL: test_i56(
800 ; CHECK-NEXT: .param .b64 test_i56_param_0
801 ; CHECK-DAG: ld.param.u8 {{%rd[0-9]+}}, [test_i56_param_0+6];
802 ; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i56_param_0+4];
803 ; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i56_param_0];
804 ; CHECK: .param .b64 param0;
805 ; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}};
806 ; CHECK: .param .b64 retval0;
807 ; CHECK: call.uni (retval0),
808 ; CHECK-NEXT: test_i56,
809 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0];
810 ; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}};
812 define i56 @test_i56(i56 %a) {
813 %r = tail call i56 @test_i56(i56 %a);
817 ; CHECK: .func (.param .b64 func_retval0)
818 ; CHECK-LABEL: test_i57(
819 ; CHECK-NEXT: .param .b64 test_i57_param_0
820 ; CHECK: ld.param.u64 {{%rd[0-9]+}}, [test_i57_param_0];
821 ; CHECK: .param .b64 param0;
822 ; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}};
823 ; CHECK: .param .b64 retval0;
824 ; CHECK: call.uni (retval0),
825 ; CHECK-NEXT: test_i57,
826 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0];
827 ; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}};
829 define i57 @test_i57(i57 %a) {
830 %r = tail call i57 @test_i57(i57 %a);
834 ; CHECK: .func (.param .b64 func_retval0)
835 ; CHECK-LABEL: test_i64(
836 ; CHECK-NEXT: .param .b64 test_i64_param_0
837 ; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_i64_param_0];
838 ; CHECK: .param .b64 param0;
839 ; CHECK: st.param.b64 [param0+0], [[E]];
840 ; CHECK: .param .b64 retval0;
841 ; CHECK: call.uni (retval0),
842 ; CHECK-NEXT: test_i64,
843 ; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0];
844 ; CHECK: st.param.b64 [func_retval0+0], [[R]];
846 define i64 @test_i64(i64 %a) {
847 %r = tail call i64 @test_i64(i64 %a);
851 ; CHECK: .func (.param .align 32 .b8 func_retval0[32])
852 ; CHECK-LABEL: test_v3i64(
853 ; CHECK-NEXT: .param .align 32 .b8 test_v3i64_param_0[32]
854 ; CHECK-DAG: ld.param.u64 [[E2:%rd[0-9]+]], [test_v3i64_param_0+16];
855 ; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0];
856 ; CHECK: .param .align 32 .b8 param0[32];
857 ; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]};
858 ; CHECK: st.param.b64 [param0+16], [[E2]];
859 ; CHECK: .param .align 32 .b8 retval0[32];
860 ; CHECK: call.uni (retval0),
861 ; CHECK-NEXT: test_v3i64,
862 ; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0];
863 ; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16];
864 ; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]};
865 ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]];
866 ; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]};
867 ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]];
869 define <3 x i64> @test_v3i64(<3 x i64> %a) {
870 %r = tail call <3 x i64> @test_v3i64(<3 x i64> %a);
874 ; For i64 vector loads are limited by PTX to 2 elements.
875 ; CHECK: .func (.param .align 32 .b8 func_retval0[32])
876 ; CHECK-LABEL: test_v4i64(
877 ; CHECK-NEXT: .param .align 32 .b8 test_v4i64_param_0[32]
878 ; CHECK-DAG: ld.param.v2.u64 {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16];
879 ; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0];
880 ; CHECK: .param .align 32 .b8 param0[32];
881 ; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]};
882 ; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]};
883 ; CHECK: .param .align 32 .b8 retval0[32];
884 ; CHECK: call.uni (retval0),
885 ; CHECK-NEXT: test_v4i64,
886 ; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0];
887 ; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16];
888 ; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[RE2]], [[RE3]]};
889 ; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]};
891 define <4 x i64> @test_v4i64(<4 x i64> %a) {
892 %r = tail call <4 x i64> @test_v4i64(<4 x i64> %a);
896 ; Aggregates, on the other hand, do not get extended.
898 ; CHECK: .func (.param .align 1 .b8 func_retval0[1])
899 ; CHECK-LABEL: test_s_i1(
900 ; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1]
901 ; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i1_param_0];
902 ; CHECK: .param .align 1 .b8 param0[1];
903 ; CHECK: st.param.b8 [param0+0], [[A]]
904 ; CHECK: .param .align 1 .b8 retval0[1];
906 ; CHECK-NEXT: test_s_i1,
907 ; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0];
908 ; CHECK: st.param.b8 [func_retval0+0], [[R]];
910 define %s_i1 @test_s_i1(%s_i1 %a) {
911 %r = tail call %s_i1 @test_s_i1(%s_i1 %a);
915 ; CHECK: .func (.param .align 1 .b8 func_retval0[1])
916 ; CHECK-LABEL: test_s_i8(
917 ; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1]
918 ; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i8_param_0];
919 ; CHECK: .param .align 1 .b8 param0[1];
920 ; CHECK: st.param.b8 [param0+0], [[A]]
921 ; CHECK: .param .align 1 .b8 retval0[1];
923 ; CHECK-NEXT: test_s_i8,
924 ; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0];
925 ; CHECK: st.param.b8 [func_retval0+0], [[R]];
927 define %s_i8 @test_s_i8(%s_i8 %a) {
928 %r = tail call %s_i8 @test_s_i8(%s_i8 %a);
932 ; CHECK: .func (.param .align 2 .b8 func_retval0[2])
933 ; CHECK-LABEL: test_s_i16(
934 ; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2]
935 ; CHECK: ld.param.u16 [[A:%rs[0-9]+]], [test_s_i16_param_0];
936 ; CHECK: .param .align 2 .b8 param0[2];
937 ; CHECK: st.param.b16 [param0+0], [[A]]
938 ; CHECK: .param .align 2 .b8 retval0[2];
940 ; CHECK-NEXT: test_s_i16,
941 ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0];
942 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
944 define %s_i16 @test_s_i16(%s_i16 %a) {
945 %r = tail call %s_i16 @test_s_i16(%s_i16 %a);
949 ; CHECK: .func (.param .align 2 .b8 func_retval0[2])
950 ; CHECK-LABEL: test_s_f16(
951 ; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2]
952 ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_f16_param_0];
953 ; CHECK: .param .align 2 .b8 param0[2];
954 ; CHECK: st.param.b16 [param0+0], [[A]]
955 ; CHECK: .param .align 2 .b8 retval0[2];
957 ; CHECK-NEXT: test_s_f16,
958 ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0];
959 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
961 define %s_f16 @test_s_f16(%s_f16 %a) {
962 %r = tail call %s_f16 @test_s_f16(%s_f16 %a);
966 ; CHECK: .func (.param .align 4 .b8 func_retval0[4])
967 ; CHECK-LABEL: test_s_i32(
968 ; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4]
969 ; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_s_i32_param_0];
970 ; CHECK: .param .align 4 .b8 param0[4]
971 ; CHECK: st.param.b32 [param0+0], [[E]];
972 ; CHECK: .param .align 4 .b8 retval0[4];
973 ; CHECK: call.uni (retval0),
974 ; CHECK-NEXT: test_s_i32,
975 ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
976 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
978 define %s_i32 @test_s_i32(%s_i32 %a) {
979 %r = tail call %s_i32 @test_s_i32(%s_i32 %a);
983 ; CHECK: .func (.param .align 4 .b8 func_retval0[4])
984 ; CHECK-LABEL: test_s_f32(
985 ; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4]
986 ; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_s_f32_param_0];
987 ; CHECK: .param .align 4 .b8 param0[4]
988 ; CHECK: st.param.f32 [param0+0], [[E]];
989 ; CHECK: .param .align 4 .b8 retval0[4];
990 ; CHECK: call.uni (retval0),
991 ; CHECK-NEXT: test_s_f32,
992 ; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0];
993 ; CHECK: st.param.f32 [func_retval0+0], [[R]];
995 define %s_f32 @test_s_f32(%s_f32 %a) {
996 %r = tail call %s_f32 @test_s_f32(%s_f32 %a);
1000 ; CHECK: .func (.param .align 8 .b8 func_retval0[8])
1001 ; CHECK-LABEL: test_s_i64(
1002 ; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8]
1003 ; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_s_i64_param_0];
1004 ; CHECK: .param .align 8 .b8 param0[8];
1005 ; CHECK: st.param.b64 [param0+0], [[E]];
1006 ; CHECK: .param .align 8 .b8 retval0[8];
1007 ; CHECK: call.uni (retval0),
1008 ; CHECK-NEXT: test_s_i64,
1009 ; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0];
1010 ; CHECK: st.param.b64 [func_retval0+0], [[R]];
1012 define %s_i64 @test_s_i64(%s_i64 %a) {
1013 %r = tail call %s_i64 @test_s_i64(%s_i64 %a);
1017 ; Fields that have different types, but identical sizes are not vectorized.
1018 ; CHECK: .func (.param .align 8 .b8 func_retval0[24])
1019 ; CHECK-LABEL: test_s_i32f32(
1020 ; CHECK: .param .align 8 .b8 test_s_i32f32_param_0[24]
1021 ; CHECK-DAG: ld.param.u64 [[E4:%rd[0-9]+]], [test_s_i32f32_param_0+16];
1022 ; CHECK-DAG: ld.param.f32 [[E3:%f[0-9]+]], [test_s_i32f32_param_0+12];
1023 ; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_s_i32f32_param_0+8];
1024 ; CHECK-DAG: ld.param.f32 [[E1:%f[0-9]+]], [test_s_i32f32_param_0+4];
1025 ; CHECK-DAG: ld.param.u32 [[E0:%r[0-9]+]], [test_s_i32f32_param_0];
1026 ; CHECK: .param .align 8 .b8 param0[24];
1027 ; CHECK-DAG: st.param.b32 [param0+0], [[E0]];
1028 ; CHECK-DAG: st.param.f32 [param0+4], [[E1]];
1029 ; CHECK-DAG: st.param.b32 [param0+8], [[E2]];
1030 ; CHECK-DAG: st.param.f32 [param0+12], [[E3]];
1031 ; CHECK-DAG: st.param.b64 [param0+16], [[E4]];
1032 ; CHECK: .param .align 8 .b8 retval0[24];
1033 ; CHECK: call.uni (retval0),
1034 ; CHECK-NEXT: test_s_i32f32,
1035 ; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0+0];
1036 ; CHECK-DAG: ld.param.f32 [[RE1:%f[0-9]+]], [retval0+4];
1037 ; CHECK-DAG: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
1038 ; CHECK-DAG: ld.param.f32 [[RE3:%f[0-9]+]], [retval0+12];
1039 ; CHECK-DAG: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16];
1040 ; CHECK-DAG: st.param.b32 [func_retval0+0], [[RE0]];
1041 ; CHECK-DAG: st.param.f32 [func_retval0+4], [[RE1]];
1042 ; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]];
1043 ; CHECK-DAG: st.param.f32 [func_retval0+12], [[RE3]];
1044 ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]];
1046 define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) {
1047 %r = tail call %s_i32f32 @test_s_i32f32(%s_i32f32 %a);
1051 ; We do vectorize consecutive fields with matching types.
1052 ; CHECK:.visible .func (.param .align 8 .b8 func_retval0[24])
1053 ; CHECK-LABEL: test_s_i32x4(
1054 ; CHECK: .param .align 8 .b8 test_s_i32x4_param_0[24]
1055 ; CHECK-DAG: ld.param.u64 [[RD1:%rd[0-9]+]], [test_s_i32x4_param_0+16];
1056 ; CHECK-DAG: ld.param.v2.u32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8];
1057 ; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0];
1058 ; CHECK: .param .align 8 .b8 param0[24];
1059 ; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
1060 ; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
1061 ; CHECK: st.param.b64 [param0+16], [[E4]];
1062 ; CHECK: .param .align 8 .b8 retval0[24];
1063 ; CHECK: call.uni (retval0),
1064 ; CHECK-NEXT: test_s_i32x4,
1065 ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
1066 ; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8];
1067 ; CHECK: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16];
1068 ; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
1069 ; CHECK-DAG: st.param.v2.b32 [func_retval0+8], {[[RE2]], [[RE3]]};
1070 ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]];
1073 define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) {
1074 %r = tail call %s_i32x4 @test_s_i32x4(%s_i32x4 %a);
1078 ; CHECK:.visible .func (.param .align 8 .b8 func_retval0[32])
1079 ; CHECK-LABEL: test_s_i1i32x4(
1080 ; CHECK: .param .align 8 .b8 test_s_i1i32x4_param_0[32]
1081 ; CHECK: ld.param.u64 [[E5:%rd[0-9]+]], [test_s_i1i32x4_param_0+24];
1082 ; CHECK: ld.param.u32 [[E4:%r[0-9]+]], [test_s_i1i32x4_param_0+16];
1083 ; CHECK: ld.param.u32 [[E3:%r[0-9]+]], [test_s_i1i32x4_param_0+12];
1084 ; CHECK: ld.param.u8 [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8];
1085 ; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0];
1086 ; CHECK: .param .align 8 .b8 param0[32];
1087 ; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
1088 ; CHECK: st.param.b8 [param0+8], [[E2]];
1089 ; CHECK: st.param.b32 [param0+12], [[E3]];
1090 ; CHECK: st.param.b32 [param0+16], [[E4]];
1091 ; CHECK: st.param.b64 [param0+24], [[E5]];
1092 ; CHECK: .param .align 8 .b8 retval0[32];
1093 ; CHECK: call.uni (retval0),
1094 ; CHECK: test_s_i1i32x4,
1098 ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
1099 ; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+8];
1100 ; CHECK: ld.param.b32 [[RE3:%r[0-9]+]], [retval0+12];
1101 ; CHECK: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16];
1102 ; CHECK: ld.param.b64 [[RE5:%rd[0-9]+]], [retval0+24];
1103 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
1104 ; CHECK: st.param.b8 [func_retval0+8], [[RE2]];
1105 ; CHECK: st.param.b32 [func_retval0+12], [[RE3]];
1106 ; CHECK: st.param.b32 [func_retval0+16], [[RE4]];
1107 ; CHECK: st.param.b64 [func_retval0+24], [[RE5]];
1110 define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
1111 %r = tail call %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a);
1115 ; -- All loads/stores from parameters aligned by one must be done one
1116 ; -- byte at a time.
1117 ; CHECK:.visible .func (.param .align 1 .b8 func_retval0[25])
1118 ; CHECK-LABEL: test_s_i1i32x4p(
1119 ; CHECK-DAG: .param .align 1 .b8 test_s_i1i32x4p_param_0[25]
1120 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+24];
1121 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+23];
1122 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+22];
1123 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+21];
1124 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+20];
1125 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+19];
1126 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+18];
1127 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+17];
1128 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+16];
1129 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+15];
1130 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+14];
1131 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+13];
1132 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+12];
1133 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+11];
1134 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+10];
1135 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+9];
1136 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+8];
1137 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+7];
1138 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+6];
1139 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+5];
1140 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+4];
1141 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+3];
1142 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+2];
1143 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+1];
1144 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0];
1146 ; --- Unaligned parameter store/ return value load is broken in both nvcc
1147 ; --- and llvm and needs to be fixed.
1148 ; CHECK: .param .align 1 .b8 param0[25];
1149 ; CHECK-DAG: st.param.b32 [param0+0],
1150 ; CHECK-DAG: st.param.b32 [param0+4],
1151 ; CHECK-DAG: st.param.b8 [param0+8],
1152 ; CHECK-DAG: st.param.b32 [param0+9],
1153 ; CHECK-DAG: st.param.b32 [param0+13],
1154 ; CHECK-DAG: st.param.b64 [param0+17],
1155 ; CHECK: .param .align 1 .b8 retval0[25];
1156 ; CHECK: call.uni (retval0),
1157 ; CHECK-NEXT: test_s_i1i32x4p,
1158 ; CHECK-DAG: ld.param.b32 %r41, [retval0+0];
1159 ; CHECK-DAG: ld.param.b32 %r42, [retval0+4];
1160 ; CHECK-DAG: ld.param.b8 %rs2, [retval0+8];
1161 ; CHECK-DAG: ld.param.b32 %r43, [retval0+9];
1162 ; CHECK-DAG: ld.param.b32 %r44, [retval0+13];
1163 ; CHECK-DAG: ld.param.b64 %rd23, [retval0+17];
1164 ; CHECK-DAG: st.param.b32 [func_retval0+0],
1165 ; CHECK-DAG: st.param.b32 [func_retval0+4],
1166 ; CHECK-DAG: st.param.b8 [func_retval0+8],
1167 ; CHECK-DAG: st.param.b32 [func_retval0+9],
1168 ; CHECK-DAG: st.param.b32 [func_retval0+13],
1169 ; CHECK-DAG: st.param.b64 [func_retval0+17],
1171 define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) {
1172 %r = tail call %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a);
1176 ; Check that we can vectorize loads that span multiple aggregate fields.
1177 ; CHECK:.visible .func (.param .align 16 .b8 func_retval0[80])
1178 ; CHECK-LABEL: test_s_crossfield(
1179 ; CHECK: .param .align 16 .b8 test_s_crossfield_param_0[80]
1180 ; CHECK: ld.param.u32 [[E15:%r[0-9]+]], [test_s_crossfield_param_0+64];
1181 ; CHECK: ld.param.v4.u32 {[[E11:%r[0-9]+]], [[E12:%r[0-9]+]], [[E13:%r[0-9]+]], [[E14:%r[0-9]+]]}, [test_s_crossfield_param_0+48];
1182 ; CHECK: ld.param.v4.u32 {[[E7:%r[0-9]+]], [[E8:%r[0-9]+]], [[E9:%r[0-9]+]], [[E10:%r[0-9]+]]}, [test_s_crossfield_param_0+32];
1183 ; CHECK: ld.param.v4.u32 {[[E3:%r[0-9]+]], [[E4:%r[0-9]+]], [[E5:%r[0-9]+]], [[E6:%r[0-9]+]]}, [test_s_crossfield_param_0+16];
1184 ; CHECK: ld.param.u32 [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8];
1185 ; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0];
1186 ; CHECK: .param .align 16 .b8 param0[80];
1187 ; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
1188 ; CHECK: st.param.b32 [param0+8], [[E2]];
1189 ; CHECK: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]};
1190 ; CHECK: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]};
1191 ; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
1192 ; CHECK: st.param.b32 [param0+64], [[E15]];
1193 ; CHECK: .param .align 16 .b8 retval0[80];
1194 ; CHECK: call.uni (retval0),
1195 ; CHECK: test_s_crossfield,
1196 ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
1197 ; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
1198 ; CHECK: ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16];
1199 ; CHECK: ld.param.v4.b32 {[[RE7:%r[0-9]+]], [[RE8:%r[0-9]+]], [[RE9:%r[0-9]+]], [[RE10:%r[0-9]+]]}, [retval0+32];
1200 ; CHECK: ld.param.v4.b32 {[[RE11:%r[0-9]+]], [[RE12:%r[0-9]+]], [[RE13:%r[0-9]+]], [[RE14:%r[0-9]+]]}, [retval0+48];
1201 ; CHECK: ld.param.b32 [[RE15:%r[0-9]+]], [retval0+64];
1202 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
1203 ; CHECK: st.param.b32 [func_retval0+8], [[RE2]];
1204 ; CHECK: st.param.v4.b32 [func_retval0+16], {[[RE3]], [[RE4]], [[RE5]], [[RE6]]};
1205 ; CHECK: st.param.v4.b32 [func_retval0+32], {[[RE7]], [[RE8]], [[RE9]], [[RE10]]};
1206 ; CHECK: st.param.v4.b32 [func_retval0+48], {[[RE11]], [[RE12]], [[RE13]], [[RE14]]};
1207 ; CHECK: st.param.b32 [func_retval0+64], [[RE15]];
1210 define %s_crossfield @test_s_crossfield(%s_crossfield %a) {
1211 %r = tail call %s_crossfield @test_s_crossfield(%s_crossfield %a);
1212 ret %s_crossfield %r;