1 ; Verifies correctness of load/store of parameters and return values.
2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s
3 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %}
10 %s_f32 = type { float }
12 %s_f64 = type { double }
14 ; More complicated types. i64 is used to increase natural alignment
15 ; requirement for the type.
16 %s_i32x4 = type { i32, i32, i32, i32, i64}
17 %s_i32f32 = type { i32, float, i32, float, i64}
18 %s_i8i32x4 = type { i32, i32, i8, i32, i32, i64}
19 %s_i8i32x4p = type <{ i32, i32, i8, i32, i32, i64}>
20 %s_crossfield = type { i32, [2 x i32], <4 x i32>, [3 x {i32, i32, i32}]}
21 ; All scalar parameters must be at least 32 bits in size.
22 ; i1 is loaded/stored as i8.
24 ; CHECK: .func (.param .b32 func_retval0)
25 ; CHECK-LABEL: test_i1(
26 ; CHECK-NEXT: .param .b32 test_i1_param_0
27 ; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i1_param_0];
28 ; CHECK: and.b16 [[A:%rs[0-9]+]], [[A8]], 1;
29 ; CHECK: setp.eq.b16 %p1, [[A]], 1
30 ; CHECK: cvt.u32.u16 [[B:%r[0-9]+]], [[A8]]
31 ; CHECK: and.b32 [[C:%r[0-9]+]], [[B]], 1;
32 ; CHECK: .param .b32 param0;
33 ; CHECK: st.param.b32 [param0+0], [[C]]
34 ; CHECK: .param .b32 retval0;
36 ; CHECK-NEXT: test_i1,
37 ; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0];
38 ; CHECK: and.b32 [[R:%r[0-9]+]], [[R8]], 1;
39 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
41 define i1 @test_i1(i1 %a) {
42 %r = tail call i1 @test_i1(i1 %a);
46 ; Signed i1 is a somewhat special case. We only care about one bit and
47 ; then us neg.s32 to convert it to 32-bit -1 if it's set.
48 ; CHECK: .func (.param .b32 func_retval0)
49 ; CHECK-LABEL: test_i1s(
50 ; CHECK-NEXT: .param .b32 test_i1s_param_0
51 ; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i1s_param_0];
52 ; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
53 ; CHECK: and.b32 [[A1:%r[0-9]+]], [[A32]], 1;
54 ; CHECK: neg.s32 [[A:%r[0-9]+]], [[A1]];
55 ; CHECK: .param .b32 param0;
56 ; CHECK: st.param.b32 [param0+0], [[A]];
57 ; CHECK: .param .b32 retval0;
59 ; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0];
60 ; CHECK: and.b32 [[R1:%r[0-9]+]], [[R8]], 1;
61 ; CHECK: neg.s32 [[R:%r[0-9]+]], [[R1]];
62 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
64 define signext i1 @test_i1s(i1 signext %a) {
65 %r = tail call signext i1 @test_i1s(i1 signext %a);
69 ; Make sure that i1 loads are vectorized as i8 loads, respecting each element alignment.
70 ; CHECK: .func (.param .align 1 .b8 func_retval0[1])
71 ; CHECK-LABEL: test_v3i1(
72 ; CHECK-NEXT: .param .align 1 .b8 test_v3i1_param_0[1]
73 ; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i1_param_0+2];
74 ; CHECK-DAG: ld.param.u8 [[E0:%rs[0-9]+]], [test_v3i1_param_0]
75 ; CHECK: .param .align 1 .b8 param0[1];
76 ; CHECK-DAG: st.param.b8 [param0+0], [[E0]];
77 ; CHECK-DAG: st.param.b8 [param0+2], [[E2]];
78 ; CHECK: .param .align 1 .b8 retval0[1];
79 ; CHECK: call.uni (retval0),
80 ; CHECK-NEXT: test_v3i1,
81 ; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0+0];
82 ; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
83 ; CHECK-DAG: st.param.b8 [func_retval0+0], [[RE0]]
84 ; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]];
86 define <3 x i1> @test_v3i1(<3 x i1> %a) {
87 %r = tail call <3 x i1> @test_v3i1(<3 x i1> %a);
91 ; CHECK: .func (.param .align 1 .b8 func_retval0[1])
92 ; CHECK-LABEL: test_v4i1(
93 ; CHECK-NEXT: .param .align 1 .b8 test_v4i1_param_0[1]
94 ; CHECK: ld.param.u8 [[E0:%rs[0-9]+]], [test_v4i1_param_0]
95 ; CHECK: .param .align 1 .b8 param0[1];
96 ; CHECK: st.param.b8 [param0+0], [[E0]];
97 ; CHECK: .param .align 1 .b8 retval0[1];
98 ; CHECK: call.uni (retval0),
100 ; CHECK: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0+0];
101 ; CHECK: ld.param.b8 [[RE1:%rs[0-9]+]], [retval0+1];
102 ; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
103 ; CHECK: ld.param.b8 [[RE3:%rs[0-9]+]], [retval0+3];
104 ; CHECK: st.param.b8 [func_retval0+0], [[RE0]];
105 ; CHECK: st.param.b8 [func_retval0+1], [[RE1]];
106 ; CHECK: st.param.b8 [func_retval0+2], [[RE2]];
107 ; CHECK: st.param.b8 [func_retval0+3], [[RE3]];
109 define <4 x i1> @test_v4i1(<4 x i1> %a) {
110 %r = tail call <4 x i1> @test_v4i1(<4 x i1> %a);
114 ; CHECK: .func (.param .align 1 .b8 func_retval0[1])
115 ; CHECK-LABEL: test_v5i1(
116 ; CHECK-NEXT: .param .align 1 .b8 test_v5i1_param_0[1]
117 ; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i1_param_0+4];
118 ; CHECK-DAG: ld.param.u8 [[E0:%rs[0-9]+]], [test_v5i1_param_0]
119 ; CHECK: .param .align 1 .b8 param0[1];
120 ; CHECK-DAG: st.param.b8 [param0+0], [[E0]];
121 ; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
122 ; CHECK: .param .align 1 .b8 retval0[1];
123 ; CHECK: call.uni (retval0),
124 ; CHECK-NEXT: test_v5i1,
125 ; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0+0];
126 ; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
127 ; CHECK-DAG: st.param.b8 [func_retval0+0], [[RE0]]
128 ; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]];
130 define <5 x i1> @test_v5i1(<5 x i1> %a) {
131 %r = tail call <5 x i1> @test_v5i1(<5 x i1> %a);
135 ; CHECK: .func (.param .b32 func_retval0)
136 ; CHECK-LABEL: test_i2(
137 ; CHECK-NEXT: .param .b32 test_i2_param_0
138 ; CHECK: ld.param.u8 {{%rs[0-9]+}}, [test_i2_param_0];
139 ; CHECK: .param .b32 param0;
140 ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}};
141 ; CHECK: .param .b32 retval0;
142 ; CHECK: call.uni (retval0),
144 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0];
145 ; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}};
147 define i2 @test_i2(i2 %a) {
148 %r = tail call i2 @test_i2(i2 %a);
152 ; CHECK: .func (.param .b32 func_retval0)
153 ; CHECK-LABEL: test_i3(
154 ; CHECK-NEXT: .param .b32 test_i3_param_0
155 ; CHECK: ld.param.u8 {{%rs[0-9]+}}, [test_i3_param_0];
156 ; CHECK: .param .b32 param0;
157 ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}};
158 ; CHECK: .param .b32 retval0;
159 ; CHECK: call.uni (retval0),
161 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0];
162 ; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}};
164 define i3 @test_i3(i3 %a) {
165 %r = tail call i3 @test_i3(i3 %a);
169 ; Unsigned i8 is loaded directly into 32-bit register.
170 ; CHECK: .func (.param .b32 func_retval0)
171 ; CHECK-LABEL: test_i8(
172 ; CHECK-NEXT: .param .b32 test_i8_param_0
173 ; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i8_param_0];
174 ; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
175 ; CHECK: and.b32 [[A:%r[0-9]+]], [[A32]], 255;
176 ; CHECK: .param .b32 param0;
177 ; CHECK: st.param.b32 [param0+0], [[A]];
178 ; CHECK: .param .b32 retval0;
179 ; CHECK: call.uni (retval0),
181 ; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0];
182 ; CHECK: and.b32 [[R:%r[0-9]+]], [[R32]], 255;
183 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
185 define i8 @test_i8(i8 %a) {
186 %r = tail call i8 @test_i8(i8 %a);
190 ; signed i8 is loaded into 16-bit register which is then sign-extended to i32.
191 ; CHECK: .func (.param .b32 func_retval0)
192 ; CHECK-LABEL: test_i8s(
193 ; CHECK-NEXT: .param .b32 test_i8s_param_0
194 ; CHECK: ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0];
195 ; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]];
196 ; CHECK: .param .b32 param0;
197 ; CHECK: st.param.b32 [param0+0], [[A]];
198 ; CHECK: .param .b32 retval0;
199 ; CHECK: call.uni (retval0),
201 ; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0];
202 ; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ?
203 ; CHECK: cvt.u16.u32 [[R16:%rs[0-9]+]], [[R32]];
204 ; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[R16]];
205 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
207 define signext i8 @test_i8s(i8 signext %a) {
208 %r = tail call signext i8 @test_i8s(i8 signext %a);
212 ; CHECK: .func (.param .align 4 .b8 func_retval0[4])
213 ; CHECK-LABEL: test_v3i8(
214 ; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4]
215 ; CHECK: ld.param.u32 [[R:%r[0-9]+]], [test_v3i8_param_0];
216 ; CHECK: .param .align 4 .b8 param0[4];
217 ; CHECK: st.param.b32 [param0+0], [[R]]
218 ; CHECK: .param .align 4 .b8 retval0[4];
219 ; CHECK: call.uni (retval0),
220 ; CHECK-NEXT: test_v3i8,
221 ; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0+0];
222 ; v4i8/i32->{v3i8 elements}->v4i8/i32 conversion is messy and not very
223 ; interesting here, so it's skipped.
224 ; CHECK: st.param.b32 [func_retval0+0],
226 define <3 x i8> @test_v3i8(<3 x i8> %a) {
227 %r = tail call <3 x i8> @test_v3i8(<3 x i8> %a);
231 ; CHECK: .func (.param .align 4 .b8 func_retval0[4])
232 ; CHECK-LABEL: test_v4i8(
233 ; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4]
234 ; CHECK: ld.param.u32 [[R:%r[0-9]+]], [test_v4i8_param_0]
235 ; CHECK: .param .align 4 .b8 param0[4];
236 ; CHECK: st.param.b32 [param0+0], [[R]];
237 ; CHECK: .param .align 4 .b8 retval0[4];
238 ; CHECK: call.uni (retval0),
239 ; CHECK-NEXT: test_v4i8,
240 ; CHECK: ld.param.b32 [[RET:%r[0-9]+]], [retval0+0];
241 ; CHECK: st.param.b32 [func_retval0+0], [[RET]];
243 define <4 x i8> @test_v4i8(<4 x i8> %a) {
244 %r = tail call <4 x i8> @test_v4i8(<4 x i8> %a);
248 ; CHECK: .func (.param .align 8 .b8 func_retval0[8])
249 ; CHECK-LABEL: test_v5i8(
250 ; CHECK-NEXT: .param .align 8 .b8 test_v5i8_param_0[8]
251 ; CHECK-DAG: ld.param.u32 [[E0:%r[0-9]+]], [test_v5i8_param_0]
252 ; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4];
253 ; CHECK: .param .align 8 .b8 param0[8];
254 ; CHECK-DAG: st.param.v4.b8 [param0+0],
255 ; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
256 ; CHECK: .param .align 8 .b8 retval0[8];
257 ; CHECK: call.uni (retval0),
258 ; CHECK-NEXT: test_v5i8,
259 ; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
260 ; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
261 ; CHECK-DAG: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
262 ; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]];
264 define <5 x i8> @test_v5i8(<5 x i8> %a) {
265 %r = tail call <5 x i8> @test_v5i8(<5 x i8> %a);
269 ; CHECK: .func (.param .b32 func_retval0)
270 ; CHECK-LABEL: test_i11(
271 ; CHECK-NEXT: .param .b32 test_i11_param_0
272 ; CHECK: ld.param.u16 {{%rs[0-9]+}}, [test_i11_param_0];
273 ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}};
274 ; CHECK: .param .b32 retval0;
275 ; CHECK: call.uni (retval0),
276 ; CHECK-NEXT: test_i11,
277 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0];
278 ; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}};
280 define i11 @test_i11(i11 %a) {
281 %r = tail call i11 @test_i11(i11 %a);
285 ; CHECK: .func (.param .b32 func_retval0)
286 ; CHECK-LABEL: test_i16(
287 ; CHECK-NEXT: .param .b32 test_i16_param_0
288 ; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16_param_0];
289 ; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]];
290 ; CHECK: .param .b32 param0;
291 ; CHECK: st.param.b32 [param0+0], [[E32]];
292 ; CHECK: .param .b32 retval0;
293 ; CHECK: call.uni (retval0),
294 ; CHECK-NEXT: test_i16,
295 ; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0];
296 ; CHECK: and.b32 [[R:%r[0-9]+]], [[RE32]], 65535;
297 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
299 define i16 @test_i16(i16 %a) {
300 %r = tail call i16 @test_i16(i16 %a);
304 ; CHECK: .func (.param .b32 func_retval0)
305 ; CHECK-LABEL: test_i16s(
306 ; CHECK-NEXT: .param .b32 test_i16s_param_0
307 ; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16s_param_0];
308 ; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]];
309 ; CHECK: .param .b32 param0;
310 ; CHECK: st.param.b32 [param0+0], [[E32]];
311 ; CHECK: .param .b32 retval0;
312 ; CHECK: call.uni (retval0),
313 ; CHECK-NEXT: test_i16s,
314 ; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0];
315 ; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]];
316 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
318 define signext i16 @test_i16s(i16 signext %a) {
319 %r = tail call signext i16 @test_i16s(i16 signext %a);
323 ; CHECK: .func (.param .align 8 .b8 func_retval0[8])
324 ; CHECK-LABEL: test_v3i16(
325 ; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8]
326 ; CHECK-DAG: ld.param.u16 [[E2:%rs[0-9]+]], [test_v3i16_param_0+4];
327 ; CHECK-DAG: ld.param.u32 [[R:%r[0-9]+]], [test_v3i16_param_0];
328 ; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[R]];
329 ; CHECK: .param .align 8 .b8 param0[8];
330 ; CHECK: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]};
331 ; CHECK: st.param.b16 [param0+4], [[E2]];
332 ; CHECK: .param .align 8 .b8 retval0[8];
333 ; CHECK: call.uni (retval0),
334 ; CHECK-NEXT: test_v3i16,
335 ; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0];
336 ; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4];
337 ; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[RE0]], [[RE1]]};
338 ; CHECK-DAG: st.param.b16 [func_retval0+4], [[RE2]];
340 define <3 x i16> @test_v3i16(<3 x i16> %a) {
341 %r = tail call <3 x i16> @test_v3i16(<3 x i16> %a);
345 ; CHECK: .func (.param .align 8 .b8 func_retval0[8])
346 ; CHECK-LABEL: test_v4i16(
347 ; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8]
348 ; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v4i16_param_0]
349 ; CHECK: .param .align 8 .b8 param0[8];
350 ; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
351 ; CHECK: .param .align 8 .b8 retval0[8];
352 ; CHECK: call.uni (retval0),
353 ; CHECK-NEXT: test_v4i16,
354 ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
355 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}
357 define <4 x i16> @test_v4i16(<4 x i16> %a) {
358 %r = tail call <4 x i16> @test_v4i16(<4 x i16> %a);
362 ; CHECK: .func (.param .align 16 .b8 func_retval0[16])
363 ; CHECK-LABEL: test_v5i16(
364 ; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16]
365 ; CHECK-DAG: ld.param.u16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8];
366 ; CHECK-DAG: ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0]
367 ; CHECK: .param .align 16 .b8 param0[16];
368 ; CHECK-DAG: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
369 ; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
370 ; CHECK: .param .align 16 .b8 retval0[16];
371 ; CHECK: call.uni (retval0),
372 ; CHECK-NEXT: test_v5i16,
373 ; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
374 ; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8];
375 ; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
376 ; CHECK-DAG: st.param.b16 [func_retval0+8], [[RE4]];
378 define <5 x i16> @test_v5i16(<5 x i16> %a) {
379 %r = tail call <5 x i16> @test_v5i16(<5 x i16> %a);
383 ; CHECK: .func (.param .align 2 .b8 func_retval0[2])
384 ; CHECK-LABEL: test_f16(
385 ; CHECK-NEXT: .param .align 2 .b8 test_f16_param_0[2]
386 ; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_f16_param_0];
387 ; CHECK: .param .align 2 .b8 param0[2];
388 ; CHECK: st.param.b16 [param0+0], [[E]];
389 ; CHECK: .param .align 2 .b8 retval0[2];
390 ; CHECK: call.uni (retval0),
391 ; CHECK-NEXT: test_f16,
392 ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0];
393 ; CHECK: st.param.b16 [func_retval0+0], [[R]]
395 define half @test_f16(half %a) {
396 %r = tail call half @test_f16(half %a);
400 ; CHECK: .func (.param .align 4 .b8 func_retval0[4])
401 ; CHECK-LABEL: test_v2f16(
402 ; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4]
403 ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2f16_param_0];
404 ; CHECK: .param .align 4 .b8 param0[4];
405 ; CHECK: st.param.b32 [param0+0], [[E]];
406 ; CHECK: .param .align 4 .b8 retval0[4];
407 ; CHECK: call.uni (retval0),
408 ; CHECK-NEXT: test_v2f16,
409 ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
410 ; CHECK: st.param.b32 [func_retval0+0], [[R]]
412 define <2 x half> @test_v2f16(<2 x half> %a) {
413 %r = tail call <2 x half> @test_v2f16(<2 x half> %a);
417 ; CHECK: .func (.param .align 2 .b8 func_retval0[2])
418 ; CHECK-LABEL: test_bf16(
419 ; CHECK-NEXT: .param .align 2 .b8 test_bf16_param_0[2]
420 ; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_bf16_param_0];
421 ; CHECK: .param .align 2 .b8 param0[2];
422 ; CHECK: st.param.b16 [param0+0], [[E]];
423 ; CHECK: .param .align 2 .b8 retval0[2];
424 ; CHECK: call.uni (retval0),
425 ; CHECK-NEXT: test_bf16,
426 ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0];
427 ; CHECK: st.param.b16 [func_retval0+0], [[R]]
429 define bfloat @test_bf16(bfloat %a) {
430 %r = tail call bfloat @test_bf16(bfloat %a);
434 ; CHECK: .func (.param .align 4 .b8 func_retval0[4])
435 ; CHECK-LABEL: test_v2bf16(
436 ; CHECK-NEXT: .param .align 4 .b8 test_v2bf16_param_0[4]
437 ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2bf16_param_0];
438 ; CHECK: .param .align 4 .b8 param0[4];
439 ; CHECK: st.param.b32 [param0+0], [[E]];
440 ; CHECK: .param .align 4 .b8 retval0[4];
441 ; CHECK: call.uni (retval0),
442 ; CHECK-NEXT: test_v2bf16,
443 ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
444 ; CHECK: st.param.b32 [func_retval0+0], [[R]]
446 define <2 x bfloat> @test_v2bf16(<2 x bfloat> %a) {
447 %r = tail call <2 x bfloat> @test_v2bf16(<2 x bfloat> %a);
452 ; CHECK:.func (.param .align 8 .b8 func_retval0[8])
453 ; CHECK-LABEL: test_v3f16(
454 ; CHECK: .param .align 8 .b8 test_v3f16_param_0[8]
455 ; CHECK-DAG: ld.param.b32 [[HH01:%r[0-9]+]], [test_v3f16_param_0];
456 ; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]];
457 ; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3f16_param_0+4];
458 ; CHECK: .param .align 8 .b8 param0[8];
459 ; CHECK-DAG: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]};
460 ; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
461 ; CHECK: .param .align 8 .b8 retval0[8];
462 ; CHECK: call.uni (retval0),
464 ; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0+0];
465 ; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4];
466 ; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]};
467 ; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]];
469 define <3 x half> @test_v3f16(<3 x half> %a) {
470 %r = tail call <3 x half> @test_v3f16(<3 x half> %a);
474 ; CHECK:.func (.param .align 8 .b8 func_retval0[8])
475 ; CHECK-LABEL: test_v4f16(
476 ; CHECK: .param .align 8 .b8 test_v4f16_param_0[8]
477 ; CHECK: ld.param.v2.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0];
478 ; CHECK: .param .align 8 .b8 param0[8];
479 ; CHECK: st.param.v2.b32 [param0+0], {[[R01]], [[R23]]};
480 ; CHECK: .param .align 8 .b8 retval0[8];
481 ; CHECK: call.uni (retval0),
483 ; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0+0];
484 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[RH01]], [[RH23]]};
486 define <4 x half> @test_v4f16(<4 x half> %a) {
487 %r = tail call <4 x half> @test_v4f16(<4 x half> %a);
491 ; CHECK:.func (.param .align 16 .b8 func_retval0[16])
492 ; CHECK-LABEL: test_v5f16(
493 ; CHECK: .param .align 16 .b8 test_v5f16_param_0[16]
494 ; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0];
495 ; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8];
496 ; CHECK: .param .align 16 .b8 param0[16];
497 ; CHECK-DAG: st.param.v4.b16 [param0+0],
498 ; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
499 ; CHECK: .param .align 16 .b8 retval0[16];
500 ; CHECK: call.uni (retval0),
502 ; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0+0];
503 ; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8];
504 ; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]};
505 ; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]];
507 define <5 x half> @test_v5f16(<5 x half> %a) {
508 %r = tail call <5 x half> @test_v5f16(<5 x half> %a);
512 ; CHECK:.func (.param .align 16 .b8 func_retval0[16])
513 ; CHECK-LABEL: test_v8f16(
514 ; CHECK: .param .align 16 .b8 test_v8f16_param_0[16]
515 ; CHECK: ld.param.v4.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0];
516 ; CHECK: .param .align 16 .b8 param0[16];
517 ; CHECK: st.param.v4.b32 [param0+0], {[[R01]], [[R23]], [[R45]], [[R67]]};
518 ; CHECK: .param .align 16 .b8 retval0[16];
519 ; CHECK: call.uni (retval0),
521 ; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0+0];
522 ; CHECK: st.param.v4.b32 [func_retval0+0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]};
524 define <8 x half> @test_v8f16(<8 x half> %a) {
525 %r = tail call <8 x half> @test_v8f16(<8 x half> %a);
529 ; CHECK:.func (.param .align 32 .b8 func_retval0[32])
530 ; CHECK-LABEL: test_v9f16(
531 ; CHECK: .param .align 32 .b8 test_v9f16_param_0[32]
532 ; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v9f16_param_0];
533 ; CHECK-DAG: ld.param.v4.b16 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [test_v9f16_param_0+8];
534 ; CHECK-DAG: ld.param.b16 [[E8:%rs[0-9]+]], [test_v9f16_param_0+16];
535 ; CHECK: .param .align 32 .b8 param0[32];
536 ; CHECK-DAG: st.param.v4.b16 [param0+0],
537 ; CHECK-DAG: st.param.v4.b16 [param0+8],
538 ; CHECK-DAG: st.param.b16 [param0+16], [[E8]];
539 ; CHECK: .param .align 32 .b8 retval0[32];
540 ; CHECK: call.uni (retval0),
542 ; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0+0];
543 ; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8];
544 ; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16];
545 ; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]};
546 ; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]};
547 ; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]];
549 define <9 x half> @test_v9f16(<9 x half> %a) {
550 %r = tail call <9 x half> @test_v9f16(<9 x half> %a);
554 ; CHECK: .func (.param .b32 func_retval0)
555 ; CHECK-LABEL: test_i19(
556 ; CHECK-NEXT: .param .b32 test_i19_param_0
557 ; CHECK-DAG: ld.param.u16 {{%r[0-9]+}}, [test_i19_param_0];
558 ; CHECK-DAG: ld.param.u8 {{%r[0-9]+}}, [test_i19_param_0+2];
559 ; CHECK: .param .b32 param0;
560 ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}};
561 ; CHECK: .param .b32 retval0;
562 ; CHECK: call.uni (retval0),
563 ; CHECK-NEXT: test_i19,
564 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0];
565 ; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}};
567 define i19 @test_i19(i19 %a) {
568 %r = tail call i19 @test_i19(i19 %a);
572 ; CHECK: .func (.param .b32 func_retval0)
573 ; CHECK-LABEL: test_i23(
574 ; CHECK-NEXT: .param .b32 test_i23_param_0
575 ; CHECK-DAG: ld.param.u16 {{%r[0-9]+}}, [test_i23_param_0];
576 ; CHECK-DAG: ld.param.u8 {{%r[0-9]+}}, [test_i23_param_0+2];
577 ; CHECK: .param .b32 param0;
578 ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}};
579 ; CHECK: .param .b32 retval0;
580 ; CHECK: call.uni (retval0),
581 ; CHECK-NEXT: test_i23,
582 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0];
583 ; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}};
585 define i23 @test_i23(i23 %a) {
586 %r = tail call i23 @test_i23(i23 %a);
590 ; CHECK: .func (.param .b32 func_retval0)
591 ; CHECK-LABEL: test_i24(
592 ; CHECK-NEXT: .param .b32 test_i24_param_0
593 ; CHECK-DAG: ld.param.u8 {{%r[0-9]+}}, [test_i24_param_0+2];
594 ; CHECK-DAG: ld.param.u16 {{%r[0-9]+}}, [test_i24_param_0];
595 ; CHECK: .param .b32 param0;
596 ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}};
597 ; CHECK: .param .b32 retval0;
598 ; CHECK: call.uni (retval0),
599 ; CHECK-NEXT: test_i24,
600 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0];
601 ; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}};
603 define i24 @test_i24(i24 %a) {
604 %r = tail call i24 @test_i24(i24 %a);
608 ; CHECK: .func (.param .b32 func_retval0)
609 ; CHECK-LABEL: test_i29(
610 ; CHECK-NEXT: .param .b32 test_i29_param_0
611 ; CHECK: ld.param.u32 {{%r[0-9]+}}, [test_i29_param_0];
612 ; CHECK: .param .b32 param0;
613 ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}};
614 ; CHECK: .param .b32 retval0;
615 ; CHECK: call.uni (retval0),
616 ; CHECK-NEXT: test_i29,
617 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0];
618 ; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}};
620 define i29 @test_i29(i29 %a) {
621 %r = tail call i29 @test_i29(i29 %a);
625 ; CHECK: .func (.param .b32 func_retval0)
626 ; CHECK-LABEL: test_i32(
627 ; CHECK-NEXT: .param .b32 test_i32_param_0
628 ; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_i32_param_0];
629 ; CHECK: .param .b32 param0;
630 ; CHECK: st.param.b32 [param0+0], [[E]];
631 ; CHECK: .param .b32 retval0;
632 ; CHECK: call.uni (retval0),
633 ; CHECK-NEXT: test_i32,
634 ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
635 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
637 define i32 @test_i32(i32 %a) {
638 %r = tail call i32 @test_i32(i32 %a);
642 ; CHECK: .func (.param .align 16 .b8 func_retval0[16])
643 ; CHECK-LABEL: test_v3i32(
644 ; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16]
645 ; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_v3i32_param_0+8];
646 ; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0];
647 ; CHECK: .param .align 16 .b8 param0[16];
648 ; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
649 ; CHECK: st.param.b32 [param0+8], [[E2]];
650 ; CHECK: .param .align 16 .b8 retval0[16];
651 ; CHECK: call.uni (retval0),
652 ; CHECK-NEXT: test_v3i32,
653 ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
654 ; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
655 ; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
656 ; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]];
658 define <3 x i32> @test_v3i32(<3 x i32> %a) {
659 %r = tail call <3 x i32> @test_v3i32(<3 x i32> %a);
663 ; CHECK: .func (.param .align 16 .b8 func_retval0[16])
664 ; CHECK-LABEL: test_v4i32(
665 ; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16]
666 ; CHECK: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0]
667 ; CHECK: .param .align 16 .b8 param0[16];
668 ; CHECK: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
669 ; CHECK: .param .align 16 .b8 retval0[16];
670 ; CHECK: call.uni (retval0),
671 ; CHECK-NEXT: test_v4i32,
672 ; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0];
673 ; CHECK: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
675 define <4 x i32> @test_v4i32(<4 x i32> %a) {
676 %r = tail call <4 x i32> @test_v4i32(<4 x i32> %a);
680 ; CHECK: .func (.param .align 32 .b8 func_retval0[32])
681 ; CHECK-LABEL: test_v5i32(
682 ; CHECK-NEXT: .param .align 32 .b8 test_v5i32_param_0[32]
683 ; CHECK-DAG: ld.param.u32 [[E4:%r[0-9]+]], [test_v5i32_param_0+16];
684 ; CHECK-DAG: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0]
685 ; CHECK: .param .align 32 .b8 param0[32];
686 ; CHECK-DAG: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
687 ; CHECK-DAG: st.param.b32 [param0+16], [[E4]];
688 ; CHECK: .param .align 32 .b8 retval0[32];
689 ; CHECK: call.uni (retval0),
690 ; CHECK-NEXT: test_v5i32,
691 ; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0];
692 ; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16];
693 ; CHECK-DAG: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
694 ; CHECK-DAG: st.param.b32 [func_retval0+16], [[RE4]];
696 define <5 x i32> @test_v5i32(<5 x i32> %a) {
697 %r = tail call <5 x i32> @test_v5i32(<5 x i32> %a);
701 ; CHECK: .func (.param .b32 func_retval0)
702 ; CHECK-LABEL: test_f32(
703 ; CHECK-NEXT: .param .b32 test_f32_param_0
704 ; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_f32_param_0];
705 ; CHECK: .param .b32 param0;
706 ; CHECK: st.param.f32 [param0+0], [[E]];
707 ; CHECK: .param .b32 retval0;
708 ; CHECK: call.uni (retval0),
709 ; CHECK-NEXT: test_f32,
710 ; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0];
711 ; CHECK: st.param.f32 [func_retval0+0], [[R]];
713 define float @test_f32(float %a) {
714 %r = tail call float @test_f32(float %a);
718 ; CHECK: .func (.param .b64 func_retval0)
719 ; CHECK-LABEL: test_i40(
720 ; CHECK-NEXT: .param .b64 test_i40_param_0
721 ; CHECK-DAG: ld.param.u8 {{%rd[0-9]+}}, [test_i40_param_0+4];
722 ; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i40_param_0];
723 ; CHECK: .param .b64 param0;
724 ; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}};
725 ; CHECK: .param .b64 retval0;
726 ; CHECK: call.uni (retval0),
727 ; CHECK-NEXT: test_i40,
728 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0];
729 ; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}};
731 define i40 @test_i40(i40 %a) {
732 %r = tail call i40 @test_i40(i40 %a);
736 ; CHECK: .func (.param .b64 func_retval0)
737 ; CHECK-LABEL: test_i47(
738 ; CHECK-NEXT: .param .b64 test_i47_param_0
739 ; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i47_param_0+4];
740 ; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i47_param_0];
741 ; CHECK: .param .b64 param0;
742 ; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}};
743 ; CHECK: .param .b64 retval0;
744 ; CHECK: call.uni (retval0),
745 ; CHECK-NEXT: test_i47,
746 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0];
747 ; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}};
749 define i47 @test_i47(i47 %a) {
750 %r = tail call i47 @test_i47(i47 %a);
754 ; CHECK: .func (.param .b64 func_retval0)
755 ; CHECK-LABEL: test_i48(
756 ; CHECK-NEXT: .param .b64 test_i48_param_0
757 ; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i48_param_0+4];
758 ; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i48_param_0];
759 ; CHECK: .param .b64 param0;
760 ; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}};
761 ; CHECK: .param .b64 retval0;
762 ; CHECK: call.uni (retval0),
763 ; CHECK-NEXT: test_i48,
764 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0];
765 ; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}};
767 define i48 @test_i48(i48 %a) {
768 %r = tail call i48 @test_i48(i48 %a);
772 ; CHECK: .func (.param .b64 func_retval0)
773 ; CHECK-LABEL: test_i51(
774 ; CHECK-NEXT: .param .b64 test_i51_param_0
775 ; CHECK-DAG: ld.param.u8 {{%rd[0-9]+}}, [test_i51_param_0+6];
776 ; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i51_param_0+4];
777 ; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i51_param_0];
778 ; CHECK: .param .b64 param0;
779 ; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}};
780 ; CHECK: .param .b64 retval0;
781 ; CHECK: call.uni (retval0),
782 ; CHECK-NEXT: test_i51,
783 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0];
784 ; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}};
786 define i51 @test_i51(i51 %a) {
787 %r = tail call i51 @test_i51(i51 %a);
791 ; CHECK: .func (.param .b64 func_retval0)
792 ; CHECK-LABEL: test_i56(
793 ; CHECK-NEXT: .param .b64 test_i56_param_0
794 ; CHECK-DAG: ld.param.u8 {{%rd[0-9]+}}, [test_i56_param_0+6];
795 ; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i56_param_0+4];
796 ; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i56_param_0];
797 ; CHECK: .param .b64 param0;
798 ; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}};
799 ; CHECK: .param .b64 retval0;
800 ; CHECK: call.uni (retval0),
801 ; CHECK-NEXT: test_i56,
802 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0];
803 ; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}};
805 define i56 @test_i56(i56 %a) {
806 %r = tail call i56 @test_i56(i56 %a);
810 ; CHECK: .func (.param .b64 func_retval0)
811 ; CHECK-LABEL: test_i57(
812 ; CHECK-NEXT: .param .b64 test_i57_param_0
813 ; CHECK: ld.param.u64 {{%rd[0-9]+}}, [test_i57_param_0];
814 ; CHECK: .param .b64 param0;
815 ; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}};
816 ; CHECK: .param .b64 retval0;
817 ; CHECK: call.uni (retval0),
818 ; CHECK-NEXT: test_i57,
819 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0];
820 ; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}};
822 define i57 @test_i57(i57 %a) {
823 %r = tail call i57 @test_i57(i57 %a);
827 ; CHECK: .func (.param .b64 func_retval0)
828 ; CHECK-LABEL: test_i64(
829 ; CHECK-NEXT: .param .b64 test_i64_param_0
830 ; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_i64_param_0];
831 ; CHECK: .param .b64 param0;
832 ; CHECK: st.param.b64 [param0+0], [[E]];
833 ; CHECK: .param .b64 retval0;
834 ; CHECK: call.uni (retval0),
835 ; CHECK-NEXT: test_i64,
836 ; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0];
837 ; CHECK: st.param.b64 [func_retval0+0], [[R]];
839 define i64 @test_i64(i64 %a) {
840 %r = tail call i64 @test_i64(i64 %a);
844 ; CHECK: .func (.param .align 32 .b8 func_retval0[32])
845 ; CHECK-LABEL: test_v3i64(
846 ; CHECK-NEXT: .param .align 32 .b8 test_v3i64_param_0[32]
847 ; CHECK-DAG: ld.param.u64 [[E2:%rd[0-9]+]], [test_v3i64_param_0+16];
848 ; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0];
849 ; CHECK: .param .align 32 .b8 param0[32];
850 ; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]};
851 ; CHECK: st.param.b64 [param0+16], [[E2]];
852 ; CHECK: .param .align 32 .b8 retval0[32];
853 ; CHECK: call.uni (retval0),
854 ; CHECK-NEXT: test_v3i64,
855 ; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0];
856 ; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16];
857 ; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]};
858 ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]];
859 ; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]};
860 ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]];
862 define <3 x i64> @test_v3i64(<3 x i64> %a) {
863 %r = tail call <3 x i64> @test_v3i64(<3 x i64> %a);
867 ; For i64 vector loads are limited by PTX to 2 elements.
868 ; CHECK: .func (.param .align 32 .b8 func_retval0[32])
869 ; CHECK-LABEL: test_v4i64(
870 ; CHECK-NEXT: .param .align 32 .b8 test_v4i64_param_0[32]
871 ; CHECK-DAG: ld.param.v2.u64 {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16];
872 ; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0];
873 ; CHECK: .param .align 32 .b8 param0[32];
874 ; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]};
875 ; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]};
876 ; CHECK: .param .align 32 .b8 retval0[32];
877 ; CHECK: call.uni (retval0),
878 ; CHECK-NEXT: test_v4i64,
879 ; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0];
880 ; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16];
881 ; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[RE2]], [[RE3]]};
882 ; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]};
884 define <4 x i64> @test_v4i64(<4 x i64> %a) {
885 %r = tail call <4 x i64> @test_v4i64(<4 x i64> %a);
889 ; Aggregates, on the other hand, do not get extended.
891 ; CHECK: .func (.param .align 1 .b8 func_retval0[1])
892 ; CHECK-LABEL: test_s_i1(
893 ; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1]
894 ; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i1_param_0];
895 ; CHECK: .param .align 1 .b8 param0[1];
896 ; CHECK: st.param.b8 [param0+0], [[A]]
897 ; CHECK: .param .align 1 .b8 retval0[1];
899 ; CHECK-NEXT: test_s_i1,
900 ; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0];
901 ; CHECK: st.param.b8 [func_retval0+0], [[R]];
903 define %s_i1 @test_s_i1(%s_i1 %a) {
904 %r = tail call %s_i1 @test_s_i1(%s_i1 %a);
908 ; CHECK: .func (.param .align 1 .b8 func_retval0[1])
909 ; CHECK-LABEL: test_s_i8(
910 ; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1]
911 ; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i8_param_0];
912 ; CHECK: .param .align 1 .b8 param0[1];
913 ; CHECK: st.param.b8 [param0+0], [[A]]
914 ; CHECK: .param .align 1 .b8 retval0[1];
916 ; CHECK-NEXT: test_s_i8,
917 ; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0];
918 ; CHECK: st.param.b8 [func_retval0+0], [[R]];
920 define %s_i8 @test_s_i8(%s_i8 %a) {
921 %r = tail call %s_i8 @test_s_i8(%s_i8 %a);
925 ; CHECK: .func (.param .align 2 .b8 func_retval0[2])
926 ; CHECK-LABEL: test_s_i16(
927 ; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2]
928 ; CHECK: ld.param.u16 [[A:%rs[0-9]+]], [test_s_i16_param_0];
929 ; CHECK: .param .align 2 .b8 param0[2];
930 ; CHECK: st.param.b16 [param0+0], [[A]]
931 ; CHECK: .param .align 2 .b8 retval0[2];
933 ; CHECK-NEXT: test_s_i16,
934 ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0];
935 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
937 define %s_i16 @test_s_i16(%s_i16 %a) {
938 %r = tail call %s_i16 @test_s_i16(%s_i16 %a);
942 ; CHECK: .func (.param .align 2 .b8 func_retval0[2])
943 ; CHECK-LABEL: test_s_f16(
944 ; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2]
945 ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_f16_param_0];
946 ; CHECK: .param .align 2 .b8 param0[2];
947 ; CHECK: st.param.b16 [param0+0], [[A]]
948 ; CHECK: .param .align 2 .b8 retval0[2];
950 ; CHECK-NEXT: test_s_f16,
951 ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0];
952 ; CHECK: st.param.b16 [func_retval0+0], [[R]];
954 define %s_f16 @test_s_f16(%s_f16 %a) {
955 %r = tail call %s_f16 @test_s_f16(%s_f16 %a);
959 ; CHECK: .func (.param .align 4 .b8 func_retval0[4])
960 ; CHECK-LABEL: test_s_i32(
961 ; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4]
962 ; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_s_i32_param_0];
963 ; CHECK: .param .align 4 .b8 param0[4]
964 ; CHECK: st.param.b32 [param0+0], [[E]];
965 ; CHECK: .param .align 4 .b8 retval0[4];
966 ; CHECK: call.uni (retval0),
967 ; CHECK-NEXT: test_s_i32,
968 ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0];
969 ; CHECK: st.param.b32 [func_retval0+0], [[R]];
971 define %s_i32 @test_s_i32(%s_i32 %a) {
972 %r = tail call %s_i32 @test_s_i32(%s_i32 %a);
976 ; CHECK: .func (.param .align 4 .b8 func_retval0[4])
977 ; CHECK-LABEL: test_s_f32(
978 ; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4]
979 ; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_s_f32_param_0];
980 ; CHECK: .param .align 4 .b8 param0[4]
981 ; CHECK: st.param.f32 [param0+0], [[E]];
982 ; CHECK: .param .align 4 .b8 retval0[4];
983 ; CHECK: call.uni (retval0),
984 ; CHECK-NEXT: test_s_f32,
985 ; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0];
986 ; CHECK: st.param.f32 [func_retval0+0], [[R]];
988 define %s_f32 @test_s_f32(%s_f32 %a) {
989 %r = tail call %s_f32 @test_s_f32(%s_f32 %a);
993 ; CHECK: .func (.param .align 8 .b8 func_retval0[8])
994 ; CHECK-LABEL: test_s_i64(
995 ; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8]
996 ; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_s_i64_param_0];
997 ; CHECK: .param .align 8 .b8 param0[8];
998 ; CHECK: st.param.b64 [param0+0], [[E]];
999 ; CHECK: .param .align 8 .b8 retval0[8];
1000 ; CHECK: call.uni (retval0),
1001 ; CHECK-NEXT: test_s_i64,
1002 ; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0];
1003 ; CHECK: st.param.b64 [func_retval0+0], [[R]];
1005 define %s_i64 @test_s_i64(%s_i64 %a) {
1006 %r = tail call %s_i64 @test_s_i64(%s_i64 %a);
1010 ; Fields that have different types, but identical sizes are not vectorized.
1011 ; CHECK: .func (.param .align 8 .b8 func_retval0[24])
1012 ; CHECK-LABEL: test_s_i32f32(
1013 ; CHECK: .param .align 8 .b8 test_s_i32f32_param_0[24]
1014 ; CHECK-DAG: ld.param.u64 [[E4:%rd[0-9]+]], [test_s_i32f32_param_0+16];
1015 ; CHECK-DAG: ld.param.f32 [[E3:%f[0-9]+]], [test_s_i32f32_param_0+12];
1016 ; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_s_i32f32_param_0+8];
1017 ; CHECK-DAG: ld.param.f32 [[E1:%f[0-9]+]], [test_s_i32f32_param_0+4];
1018 ; CHECK-DAG: ld.param.u32 [[E0:%r[0-9]+]], [test_s_i32f32_param_0];
1019 ; CHECK: .param .align 8 .b8 param0[24];
1020 ; CHECK-DAG: st.param.b32 [param0+0], [[E0]];
1021 ; CHECK-DAG: st.param.f32 [param0+4], [[E1]];
1022 ; CHECK-DAG: st.param.b32 [param0+8], [[E2]];
1023 ; CHECK-DAG: st.param.f32 [param0+12], [[E3]];
1024 ; CHECK-DAG: st.param.b64 [param0+16], [[E4]];
1025 ; CHECK: .param .align 8 .b8 retval0[24];
1026 ; CHECK: call.uni (retval0),
1027 ; CHECK-NEXT: test_s_i32f32,
1028 ; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0+0];
1029 ; CHECK-DAG: ld.param.f32 [[RE1:%f[0-9]+]], [retval0+4];
1030 ; CHECK-DAG: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
1031 ; CHECK-DAG: ld.param.f32 [[RE3:%f[0-9]+]], [retval0+12];
1032 ; CHECK-DAG: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16];
1033 ; CHECK-DAG: st.param.b32 [func_retval0+0], [[RE0]];
1034 ; CHECK-DAG: st.param.f32 [func_retval0+4], [[RE1]];
1035 ; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]];
1036 ; CHECK-DAG: st.param.f32 [func_retval0+12], [[RE3]];
1037 ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]];
1039 define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) {
1040 %r = tail call %s_i32f32 @test_s_i32f32(%s_i32f32 %a);
1044 ; We do vectorize consecutive fields with matching types.
1045 ; CHECK:.visible .func (.param .align 8 .b8 func_retval0[24])
1046 ; CHECK-LABEL: test_s_i32x4(
1047 ; CHECK: .param .align 8 .b8 test_s_i32x4_param_0[24]
1048 ; CHECK-DAG: ld.param.u64 [[RD1:%rd[0-9]+]], [test_s_i32x4_param_0+16];
1049 ; CHECK-DAG: ld.param.v2.u32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8];
1050 ; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0];
1051 ; CHECK: .param .align 8 .b8 param0[24];
1052 ; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
1053 ; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
1054 ; CHECK: st.param.b64 [param0+16], [[E4]];
1055 ; CHECK: .param .align 8 .b8 retval0[24];
1056 ; CHECK: call.uni (retval0),
1057 ; CHECK-NEXT: test_s_i32x4,
1058 ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
1059 ; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8];
1060 ; CHECK: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16];
1061 ; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
1062 ; CHECK-DAG: st.param.v2.b32 [func_retval0+8], {[[RE2]], [[RE3]]};
1063 ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]];
1066 define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) {
1067 %r = tail call %s_i32x4 @test_s_i32x4(%s_i32x4 %a);
1071 ; CHECK:.visible .func (.param .align 8 .b8 func_retval0[32])
1072 ; CHECK-LABEL: test_s_i1i32x4(
1073 ; CHECK: .param .align 8 .b8 test_s_i1i32x4_param_0[32]
1074 ; CHECK: ld.param.u64 [[E5:%rd[0-9]+]], [test_s_i1i32x4_param_0+24];
1075 ; CHECK: ld.param.u32 [[E4:%r[0-9]+]], [test_s_i1i32x4_param_0+16];
1076 ; CHECK: ld.param.u32 [[E3:%r[0-9]+]], [test_s_i1i32x4_param_0+12];
1077 ; CHECK: ld.param.u8 [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8];
1078 ; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0];
1079 ; CHECK: .param .align 8 .b8 param0[32];
1080 ; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
1081 ; CHECK: st.param.b8 [param0+8], [[E2]];
1082 ; CHECK: st.param.b32 [param0+12], [[E3]];
1083 ; CHECK: st.param.b32 [param0+16], [[E4]];
1084 ; CHECK: st.param.b64 [param0+24], [[E5]];
1085 ; CHECK: .param .align 8 .b8 retval0[32];
1086 ; CHECK: call.uni (retval0),
1087 ; CHECK: test_s_i1i32x4,
1091 ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
1092 ; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+8];
1093 ; CHECK: ld.param.b32 [[RE3:%r[0-9]+]], [retval0+12];
1094 ; CHECK: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16];
1095 ; CHECK: ld.param.b64 [[RE5:%rd[0-9]+]], [retval0+24];
1096 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
1097 ; CHECK: st.param.b8 [func_retval0+8], [[RE2]];
1098 ; CHECK: st.param.b32 [func_retval0+12], [[RE3]];
1099 ; CHECK: st.param.b32 [func_retval0+16], [[RE4]];
1100 ; CHECK: st.param.b64 [func_retval0+24], [[RE5]];
1103 define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
1104 %r = tail call %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a);
1108 ; -- All loads/stores from parameters aligned by one must be done one
1109 ; -- byte at a time.
1110 ; CHECK:.visible .func (.param .align 1 .b8 func_retval0[25])
1111 ; CHECK-LABEL: test_s_i1i32x4p(
1112 ; CHECK-DAG: .param .align 1 .b8 test_s_i1i32x4p_param_0[25]
1113 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+24];
1114 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+23];
1115 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+22];
1116 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+21];
1117 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+20];
1118 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+19];
1119 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+18];
1120 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+17];
1121 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+16];
1122 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+15];
1123 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+14];
1124 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+13];
1125 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+12];
1126 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+11];
1127 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+10];
1128 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+9];
1129 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+8];
1130 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+7];
1131 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+6];
1132 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+5];
1133 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+4];
1134 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+3];
1135 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+2];
1136 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+1];
1137 ; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0];
1139 ; --- Unaligned parameter store/ return value load is broken in both nvcc
1140 ; --- and llvm and needs to be fixed.
1141 ; CHECK: .param .align 1 .b8 param0[25];
1142 ; CHECK-DAG: st.param.b32 [param0+0],
1143 ; CHECK-DAG: st.param.b32 [param0+4],
1144 ; CHECK-DAG: st.param.b8 [param0+8],
1145 ; CHECK-DAG: st.param.b32 [param0+9],
1146 ; CHECK-DAG: st.param.b32 [param0+13],
1147 ; CHECK-DAG: st.param.b64 [param0+17],
1148 ; CHECK: .param .align 1 .b8 retval0[25];
1149 ; CHECK: call.uni (retval0),
1150 ; CHECK-NEXT: test_s_i1i32x4p,
1151 ; CHECK-DAG: ld.param.b32 %r41, [retval0+0];
1152 ; CHECK-DAG: ld.param.b32 %r42, [retval0+4];
1153 ; CHECK-DAG: ld.param.b8 %rs2, [retval0+8];
1154 ; CHECK-DAG: ld.param.b32 %r43, [retval0+9];
1155 ; CHECK-DAG: ld.param.b32 %r44, [retval0+13];
1156 ; CHECK-DAG: ld.param.b64 %rd23, [retval0+17];
1157 ; CHECK-DAG: st.param.b32 [func_retval0+0],
1158 ; CHECK-DAG: st.param.b32 [func_retval0+4],
1159 ; CHECK-DAG: st.param.b8 [func_retval0+8],
1160 ; CHECK-DAG: st.param.b32 [func_retval0+9],
1161 ; CHECK-DAG: st.param.b32 [func_retval0+13],
1162 ; CHECK-DAG: st.param.b64 [func_retval0+17],
1164 define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) {
1165 %r = tail call %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a);
1169 ; Check that we can vectorize loads that span multiple aggregate fields.
1170 ; CHECK:.visible .func (.param .align 16 .b8 func_retval0[80])
1171 ; CHECK-LABEL: test_s_crossfield(
1172 ; CHECK: .param .align 16 .b8 test_s_crossfield_param_0[80]
1173 ; CHECK: ld.param.u32 [[E15:%r[0-9]+]], [test_s_crossfield_param_0+64];
1174 ; CHECK: ld.param.v4.u32 {[[E11:%r[0-9]+]], [[E12:%r[0-9]+]], [[E13:%r[0-9]+]], [[E14:%r[0-9]+]]}, [test_s_crossfield_param_0+48];
1175 ; CHECK: ld.param.v4.u32 {[[E7:%r[0-9]+]], [[E8:%r[0-9]+]], [[E9:%r[0-9]+]], [[E10:%r[0-9]+]]}, [test_s_crossfield_param_0+32];
1176 ; CHECK: ld.param.v4.u32 {[[E3:%r[0-9]+]], [[E4:%r[0-9]+]], [[E5:%r[0-9]+]], [[E6:%r[0-9]+]]}, [test_s_crossfield_param_0+16];
1177 ; CHECK: ld.param.u32 [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8];
1178 ; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0];
1179 ; CHECK: .param .align 16 .b8 param0[80];
1180 ; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
1181 ; CHECK: st.param.b32 [param0+8], [[E2]];
1182 ; CHECK: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]};
1183 ; CHECK: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]};
1184 ; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
1185 ; CHECK: st.param.b32 [param0+64], [[E15]];
1186 ; CHECK: .param .align 16 .b8 retval0[80];
1187 ; CHECK: call.uni (retval0),
1188 ; CHECK: test_s_crossfield,
1189 ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
1190 ; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
1191 ; CHECK: ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16];
1192 ; CHECK: ld.param.v4.b32 {[[RE7:%r[0-9]+]], [[RE8:%r[0-9]+]], [[RE9:%r[0-9]+]], [[RE10:%r[0-9]+]]}, [retval0+32];
1193 ; CHECK: ld.param.v4.b32 {[[RE11:%r[0-9]+]], [[RE12:%r[0-9]+]], [[RE13:%r[0-9]+]], [[RE14:%r[0-9]+]]}, [retval0+48];
1194 ; CHECK: ld.param.b32 [[RE15:%r[0-9]+]], [retval0+64];
1195 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
1196 ; CHECK: st.param.b32 [func_retval0+8], [[RE2]];
1197 ; CHECK: st.param.v4.b32 [func_retval0+16], {[[RE3]], [[RE4]], [[RE5]], [[RE6]]};
1198 ; CHECK: st.param.v4.b32 [func_retval0+32], {[[RE7]], [[RE8]], [[RE9]], [[RE10]]};
1199 ; CHECK: st.param.v4.b32 [func_retval0+48], {[[RE11]], [[RE12]], [[RE13]], [[RE14]]};
1200 ; CHECK: st.param.b32 [func_retval0+64], [[RE15]];
1203 define %s_crossfield @test_s_crossfield(%s_crossfield %a) {
1204 %r = tail call %s_crossfield @test_s_crossfield(%s_crossfield %a);
1205 ret %s_crossfield %r;