1 ; RUN: llc < %s -mtriple=nvptx64-unknown-unknown | FileCheck %s
2 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-unknown-unknown | %ptxas-verify %}
4 ; Check that parameters of a __device__ function with private or internal
5 ; linkage called from a __global__ (kernel) function get increased alignment,
6 ; and additional vectorization is performed on loads/stores with that
9 ; Test IR is a minimized version of IR generated with the following command
10 ; from the source code below:
11 ; $ clang++ -O3 --cuda-gpu-arch=sm_35 -S -emit-llvm src.cu
13 ; ----------------------------------------------------------------------------
16 ; struct St4x1 { uint32_t field[1]; };
17 ; struct St4x2 { uint32_t field[2]; };
18 ; struct St4x3 { uint32_t field[3]; };
19 ; struct St4x4 { uint32_t field[4]; };
20 ; struct St4x5 { uint32_t field[5]; };
21 ; struct St4x6 { uint32_t field[6]; };
22 ; struct St4x7 { uint32_t field[7]; };
23 ; struct St4x8 { uint32_t field[8]; };
24 ; struct St8x1 { uint64_t field[1]; };
25 ; struct St8x2 { uint64_t field[2]; };
26 ; struct St8x3 { uint64_t field[3]; };
27 ; struct St8x4 { uint64_t field[4]; };
29 ; #define DECLARE_CALLEE(StName) \
30 ; static __device__ __attribute__((noinline)) \
31 ; struct StName callee_##StName(struct StName in) { \
32 ; struct StName ret; \
33 ; const unsigned size = sizeof(ret.field) / sizeof(*ret.field); \
34 ; for (unsigned i = 0; i != size; ++i) \
35 ; ret.field[i] = in.field[i]; \
39 ; #define DECLARE_CALLER(StName) \
41 ; void caller_##StName(struct StName in, struct StName* ret) \
43 ; *ret = callee_##StName(in); \
46 ; #define DECLARE_CALL(StName) \
47 ; DECLARE_CALLEE(StName) \
48 ; DECLARE_CALLER(StName) \
62 ; ----------------------------------------------------------------------------
64 %struct.St4x1 = type { [1 x i32] }
65 %struct.St4x2 = type { [2 x i32] }
66 %struct.St4x3 = type { [3 x i32] }
67 %struct.St4x4 = type { [4 x i32] }
68 %struct.St4x5 = type { [5 x i32] }
69 %struct.St4x6 = type { [6 x i32] }
70 %struct.St4x7 = type { [7 x i32] }
71 %struct.St4x8 = type { [8 x i32] }
72 %struct.St8x1 = type { [1 x i64] }
73 %struct.St8x2 = type { [2 x i64] }
74 %struct.St8x3 = type { [3 x i64] }
75 %struct.St8x4 = type { [4 x i64] }
77 ; Section 1 - checking that:
78 ; - function argument (including retval) vectorization is done with internal linkage;
79 ; - caller and callee specify correct alignment for callee's params.
81 define dso_local void @caller_St4x1(ptr nocapture noundef readonly byval(%struct.St4x1) align 4 %in, ptr nocapture noundef writeonly %ret) {
82 ; CHECK-LABEL: .visible .func caller_St4x1(
83 ; CHECK: .param .align 4 .b8 caller_St4x1_param_0[4],
84 ; CHECK: .param .b64 caller_St4x1_param_1
86 ; CHECK: .param .b32 param0;
87 ; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}};
88 ; CHECK: .param .align 16 .b8 retval0[4];
89 ; CHECK: call.uni (retval0),
90 ; CHECK-NEXT: callee_St4x1,
94 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0];
95 %1 = load i32, ptr %in, align 4
96 %call = tail call fastcc [1 x i32] @callee_St4x1(i32 %1) #2
97 %.fca.0.extract = extractvalue [1 x i32] %call, 0
98 store i32 %.fca.0.extract, ptr %ret, align 4
102 define internal fastcc [1 x i32] @callee_St4x1(i32 %in.0.val) {
103 ; CHECK: .func (.param .align 16 .b8 func_retval0[4])
104 ; CHECK-LABEL: callee_St4x1(
105 ; CHECK-NEXT: .param .b32 callee_St4x1_param_0
106 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [callee_St4x1_param_0];
107 ; CHECK: st.param.b32 [func_retval0+0], [[R1]];
109 %oldret = insertvalue [1 x i32] poison, i32 %in.0.val, 0
110 ret [1 x i32] %oldret
113 define dso_local void @caller_St4x2(ptr nocapture noundef readonly byval(%struct.St4x2) align 4 %in, ptr nocapture noundef writeonly %ret) {
114 ; CHECK-LABEL: .visible .func caller_St4x2(
115 ; CHECK: .param .align 4 .b8 caller_St4x2_param_0[8],
116 ; CHECK: .param .b64 caller_St4x2_param_1
118 ; CHECK: .param .align 16 .b8 param0[8];
119 ; CHECK: st.param.v2.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
120 ; CHECK: .param .align 16 .b8 retval0[8];
121 ; CHECK: call.uni (retval0),
122 ; CHECK-NEXT: callee_St4x2,
126 ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+0];
127 %agg.tmp = alloca %struct.St4x2, align 8
128 %1 = load i64, ptr %in, align 4
129 store i64 %1, ptr %agg.tmp, align 8
130 %call = tail call fastcc [2 x i32] @callee_St4x2(ptr noundef nonnull byval(%struct.St4x2) align 4 %agg.tmp) #2
131 %.fca.0.extract = extractvalue [2 x i32] %call, 0
132 %.fca.1.extract = extractvalue [2 x i32] %call, 1
133 store i32 %.fca.0.extract, ptr %ret, align 4
134 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 4
135 store i32 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 4
139 define internal fastcc [2 x i32] @callee_St4x2(ptr nocapture noundef readonly byval(%struct.St4x2) align 4 %in) {
140 ; CHECK: .func (.param .align 16 .b8 func_retval0[8])
141 ; CHECK-LABEL: callee_St4x2(
142 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x2_param_0[8]
143 ; CHECK: ld.param.v2.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]]}, [callee_St4x2_param_0];
144 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R1]], [[R2]]};
146 %1 = load i32, ptr %in, align 4
147 %arrayidx.1 = getelementptr inbounds [2 x i32], ptr %in, i64 0, i64 1
148 %2 = load i32, ptr %arrayidx.1, align 4
149 %3 = insertvalue [2 x i32] poison, i32 %1, 0
150 %oldret = insertvalue [2 x i32] %3, i32 %2, 1
151 ret [2 x i32] %oldret
154 define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct.St4x3) align 4 %in, ptr nocapture noundef writeonly %ret) {
155 ; CHECK-LABEL: .visible .func caller_St4x3(
156 ; CHECK: .param .align 4 .b8 caller_St4x3_param_0[12],
157 ; CHECK: .param .b64 caller_St4x3_param_1
159 ; CHECK: .param .align 16 .b8 param0[12];
160 ; CHECK: st.param.v2.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
161 ; CHECK: st.param.b32 [param0+8], {{%r[0-9]+}};
162 ; CHECK: .param .align 16 .b8 retval0[12];
163 ; CHECK: call.uni (retval0),
164 ; CHECK-NEXT: callee_St4x3,
168 ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+0];
169 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+8];
170 %call = tail call fastcc [3 x i32] @callee_St4x3(ptr noundef nonnull byval(%struct.St4x3) align 4 %in) #2
171 %.fca.0.extract = extractvalue [3 x i32] %call, 0
172 %.fca.1.extract = extractvalue [3 x i32] %call, 1
173 %.fca.2.extract = extractvalue [3 x i32] %call, 2
174 store i32 %.fca.0.extract, ptr %ret, align 4
175 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 4
176 store i32 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 4
177 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8
178 store i32 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 4
182 define internal fastcc [3 x i32] @callee_St4x3(ptr nocapture noundef readonly byval(%struct.St4x3) align 4 %in) {
183 ; CHECK: .func (.param .align 16 .b8 func_retval0[12])
184 ; CHECK-LABEL: callee_St4x3(
185 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x3_param_0[12]
186 ; CHECK: ld.param.v2.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]]}, [callee_St4x3_param_0];
187 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [callee_St4x3_param_0+8];
188 ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R1]], [[R2]]};
189 ; CHECK: st.param.b32 [func_retval0+8], [[R3]];
191 %1 = load i32, ptr %in, align 4
192 %arrayidx.1 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 1
193 %2 = load i32, ptr %arrayidx.1, align 4
194 %arrayidx.2 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 2
195 %3 = load i32, ptr %arrayidx.2, align 4
196 %4 = insertvalue [3 x i32] poison, i32 %1, 0
197 %5 = insertvalue [3 x i32] %4, i32 %2, 1
198 %oldret = insertvalue [3 x i32] %5, i32 %3, 2
199 ret [3 x i32] %oldret
202 define dso_local void @caller_St4x4(ptr nocapture noundef readonly byval(%struct.St4x4) align 4 %in, ptr nocapture noundef writeonly %ret) {
203 ; CHECK-LABEL: .visible .func caller_St4x4(
204 ; CHECK: .param .align 4 .b8 caller_St4x4_param_0[16],
205 ; CHECK: .param .b64 caller_St4x4_param_1
207 ; CHECK: .param .align 16 .b8 param0[16];
208 ; CHECK: st.param.v4.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
209 ; CHECK: .param .align 16 .b8 retval0[16];
210 ; CHECK: call.uni (retval0),
211 ; CHECK-NEXT: callee_St4x4,
215 ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+0];
216 %call = tail call fastcc [4 x i32] @callee_St4x4(ptr noundef nonnull byval(%struct.St4x4) align 4 %in) #2
217 %.fca.0.extract = extractvalue [4 x i32] %call, 0
218 %.fca.1.extract = extractvalue [4 x i32] %call, 1
219 %.fca.2.extract = extractvalue [4 x i32] %call, 2
220 %.fca.3.extract = extractvalue [4 x i32] %call, 3
221 store i32 %.fca.0.extract, ptr %ret, align 4
222 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 4
223 store i32 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 4
224 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8
225 store i32 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 4
226 %ref.tmp.sroa.6.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 12
227 store i32 %.fca.3.extract, ptr %ref.tmp.sroa.6.0..sroa_idx, align 4
231 define internal fastcc [4 x i32] @callee_St4x4(ptr nocapture noundef readonly byval(%struct.St4x4) align 4 %in) {
232 ; CHECK: .func (.param .align 16 .b8 func_retval0[16])
233 ; CHECK-LABEL: callee_St4x4(
234 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x4_param_0[16]
235 ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x4_param_0];
236 ; CHECK: st.param.v4.b32 [func_retval0+0], {[[R1]], [[R2]], [[R3]], [[R4]]};
238 %1 = load i32, ptr %in, align 4
239 %arrayidx.1 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 1
240 %2 = load i32, ptr %arrayidx.1, align 4
241 %arrayidx.2 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 2
242 %3 = load i32, ptr %arrayidx.2, align 4
243 %arrayidx.3 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 3
244 %4 = load i32, ptr %arrayidx.3, align 4
245 %5 = insertvalue [4 x i32] poison, i32 %1, 0
246 %6 = insertvalue [4 x i32] %5, i32 %2, 1
247 %7 = insertvalue [4 x i32] %6, i32 %3, 2
248 %oldret = insertvalue [4 x i32] %7, i32 %4, 3
249 ret [4 x i32] %oldret
252 define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct.St4x5) align 4 %in, ptr nocapture noundef writeonly %ret) {
253 ; CHECK-LABEL: .visible .func caller_St4x5(
254 ; CHECK: .param .align 4 .b8 caller_St4x5_param_0[20],
255 ; CHECK: .param .b64 caller_St4x5_param_1
257 ; CHECK: .param .align 16 .b8 param0[20];
258 ; CHECK: st.param.v4.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
259 ; CHECK: st.param.b32 [param0+16], {{%r[0-9]+}};
260 ; CHECK: .param .align 16 .b8 retval0[20];
261 ; CHECK: call.uni (retval0),
262 ; CHECK-NEXT: callee_St4x5,
266 ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+0];
267 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+16];
268 %call = tail call fastcc [5 x i32] @callee_St4x5(ptr noundef nonnull byval(%struct.St4x5) align 4 %in) #2
269 %.fca.0.extract = extractvalue [5 x i32] %call, 0
270 %.fca.1.extract = extractvalue [5 x i32] %call, 1
271 %.fca.2.extract = extractvalue [5 x i32] %call, 2
272 %.fca.3.extract = extractvalue [5 x i32] %call, 3
273 %.fca.4.extract = extractvalue [5 x i32] %call, 4
274 store i32 %.fca.0.extract, ptr %ret, align 4
275 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 4
276 store i32 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 4
277 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8
278 store i32 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 4
279 %ref.tmp.sroa.6.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 12
280 store i32 %.fca.3.extract, ptr %ref.tmp.sroa.6.0..sroa_idx, align 4
281 %ref.tmp.sroa.7.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 16
282 store i32 %.fca.4.extract, ptr %ref.tmp.sroa.7.0..sroa_idx, align 4
286 define internal fastcc [5 x i32] @callee_St4x5(ptr nocapture noundef readonly byval(%struct.St4x5) align 4 %in) {
287 ; CHECK: .func (.param .align 16 .b8 func_retval0[20])
288 ; CHECK-LABEL: callee_St4x5(
289 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x5_param_0[20]
290 ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x5_param_0];
291 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [callee_St4x5_param_0+16];
292 ; CHECK: st.param.v4.b32 [func_retval0+0], {[[R1]], [[R2]], [[R3]], [[R4]]};
293 ; CHECK: st.param.b32 [func_retval0+16], [[R5]];
295 %1 = load i32, ptr %in, align 4
296 %arrayidx.1 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 1
297 %2 = load i32, ptr %arrayidx.1, align 4
298 %arrayidx.2 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 2
299 %3 = load i32, ptr %arrayidx.2, align 4
300 %arrayidx.3 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 3
301 %4 = load i32, ptr %arrayidx.3, align 4
302 %arrayidx.4 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 4
303 %5 = load i32, ptr %arrayidx.4, align 4
304 %6 = insertvalue [5 x i32] poison, i32 %1, 0
305 %7 = insertvalue [5 x i32] %6, i32 %2, 1
306 %8 = insertvalue [5 x i32] %7, i32 %3, 2
307 %9 = insertvalue [5 x i32] %8, i32 %4, 3
308 %oldret = insertvalue [5 x i32] %9, i32 %5, 4
309 ret [5 x i32] %oldret
312 define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct.St4x6) align 4 %in, ptr nocapture noundef writeonly %ret) {
313 ; CHECK-LABEL: .visible .func caller_St4x6(
314 ; CHECK: .param .align 4 .b8 caller_St4x6_param_0[24],
315 ; CHECK: .param .b64 caller_St4x6_param_1
317 ; CHECK: .param .align 16 .b8 param0[24];
318 ; CHECK: st.param.v4.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
319 ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
320 ; CHECK: .param .align 16 .b8 retval0[24];
321 ; CHECK: call.uni (retval0),
322 ; CHECK-NEXT: callee_St4x6,
326 ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+0];
327 ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
328 %call = tail call fastcc [6 x i32] @callee_St4x6(ptr noundef nonnull byval(%struct.St4x6) align 4 %in) #2
329 %.fca.0.extract = extractvalue [6 x i32] %call, 0
330 %.fca.1.extract = extractvalue [6 x i32] %call, 1
331 %.fca.2.extract = extractvalue [6 x i32] %call, 2
332 %.fca.3.extract = extractvalue [6 x i32] %call, 3
333 %.fca.4.extract = extractvalue [6 x i32] %call, 4
334 %.fca.5.extract = extractvalue [6 x i32] %call, 5
335 store i32 %.fca.0.extract, ptr %ret, align 4
336 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 4
337 store i32 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 4
338 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8
339 store i32 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 4
340 %ref.tmp.sroa.6.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 12
341 store i32 %.fca.3.extract, ptr %ref.tmp.sroa.6.0..sroa_idx, align 4
342 %ref.tmp.sroa.7.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 16
343 store i32 %.fca.4.extract, ptr %ref.tmp.sroa.7.0..sroa_idx, align 4
344 %ref.tmp.sroa.8.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 20
345 store i32 %.fca.5.extract, ptr %ref.tmp.sroa.8.0..sroa_idx, align 4
349 define internal fastcc [6 x i32] @callee_St4x6(ptr nocapture noundef readonly byval(%struct.St4x6) align 4 %in) {
350 ; CHECK: .func (.param .align 16 .b8 func_retval0[24])
351 ; CHECK-LABEL: callee_St4x6(
352 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x6_param_0[24]
353 ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x6_param_0];
354 ; CHECK: ld.param.v2.u32 {[[R5:%r[0-9]+]], [[R6:%r[0-9]+]]}, [callee_St4x6_param_0+16];
355 ; CHECK: st.param.v4.b32 [func_retval0+0], {[[R1]], [[R2]], [[R3]], [[R4]]};
356 ; CHECK: st.param.v2.b32 [func_retval0+16], {[[R5]], [[R6]]};
358 %1 = load i32, ptr %in, align 4
359 %arrayidx.1 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 1
360 %2 = load i32, ptr %arrayidx.1, align 4
361 %arrayidx.2 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 2
362 %3 = load i32, ptr %arrayidx.2, align 4
363 %arrayidx.3 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 3
364 %4 = load i32, ptr %arrayidx.3, align 4
365 %arrayidx.4 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 4
366 %5 = load i32, ptr %arrayidx.4, align 4
367 %arrayidx.5 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 5
368 %6 = load i32, ptr %arrayidx.5, align 4
369 %7 = insertvalue [6 x i32] poison, i32 %1, 0
370 %8 = insertvalue [6 x i32] %7, i32 %2, 1
371 %9 = insertvalue [6 x i32] %8, i32 %3, 2
372 %10 = insertvalue [6 x i32] %9, i32 %4, 3
373 %11 = insertvalue [6 x i32] %10, i32 %5, 4
374 %oldret = insertvalue [6 x i32] %11, i32 %6, 5
375 ret [6 x i32] %oldret
378 define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct.St4x7) align 4 %in, ptr nocapture noundef writeonly %ret) {
379 ; CHECK-LABEL: .visible .func caller_St4x7(
380 ; CHECK: .param .align 4 .b8 caller_St4x7_param_0[28],
381 ; CHECK: .param .b64 caller_St4x7_param_1
383 ; CHECK: .param .align 16 .b8 param0[28];
384 ; CHECK: st.param.v4.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
385 ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
386 ; CHECK: st.param.b32 [param0+24], {{%r[0-9]+}};
387 ; CHECK: .param .align 16 .b8 retval0[28];
388 ; CHECK: call.uni (retval0),
389 ; CHECK-NEXT: callee_St4x7,
393 ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+0];
394 ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
395 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+24];
396 %call = tail call fastcc [7 x i32] @callee_St4x7(ptr noundef nonnull byval(%struct.St4x7) align 4 %in) #2
397 %.fca.0.extract = extractvalue [7 x i32] %call, 0
398 %.fca.1.extract = extractvalue [7 x i32] %call, 1
399 %.fca.2.extract = extractvalue [7 x i32] %call, 2
400 %.fca.3.extract = extractvalue [7 x i32] %call, 3
401 %.fca.4.extract = extractvalue [7 x i32] %call, 4
402 %.fca.5.extract = extractvalue [7 x i32] %call, 5
403 %.fca.6.extract = extractvalue [7 x i32] %call, 6
404 store i32 %.fca.0.extract, ptr %ret, align 4
405 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 4
406 store i32 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 4
407 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8
408 store i32 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 4
409 %ref.tmp.sroa.6.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 12
410 store i32 %.fca.3.extract, ptr %ref.tmp.sroa.6.0..sroa_idx, align 4
411 %ref.tmp.sroa.7.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 16
412 store i32 %.fca.4.extract, ptr %ref.tmp.sroa.7.0..sroa_idx, align 4
413 %ref.tmp.sroa.8.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 20
414 store i32 %.fca.5.extract, ptr %ref.tmp.sroa.8.0..sroa_idx, align 4
415 %ref.tmp.sroa.9.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 24
416 store i32 %.fca.6.extract, ptr %ref.tmp.sroa.9.0..sroa_idx, align 4
420 define internal fastcc [7 x i32] @callee_St4x7(ptr nocapture noundef readonly byval(%struct.St4x7) align 4 %in) {
421 ; CHECK: .func (.param .align 16 .b8 func_retval0[28])
422 ; CHECK-LABEL: callee_St4x7(
423 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x7_param_0[28]
424 ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x7_param_0];
425 ; CHECK: ld.param.v2.u32 {[[R5:%r[0-9]+]], [[R6:%r[0-9]+]]}, [callee_St4x7_param_0+16];
426 ; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [callee_St4x7_param_0+24];
427 ; CHECK: st.param.v4.b32 [func_retval0+0], {[[R1]], [[R2]], [[R3]], [[R4]]};
428 ; CHECK: st.param.v2.b32 [func_retval0+16], {[[R5]], [[R6]]};
429 ; CHECK: st.param.b32 [func_retval0+24], [[R7]];
431 %1 = load i32, ptr %in, align 4
432 %arrayidx.1 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 1
433 %2 = load i32, ptr %arrayidx.1, align 4
434 %arrayidx.2 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 2
435 %3 = load i32, ptr %arrayidx.2, align 4
436 %arrayidx.3 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 3
437 %4 = load i32, ptr %arrayidx.3, align 4
438 %arrayidx.4 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 4
439 %5 = load i32, ptr %arrayidx.4, align 4
440 %arrayidx.5 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 5
441 %6 = load i32, ptr %arrayidx.5, align 4
442 %arrayidx.6 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 6
443 %7 = load i32, ptr %arrayidx.6, align 4
444 %8 = insertvalue [7 x i32] poison, i32 %1, 0
445 %9 = insertvalue [7 x i32] %8, i32 %2, 1
446 %10 = insertvalue [7 x i32] %9, i32 %3, 2
447 %11 = insertvalue [7 x i32] %10, i32 %4, 3
448 %12 = insertvalue [7 x i32] %11, i32 %5, 4
449 %13 = insertvalue [7 x i32] %12, i32 %6, 5
450 %oldret = insertvalue [7 x i32] %13, i32 %7, 6
451 ret [7 x i32] %oldret
454 define dso_local void @caller_St4x8(ptr nocapture noundef readonly byval(%struct.St4x8) align 4 %in, ptr nocapture noundef writeonly %ret) {
455 ; CHECK-LABEL: .visible .func caller_St4x8(
456 ; CHECK: .param .align 4 .b8 caller_St4x8_param_0[32],
457 ; CHECK: .param .b64 caller_St4x8_param_1
459 ; CHECK: .param .align 16 .b8 param0[32];
460 ; CHECK: st.param.v4.b32 [param0+0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
461 ; CHECK: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
462 ; CHECK: .param .align 16 .b8 retval0[32];
463 ; CHECK: call.uni (retval0),
464 ; CHECK-NEXT: callee_St4x8,
468 ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+0];
469 ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
470 %call = tail call fastcc [8 x i32] @callee_St4x8(ptr noundef nonnull byval(%struct.St4x8) align 4 %in) #2
471 %.fca.0.extract = extractvalue [8 x i32] %call, 0
472 %.fca.1.extract = extractvalue [8 x i32] %call, 1
473 %.fca.2.extract = extractvalue [8 x i32] %call, 2
474 %.fca.3.extract = extractvalue [8 x i32] %call, 3
475 %.fca.4.extract = extractvalue [8 x i32] %call, 4
476 %.fca.5.extract = extractvalue [8 x i32] %call, 5
477 %.fca.6.extract = extractvalue [8 x i32] %call, 6
478 %.fca.7.extract = extractvalue [8 x i32] %call, 7
479 store i32 %.fca.0.extract, ptr %ret, align 4
480 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 4
481 store i32 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 4
482 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8
483 store i32 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 4
484 %ref.tmp.sroa.6.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 12
485 store i32 %.fca.3.extract, ptr %ref.tmp.sroa.6.0..sroa_idx, align 4
486 %ref.tmp.sroa.7.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 16
487 store i32 %.fca.4.extract, ptr %ref.tmp.sroa.7.0..sroa_idx, align 4
488 %ref.tmp.sroa.8.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 20
489 store i32 %.fca.5.extract, ptr %ref.tmp.sroa.8.0..sroa_idx, align 4
490 %ref.tmp.sroa.9.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 24
491 store i32 %.fca.6.extract, ptr %ref.tmp.sroa.9.0..sroa_idx, align 4
492 %ref.tmp.sroa.10.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 28
493 store i32 %.fca.7.extract, ptr %ref.tmp.sroa.10.0..sroa_idx, align 4
497 define internal fastcc [8 x i32] @callee_St4x8(ptr nocapture noundef readonly byval(%struct.St4x8) align 4 %in) {
498 ; CHECK: .func (.param .align 16 .b8 func_retval0[32])
499 ; CHECK-LABEL: callee_St4x8(
500 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x8_param_0[32]
501 ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x8_param_0];
502 ; CHECK: ld.param.v4.u32 {[[R5:%r[0-9]+]], [[R6:%r[0-9]+]], [[R7:%r[0-9]+]], [[R8:%r[0-9]+]]}, [callee_St4x8_param_0+16];
503 ; CHECK: st.param.v4.b32 [func_retval0+0], {[[R1]], [[R2]], [[R3]], [[R4]]};
504 ; CHECK: st.param.v4.b32 [func_retval0+16], {[[R5]], [[R6]], [[R7]], [[R8]]};
506 %1 = load i32, ptr %in, align 4
507 %arrayidx.1 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 1
508 %2 = load i32, ptr %arrayidx.1, align 4
509 %arrayidx.2 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 2
510 %3 = load i32, ptr %arrayidx.2, align 4
511 %arrayidx.3 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 3
512 %4 = load i32, ptr %arrayidx.3, align 4
513 %arrayidx.4 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 4
514 %5 = load i32, ptr %arrayidx.4, align 4
515 %arrayidx.5 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 5
516 %6 = load i32, ptr %arrayidx.5, align 4
517 %arrayidx.6 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 6
518 %7 = load i32, ptr %arrayidx.6, align 4
519 %arrayidx.7 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 7
520 %8 = load i32, ptr %arrayidx.7, align 4
521 %9 = insertvalue [8 x i32] poison, i32 %1, 0
522 %10 = insertvalue [8 x i32] %9, i32 %2, 1
523 %11 = insertvalue [8 x i32] %10, i32 %3, 2
524 %12 = insertvalue [8 x i32] %11, i32 %4, 3
525 %13 = insertvalue [8 x i32] %12, i32 %5, 4
526 %14 = insertvalue [8 x i32] %13, i32 %6, 5
527 %15 = insertvalue [8 x i32] %14, i32 %7, 6
528 %oldret = insertvalue [8 x i32] %15, i32 %8, 7
529 ret [8 x i32] %oldret
532 define dso_local void @caller_St8x1(ptr nocapture noundef readonly byval(%struct.St8x1) align 8 %in, ptr nocapture noundef writeonly %ret) {
533 ; CHECK-LABEL: .visible .func caller_St8x1(
534 ; CHECK: .param .align 8 .b8 caller_St8x1_param_0[8],
535 ; CHECK: .param .b64 caller_St8x1_param_1
537 ; CHECK: .param .b64 param0;
538 ; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}};
539 ; CHECK: .param .align 16 .b8 retval0[8];
540 ; CHECK: call.uni (retval0),
541 ; CHECK-NEXT: callee_St8x1,
545 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0];
546 %1 = load i64, ptr %in, align 8
547 %call = tail call fastcc [1 x i64] @callee_St8x1(i64 %1) #2
548 %.fca.0.extract = extractvalue [1 x i64] %call, 0
549 store i64 %.fca.0.extract, ptr %ret, align 8
553 define internal fastcc [1 x i64] @callee_St8x1(i64 %in.0.val) {
554 ; CHECK: .func (.param .align 16 .b8 func_retval0[8])
555 ; CHECK-LABEL: callee_St8x1(
556 ; CHECK-NEXT: .param .b64 callee_St8x1_param_0
557 ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [callee_St8x1_param_0];
558 ; CHECK: st.param.b64 [func_retval0+0], [[RD1]];
560 %oldret = insertvalue [1 x i64] poison, i64 %in.0.val, 0
561 ret [1 x i64] %oldret
564 define dso_local void @caller_St8x2(ptr nocapture noundef readonly byval(%struct.St8x2) align 8 %in, ptr nocapture noundef writeonly %ret) {
565 ; CHECK-LABEL: .visible .func caller_St8x2(
566 ; CHECK: .param .align 8 .b8 caller_St8x2_param_0[16],
567 ; CHECK: .param .b64 caller_St8x2_param_1
569 ; CHECK: .param .align 16 .b8 param0[16];
570 ; CHECK: st.param.v2.b64 [param0+0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
571 ; CHECK: .param .align 16 .b8 retval0[16];
572 ; CHECK: call.uni (retval0),
573 ; CHECK-NEXT: callee_St8x2,
577 ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+0];
578 %call = tail call fastcc [2 x i64] @callee_St8x2(ptr noundef nonnull byval(%struct.St8x2) align 8 %in) #2
579 %.fca.0.extract = extractvalue [2 x i64] %call, 0
580 %.fca.1.extract = extractvalue [2 x i64] %call, 1
581 store i64 %.fca.0.extract, ptr %ret, align 8
582 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8
583 store i64 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 8
587 define internal fastcc [2 x i64] @callee_St8x2(ptr nocapture noundef readonly byval(%struct.St8x2) align 8 %in) {
588 ; CHECK: .func (.param .align 16 .b8 func_retval0[16])
589 ; CHECK-LABEL: callee_St8x2(
590 ; CHECK-NEXT: .param .align 16 .b8 callee_St8x2_param_0[16]
591 ; CHECK: ld.param.v2.u64 {[[RD1:%rd[0-9]+]], [[RD2:%rd[0-9]+]]}, [callee_St8x2_param_0];
592 ; CHECK: st.param.v2.b64 [func_retval0+0], {[[RD1]], [[RD2]]};
594 %1 = load i64, ptr %in, align 8
595 %arrayidx.1 = getelementptr inbounds [2 x i64], ptr %in, i64 0, i64 1
596 %2 = load i64, ptr %arrayidx.1, align 8
597 %3 = insertvalue [2 x i64] poison, i64 %1, 0
598 %oldret = insertvalue [2 x i64] %3, i64 %2, 1
599 ret [2 x i64] %oldret
602 define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct.St8x3) align 8 %in, ptr nocapture noundef writeonly %ret) {
603 ; CHECK-LABEL: .visible .func caller_St8x3(
604 ; CHECK: .param .align 8 .b8 caller_St8x3_param_0[24],
605 ; CHECK: .param .b64 caller_St8x3_param_1
607 ; CHECK: .param .align 16 .b8 param0[24];
608 ; CHECK: st.param.v2.b64 [param0+0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
609 ; CHECK: st.param.b64 [param0+16], {{%rd[0-9]+}};
610 ; CHECK: .param .align 16 .b8 retval0[24];
611 ; CHECK: call.uni (retval0),
612 ; CHECK-NEXT: callee_St8x3,
616 ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+0];
617 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+16];
618 %call = tail call fastcc [3 x i64] @callee_St8x3(ptr noundef nonnull byval(%struct.St8x3) align 8 %in) #2
619 %.fca.0.extract = extractvalue [3 x i64] %call, 0
620 %.fca.1.extract = extractvalue [3 x i64] %call, 1
621 %.fca.2.extract = extractvalue [3 x i64] %call, 2
622 store i64 %.fca.0.extract, ptr %ret, align 8
623 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8
624 store i64 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 8
625 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 16
626 store i64 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 8
630 define internal fastcc [3 x i64] @callee_St8x3(ptr nocapture noundef readonly byval(%struct.St8x3) align 8 %in) {
631 ; CHECK: .func (.param .align 16 .b8 func_retval0[24])
632 ; CHECK-LABEL: callee_St8x3(
633 ; CHECK-NEXT: .param .align 16 .b8 callee_St8x3_param_0[24]
634 ; CHECK: ld.param.v2.u64 {[[RD1:%rd[0-9]+]], [[RD2:%rd[0-9]+]]}, [callee_St8x3_param_0];
635 ; CHECK: ld.param.u64 [[RD3:%rd[0-9]+]], [callee_St8x3_param_0+16];
636 ; CHECK: st.param.v2.b64 [func_retval0+0], {[[RD1]], [[RD2]]};
637 ; CHECK: st.param.b64 [func_retval0+16], [[RD3]];
639 %1 = load i64, ptr %in, align 8
640 %arrayidx.1 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 1
641 %2 = load i64, ptr %arrayidx.1, align 8
642 %arrayidx.2 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 2
643 %3 = load i64, ptr %arrayidx.2, align 8
644 %4 = insertvalue [3 x i64] poison, i64 %1, 0
645 %5 = insertvalue [3 x i64] %4, i64 %2, 1
646 %oldret = insertvalue [3 x i64] %5, i64 %3, 2
647 ret [3 x i64] %oldret
650 define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct.St8x4) align 8 %in, ptr nocapture noundef writeonly %ret) {
651 ; CHECK-LABEL: .visible .func caller_St8x4(
652 ; CHECK: .param .align 8 .b8 caller_St8x4_param_0[32],
653 ; CHECK: .param .b64 caller_St8x4_param_1
655 ; CHECK: .param .align 16 .b8 param0[32];
656 ; CHECK: st.param.v2.b64 [param0+0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
657 ; CHECK: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
658 ; CHECK: .param .align 16 .b8 retval0[32];
659 ; CHECK: call.uni (retval0),
660 ; CHECK-NEXT: callee_St8x4,
664 ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+0];
665 ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+16];
666 %call = tail call fastcc [4 x i64] @callee_St8x4(ptr noundef nonnull byval(%struct.St8x4) align 8 %in) #2
667 %.fca.0.extract = extractvalue [4 x i64] %call, 0
668 %.fca.1.extract = extractvalue [4 x i64] %call, 1
669 %.fca.2.extract = extractvalue [4 x i64] %call, 2
670 %.fca.3.extract = extractvalue [4 x i64] %call, 3
671 store i64 %.fca.0.extract, ptr %ret, align 8
672 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8
673 store i64 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 8
674 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 16
675 store i64 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 8
676 %ref.tmp.sroa.6.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 24
677 store i64 %.fca.3.extract, ptr %ref.tmp.sroa.6.0..sroa_idx, align 8
681 define internal fastcc [4 x i64] @callee_St8x4(ptr nocapture noundef readonly byval(%struct.St8x4) align 8 %in) {
682 ; CHECK: .func (.param .align 16 .b8 func_retval0[32])
683 ; CHECK-LABEL: callee_St8x4(
684 ; CHECK-NEXT: .param .align 16 .b8 callee_St8x4_param_0[32]
685 ; CHECK: ld.param.v2.u64 {[[RD1:%rd[0-9]+]], [[RD2:%rd[0-9]+]]}, [callee_St8x4_param_0];
686 ; CHECK: ld.param.v2.u64 {[[RD3:%rd[0-9]+]], [[RD4:%rd[0-9]+]]}, [callee_St8x4_param_0+16];
687 ; CHECK: st.param.v2.b64 [func_retval0+0], {[[RD1]], [[RD2]]};
688 ; CHECK: st.param.v2.b64 [func_retval0+16], {[[RD3]], [[RD4]]};
690 %1 = load i64, ptr %in, align 8
691 %arrayidx.1 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 1
692 %2 = load i64, ptr %arrayidx.1, align 8
693 %arrayidx.2 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 2
694 %3 = load i64, ptr %arrayidx.2, align 8
695 %arrayidx.3 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 3
696 %4 = load i64, ptr %arrayidx.3, align 8
697 %5 = insertvalue [4 x i64] poison, i64 %1, 0
698 %6 = insertvalue [4 x i64] %5, i64 %2, 1
699 %7 = insertvalue [4 x i64] %6, i64 %3, 2
700 %oldret = insertvalue [4 x i64] %7, i64 %4, 3
701 ret [4 x i64] %oldret
704 ; Section 2 - checking that function argument (including retval) vectorization is done with private linkage.
706 define private fastcc [4 x i32] @callee_St4x4_private(ptr nocapture noundef readonly byval(%struct.St4x4) align 4 %in) {
707 ; CHECK: .func (.param .align 16 .b8 func_retval0[16])
708 ; CHECK-LABEL: callee_St4x4_private(
709 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x4_private_param_0[16]
710 ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x4_private_param_0];
711 ; CHECK: st.param.v4.b32 [func_retval0+0], {[[R1]], [[R2]], [[R3]], [[R4]]};
713 %1 = load i32, ptr %in, align 4
714 %arrayidx.1 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 1
715 %2 = load i32, ptr %arrayidx.1, align 4
716 %arrayidx.2 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 2
717 %3 = load i32, ptr %arrayidx.2, align 4
718 %arrayidx.3 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 3
719 %4 = load i32, ptr %arrayidx.3, align 4
720 %5 = insertvalue [4 x i32] poison, i32 %1, 0
721 %6 = insertvalue [4 x i32] %5, i32 %2, 1
722 %7 = insertvalue [4 x i32] %6, i32 %3, 2
723 %oldret = insertvalue [4 x i32] %7, i32 %4, 3
724 ret [4 x i32] %oldret
727 ; Section 3 - checking that function argument (including retval) vectorization
728 ; is NOT done with linkage types other than internal and private.
730 define external fastcc [4 x i32] @callee_St4x4_external(ptr nocapture noundef readonly byval(%struct.St4x4) align 4 %in) {
731 ; CHECK: .func (.param .align 4 .b8 func_retval0[16])
732 ; CHECK-LABEL: callee_St4x4_external(
733 ; CHECK-NEXT: .param .align 4 .b8 callee_St4x4_external_param_0[16]
734 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [callee_St4x4_external_param_0];
735 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [callee_St4x4_external_param_0+4];
736 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [callee_St4x4_external_param_0+8];
737 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [callee_St4x4_external_param_0+12];
738 ; CHECK: st.param.b32 [func_retval0+0], [[R1]];
739 ; CHECK: st.param.b32 [func_retval0+4], [[R2]];
740 ; CHECK: st.param.b32 [func_retval0+8], [[R3]];
741 ; CHECK: st.param.b32 [func_retval0+12], [[R4]];
743 %1 = load i32, ptr %in, align 4
744 %arrayidx.1 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 1
745 %2 = load i32, ptr %arrayidx.1, align 4
746 %arrayidx.2 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 2
747 %3 = load i32, ptr %arrayidx.2, align 4
748 %arrayidx.3 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 3
749 %4 = load i32, ptr %arrayidx.3, align 4
750 %5 = insertvalue [4 x i32] poison, i32 %1, 0
751 %6 = insertvalue [4 x i32] %5, i32 %2, 1
752 %7 = insertvalue [4 x i32] %6, i32 %3, 2
753 %oldret = insertvalue [4 x i32] %7, i32 %4, 3
754 ret [4 x i32] %oldret