1 ; RUN: llc < %s -mtriple=nvptx64-unknown-unknown | FileCheck %s
2 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-unknown-unknown | %ptxas-verify %}
4 ; Check that parameters of a __global__ (kernel) function do not get increased
5 ; alignment, and no additional vectorization is performed on loads/stores with
8 ; Test IR is a minimized version of IR generated with the following command
9 ; from the source code below:
10 ; $ clang++ -O3 --cuda-gpu-arch=sm_35 -S -emit-llvm src.cu
12 ; ----------------------------------------------------------------------------
15 ; struct St4x1 { uint32_t field[1]; };
16 ; struct St4x2 { uint32_t field[2]; };
17 ; struct St4x3 { uint32_t field[3]; };
18 ; struct St4x4 { uint32_t field[4]; };
19 ; struct St4x5 { uint32_t field[5]; };
20 ; struct St4x6 { uint32_t field[6]; };
21 ; struct St4x7 { uint32_t field[7]; };
22 ; struct St4x8 { uint32_t field[8]; };
23 ; struct St8x1 { uint64_t field[1]; };
24 ; struct St8x2 { uint64_t field[2]; };
25 ; struct St8x3 { uint64_t field[3]; };
26 ; struct St8x4 { uint64_t field[4]; };
28 ; #define DECLARE_FUNCTION(StName) \
29 ; static __global__ __attribute__((noinline)) \
30 ; void foo_##StName(struct StName in, struct StName* ret) { \
31 ; const unsigned size = sizeof(ret->field) / sizeof(*ret->field); \
32 ; for (unsigned i = 0; i != size; ++i) \
33 ; ret->field[i] = in.field[i]; \
36 ; DECLARE_FUNCTION(St4x1)
37 ; DECLARE_FUNCTION(St4x2)
38 ; DECLARE_FUNCTION(St4x3)
39 ; DECLARE_FUNCTION(St4x4)
40 ; DECLARE_FUNCTION(St4x5)
41 ; DECLARE_FUNCTION(St4x6)
42 ; DECLARE_FUNCTION(St4x7)
43 ; DECLARE_FUNCTION(St4x8)
44 ; DECLARE_FUNCTION(St8x1)
45 ; DECLARE_FUNCTION(St8x2)
46 ; DECLARE_FUNCTION(St8x3)
47 ; DECLARE_FUNCTION(St8x4)
48 ; ----------------------------------------------------------------------------
50 %struct.St4x1 = type { [1 x i32] }
51 %struct.St4x2 = type { [2 x i32] }
52 %struct.St4x3 = type { [3 x i32] }
53 %struct.St4x4 = type { [4 x i32] }
54 %struct.St4x5 = type { [5 x i32] }
55 %struct.St4x6 = type { [6 x i32] }
56 %struct.St4x7 = type { [7 x i32] }
57 %struct.St4x8 = type { [8 x i32] }
58 %struct.St8x1 = type { [1 x i64] }
59 %struct.St8x2 = type { [2 x i64] }
60 %struct.St8x3 = type { [3 x i64] }
61 %struct.St8x4 = type { [4 x i64] }
63 define dso_local void @foo_St4x1(ptr nocapture noundef readonly byval(%struct.St4x1) align 4 %in, ptr nocapture noundef writeonly %ret) {
64 ; CHECK-LABEL: .visible .func foo_St4x1(
65 ; CHECK: .param .align 4 .b8 foo_St4x1_param_0[4],
66 ; CHECK: .param .b64 foo_St4x1_param_1
68 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x1_param_1];
69 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x1_param_0];
70 ; CHECK: st.u32 [[[R1]]], [[R2]];
72 %1 = load i32, ptr %in, align 4
73 store i32 %1, ptr %ret, align 4
77 define dso_local void @foo_St4x2(ptr nocapture noundef readonly byval(%struct.St4x2) align 4 %in, ptr nocapture noundef writeonly %ret) {
78 ; CHECK-LABEL: .visible .func foo_St4x2(
79 ; CHECK: .param .align 4 .b8 foo_St4x2_param_0[8],
80 ; CHECK: .param .b64 foo_St4x2_param_1
82 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x2_param_1];
83 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x2_param_0];
84 ; CHECK: st.u32 [[[R1]]], [[R2]];
85 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x2_param_0+4];
86 ; CHECK: st.u32 [[[R1]]+4], [[R3]];
88 %1 = load i32, ptr %in, align 4
89 store i32 %1, ptr %ret, align 4
90 %arrayidx.1 = getelementptr inbounds [2 x i32], ptr %in, i64 0, i64 1
91 %2 = load i32, ptr %arrayidx.1, align 4
92 %arrayidx3.1 = getelementptr inbounds [2 x i32], ptr %ret, i64 0, i64 1
93 store i32 %2, ptr %arrayidx3.1, align 4
97 define dso_local void @foo_St4x3(ptr nocapture noundef readonly byval(%struct.St4x3) align 4 %in, ptr nocapture noundef writeonly %ret) {
98 ; CHECK-LABEL: .visible .func foo_St4x3(
99 ; CHECK: .param .align 4 .b8 foo_St4x3_param_0[12],
100 ; CHECK: .param .b64 foo_St4x3_param_1
102 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x3_param_1];
103 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x3_param_0];
104 ; CHECK: st.u32 [[[R1]]], [[R2]];
105 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x3_param_0+4];
106 ; CHECK: st.u32 [[[R1]]+4], [[R3]];
107 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x3_param_0+8];
108 ; CHECK: st.u32 [[[R1]]+8], [[R4]];
110 %1 = load i32, ptr %in, align 4
111 store i32 %1, ptr %ret, align 4
112 %arrayidx.1 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 1
113 %2 = load i32, ptr %arrayidx.1, align 4
114 %arrayidx3.1 = getelementptr inbounds [3 x i32], ptr %ret, i64 0, i64 1
115 store i32 %2, ptr %arrayidx3.1, align 4
116 %arrayidx.2 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 2
117 %3 = load i32, ptr %arrayidx.2, align 4
118 %arrayidx3.2 = getelementptr inbounds [3 x i32], ptr %ret, i64 0, i64 2
119 store i32 %3, ptr %arrayidx3.2, align 4
123 define dso_local void @foo_St4x4(ptr nocapture noundef readonly byval(%struct.St4x4) align 4 %in, ptr nocapture noundef writeonly %ret) {
124 ; CHECK-LABEL: .visible .func foo_St4x4(
125 ; CHECK: .param .align 4 .b8 foo_St4x4_param_0[16],
126 ; CHECK: .param .b64 foo_St4x4_param_1
128 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x4_param_1];
129 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x4_param_0];
130 ; CHECK: st.u32 [[[R1]]], [[R2]];
131 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x4_param_0+4];
132 ; CHECK: st.u32 [[[R1]]+4], [[R3]];
133 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x4_param_0+8];
134 ; CHECK: st.u32 [[[R1]]+8], [[R4]];
135 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x4_param_0+12];
136 ; CHECK: st.u32 [[[R1]]+12], [[R5]];
138 %1 = load i32, ptr %in, align 4
139 store i32 %1, ptr %ret, align 4
140 %arrayidx.1 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 1
141 %2 = load i32, ptr %arrayidx.1, align 4
142 %arrayidx3.1 = getelementptr inbounds [4 x i32], ptr %ret, i64 0, i64 1
143 store i32 %2, ptr %arrayidx3.1, align 4
144 %arrayidx.2 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 2
145 %3 = load i32, ptr %arrayidx.2, align 4
146 %arrayidx3.2 = getelementptr inbounds [4 x i32], ptr %ret, i64 0, i64 2
147 store i32 %3, ptr %arrayidx3.2, align 4
148 %arrayidx.3 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 3
149 %4 = load i32, ptr %arrayidx.3, align 4
150 %arrayidx3.3 = getelementptr inbounds [4 x i32], ptr %ret, i64 0, i64 3
151 store i32 %4, ptr %arrayidx3.3, align 4
155 define dso_local void @foo_St4x5(ptr nocapture noundef readonly byval(%struct.St4x5) align 4 %in, ptr nocapture noundef writeonly %ret) {
156 ; CHECK-LABEL: .visible .func foo_St4x5(
157 ; CHECK: .param .align 4 .b8 foo_St4x5_param_0[20],
158 ; CHECK: .param .b64 foo_St4x5_param_1
160 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x5_param_1];
161 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x5_param_0];
162 ; CHECK: st.u32 [[[R1]]], [[R2]];
163 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x5_param_0+4];
164 ; CHECK: st.u32 [[[R1]]+4], [[R3]];
165 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x5_param_0+8];
166 ; CHECK: st.u32 [[[R1]]+8], [[R4]];
167 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x5_param_0+12];
168 ; CHECK: st.u32 [[[R1]]+12], [[R5]];
169 ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x5_param_0+16];
170 ; CHECK: st.u32 [[[R1]]+16], [[R6]];
172 %1 = load i32, ptr %in, align 4
173 store i32 %1, ptr %ret, align 4
174 %arrayidx.1 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 1
175 %2 = load i32, ptr %arrayidx.1, align 4
176 %arrayidx3.1 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 1
177 store i32 %2, ptr %arrayidx3.1, align 4
178 %arrayidx.2 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 2
179 %3 = load i32, ptr %arrayidx.2, align 4
180 %arrayidx3.2 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 2
181 store i32 %3, ptr %arrayidx3.2, align 4
182 %arrayidx.3 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 3
183 %4 = load i32, ptr %arrayidx.3, align 4
184 %arrayidx3.3 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 3
185 store i32 %4, ptr %arrayidx3.3, align 4
186 %arrayidx.4 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 4
187 %5 = load i32, ptr %arrayidx.4, align 4
188 %arrayidx3.4 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 4
189 store i32 %5, ptr %arrayidx3.4, align 4
193 define dso_local void @foo_St4x6(ptr nocapture noundef readonly byval(%struct.St4x6) align 4 %in, ptr nocapture noundef writeonly %ret) {
194 ; CHECK-LABEL: .visible .func foo_St4x6(
195 ; CHECK: .param .align 4 .b8 foo_St4x6_param_0[24],
196 ; CHECK: .param .b64 foo_St4x6_param_1
198 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x6_param_1];
199 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x6_param_0];
200 ; CHECK: st.u32 [[[R1]]], [[R2]];
201 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x6_param_0+4];
202 ; CHECK: st.u32 [[[R1]]+4], [[R3]];
203 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x6_param_0+8];
204 ; CHECK: st.u32 [[[R1]]+8], [[R4]];
205 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x6_param_0+12];
206 ; CHECK: st.u32 [[[R1]]+12], [[R5]];
207 ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x6_param_0+16];
208 ; CHECK: st.u32 [[[R1]]+16], [[R6]];
209 ; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x6_param_0+20];
210 ; CHECK: st.u32 [[[R1]]+20], [[R7]];
212 %1 = load i32, ptr %in, align 4
213 store i32 %1, ptr %ret, align 4
214 %arrayidx.1 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 1
215 %2 = load i32, ptr %arrayidx.1, align 4
216 %arrayidx3.1 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 1
217 store i32 %2, ptr %arrayidx3.1, align 4
218 %arrayidx.2 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 2
219 %3 = load i32, ptr %arrayidx.2, align 4
220 %arrayidx3.2 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 2
221 store i32 %3, ptr %arrayidx3.2, align 4
222 %arrayidx.3 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 3
223 %4 = load i32, ptr %arrayidx.3, align 4
224 %arrayidx3.3 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 3
225 store i32 %4, ptr %arrayidx3.3, align 4
226 %arrayidx.4 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 4
227 %5 = load i32, ptr %arrayidx.4, align 4
228 %arrayidx3.4 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 4
229 store i32 %5, ptr %arrayidx3.4, align 4
230 %arrayidx.5 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 5
231 %6 = load i32, ptr %arrayidx.5, align 4
232 %arrayidx3.5 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 5
233 store i32 %6, ptr %arrayidx3.5, align 4
237 define dso_local void @foo_St4x7(ptr nocapture noundef readonly byval(%struct.St4x7) align 4 %in, ptr nocapture noundef writeonly %ret) {
238 ; CHECK-LABEL: .visible .func foo_St4x7(
239 ; CHECK: .param .align 4 .b8 foo_St4x7_param_0[28],
240 ; CHECK: .param .b64 foo_St4x7_param_1
242 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x7_param_1];
243 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x7_param_0];
244 ; CHECK: st.u32 [[[R1]]], [[R2]];
245 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x7_param_0+4];
246 ; CHECK: st.u32 [[[R1]]+4], [[R3]];
247 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x7_param_0+8];
248 ; CHECK: st.u32 [[[R1]]+8], [[R4]];
249 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x7_param_0+12];
250 ; CHECK: st.u32 [[[R1]]+12], [[R5]];
251 ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x7_param_0+16];
252 ; CHECK: st.u32 [[[R1]]+16], [[R6]];
253 ; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x7_param_0+20];
254 ; CHECK: st.u32 [[[R1]]+20], [[R7]];
255 ; CHECK: ld.param.u32 [[R8:%r[0-9]+]], [foo_St4x7_param_0+24];
256 ; CHECK: st.u32 [[[R1]]+24], [[R8]];
258 %1 = load i32, ptr %in, align 4
259 store i32 %1, ptr %ret, align 4
260 %arrayidx.1 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 1
261 %2 = load i32, ptr %arrayidx.1, align 4
262 %arrayidx3.1 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 1
263 store i32 %2, ptr %arrayidx3.1, align 4
264 %arrayidx.2 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 2
265 %3 = load i32, ptr %arrayidx.2, align 4
266 %arrayidx3.2 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 2
267 store i32 %3, ptr %arrayidx3.2, align 4
268 %arrayidx.3 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 3
269 %4 = load i32, ptr %arrayidx.3, align 4
270 %arrayidx3.3 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 3
271 store i32 %4, ptr %arrayidx3.3, align 4
272 %arrayidx.4 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 4
273 %5 = load i32, ptr %arrayidx.4, align 4
274 %arrayidx3.4 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 4
275 store i32 %5, ptr %arrayidx3.4, align 4
276 %arrayidx.5 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 5
277 %6 = load i32, ptr %arrayidx.5, align 4
278 %arrayidx3.5 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 5
279 store i32 %6, ptr %arrayidx3.5, align 4
280 %arrayidx.6 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 6
281 %7 = load i32, ptr %arrayidx.6, align 4
282 %arrayidx3.6 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 6
283 store i32 %7, ptr %arrayidx3.6, align 4
287 define dso_local void @foo_St4x8(ptr nocapture noundef readonly byval(%struct.St4x8) align 4 %in, ptr nocapture noundef writeonly %ret) {
288 ; CHECK-LABEL: .visible .func foo_St4x8(
289 ; CHECK: .param .align 4 .b8 foo_St4x8_param_0[32],
290 ; CHECK: .param .b64 foo_St4x8_param_1
292 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x8_param_1];
293 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x8_param_0];
294 ; CHECK: st.u32 [[[R1]]], [[R2]];
295 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x8_param_0+4];
296 ; CHECK: st.u32 [[[R1]]+4], [[R3]];
297 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x8_param_0+8];
298 ; CHECK: st.u32 [[[R1]]+8], [[R4]];
299 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x8_param_0+12];
300 ; CHECK: st.u32 [[[R1]]+12], [[R5]];
301 ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x8_param_0+16];
302 ; CHECK: st.u32 [[[R1]]+16], [[R6]];
303 ; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x8_param_0+20];
304 ; CHECK: st.u32 [[[R1]]+20], [[R7]];
305 ; CHECK: ld.param.u32 [[R8:%r[0-9]+]], [foo_St4x8_param_0+24];
306 ; CHECK: st.u32 [[[R1]]+24], [[R8]];
307 ; CHECK: ld.param.u32 [[R9:%r[0-9]+]], [foo_St4x8_param_0+28];
308 ; CHECK: st.u32 [[[R1]]+28], [[R9]];
310 %1 = load i32, ptr %in, align 4
311 store i32 %1, ptr %ret, align 4
312 %arrayidx.1 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 1
313 %2 = load i32, ptr %arrayidx.1, align 4
314 %arrayidx3.1 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 1
315 store i32 %2, ptr %arrayidx3.1, align 4
316 %arrayidx.2 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 2
317 %3 = load i32, ptr %arrayidx.2, align 4
318 %arrayidx3.2 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 2
319 store i32 %3, ptr %arrayidx3.2, align 4
320 %arrayidx.3 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 3
321 %4 = load i32, ptr %arrayidx.3, align 4
322 %arrayidx3.3 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 3
323 store i32 %4, ptr %arrayidx3.3, align 4
324 %arrayidx.4 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 4
325 %5 = load i32, ptr %arrayidx.4, align 4
326 %arrayidx3.4 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 4
327 store i32 %5, ptr %arrayidx3.4, align 4
328 %arrayidx.5 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 5
329 %6 = load i32, ptr %arrayidx.5, align 4
330 %arrayidx3.5 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 5
331 store i32 %6, ptr %arrayidx3.5, align 4
332 %arrayidx.6 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 6
333 %7 = load i32, ptr %arrayidx.6, align 4
334 %arrayidx3.6 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 6
335 store i32 %7, ptr %arrayidx3.6, align 4
336 %arrayidx.7 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 7
337 %8 = load i32, ptr %arrayidx.7, align 4
338 %arrayidx3.7 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 7
339 store i32 %8, ptr %arrayidx3.7, align 4
343 define dso_local void @foo_St8x1(ptr nocapture noundef readonly byval(%struct.St8x1) align 8 %in, ptr nocapture noundef writeonly %ret) {
344 ; CHECK-LABEL: .visible .func foo_St8x1(
345 ; CHECK: .param .align 8 .b8 foo_St8x1_param_0[8],
346 ; CHECK: .param .b64 foo_St8x1_param_1
348 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x1_param_1];
349 ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x1_param_0];
350 ; CHECK: st.u64 [[[R1]]], [[RD1]];
352 %1 = load i64, ptr %in, align 8
353 store i64 %1, ptr %ret, align 8
357 define dso_local void @foo_St8x2(ptr nocapture noundef readonly byval(%struct.St8x2) align 8 %in, ptr nocapture noundef writeonly %ret) {
358 ; CHECK-LABEL: .visible .func foo_St8x2(
359 ; CHECK: .param .align 8 .b8 foo_St8x2_param_0[16],
360 ; CHECK: .param .b64 foo_St8x2_param_1
362 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x2_param_1];
363 ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x2_param_0];
364 ; CHECK: st.u64 [[[R1]]], [[RD1]];
365 ; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x2_param_0+8];
366 ; CHECK: st.u64 [[[R1]]+8], [[RD2]];
368 %1 = load i64, ptr %in, align 8
369 store i64 %1, ptr %ret, align 8
370 %arrayidx.1 = getelementptr inbounds [2 x i64], ptr %in, i64 0, i64 1
371 %2 = load i64, ptr %arrayidx.1, align 8
372 %arrayidx3.1 = getelementptr inbounds [2 x i64], ptr %ret, i64 0, i64 1
373 store i64 %2, ptr %arrayidx3.1, align 8
377 define dso_local void @foo_St8x3(ptr nocapture noundef readonly byval(%struct.St8x3) align 8 %in, ptr nocapture noundef writeonly %ret) {
378 ; CHECK-LABEL: .visible .func foo_St8x3(
379 ; CHECK: .param .align 8 .b8 foo_St8x3_param_0[24],
380 ; CHECK: .param .b64 foo_St8x3_param_1
382 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x3_param_1];
383 ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x3_param_0];
384 ; CHECK: st.u64 [[[R1]]], [[RD1]];
385 ; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x3_param_0+8];
386 ; CHECK: st.u64 [[[R1]]+8], [[RD2]];
387 ; CHECK: ld.param.u64 [[RD3:%rd[0-9]+]], [foo_St8x3_param_0+16];
388 ; CHECK: st.u64 [[[R1]]+16], [[RD3]];
390 %1 = load i64, ptr %in, align 8
391 store i64 %1, ptr %ret, align 8
392 %arrayidx.1 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 1
393 %2 = load i64, ptr %arrayidx.1, align 8
394 %arrayidx3.1 = getelementptr inbounds [3 x i64], ptr %ret, i64 0, i64 1
395 store i64 %2, ptr %arrayidx3.1, align 8
396 %arrayidx.2 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 2
397 %3 = load i64, ptr %arrayidx.2, align 8
398 %arrayidx3.2 = getelementptr inbounds [3 x i64], ptr %ret, i64 0, i64 2
399 store i64 %3, ptr %arrayidx3.2, align 8
403 define dso_local void @foo_St8x4(ptr nocapture noundef readonly byval(%struct.St8x4) align 8 %in, ptr nocapture noundef writeonly %ret) {
404 ; CHECK-LABEL: .visible .func foo_St8x4(
405 ; CHECK: .param .align 8 .b8 foo_St8x4_param_0[32],
406 ; CHECK: .param .b64 foo_St8x4_param_1
408 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x4_param_1];
409 ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x4_param_0];
410 ; CHECK: st.u64 [[[R1]]], [[RD1]];
411 ; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x4_param_0+8];
412 ; CHECK: st.u64 [[[R1]]+8], [[RD2]];
413 ; CHECK: ld.param.u64 [[RD3:%rd[0-9]+]], [foo_St8x4_param_0+16];
414 ; CHECK: st.u64 [[[R1]]+16], [[RD3]];
415 ; CHECK: ld.param.u64 [[RD4:%rd[0-9]+]], [foo_St8x4_param_0+24];
416 ; CHECK: st.u64 [[[R1]]+24], [[RD4]];
418 %1 = load i64, ptr %in, align 8
419 store i64 %1, ptr %ret, align 8
420 %arrayidx.1 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 1
421 %2 = load i64, ptr %arrayidx.1, align 8
422 %arrayidx3.1 = getelementptr inbounds [4 x i64], ptr %ret, i64 0, i64 1
423 store i64 %2, ptr %arrayidx3.1, align 8
424 %arrayidx.2 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 2
425 %3 = load i64, ptr %arrayidx.2, align 8
426 %arrayidx3.2 = getelementptr inbounds [4 x i64], ptr %ret, i64 0, i64 2
427 store i64 %3, ptr %arrayidx3.2, align 8
428 %arrayidx.3 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 3
429 %4 = load i64, ptr %arrayidx.3, align 8
430 %arrayidx3.3 = getelementptr inbounds [4 x i64], ptr %ret, i64 0, i64 3
431 store i64 %4, ptr %arrayidx3.3, align 8