1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc < %s -global-isel=1 -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
6 %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
7 %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
9 define %struct.__neon_int8x8x2_t @ld2_8b(ptr %A) nounwind {
10 ; CHECK-LABEL: ld2_8b:
12 ; CHECK-NEXT: ld2.8b { v0, v1 }, [x0]
14 ; Make sure we are loading into the results defined by the ABI (i.e., v0, v1)
15 ; and from the argument of the function also defined by ABI (i.e., x0)
16 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
17 ret %struct.__neon_int8x8x2_t %tmp2
20 define %struct.__neon_int8x8x3_t @ld3_8b(ptr %A) nounwind {
21 ; CHECK-LABEL: ld3_8b:
23 ; CHECK-NEXT: ld3.8b { v0, v1, v2 }, [x0]
25 ; Make sure we are using the operands defined by the ABI
26 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0(ptr %A)
27 ret %struct.__neon_int8x8x3_t %tmp2
30 define %struct.__neon_int8x8x4_t @ld4_8b(ptr %A) nounwind {
31 ; CHECK-LABEL: ld4_8b:
33 ; CHECK-NEXT: ld4.8b { v0, v1, v2, v3 }, [x0]
35 ; Make sure we are using the operands defined by the ABI
36 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0(ptr %A)
37 ret %struct.__neon_int8x8x4_t %tmp2
40 declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0(ptr) nounwind readonly
41 declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0(ptr) nounwind readonly
42 declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0(ptr) nounwind readonly
44 %struct.__neon_int8x16x2_t = type { <16 x i8>, <16 x i8> }
45 %struct.__neon_int8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> }
46 %struct.__neon_int8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
48 define %struct.__neon_int8x16x2_t @ld2_16b(ptr %A) nounwind {
49 ; CHECK-LABEL: ld2_16b:
51 ; CHECK-NEXT: ld2.16b { v0, v1 }, [x0]
53 ; Make sure we are using the operands defined by the ABI
54 %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0(ptr %A)
55 ret %struct.__neon_int8x16x2_t %tmp2
58 define %struct.__neon_int8x16x3_t @ld3_16b(ptr %A) nounwind {
59 ; CHECK-LABEL: ld3_16b:
61 ; CHECK-NEXT: ld3.16b { v0, v1, v2 }, [x0]
63 ; Make sure we are using the operands defined by the ABI
64 %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0(ptr %A)
65 ret %struct.__neon_int8x16x3_t %tmp2
68 define %struct.__neon_int8x16x4_t @ld4_16b(ptr %A) nounwind {
69 ; CHECK-LABEL: ld4_16b:
71 ; CHECK-NEXT: ld4.16b { v0, v1, v2, v3 }, [x0]
73 ; Make sure we are using the operands defined by the ABI
74 %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0(ptr %A)
75 ret %struct.__neon_int8x16x4_t %tmp2
78 declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0(ptr) nounwind readonly
79 declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0(ptr) nounwind readonly
80 declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0(ptr) nounwind readonly
82 %struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
83 %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
84 %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
86 define %struct.__neon_int16x4x2_t @ld2_4h(ptr %A) nounwind {
87 ; CHECK-LABEL: ld2_4h:
89 ; CHECK-NEXT: ld2.4h { v0, v1 }, [x0]
91 ; Make sure we are using the operands defined by the ABI
92 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0(ptr %A)
93 ret %struct.__neon_int16x4x2_t %tmp2
96 define %struct.__neon_int16x4x3_t @ld3_4h(ptr %A) nounwind {
97 ; CHECK-LABEL: ld3_4h:
99 ; CHECK-NEXT: ld3.4h { v0, v1, v2 }, [x0]
101 ; Make sure we are using the operands defined by the ABI
102 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0(ptr %A)
103 ret %struct.__neon_int16x4x3_t %tmp2
106 define %struct.__neon_int16x4x4_t @ld4_4h(ptr %A) nounwind {
107 ; CHECK-LABEL: ld4_4h:
109 ; CHECK-NEXT: ld4.4h { v0, v1, v2, v3 }, [x0]
111 ; Make sure we are using the operands defined by the ABI
112 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0(ptr %A)
113 ret %struct.__neon_int16x4x4_t %tmp2
116 declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0(ptr) nounwind readonly
117 declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0(ptr) nounwind readonly
118 declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0(ptr) nounwind readonly
120 %struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
121 %struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
122 %struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
124 define %struct.__neon_int16x8x2_t @ld2_8h(ptr %A) nounwind {
125 ; CHECK-LABEL: ld2_8h:
127 ; CHECK-NEXT: ld2.8h { v0, v1 }, [x0]
129 ; Make sure we are using the operands defined by the ABI
130 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0(ptr %A)
131 ret %struct.__neon_int16x8x2_t %tmp2
134 define %struct.__neon_int16x8x3_t @ld3_8h(ptr %A) nounwind {
135 ; CHECK-LABEL: ld3_8h:
137 ; CHECK-NEXT: ld3.8h { v0, v1, v2 }, [x0]
139 ; Make sure we are using the operands defined by the ABI
140 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0(ptr %A)
141 ret %struct.__neon_int16x8x3_t %tmp2
144 define %struct.__neon_int16x8x4_t @ld4_8h(ptr %A) nounwind {
145 ; CHECK-LABEL: ld4_8h:
147 ; CHECK-NEXT: ld4.8h { v0, v1, v2, v3 }, [x0]
149 ; Make sure we are using the operands defined by the ABI
150 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0(ptr %A)
151 ret %struct.__neon_int16x8x4_t %tmp2
154 declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0(ptr) nounwind readonly
155 declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0(ptr) nounwind readonly
156 declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0(ptr) nounwind readonly
158 %struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
159 %struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
160 %struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
162 define %struct.__neon_int32x2x2_t @ld2_2s(ptr %A) nounwind {
163 ; CHECK-LABEL: ld2_2s:
165 ; CHECK-NEXT: ld2.2s { v0, v1 }, [x0]
167 ; Make sure we are using the operands defined by the ABI
168 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0(ptr %A)
169 ret %struct.__neon_int32x2x2_t %tmp2
172 define %struct.__neon_int32x2x3_t @ld3_2s(ptr %A) nounwind {
173 ; CHECK-LABEL: ld3_2s:
175 ; CHECK-NEXT: ld3.2s { v0, v1, v2 }, [x0]
177 ; Make sure we are using the operands defined by the ABI
178 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0(ptr %A)
179 ret %struct.__neon_int32x2x3_t %tmp2
182 define %struct.__neon_int32x2x4_t @ld4_2s(ptr %A) nounwind {
183 ; CHECK-LABEL: ld4_2s:
185 ; CHECK-NEXT: ld4.2s { v0, v1, v2, v3 }, [x0]
187 ; Make sure we are using the operands defined by the ABI
188 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0(ptr %A)
189 ret %struct.__neon_int32x2x4_t %tmp2
192 declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0(ptr) nounwind readonly
193 declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0(ptr) nounwind readonly
194 declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0(ptr) nounwind readonly
196 %struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
197 %struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
198 %struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
200 define %struct.__neon_int32x4x2_t @ld2_4s(ptr %A) nounwind {
201 ; CHECK-LABEL: ld2_4s:
203 ; CHECK-NEXT: ld2.4s { v0, v1 }, [x0]
205 ; Make sure we are using the operands defined by the ABI
206 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0(ptr %A)
207 ret %struct.__neon_int32x4x2_t %tmp2
210 define %struct.__neon_int32x4x3_t @ld3_4s(ptr %A) nounwind {
211 ; CHECK-LABEL: ld3_4s:
213 ; CHECK-NEXT: ld3.4s { v0, v1, v2 }, [x0]
215 ; Make sure we are using the operands defined by the ABI
216 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0(ptr %A)
217 ret %struct.__neon_int32x4x3_t %tmp2
220 define %struct.__neon_int32x4x4_t @ld4_4s(ptr %A) nounwind {
221 ; CHECK-LABEL: ld4_4s:
223 ; CHECK-NEXT: ld4.4s { v0, v1, v2, v3 }, [x0]
225 ; Make sure we are using the operands defined by the ABI
226 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0(ptr %A)
227 ret %struct.__neon_int32x4x4_t %tmp2
230 declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0(ptr) nounwind readonly
231 declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0(ptr) nounwind readonly
232 declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0(ptr) nounwind readonly
234 %struct.__neon_int64x2x2_t = type { <2 x i64>, <2 x i64> }
235 %struct.__neon_int64x2x3_t = type { <2 x i64>, <2 x i64>, <2 x i64> }
236 %struct.__neon_int64x2x4_t = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }
238 define %struct.__neon_int64x2x2_t @ld2_2d(ptr %A) nounwind {
239 ; CHECK-LABEL: ld2_2d:
241 ; CHECK-NEXT: ld2.2d { v0, v1 }, [x0]
243 ; Make sure we are using the operands defined by the ABI
244 %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0(ptr %A)
245 ret %struct.__neon_int64x2x2_t %tmp2
248 define %struct.__neon_int64x2x3_t @ld3_2d(ptr %A) nounwind {
249 ; CHECK-LABEL: ld3_2d:
251 ; CHECK-NEXT: ld3.2d { v0, v1, v2 }, [x0]
253 ; Make sure we are using the operands defined by the ABI
254 %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0(ptr %A)
255 ret %struct.__neon_int64x2x3_t %tmp2
258 define %struct.__neon_int64x2x4_t @ld4_2d(ptr %A) nounwind {
259 ; CHECK-LABEL: ld4_2d:
261 ; CHECK-NEXT: ld4.2d { v0, v1, v2, v3 }, [x0]
263 ; Make sure we are using the operands defined by the ABI
264 %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0(ptr %A)
265 ret %struct.__neon_int64x2x4_t %tmp2
268 declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0(ptr) nounwind readonly
269 declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0(ptr) nounwind readonly
270 declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0(ptr) nounwind readonly
272 %struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> }
273 %struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
274 %struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
277 define %struct.__neon_int64x1x2_t @ld2_1di64(ptr %A) nounwind {
278 ; CHECK-LABEL: ld2_1di64:
280 ; CHECK-NEXT: ld1.1d { v0, v1 }, [x0]
282 ; Make sure we are using the operands defined by the ABI
283 %tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0(ptr %A)
284 ret %struct.__neon_int64x1x2_t %tmp2
287 define %struct.__neon_int64x1x3_t @ld3_1di64(ptr %A) nounwind {
288 ; CHECK-LABEL: ld3_1di64:
290 ; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0]
292 ; Make sure we are using the operands defined by the ABI
293 %tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0(ptr %A)
294 ret %struct.__neon_int64x1x3_t %tmp2
297 define %struct.__neon_int64x1x4_t @ld4_1di64(ptr %A) nounwind {
298 ; CHECK-LABEL: ld4_1di64:
300 ; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0]
302 ; Make sure we are using the operands defined by the ABI
303 %tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0(ptr %A)
304 ret %struct.__neon_int64x1x4_t %tmp2
308 declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0(ptr) nounwind readonly
309 declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0(ptr) nounwind readonly
310 declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0(ptr) nounwind readonly
312 %struct.__neon_float64x1x2_t = type { <1 x double>, <1 x double> }
313 %struct.__neon_float64x1x3_t = type { <1 x double>, <1 x double>, <1 x double> }
314 %struct.__neon_float64x1x4_t = type { <1 x double>, <1 x double>, <1 x double>, <1 x double> }
317 define %struct.__neon_float64x1x2_t @ld2_1df64(ptr %A) nounwind {
318 ; CHECK-LABEL: ld2_1df64:
320 ; CHECK-NEXT: ld1.1d { v0, v1 }, [x0]
322 ; Make sure we are using the operands defined by the ABI
323 %tmp2 = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0(ptr %A)
324 ret %struct.__neon_float64x1x2_t %tmp2
327 define %struct.__neon_float64x1x3_t @ld3_1df64(ptr %A) nounwind {
328 ; CHECK-LABEL: ld3_1df64:
330 ; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0]
332 ; Make sure we are using the operands defined by the ABI
333 %tmp2 = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0(ptr %A)
334 ret %struct.__neon_float64x1x3_t %tmp2
337 define %struct.__neon_float64x1x4_t @ld4_1df64(ptr %A) nounwind {
338 ; CHECK-LABEL: ld4_1df64:
340 ; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0]
342 ; Make sure we are using the operands defined by the ABI
343 %tmp2 = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr %A)
344 ret %struct.__neon_float64x1x4_t %tmp2
347 declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0(ptr) nounwind readonly
348 declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0(ptr) nounwind readonly
349 declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr) nounwind readonly
352 define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, ptr %A) nounwind {
353 ; Make sure we are using the operands defined by the ABI
354 ; CHECK-SD-LABEL: ld2lane_16b:
355 ; CHECK-SD: // %bb.0:
356 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
357 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
358 ; CHECK-SD-NEXT: ld2.b { v0, v1 }[1], [x0]
361 ; CHECK-GI-LABEL: ld2lane_16b:
362 ; CHECK-GI: // %bb.0:
363 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
364 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
365 ; CHECK-GI-NEXT: ld2.b { v0, v1 }[1], [x0]
367 %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, i64 1, ptr %A)
368 ret %struct.__neon_int8x16x2_t %tmp2
371 define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, ptr %A) nounwind {
372 ; Make sure we are using the operands defined by the ABI
373 ; CHECK-SD-LABEL: ld3lane_16b:
374 ; CHECK-SD: // %bb.0:
375 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
376 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
377 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
378 ; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[1], [x0]
381 ; CHECK-GI-LABEL: ld3lane_16b:
382 ; CHECK-GI: // %bb.0:
383 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
384 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
385 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
386 ; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[1], [x0]
388 %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, ptr %A)
389 ret %struct.__neon_int8x16x3_t %tmp2
392 define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, ptr %A) nounwind {
393 ; Make sure we are using the operands defined by the ABI
394 ; CHECK-SD-LABEL: ld4lane_16b:
395 ; CHECK-SD: // %bb.0:
396 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
397 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
398 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
399 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
400 ; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0]
403 ; CHECK-GI-LABEL: ld4lane_16b:
404 ; CHECK-GI: // %bb.0:
405 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
406 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
407 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
408 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
409 ; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0]
411 %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, ptr %A)
412 ret %struct.__neon_int8x16x4_t %tmp2
415 declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr) nounwind readonly
416 declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readonly
417 declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readonly
419 define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, ptr %A) nounwind {
420 ; Make sure we are using the operands defined by the ABI
421 ; CHECK-SD-LABEL: ld2lane_8h:
422 ; CHECK-SD: // %bb.0:
423 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
424 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
425 ; CHECK-SD-NEXT: ld2.h { v0, v1 }[1], [x0]
428 ; CHECK-GI-LABEL: ld2lane_8h:
429 ; CHECK-GI: // %bb.0:
430 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
431 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
432 ; CHECK-GI-NEXT: ld2.h { v0, v1 }[1], [x0]
434 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, i64 1, ptr %A)
435 ret %struct.__neon_int16x8x2_t %tmp2
438 define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, ptr %A) nounwind {
439 ; Make sure we are using the operands defined by the ABI
440 ; CHECK-SD-LABEL: ld3lane_8h:
441 ; CHECK-SD: // %bb.0:
442 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
443 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
444 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
445 ; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[1], [x0]
448 ; CHECK-GI-LABEL: ld3lane_8h:
449 ; CHECK-GI: // %bb.0:
450 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
451 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
452 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
453 ; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[1], [x0]
455 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, ptr %A)
456 ret %struct.__neon_int16x8x3_t %tmp2
459 define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, ptr %A) nounwind {
460 ; Make sure we are using the operands defined by the ABI
461 ; CHECK-SD-LABEL: ld4lane_8h:
462 ; CHECK-SD: // %bb.0:
463 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
464 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
465 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
466 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
467 ; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0]
470 ; CHECK-GI-LABEL: ld4lane_8h:
471 ; CHECK-GI: // %bb.0:
472 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
473 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
474 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
475 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
476 ; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0]
478 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, ptr %A)
479 ret %struct.__neon_int16x8x4_t %tmp2
482 declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr) nounwind readonly
483 declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readonly
484 declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readonly
486 define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, ptr %A) nounwind {
487 ; Make sure we are using the operands defined by the ABI
488 ; CHECK-SD-LABEL: ld2lane_4s:
489 ; CHECK-SD: // %bb.0:
490 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
491 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
492 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[1], [x0]
495 ; CHECK-GI-LABEL: ld2lane_4s:
496 ; CHECK-GI: // %bb.0:
497 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
498 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
499 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[1], [x0]
501 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, i64 1, ptr %A)
502 ret %struct.__neon_int32x4x2_t %tmp2
505 define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, ptr %A) nounwind {
506 ; Make sure we are using the operands defined by the ABI
507 ; CHECK-SD-LABEL: ld3lane_4s:
508 ; CHECK-SD: // %bb.0:
509 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
510 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
511 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
512 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[1], [x0]
515 ; CHECK-GI-LABEL: ld3lane_4s:
516 ; CHECK-GI: // %bb.0:
517 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
518 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
519 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
520 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[1], [x0]
522 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, ptr %A)
523 ret %struct.__neon_int32x4x3_t %tmp2
526 define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, ptr %A) nounwind {
527 ; Make sure we are using the operands defined by the ABI
528 ; CHECK-SD-LABEL: ld4lane_4s:
529 ; CHECK-SD: // %bb.0:
530 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
531 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
532 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
533 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
534 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0]
537 ; CHECK-GI-LABEL: ld4lane_4s:
538 ; CHECK-GI: // %bb.0:
539 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
540 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
541 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
542 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
543 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0]
545 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, ptr %A)
546 ret %struct.__neon_int32x4x4_t %tmp2
549 declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr) nounwind readonly
550 declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readonly
551 declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readonly
553 define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, ptr %A) nounwind {
554 ; Make sure we are using the operands defined by the ABI
555 ; CHECK-SD-LABEL: ld2lane_2d:
556 ; CHECK-SD: // %bb.0:
557 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
558 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
559 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[1], [x0]
562 ; CHECK-GI-LABEL: ld2lane_2d:
563 ; CHECK-GI: // %bb.0:
564 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
565 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
566 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[1], [x0]
568 %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, i64 1, ptr %A)
569 ret %struct.__neon_int64x2x2_t %tmp2
572 define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, ptr %A) nounwind {
573 ; Make sure we are using the operands defined by the ABI
574 ; CHECK-SD-LABEL: ld3lane_2d:
575 ; CHECK-SD: // %bb.0:
576 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
577 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
578 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
579 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[1], [x0]
582 ; CHECK-GI-LABEL: ld3lane_2d:
583 ; CHECK-GI: // %bb.0:
584 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
585 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
586 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
587 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[1], [x0]
589 %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, ptr %A)
590 ret %struct.__neon_int64x2x3_t %tmp2
593 define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, ptr %A) nounwind {
594 ; Make sure we are using the operands defined by the ABI
595 ; CHECK-SD-LABEL: ld4lane_2d:
596 ; CHECK-SD: // %bb.0:
597 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
598 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
599 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
600 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
601 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0]
604 ; CHECK-GI-LABEL: ld4lane_2d:
605 ; CHECK-GI: // %bb.0:
606 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
607 ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
608 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
609 ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
610 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0]
612 %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, ptr %A)
613 ret %struct.__neon_int64x2x4_t %tmp2
616 declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr) nounwind readonly
617 declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readonly
618 declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readonly
620 define <8 x i8> @ld1r_8b(ptr %bar) {
621 ; CHECK-LABEL: ld1r_8b:
623 ; CHECK-NEXT: ld1r.8b { v0 }, [x0]
625 ; Make sure we are using the operands defined by the ABI
626 %tmp1 = load i8, ptr %bar
627 %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
628 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
629 %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
630 %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
631 %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
632 %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
633 %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
634 %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
638 define <16 x i8> @ld1r_16b(ptr %bar) {
639 ; CHECK-LABEL: ld1r_16b:
641 ; CHECK-NEXT: ld1r.16b { v0 }, [x0]
643 ; Make sure we are using the operands defined by the ABI
644 %tmp1 = load i8, ptr %bar
645 %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
646 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
647 %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
648 %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
649 %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
650 %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
651 %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
652 %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
653 %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
654 %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
655 %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
656 %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
657 %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
658 %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
659 %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
660 %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
664 define <4 x i16> @ld1r_4h(ptr %bar) {
665 ; CHECK-LABEL: ld1r_4h:
667 ; CHECK-NEXT: ld1r.4h { v0 }, [x0]
669 ; Make sure we are using the operands defined by the ABI
670 %tmp1 = load i16, ptr %bar
671 %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
672 %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
673 %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
674 %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
678 define <8 x i16> @ld1r_8h(ptr %bar) {
679 ; CHECK-LABEL: ld1r_8h:
681 ; CHECK-NEXT: ld1r.8h { v0 }, [x0]
683 ; Make sure we are using the operands defined by the ABI
684 %tmp1 = load i16, ptr %bar
685 %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
686 %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
687 %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
688 %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
689 %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
690 %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
691 %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
692 %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
696 define <2 x i32> @ld1r_2s(ptr %bar) {
697 ; CHECK-LABEL: ld1r_2s:
699 ; CHECK-NEXT: ld1r.2s { v0 }, [x0]
701 ; Make sure we are using the operands defined by the ABI
702 %tmp1 = load i32, ptr %bar
703 %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
704 %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
708 define <4 x i32> @ld1r_4s(ptr %bar) {
709 ; CHECK-LABEL: ld1r_4s:
711 ; CHECK-NEXT: ld1r.4s { v0 }, [x0]
713 ; Make sure we are using the operands defined by the ABI
714 %tmp1 = load i32, ptr %bar
715 %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
716 %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
717 %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
718 %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
722 define <2 x i64> @ld1r_2d(ptr %bar) {
723 ; CHECK-LABEL: ld1r_2d:
725 ; CHECK-NEXT: ld1r.2d { v0 }, [x0]
727 ; Make sure we are using the operands defined by the ABI
728 %tmp1 = load i64, ptr %bar
729 %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
730 %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
734 define %struct.__neon_int8x8x2_t @ld2r_8b(ptr %A) nounwind {
735 ; CHECK-LABEL: ld2r_8b:
737 ; CHECK-NEXT: ld2r.8b { v0, v1 }, [x0]
739 ; Make sure we are using the operands defined by the ABI
740 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0(ptr %A)
741 ret %struct.__neon_int8x8x2_t %tmp2
744 define %struct.__neon_int8x8x3_t @ld3r_8b(ptr %A) nounwind {
745 ; CHECK-LABEL: ld3r_8b:
747 ; CHECK-NEXT: ld3r.8b { v0, v1, v2 }, [x0]
749 ; Make sure we are using the operands defined by the ABI
750 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0(ptr %A)
751 ret %struct.__neon_int8x8x3_t %tmp2
754 define %struct.__neon_int8x8x4_t @ld4r_8b(ptr %A) nounwind {
755 ; CHECK-LABEL: ld4r_8b:
757 ; CHECK-NEXT: ld4r.8b { v0, v1, v2, v3 }, [x0]
759 ; Make sure we are using the operands defined by the ABI
760 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0(ptr %A)
761 ret %struct.__neon_int8x8x4_t %tmp2
764 declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0(ptr) nounwind readonly
765 declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0(ptr) nounwind readonly
766 declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0(ptr) nounwind readonly
768 define %struct.__neon_int8x16x2_t @ld2r_16b(ptr %A) nounwind {
769 ; CHECK-LABEL: ld2r_16b:
771 ; CHECK-NEXT: ld2r.16b { v0, v1 }, [x0]
773 ; Make sure we are using the operands defined by the ABI
774 %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0(ptr %A)
775 ret %struct.__neon_int8x16x2_t %tmp2
778 define %struct.__neon_int8x16x3_t @ld3r_16b(ptr %A) nounwind {
779 ; CHECK-LABEL: ld3r_16b:
781 ; CHECK-NEXT: ld3r.16b { v0, v1, v2 }, [x0]
783 ; Make sure we are using the operands defined by the ABI
784 %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0(ptr %A)
785 ret %struct.__neon_int8x16x3_t %tmp2
788 define %struct.__neon_int8x16x4_t @ld4r_16b(ptr %A) nounwind {
789 ; CHECK-LABEL: ld4r_16b:
791 ; CHECK-NEXT: ld4r.16b { v0, v1, v2, v3 }, [x0]
793 ; Make sure we are using the operands defined by the ABI
794 %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0(ptr %A)
795 ret %struct.__neon_int8x16x4_t %tmp2
798 declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0(ptr) nounwind readonly
799 declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0(ptr) nounwind readonly
800 declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0(ptr) nounwind readonly
802 define %struct.__neon_int16x4x2_t @ld2r_4h(ptr %A) nounwind {
803 ; CHECK-LABEL: ld2r_4h:
805 ; CHECK-NEXT: ld2r.4h { v0, v1 }, [x0]
807 ; Make sure we are using the operands defined by the ABI
808 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0(ptr %A)
809 ret %struct.__neon_int16x4x2_t %tmp2
812 define %struct.__neon_int16x4x3_t @ld3r_4h(ptr %A) nounwind {
813 ; CHECK-LABEL: ld3r_4h:
815 ; CHECK-NEXT: ld3r.4h { v0, v1, v2 }, [x0]
817 ; Make sure we are using the operands defined by the ABI
818 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0(ptr %A)
819 ret %struct.__neon_int16x4x3_t %tmp2
822 define %struct.__neon_int16x4x4_t @ld4r_4h(ptr %A) nounwind {
823 ; CHECK-LABEL: ld4r_4h:
825 ; CHECK-NEXT: ld4r.4h { v0, v1, v2, v3 }, [x0]
827 ; Make sure we are using the operands defined by the ABI
828 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0(ptr %A)
829 ret %struct.__neon_int16x4x4_t %tmp2
832 declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0(ptr) nounwind readonly
833 declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0(ptr) nounwind readonly
834 declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0(ptr) nounwind readonly
836 define %struct.__neon_int16x8x2_t @ld2r_8h(ptr %A) nounwind {
837 ; CHECK-LABEL: ld2r_8h:
839 ; CHECK-NEXT: ld2r.8h { v0, v1 }, [x0]
841 ; Make sure we are using the operands defined by the ABI
842 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0(ptr %A)
843 ret %struct.__neon_int16x8x2_t %tmp2
846 define %struct.__neon_int16x8x3_t @ld3r_8h(ptr %A) nounwind {
847 ; CHECK-LABEL: ld3r_8h:
849 ; CHECK-NEXT: ld3r.8h { v0, v1, v2 }, [x0]
851 ; Make sure we are using the operands defined by the ABI
852 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0(ptr %A)
853 ret %struct.__neon_int16x8x3_t %tmp2
856 define %struct.__neon_int16x8x4_t @ld4r_8h(ptr %A) nounwind {
857 ; CHECK-LABEL: ld4r_8h:
859 ; CHECK-NEXT: ld4r.8h { v0, v1, v2, v3 }, [x0]
861 ; Make sure we are using the operands defined by the ABI
862 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0(ptr %A)
863 ret %struct.__neon_int16x8x4_t %tmp2
866 declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0(ptr) nounwind readonly
867 declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0(ptr) nounwind readonly
868 declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0(ptr) nounwind readonly
870 define %struct.__neon_int32x2x2_t @ld2r_2s(ptr %A) nounwind {
871 ; CHECK-LABEL: ld2r_2s:
873 ; CHECK-NEXT: ld2r.2s { v0, v1 }, [x0]
875 ; Make sure we are using the operands defined by the ABI
876 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0(ptr %A)
877 ret %struct.__neon_int32x2x2_t %tmp2
880 define %struct.__neon_int32x2x3_t @ld3r_2s(ptr %A) nounwind {
881 ; CHECK-LABEL: ld3r_2s:
883 ; CHECK-NEXT: ld3r.2s { v0, v1, v2 }, [x0]
885 ; Make sure we are using the operands defined by the ABI
886 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0(ptr %A)
887 ret %struct.__neon_int32x2x3_t %tmp2
890 define %struct.__neon_int32x2x4_t @ld4r_2s(ptr %A) nounwind {
891 ; CHECK-LABEL: ld4r_2s:
893 ; CHECK-NEXT: ld4r.2s { v0, v1, v2, v3 }, [x0]
895 ; Make sure we are using the operands defined by the ABI
896 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0(ptr %A)
897 ret %struct.__neon_int32x2x4_t %tmp2
900 declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0(ptr) nounwind readonly
901 declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0(ptr) nounwind readonly
902 declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0(ptr) nounwind readonly
904 define %struct.__neon_int32x4x2_t @ld2r_4s(ptr %A) nounwind {
905 ; CHECK-LABEL: ld2r_4s:
907 ; CHECK-NEXT: ld2r.4s { v0, v1 }, [x0]
909 ; Make sure we are using the operands defined by the ABI
910 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0(ptr %A)
911 ret %struct.__neon_int32x4x2_t %tmp2
914 define %struct.__neon_int32x4x3_t @ld3r_4s(ptr %A) nounwind {
915 ; CHECK-LABEL: ld3r_4s:
917 ; CHECK-NEXT: ld3r.4s { v0, v1, v2 }, [x0]
919 ; Make sure we are using the operands defined by the ABI
920 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0(ptr %A)
921 ret %struct.__neon_int32x4x3_t %tmp2
924 define %struct.__neon_int32x4x4_t @ld4r_4s(ptr %A) nounwind {
925 ; CHECK-LABEL: ld4r_4s:
927 ; CHECK-NEXT: ld4r.4s { v0, v1, v2, v3 }, [x0]
929 ; Make sure we are using the operands defined by the ABI
930 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0(ptr %A)
931 ret %struct.__neon_int32x4x4_t %tmp2
934 declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0(ptr) nounwind readonly
935 declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0(ptr) nounwind readonly
936 declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0(ptr) nounwind readonly
938 define %struct.__neon_int64x1x2_t @ld2r_1d(ptr %A) nounwind {
939 ; CHECK-LABEL: ld2r_1d:
941 ; CHECK-NEXT: ld2r.1d { v0, v1 }, [x0]
943 ; Make sure we are using the operands defined by the ABI
944 %tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0(ptr %A)
945 ret %struct.__neon_int64x1x2_t %tmp2
948 define %struct.__neon_int64x1x3_t @ld3r_1d(ptr %A) nounwind {
949 ; CHECK-LABEL: ld3r_1d:
951 ; CHECK-NEXT: ld3r.1d { v0, v1, v2 }, [x0]
953 ; Make sure we are using the operands defined by the ABI
954 %tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0(ptr %A)
955 ret %struct.__neon_int64x1x3_t %tmp2
958 define %struct.__neon_int64x1x4_t @ld4r_1d(ptr %A) nounwind {
959 ; CHECK-LABEL: ld4r_1d:
961 ; CHECK-NEXT: ld4r.1d { v0, v1, v2, v3 }, [x0]
963 ; Make sure we are using the operands defined by the ABI
964 %tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0(ptr %A)
965 ret %struct.__neon_int64x1x4_t %tmp2
968 declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0(ptr) nounwind readonly
969 declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0(ptr) nounwind readonly
970 declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0(ptr) nounwind readonly
972 define %struct.__neon_int64x2x2_t @ld2r_2d(ptr %A) nounwind {
973 ; CHECK-LABEL: ld2r_2d:
975 ; CHECK-NEXT: ld2r.2d { v0, v1 }, [x0]
977 ; Make sure we are using the operands defined by the ABI
978 %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %A)
979 ret %struct.__neon_int64x2x2_t %tmp2
982 define %struct.__neon_int64x2x3_t @ld3r_2d(ptr %A) nounwind {
983 ; CHECK-LABEL: ld3r_2d:
985 ; CHECK-NEXT: ld3r.2d { v0, v1, v2 }, [x0]
987 ; Make sure we are using the operands defined by the ABI
988 %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %A)
989 ret %struct.__neon_int64x2x3_t %tmp2
992 define %struct.__neon_int64x2x4_t @ld4r_2d(ptr %A) nounwind {
993 ; CHECK-LABEL: ld4r_2d:
995 ; CHECK-NEXT: ld4r.2d { v0, v1, v2, v3 }, [x0]
997 ; Make sure we are using the operands defined by the ABI
998 %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %A)
999 ret %struct.__neon_int64x2x4_t %tmp2
1002 declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0(ptr) nounwind readonly
1003 declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr) nounwind readonly
1004 declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr) nounwind readonly
1006 define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) {
1007 ; CHECK-SD-LABEL: ld1_16b:
1008 ; CHECK-SD: // %bb.0:
1009 ; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0]
1010 ; CHECK-SD-NEXT: ret
1012 ; CHECK-GI-LABEL: ld1_16b:
1013 ; CHECK-GI: // %bb.0:
1014 ; CHECK-GI-NEXT: ldr b1, [x0]
1015 ; CHECK-GI-NEXT: mov.b v0[0], v1[0]
1016 ; CHECK-GI-NEXT: ret
1017 ; Make sure we are using the operands defined by the ABI
1018 %tmp1 = load i8, ptr %bar
1019 %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
1023 define <8 x i16> @ld1_8h(<8 x i16> %V, ptr %bar) {
1024 ; CHECK-SD-LABEL: ld1_8h:
1025 ; CHECK-SD: // %bb.0:
1026 ; CHECK-SD-NEXT: ld1.h { v0 }[0], [x0]
1027 ; CHECK-SD-NEXT: ret
1029 ; CHECK-GI-LABEL: ld1_8h:
1030 ; CHECK-GI: // %bb.0:
1031 ; CHECK-GI-NEXT: ldr h1, [x0]
1032 ; CHECK-GI-NEXT: mov.h v0[0], v1[0]
1033 ; CHECK-GI-NEXT: ret
1034 ; Make sure we are using the operands defined by the ABI
1035 %tmp1 = load i16, ptr %bar
1036 %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0
1040 define <4 x i32> @ld1_4s(<4 x i32> %V, ptr %bar) {
1041 ; CHECK-SD-LABEL: ld1_4s:
1042 ; CHECK-SD: // %bb.0:
1043 ; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0]
1044 ; CHECK-SD-NEXT: ret
1046 ; CHECK-GI-LABEL: ld1_4s:
1047 ; CHECK-GI: // %bb.0:
1048 ; CHECK-GI-NEXT: ldr s1, [x0]
1049 ; CHECK-GI-NEXT: mov.s v0[0], v1[0]
1050 ; CHECK-GI-NEXT: ret
1051 ; Make sure we are using the operands defined by the ABI
1052 %tmp1 = load i32, ptr %bar
1053 %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0
1057 define <4 x float> @ld1_4s_float(<4 x float> %V, ptr %bar) {
1058 ; CHECK-SD-LABEL: ld1_4s_float:
1059 ; CHECK-SD: // %bb.0:
1060 ; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0]
1061 ; CHECK-SD-NEXT: ret
1063 ; CHECK-GI-LABEL: ld1_4s_float:
1064 ; CHECK-GI: // %bb.0:
1065 ; CHECK-GI-NEXT: ldr s1, [x0]
1066 ; CHECK-GI-NEXT: mov.s v0[0], v1[0]
1067 ; CHECK-GI-NEXT: ret
1068 ; Make sure we are using the operands defined by the ABI
1069 %tmp1 = load float, ptr %bar
1070 %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
1071 ret <4 x float> %tmp2
1074 define <2 x i64> @ld1_2d(<2 x i64> %V, ptr %bar) {
1075 ; CHECK-SD-LABEL: ld1_2d:
1076 ; CHECK-SD: // %bb.0:
1077 ; CHECK-SD-NEXT: ld1.d { v0 }[0], [x0]
1078 ; CHECK-SD-NEXT: ret
1080 ; CHECK-GI-LABEL: ld1_2d:
1081 ; CHECK-GI: // %bb.0:
1082 ; CHECK-GI-NEXT: ldr d1, [x0]
1083 ; CHECK-GI-NEXT: mov.d v0[0], v1[0]
1084 ; CHECK-GI-NEXT: ret
1085 ; Make sure we are using the operands defined by the ABI
1086 %tmp1 = load i64, ptr %bar
1087 %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0
1091 define <2 x double> @ld1_2d_double(<2 x double> %V, ptr %bar) {
1092 ; CHECK-SD-LABEL: ld1_2d_double:
1093 ; CHECK-SD: // %bb.0:
1094 ; CHECK-SD-NEXT: ld1.d { v0 }[0], [x0]
1095 ; CHECK-SD-NEXT: ret
1097 ; CHECK-GI-LABEL: ld1_2d_double:
1098 ; CHECK-GI: // %bb.0:
1099 ; CHECK-GI-NEXT: ldr d1, [x0]
1100 ; CHECK-GI-NEXT: mov.d v0[0], v1[0]
1101 ; CHECK-GI-NEXT: ret
1102 ; Make sure we are using the operands defined by the ABI
1103 %tmp1 = load double, ptr %bar
1104 %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
1105 ret <2 x double> %tmp2
1108 define <1 x i64> @ld1_1d(ptr %p) {
1109 ; CHECK-LABEL: ld1_1d:
1111 ; CHECK-NEXT: ldr d0, [x0]
1113 ; Make sure we are using the operands defined by the ABI
1114 %tmp = load <1 x i64>, ptr %p, align 8
1118 define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) {
1119 ; CHECK-SD-LABEL: ld1_8b:
1120 ; CHECK-SD: // %bb.0:
1121 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1122 ; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0]
1123 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
1124 ; CHECK-SD-NEXT: ret
1126 ; CHECK-GI-LABEL: ld1_8b:
1127 ; CHECK-GI: // %bb.0:
1128 ; CHECK-GI-NEXT: ldr b1, [x0]
1129 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1130 ; CHECK-GI-NEXT: mov.b v0[0], v1[0]
1131 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
1132 ; CHECK-GI-NEXT: ret
1133 ; Make sure we are using the operands defined by the ABI
1134 %tmp1 = load i8, ptr %bar
1135 %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0
1139 define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) {
1140 ; CHECK-SD-LABEL: ld1_4h:
1141 ; CHECK-SD: // %bb.0:
1142 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1143 ; CHECK-SD-NEXT: ld1.h { v0 }[0], [x0]
1144 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
1145 ; CHECK-SD-NEXT: ret
1147 ; CHECK-GI-LABEL: ld1_4h:
1148 ; CHECK-GI: // %bb.0:
1149 ; CHECK-GI-NEXT: ldr h1, [x0]
1150 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1151 ; CHECK-GI-NEXT: mov.h v0[0], v1[0]
1152 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
1153 ; CHECK-GI-NEXT: ret
1154 ; Make sure we are using the operands defined by the ABI
1155 %tmp1 = load i16, ptr %bar
1156 %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
1160 define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) {
1161 ; CHECK-SD-LABEL: ld1_2s:
1162 ; CHECK-SD: // %bb.0:
1163 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1164 ; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0]
1165 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
1166 ; CHECK-SD-NEXT: ret
1168 ; CHECK-GI-LABEL: ld1_2s:
1169 ; CHECK-GI: // %bb.0:
1170 ; CHECK-GI-NEXT: ldr s1, [x0]
1171 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1172 ; CHECK-GI-NEXT: mov.s v0[0], v1[0]
1173 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
1174 ; CHECK-GI-NEXT: ret
1175 ; Make sure we are using the operands defined by the ABI
1176 %tmp1 = load i32, ptr %bar
1177 %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
1181 define <2 x float> @ld1_2s_float(<2 x float> %V, ptr %bar) {
1182 ; CHECK-SD-LABEL: ld1_2s_float:
1183 ; CHECK-SD: // %bb.0:
1184 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1185 ; CHECK-SD-NEXT: ld1.s { v0 }[0], [x0]
1186 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
1187 ; CHECK-SD-NEXT: ret
1189 ; CHECK-GI-LABEL: ld1_2s_float:
1190 ; CHECK-GI: // %bb.0:
1191 ; CHECK-GI-NEXT: ldr s1, [x0]
1192 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1193 ; CHECK-GI-NEXT: mov.s v0[0], v1[0]
1194 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
1195 ; CHECK-GI-NEXT: ret
1196 ; Make sure we are using the operands defined by the ABI
1197 %tmp1 = load float, ptr %bar
1198 %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0
1199 ret <2 x float> %tmp2
1203 ; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s
1204 define void @ld1r_2s_from_dup(ptr nocapture %a, ptr nocapture %b, ptr nocapture %diff) nounwind ssp {
1205 ; CHECK-LABEL: ld1r_2s_from_dup:
1206 ; CHECK: // %bb.0: // %entry
1207 ; CHECK-NEXT: ld1r.2s { v0 }, [x0]
1208 ; CHECK-NEXT: ld1r.2s { v1 }, [x1]
1209 ; CHECK-NEXT: usubl.8h v0, v0, v1
1210 ; CHECK-NEXT: str d0, [x2]
1213 %tmp1 = load i32, ptr %a, align 4
1214 %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
1215 %lane = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
1216 %tmp3 = bitcast <2 x i32> %lane to <8 x i8>
1217 %tmp5 = load i32, ptr %b, align 4
1218 %tmp6 = insertelement <2 x i32> undef, i32 %tmp5, i32 0
1219 %lane1 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> zeroinitializer
1220 %tmp7 = bitcast <2 x i32> %lane1 to <8 x i8>
1221 %vmovl.i.i = zext <8 x i8> %tmp3 to <8 x i16>
1222 %vmovl.i4.i = zext <8 x i8> %tmp7 to <8 x i16>
1223 %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i4.i
1224 %tmp8 = bitcast <8 x i16> %sub.i to <2 x i64>
1225 %shuffle.i = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
1226 %tmp9 = bitcast <1 x i64> %shuffle.i to <4 x i16>
1227 store <4 x i16> %tmp9, ptr %diff, align 8
1231 ; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is suboptimal
1232 define <4 x float> @ld1r_4s_float(ptr nocapture %x) {
1233 ; CHECK-LABEL: ld1r_4s_float:
1234 ; CHECK: // %bb.0: // %entry
1235 ; CHECK-NEXT: ld1r.4s { v0 }, [x0]
1238 ; Make sure we are using the operands defined by the ABI
1239 %tmp = load float, ptr %x, align 4
1240 %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
1241 %tmp2 = insertelement <4 x float> %tmp1, float %tmp, i32 1
1242 %tmp3 = insertelement <4 x float> %tmp2, float %tmp, i32 2
1243 %tmp4 = insertelement <4 x float> %tmp3, float %tmp, i32 3
1244 ret <4 x float> %tmp4
1247 define <2 x float> @ld1r_2s_float(ptr nocapture %x) {
1248 ; CHECK-LABEL: ld1r_2s_float:
1249 ; CHECK: // %bb.0: // %entry
1250 ; CHECK-NEXT: ld1r.2s { v0 }, [x0]
1253 ; Make sure we are using the operands defined by the ABI
1254 %tmp = load float, ptr %x, align 4
1255 %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
1256 %tmp2 = insertelement <2 x float> %tmp1, float %tmp, i32 1
1257 ret <2 x float> %tmp2
1260 define <2 x double> @ld1r_2d_double(ptr nocapture %x) {
1261 ; CHECK-LABEL: ld1r_2d_double:
1262 ; CHECK: // %bb.0: // %entry
1263 ; CHECK-NEXT: ld1r.2d { v0 }, [x0]
1266 ; Make sure we are using the operands defined by the ABI
1267 %tmp = load double, ptr %x, align 4
1268 %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
1269 %tmp2 = insertelement <2 x double> %tmp1, double %tmp, i32 1
1270 ret <2 x double> %tmp2
1273 define <1 x double> @ld1r_1d_double(ptr nocapture %x) {
1274 ; CHECK-LABEL: ld1r_1d_double:
1275 ; CHECK: // %bb.0: // %entry
1276 ; CHECK-NEXT: ldr d0, [x0]
1279 ; Make sure we are using the operands defined by the ABI
1280 %tmp = load double, ptr %x, align 4
1281 %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
1282 ret <1 x double> %tmp1
1285 define <4 x float> @ld1r_4s_float_shuff(ptr nocapture %x) {
1286 ; CHECK-LABEL: ld1r_4s_float_shuff:
1287 ; CHECK: // %bb.0: // %entry
1288 ; CHECK-NEXT: ld1r.4s { v0 }, [x0]
1291 ; Make sure we are using the operands defined by the ABI
1292 %tmp = load float, ptr %x, align 4
1293 %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
1294 %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
1295 ret <4 x float> %lane
1298 define <2 x float> @ld1r_2s_float_shuff(ptr nocapture %x) {
1299 ; CHECK-LABEL: ld1r_2s_float_shuff:
1300 ; CHECK: // %bb.0: // %entry
1301 ; CHECK-NEXT: ld1r.2s { v0 }, [x0]
1304 ; Make sure we are using the operands defined by the ABI
1305 %tmp = load float, ptr %x, align 4
1306 %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
1307 %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
1308 ret <2 x float> %lane
1311 define <2 x double> @ld1r_2d_double_shuff(ptr nocapture %x) {
1312 ; CHECK-LABEL: ld1r_2d_double_shuff:
1313 ; CHECK: // %bb.0: // %entry
1314 ; CHECK-NEXT: ld1r.2d { v0 }, [x0]
1317 ; Make sure we are using the operands defined by the ABI
1318 %tmp = load double, ptr %x, align 4
1319 %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
1320 %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer
1321 ret <2 x double> %lane
1324 define <1 x double> @ld1r_1d_double_shuff(ptr nocapture %x) {
1325 ; CHECK-LABEL: ld1r_1d_double_shuff:
1326 ; CHECK: // %bb.0: // %entry
1327 ; CHECK-NEXT: ldr d0, [x0]
1330 ; Make sure we are using the operands defined by the ABI
1331 %tmp = load double, ptr %x, align 4
1332 %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
1333 %lane = shufflevector <1 x double> %tmp1, <1 x double> undef, <1 x i32> zeroinitializer
1334 ret <1 x double> %lane
1337 %struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
1338 %struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
1339 %struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
1341 declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr) nounwind readonly
1342 declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr) nounwind readonly
1343 declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr) nounwind readonly
1344 declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr) nounwind readonly
1345 declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr) nounwind readonly
1346 declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr) nounwind readonly
1348 define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(ptr %addr) {
1349 ; CHECK-LABEL: ld1_x2_v8i8:
1351 ; CHECK-NEXT: ld1.8b { v0, v1 }, [x0]
1353 %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr %addr)
1354 ret %struct.__neon_int8x8x2_t %val
1357 define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(ptr %addr) {
1358 ; CHECK-LABEL: ld1_x2_v4i16:
1360 ; CHECK-NEXT: ld1.4h { v0, v1 }, [x0]
1362 %val = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr %addr)
1363 ret %struct.__neon_int16x4x2_t %val
1366 define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(ptr %addr) {
1367 ; CHECK-LABEL: ld1_x2_v2i32:
1369 ; CHECK-NEXT: ld1.2s { v0, v1 }, [x0]
1371 %val = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr %addr)
1372 ret %struct.__neon_int32x2x2_t %val
1375 define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(ptr %addr) {
1376 ; CHECK-LABEL: ld1_x2_v2f32:
1378 ; CHECK-NEXT: ld1.2s { v0, v1 }, [x0]
1380 %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr %addr)
1381 ret %struct.__neon_float32x2x2_t %val
1384 define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(ptr %addr) {
1385 ; CHECK-LABEL: ld1_x2_v1i64:
1387 ; CHECK-NEXT: ld1.1d { v0, v1 }, [x0]
1389 %val = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %addr)
1390 ret %struct.__neon_int64x1x2_t %val
1393 define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(ptr %addr) {
1394 ; CHECK-LABEL: ld1_x2_v1f64:
1396 ; CHECK-NEXT: ld1.1d { v0, v1 }, [x0]
1398 %val = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %addr)
1399 ret %struct.__neon_float64x1x2_t %val
1403 %struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
1404 %struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
1405 %struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
1407 %struct.__neon_float64x2x2_t = type { <2 x double>, <2 x double> }
1408 %struct.__neon_float64x2x3_t = type { <2 x double>, <2 x double>, <2 x double> }
1409 %struct.__neon_float64x2x4_t = type { <2 x double>, <2 x double>, <2 x double>, <2 x double> }
1411 declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr) nounwind readonly
1412 declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr) nounwind readonly
1413 declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr) nounwind readonly
1414 declare %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr) nounwind readonly
1415 declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr) nounwind readonly
1416 declare %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr) nounwind readonly
1418 define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(ptr %addr) {
1419 ; CHECK-LABEL: ld1_x2_v16i8:
1421 ; CHECK-NEXT: ld1.16b { v0, v1 }, [x0]
1423 %val = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr %addr)
1424 ret %struct.__neon_int8x16x2_t %val
1427 define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(ptr %addr) {
1428 ; CHECK-LABEL: ld1_x2_v8i16:
1430 ; CHECK-NEXT: ld1.8h { v0, v1 }, [x0]
1432 %val = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr %addr)
1433 ret %struct.__neon_int16x8x2_t %val
1436 define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(ptr %addr) {
1437 ; CHECK-LABEL: ld1_x2_v4i32:
1439 ; CHECK-NEXT: ld1.4s { v0, v1 }, [x0]
1441 %val = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr %addr)
1442 ret %struct.__neon_int32x4x2_t %val
1445 define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(ptr %addr) {
1446 ; CHECK-LABEL: ld1_x2_v4f32:
1448 ; CHECK-NEXT: ld1.4s { v0, v1 }, [x0]
1450 %val = call %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr %addr)
1451 ret %struct.__neon_float32x4x2_t %val
1454 define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(ptr %addr) {
1455 ; CHECK-LABEL: ld1_x2_v2i64:
1457 ; CHECK-NEXT: ld1.2d { v0, v1 }, [x0]
1459 %val = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %addr)
1460 ret %struct.__neon_int64x2x2_t %val
1463 define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(ptr %addr) {
1464 ; CHECK-LABEL: ld1_x2_v2f64:
1466 ; CHECK-NEXT: ld1.2d { v0, v1 }, [x0]
1468 %val = call %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %addr)
1469 ret %struct.__neon_float64x2x2_t %val
1472 declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr) nounwind readonly
1473 declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr) nounwind readonly
1474 declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr) nounwind readonly
1475 declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr) nounwind readonly
1476 declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr) nounwind readonly
1477 declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr) nounwind readonly
1479 define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(ptr %addr) {
1480 ; CHECK-LABEL: ld1_x3_v8i8:
1482 ; CHECK-NEXT: ld1.8b { v0, v1, v2 }, [x0]
1484 %val = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr %addr)
1485 ret %struct.__neon_int8x8x3_t %val
1488 define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(ptr %addr) {
1489 ; CHECK-LABEL: ld1_x3_v4i16:
1491 ; CHECK-NEXT: ld1.4h { v0, v1, v2 }, [x0]
1493 %val = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr %addr)
1494 ret %struct.__neon_int16x4x3_t %val
1497 define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(ptr %addr) {
1498 ; CHECK-LABEL: ld1_x3_v2i32:
1500 ; CHECK-NEXT: ld1.2s { v0, v1, v2 }, [x0]
1502 %val = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr %addr)
1503 ret %struct.__neon_int32x2x3_t %val
1506 define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(ptr %addr) {
1507 ; CHECK-LABEL: ld1_x3_v2f32:
1509 ; CHECK-NEXT: ld1.2s { v0, v1, v2 }, [x0]
1511 %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr %addr)
1512 ret %struct.__neon_float32x2x3_t %val
1515 define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(ptr %addr) {
1516 ; CHECK-LABEL: ld1_x3_v1i64:
1518 ; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0]
1520 %val = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %addr)
1521 ret %struct.__neon_int64x1x3_t %val
1524 define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(ptr %addr) {
1525 ; CHECK-LABEL: ld1_x3_v1f64:
1527 ; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0]
1529 %val = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %addr)
1530 ret %struct.__neon_float64x1x3_t %val
1533 declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr) nounwind readonly
1534 declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr) nounwind readonly
1535 declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr) nounwind readonly
1536 declare %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr) nounwind readonly
1537 declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr) nounwind readonly
1538 declare %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr) nounwind readonly
1540 define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(ptr %addr) {
1541 ; CHECK-LABEL: ld1_x3_v16i8:
1543 ; CHECK-NEXT: ld1.16b { v0, v1, v2 }, [x0]
1545 %val = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr %addr)
1546 ret %struct.__neon_int8x16x3_t %val
1549 define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(ptr %addr) {
1550 ; CHECK-LABEL: ld1_x3_v8i16:
1552 ; CHECK-NEXT: ld1.8h { v0, v1, v2 }, [x0]
1554 %val = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr %addr)
1555 ret %struct.__neon_int16x8x3_t %val
1558 define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(ptr %addr) {
1559 ; CHECK-LABEL: ld1_x3_v4i32:
1561 ; CHECK-NEXT: ld1.4s { v0, v1, v2 }, [x0]
1563 %val = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr %addr)
1564 ret %struct.__neon_int32x4x3_t %val
1567 define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(ptr %addr) {
1568 ; CHECK-LABEL: ld1_x3_v4f32:
1570 ; CHECK-NEXT: ld1.4s { v0, v1, v2 }, [x0]
1572 %val = call %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr %addr)
1573 ret %struct.__neon_float32x4x3_t %val
1576 define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(ptr %addr) {
1577 ; CHECK-LABEL: ld1_x3_v2i64:
1579 ; CHECK-NEXT: ld1.2d { v0, v1, v2 }, [x0]
1581 %val = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %addr)
1582 ret %struct.__neon_int64x2x3_t %val
1585 define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(ptr %addr) {
1586 ; CHECK-LABEL: ld1_x3_v2f64:
1588 ; CHECK-NEXT: ld1.2d { v0, v1, v2 }, [x0]
1590 %val = call %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %addr)
1591 ret %struct.__neon_float64x2x3_t %val
1594 declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr) nounwind readonly
1595 declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr) nounwind readonly
1596 declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr) nounwind readonly
1597 declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr) nounwind readonly
1598 declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr) nounwind readonly
1599 declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr) nounwind readonly
1601 define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(ptr %addr) {
1602 ; CHECK-LABEL: ld1_x4_v8i8:
1604 ; CHECK-NEXT: ld1.8b { v0, v1, v2, v3 }, [x0]
1606 %val = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr %addr)
1607 ret %struct.__neon_int8x8x4_t %val
1610 define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(ptr %addr) {
1611 ; CHECK-LABEL: ld1_x4_v4i16:
1613 ; CHECK-NEXT: ld1.4h { v0, v1, v2, v3 }, [x0]
1615 %val = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr %addr)
1616 ret %struct.__neon_int16x4x4_t %val
1619 define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(ptr %addr) {
1620 ; CHECK-LABEL: ld1_x4_v2i32:
1622 ; CHECK-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0]
1624 %val = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr %addr)
1625 ret %struct.__neon_int32x2x4_t %val
1628 define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(ptr %addr) {
1629 ; CHECK-LABEL: ld1_x4_v2f32:
1631 ; CHECK-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0]
1633 %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr %addr)
1634 ret %struct.__neon_float32x2x4_t %val
1637 define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(ptr %addr) {
1638 ; CHECK-LABEL: ld1_x4_v1i64:
1640 ; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0]
1642 %val = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %addr)
1643 ret %struct.__neon_int64x1x4_t %val
1646 define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(ptr %addr) {
1647 ; CHECK-LABEL: ld1_x4_v1f64:
1649 ; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0]
1651 %val = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %addr)
1652 ret %struct.__neon_float64x1x4_t %val
1655 declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr) nounwind readonly
1656 declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr) nounwind readonly
1657 declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr) nounwind readonly
1658 declare %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr) nounwind readonly
1659 declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr) nounwind readonly
1660 declare %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr) nounwind readonly
1662 define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(ptr %addr) {
1663 ; CHECK-LABEL: ld1_x4_v16i8:
1665 ; CHECK-NEXT: ld1.16b { v0, v1, v2, v3 }, [x0]
1667 %val = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr %addr)
1668 ret %struct.__neon_int8x16x4_t %val
1671 define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(ptr %addr) {
1672 ; CHECK-LABEL: ld1_x4_v8i16:
1674 ; CHECK-NEXT: ld1.8h { v0, v1, v2, v3 }, [x0]
1676 %val = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr %addr)
1677 ret %struct.__neon_int16x8x4_t %val
1680 define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(ptr %addr) {
1681 ; CHECK-LABEL: ld1_x4_v4i32:
1683 ; CHECK-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0]
1685 %val = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr %addr)
1686 ret %struct.__neon_int32x4x4_t %val
1689 define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(ptr %addr) {
1690 ; CHECK-LABEL: ld1_x4_v4f32:
1692 ; CHECK-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0]
1694 %val = call %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr %addr)
1695 ret %struct.__neon_float32x4x4_t %val
1698 define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(ptr %addr) {
1699 ; CHECK-LABEL: ld1_x4_v2i64:
1701 ; CHECK-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0]
1703 %val = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %addr)
1704 ret %struct.__neon_int64x2x4_t %val
1707 define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(ptr %addr) {
1708 ; CHECK-LABEL: ld1_x4_v2f64:
1710 ; CHECK-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0]
1712 %val = call %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %addr)
1713 ret %struct.__neon_float64x2x4_t %val
1716 define <8 x i8> @dup_ld1_from_stack(ptr %__ret) {
1717 ; CHECK-SD-LABEL: dup_ld1_from_stack:
1718 ; CHECK-SD: // %bb.0: // %entry
1719 ; CHECK-SD-NEXT: sub sp, sp, #16
1720 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
1721 ; CHECK-SD-NEXT: add x8, sp, #15
1722 ; CHECK-SD-NEXT: ld1r.8b { v0 }, [x8]
1723 ; CHECK-SD-NEXT: add sp, sp, #16
1724 ; CHECK-SD-NEXT: ret
1726 ; CHECK-GI-LABEL: dup_ld1_from_stack:
1727 ; CHECK-GI: // %bb.0: // %entry
1728 ; CHECK-GI-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
1729 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
1730 ; CHECK-GI-NEXT: .cfi_offset w29, -16
1731 ; CHECK-GI-NEXT: add x8, sp, #15
1732 ; CHECK-GI-NEXT: ld1r.8b { v0 }, [x8]
1733 ; CHECK-GI-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
1734 ; CHECK-GI-NEXT: ret
1736 %item = alloca i8, align 1
1737 %0 = load i8, ptr %item, align 1
1738 %1 = insertelement <8 x i8> poison, i8 %0, i32 0
1739 %lane = shufflevector <8 x i8> %1, <8 x i8> %1, <8 x i32> zeroinitializer