1 ; RUN: llc < %s | FileCheck %s
3 target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
4 target triple = "thumbv7s-apple-ios8.0.0"
6 define <8 x i8> @load_v8i8(<8 x i8>** %ptr) {
7 ;CHECK-LABEL: load_v8i8:
8 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
9 %A = load <8 x i8>*, <8 x i8>** %ptr
10 %lA = load <8 x i8>, <8 x i8>* %A, align 1
14 define <8 x i8> @load_v8i8_update(<8 x i8>** %ptr) {
15 ;CHECK-LABEL: load_v8i8_update:
16 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
17 %A = load <8 x i8>*, <8 x i8>** %ptr
18 %lA = load <8 x i8>, <8 x i8>* %A, align 1
19 %inc = getelementptr <8 x i8>, <8 x i8>* %A, i38 1
20 store <8 x i8>* %inc, <8 x i8>** %ptr
24 define <4 x i16> @load_v4i16(<4 x i16>** %ptr) {
25 ;CHECK-LABEL: load_v4i16:
26 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
27 %A = load <4 x i16>*, <4 x i16>** %ptr
28 %lA = load <4 x i16>, <4 x i16>* %A, align 1
32 define <4 x i16> @load_v4i16_update(<4 x i16>** %ptr) {
33 ;CHECK-LABEL: load_v4i16_update:
34 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
35 %A = load <4 x i16>*, <4 x i16>** %ptr
36 %lA = load <4 x i16>, <4 x i16>* %A, align 1
37 %inc = getelementptr <4 x i16>, <4 x i16>* %A, i34 1
38 store <4 x i16>* %inc, <4 x i16>** %ptr
42 define <2 x i32> @load_v2i32(<2 x i32>** %ptr) {
43 ;CHECK-LABEL: load_v2i32:
44 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
45 %A = load <2 x i32>*, <2 x i32>** %ptr
46 %lA = load <2 x i32>, <2 x i32>* %A, align 1
50 define <2 x i32> @load_v2i32_update(<2 x i32>** %ptr) {
51 ;CHECK-LABEL: load_v2i32_update:
52 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
53 %A = load <2 x i32>*, <2 x i32>** %ptr
54 %lA = load <2 x i32>, <2 x i32>* %A, align 1
55 %inc = getelementptr <2 x i32>, <2 x i32>* %A, i32 1
56 store <2 x i32>* %inc, <2 x i32>** %ptr
60 define <2 x float> @load_v2f32(<2 x float>** %ptr) {
61 ;CHECK-LABEL: load_v2f32:
62 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
63 %A = load <2 x float>*, <2 x float>** %ptr
64 %lA = load <2 x float>, <2 x float>* %A, align 1
68 define <2 x float> @load_v2f32_update(<2 x float>** %ptr) {
69 ;CHECK-LABEL: load_v2f32_update:
70 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
71 %A = load <2 x float>*, <2 x float>** %ptr
72 %lA = load <2 x float>, <2 x float>* %A, align 1
73 %inc = getelementptr <2 x float>, <2 x float>* %A, i32 1
74 store <2 x float>* %inc, <2 x float>** %ptr
78 define <1 x i64> @load_v1i64(<1 x i64>** %ptr) {
79 ;CHECK-LABEL: load_v1i64:
80 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
81 %A = load <1 x i64>*, <1 x i64>** %ptr
82 %lA = load <1 x i64>, <1 x i64>* %A, align 1
86 define <1 x i64> @load_v1i64_update(<1 x i64>** %ptr) {
87 ;CHECK-LABEL: load_v1i64_update:
88 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
89 %A = load <1 x i64>*, <1 x i64>** %ptr
90 %lA = load <1 x i64>, <1 x i64>* %A, align 1
91 %inc = getelementptr <1 x i64>, <1 x i64>* %A, i31 1
92 store <1 x i64>* %inc, <1 x i64>** %ptr
96 define <16 x i8> @load_v16i8(<16 x i8>** %ptr) {
97 ;CHECK-LABEL: load_v16i8:
98 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
99 %A = load <16 x i8>*, <16 x i8>** %ptr
100 %lA = load <16 x i8>, <16 x i8>* %A, align 1
104 define <16 x i8> @load_v16i8_update(<16 x i8>** %ptr) {
105 ;CHECK-LABEL: load_v16i8_update:
106 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
107 %A = load <16 x i8>*, <16 x i8>** %ptr
108 %lA = load <16 x i8>, <16 x i8>* %A, align 1
109 %inc = getelementptr <16 x i8>, <16 x i8>* %A, i316 1
110 store <16 x i8>* %inc, <16 x i8>** %ptr
114 define <8 x i16> @load_v8i16(<8 x i16>** %ptr) {
115 ;CHECK-LABEL: load_v8i16:
116 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
117 %A = load <8 x i16>*, <8 x i16>** %ptr
118 %lA = load <8 x i16>, <8 x i16>* %A, align 1
122 define <8 x i16> @load_v8i16_update(<8 x i16>** %ptr) {
123 ;CHECK-LABEL: load_v8i16_update:
124 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
125 %A = load <8 x i16>*, <8 x i16>** %ptr
126 %lA = load <8 x i16>, <8 x i16>* %A, align 1
127 %inc = getelementptr <8 x i16>, <8 x i16>* %A, i38 1
128 store <8 x i16>* %inc, <8 x i16>** %ptr
132 define <4 x i32> @load_v4i32(<4 x i32>** %ptr) {
133 ;CHECK-LABEL: load_v4i32:
134 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
135 %A = load <4 x i32>*, <4 x i32>** %ptr
136 %lA = load <4 x i32>, <4 x i32>* %A, align 1
140 define <4 x i32> @load_v4i32_update(<4 x i32>** %ptr) {
141 ;CHECK-LABEL: load_v4i32_update:
142 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
143 %A = load <4 x i32>*, <4 x i32>** %ptr
144 %lA = load <4 x i32>, <4 x i32>* %A, align 1
145 %inc = getelementptr <4 x i32>, <4 x i32>* %A, i34 1
146 store <4 x i32>* %inc, <4 x i32>** %ptr
150 define <4 x float> @load_v4f32(<4 x float>** %ptr) {
151 ;CHECK-LABEL: load_v4f32:
152 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
153 %A = load <4 x float>*, <4 x float>** %ptr
154 %lA = load <4 x float>, <4 x float>* %A, align 1
158 define <4 x float> @load_v4f32_update(<4 x float>** %ptr) {
159 ;CHECK-LABEL: load_v4f32_update:
160 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
161 %A = load <4 x float>*, <4 x float>** %ptr
162 %lA = load <4 x float>, <4 x float>* %A, align 1
163 %inc = getelementptr <4 x float>, <4 x float>* %A, i34 1
164 store <4 x float>* %inc, <4 x float>** %ptr
168 define <2 x i64> @load_v2i64(<2 x i64>** %ptr) {
169 ;CHECK-LABEL: load_v2i64:
170 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
171 %A = load <2 x i64>*, <2 x i64>** %ptr
172 %lA = load <2 x i64>, <2 x i64>* %A, align 1
176 define <2 x i64> @load_v2i64_update(<2 x i64>** %ptr) {
177 ;CHECK-LABEL: load_v2i64_update:
178 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
179 %A = load <2 x i64>*, <2 x i64>** %ptr
180 %lA = load <2 x i64>, <2 x i64>* %A, align 1
181 %inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
182 store <2 x i64>* %inc, <2 x i64>** %ptr
186 ; Make sure we change the type to match alignment if necessary.
187 define <2 x i64> @load_v2i64_update_aligned2(<2 x i64>** %ptr) {
188 ;CHECK-LABEL: load_v2i64_update_aligned2:
189 ;CHECK: vld1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
190 %A = load <2 x i64>*, <2 x i64>** %ptr
191 %lA = load <2 x i64>, <2 x i64>* %A, align 2
192 %inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
193 store <2 x i64>* %inc, <2 x i64>** %ptr
197 define <2 x i64> @load_v2i64_update_aligned4(<2 x i64>** %ptr) {
198 ;CHECK-LABEL: load_v2i64_update_aligned4:
199 ;CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
200 %A = load <2 x i64>*, <2 x i64>** %ptr
201 %lA = load <2 x i64>, <2 x i64>* %A, align 4
202 %inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
203 store <2 x i64>* %inc, <2 x i64>** %ptr
207 define <2 x i64> @load_v2i64_update_aligned8(<2 x i64>** %ptr) {
208 ;CHECK-LABEL: load_v2i64_update_aligned8:
209 ;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
210 %A = load <2 x i64>*, <2 x i64>** %ptr
211 %lA = load <2 x i64>, <2 x i64>* %A, align 8
212 %inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
213 store <2 x i64>* %inc, <2 x i64>** %ptr
217 define <2 x i64> @load_v2i64_update_aligned16(<2 x i64>** %ptr) {
218 ;CHECK-LABEL: load_v2i64_update_aligned16:
219 ;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
220 %A = load <2 x i64>*, <2 x i64>** %ptr
221 %lA = load <2 x i64>, <2 x i64>* %A, align 16
222 %inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
223 store <2 x i64>* %inc, <2 x i64>** %ptr
227 ; Make sure we don't break smaller-than-dreg extloads.
228 define <4 x i32> @zextload_v8i8tov8i32(<4 x i8>** %ptr) {
229 ;CHECK-LABEL: zextload_v8i8tov8i32:
230 ;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [{{r[0-9]+}}:32]
231 ;CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
232 ;CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
233 %A = load <4 x i8>*, <4 x i8>** %ptr
234 %lA = load <4 x i8>, <4 x i8>* %A, align 4
235 %zlA = zext <4 x i8> %lA to <4 x i32>
239 define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) {
240 ;CHECK-LABEL: zextload_v8i8tov8i32_fake_update:
241 ;CHECK: ldr r[[PTRREG:[0-9]+]], [r0]
242 ;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r[[PTRREG]]:32]
243 ;CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
244 ;CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
245 ;CHECK: add.w r[[INCREG:[0-9]+]], r[[PTRREG]], #16
246 ;CHECK: str r[[INCREG]], [r0]
247 %A = load <4 x i8>*, <4 x i8>** %ptr
248 %lA = load <4 x i8>, <4 x i8>* %A, align 4
249 %inc = getelementptr <4 x i8>, <4 x i8>* %A, i38 4
250 store <4 x i8>* %inc, <4 x i8>** %ptr
251 %zlA = zext <4 x i8> %lA to <4 x i32>
255 ; CHECK-LABEL: test_silly_load:
256 ; CHECK: vldr d{{[0-9]+}}, [r0, #16]
257 ; CHECK: movs r1, #24
258 ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128], r1
259 ; CHECK: ldr {{r[0-9]+}}, [r0]
261 define void @test_silly_load(<28 x i8>* %addr) {
262 load volatile <28 x i8>, <28 x i8>* %addr
266 define <4 x i32>* @test_vld1_immoffset(<4 x i32>* %ptr.in, <4 x i32>* %ptr.out) {
267 ; CHECK-LABEL: test_vld1_immoffset:
268 ; CHECK: movs [[INC:r[0-9]+]], #32
269 ; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0], [[INC]]
270 %val = load <4 x i32>, <4 x i32>* %ptr.in
271 store <4 x i32> %val, <4 x i32>* %ptr.out
272 %next = getelementptr <4 x i32>, <4 x i32>* %ptr.in, i32 2