1 ; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-iphones -force-vector-width=4 -force-vector-interleave=1 %s -S | FileCheck %s
3 ; Vectors with i4 elements may not legal with nontemporal stores.
4 define void @test_i4_store(ptr %ddst) {
5 ; CHECK-LABEL: define void @test_i4_store(
6 ; CHECK-NOT: vector.body:
12 for.body: ; preds = %entry, %for.body
13 %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
14 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
15 %incdec.ptr = getelementptr inbounds i4, ptr %ddst.addr, i64 1
16 store i4 10, ptr %ddst.addr, align 4, !nontemporal !8
17 %add = add nuw nsw i32 %i, 4
18 %cmp = icmp ult i32 %i, 4092
19 br i1 %cmp, label %for.body, label %for.cond.cleanup
21 for.cond.cleanup: ; preds = %for.body
25 define void @test_i8_store(ptr %ddst) {
26 ; CHECK-LABEL: define void @test_i8_store(
27 ; CHECK-LABEL: vector.body:
28 ; CHECK: store <4 x i8> {{.*}} !nontemporal !0
34 for.body: ; preds = %entry, %for.body
35 %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
36 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
37 %incdec.ptr = getelementptr inbounds i8, ptr %ddst.addr, i64 1
38 store i8 10, ptr %ddst.addr, align 4, !nontemporal !8
39 %add = add nuw nsw i32 %i, 4
40 %cmp = icmp ult i32 %i, 4092
41 br i1 %cmp, label %for.body, label %for.cond.cleanup
43 for.cond.cleanup: ; preds = %for.body
47 define void @test_half_store(ptr %ddst) {
48 ; CHECK-LABEL: define void @test_half_store(
49 ; CHECK-LABEL: vector.body:
50 ; CHECK: store <4 x half> {{.*}} !nontemporal !0
56 for.body: ; preds = %entry, %for.body
57 %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
58 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
59 %incdec.ptr = getelementptr inbounds half, ptr %ddst.addr, i64 1
60 store half 10.0, ptr %ddst.addr, align 4, !nontemporal !8
61 %add = add nuw nsw i32 %i, 4
62 %cmp = icmp ult i32 %i, 4092
63 br i1 %cmp, label %for.body, label %for.cond.cleanup
65 for.cond.cleanup: ; preds = %for.body
69 define void @test_i16_store(ptr %ddst) {
70 ; CHECK-LABEL: define void @test_i16_store(
71 ; CHECK-LABEL: vector.body:
72 ; CHECK: store <4 x i16> {{.*}} !nontemporal !0
78 for.body: ; preds = %entry, %for.body
79 %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
80 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
81 %incdec.ptr = getelementptr inbounds i16, ptr %ddst.addr, i64 1
82 store i16 10, ptr %ddst.addr, align 4, !nontemporal !8
83 %add = add nuw nsw i32 %i, 4
84 %cmp = icmp ult i32 %i, 4092
85 br i1 %cmp, label %for.body, label %for.cond.cleanup
87 for.cond.cleanup: ; preds = %for.body
91 define void @test_i32_store(ptr nocapture %ddst) {
92 ; CHECK-LABEL: define void @test_i32_store(
93 ; CHECK-LABEL: vector.body:
94 ; CHECK: store <16 x i32> {{.*}} !nontemporal !0
100 for.body: ; preds = %entry, %for.body
101 %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
102 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr3, %for.body ]
103 %incdec.ptr = getelementptr inbounds i32, ptr %ddst.addr, i64 1
104 store i32 10, ptr %ddst.addr, align 4, !nontemporal !8
105 %incdec.ptr1 = getelementptr inbounds i32, ptr %ddst.addr, i64 2
106 store i32 20, ptr %incdec.ptr, align 4, !nontemporal !8
107 %incdec.ptr2 = getelementptr inbounds i32, ptr %ddst.addr, i64 3
108 store i32 30, ptr %incdec.ptr1, align 4, !nontemporal !8
109 %incdec.ptr3 = getelementptr inbounds i32, ptr %ddst.addr, i64 4
110 store i32 40, ptr %incdec.ptr2, align 4, !nontemporal !8
111 %add = add nuw nsw i32 %i, 4
112 %cmp = icmp ult i32 %i, 4092
113 br i1 %cmp, label %for.body, label %for.cond.cleanup
115 for.cond.cleanup: ; preds = %for.body
119 define void @test_i33_store(ptr nocapture %ddst) {
120 ; CHECK-LABEL: define void @test_i33_store(
121 ; CHECK-NOT: vector.body:
127 for.body: ; preds = %entry, %for.body
128 %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
129 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr3, %for.body ]
130 %incdec.ptr = getelementptr inbounds i33, ptr %ddst.addr, i64 1
131 store i33 10, ptr %ddst.addr, align 4, !nontemporal !8
132 %incdec.ptr1 = getelementptr inbounds i33, ptr %ddst.addr, i64 2
133 store i33 20, ptr %incdec.ptr, align 4, !nontemporal !8
134 %incdec.ptr2 = getelementptr inbounds i33, ptr %ddst.addr, i64 3
135 store i33 30, ptr %incdec.ptr1, align 4, !nontemporal !8
136 %incdec.ptr3 = getelementptr inbounds i33, ptr %ddst.addr, i64 4
137 store i33 40, ptr %incdec.ptr2, align 4, !nontemporal !8
138 %add = add nuw nsw i32 %i, 3
139 %cmp = icmp ult i32 %i, 4092
140 br i1 %cmp, label %for.body, label %for.cond.cleanup
142 for.cond.cleanup: ; preds = %for.body
146 define void @test_i40_store(ptr nocapture %ddst) {
147 ; CHECK-LABEL: define void @test_i40_store(
148 ; CHECK-NOT: vector.body:
154 for.body: ; preds = %entry, %for.body
155 %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
156 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr3, %for.body ]
157 %incdec.ptr = getelementptr inbounds i40, ptr %ddst.addr, i64 1
158 store i40 10, ptr %ddst.addr, align 4, !nontemporal !8
159 %incdec.ptr1 = getelementptr inbounds i40, ptr %ddst.addr, i64 2
160 store i40 20, ptr %incdec.ptr, align 4, !nontemporal !8
161 %incdec.ptr2 = getelementptr inbounds i40, ptr %ddst.addr, i64 3
162 store i40 30, ptr %incdec.ptr1, align 4, !nontemporal !8
163 %incdec.ptr3 = getelementptr inbounds i40, ptr %ddst.addr, i64 4
164 store i40 40, ptr %incdec.ptr2, align 4, !nontemporal !8
165 %add = add nuw nsw i32 %i, 3
166 %cmp = icmp ult i32 %i, 4092
167 br i1 %cmp, label %for.body, label %for.cond.cleanup
169 for.cond.cleanup: ; preds = %for.body
172 define void @test_i64_store(ptr nocapture %ddst) local_unnamed_addr #0 {
173 ; CHECK-LABEL: define void @test_i64_store(
174 ; CHECK-LABEL: vector.body:
175 ; CHECK: store <4 x i64> {{.*}} !nontemporal !0
181 for.body: ; preds = %entry, %for.body
182 %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
183 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
184 %incdec.ptr = getelementptr inbounds i64, ptr %ddst.addr, i64 1
185 store i64 10, ptr %ddst.addr, align 4, !nontemporal !8
186 %add = add nuw nsw i32 %i, 4
187 %cmp = icmp ult i32 %i, 4092
188 br i1 %cmp, label %for.body, label %for.cond.cleanup
190 for.cond.cleanup: ; preds = %for.body
194 define void @test_double_store(ptr %ddst) {
195 ; CHECK-LABEL: define void @test_double_store(
196 ; CHECK-LABEL: vector.body:
197 ; CHECK: store <4 x double> {{.*}} !nontemporal !0
203 for.body: ; preds = %entry, %for.body
204 %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
205 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
206 %incdec.ptr = getelementptr inbounds double, ptr %ddst.addr, i64 1
207 store double 10.0, ptr %ddst.addr, align 4, !nontemporal !8
208 %add = add nuw nsw i32 %i, 4
209 %cmp = icmp ult i32 %i, 4092
210 br i1 %cmp, label %for.body, label %for.cond.cleanup
212 for.cond.cleanup: ; preds = %for.body
216 define void @test_i128_store(ptr %ddst) {
217 ; CHECK-LABEL: define void @test_i128_store(
218 ; CHECK-LABEL: vector.body:
219 ; CHECK: store <4 x i128> {{.*}} !nontemporal !0
225 for.body: ; preds = %entry, %for.body
226 %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
227 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
228 %incdec.ptr = getelementptr inbounds i128, ptr %ddst.addr, i64 1
229 store i128 10, ptr %ddst.addr, align 4, !nontemporal !8
230 %add = add nuw nsw i32 %i, 4
231 %cmp = icmp ult i32 %i, 4092
232 br i1 %cmp, label %for.body, label %for.cond.cleanup
234 for.cond.cleanup: ; preds = %for.body
238 define void @test_i256_store(ptr %ddst) {
239 ; CHECK-LABEL: define void @test_i256_store(
240 ; CHECK-NOT: vector.body:
246 for.body: ; preds = %entry, %for.body
247 %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
248 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
249 %incdec.ptr = getelementptr inbounds i256, ptr %ddst.addr, i64 1
250 store i256 10, ptr %ddst.addr, align 4, !nontemporal !8
251 %add = add nuw nsw i32 %i, 4
252 %cmp = icmp ult i32 %i, 4092
253 br i1 %cmp, label %for.body, label %for.cond.cleanup
255 for.cond.cleanup: ; preds = %for.body
259 define i4 @test_i4_load(ptr %ddst) {
260 ; CHECK-LABEL: define i4 @test_i4_load
261 ; CHECK-NOT: vector.body:
262 ; CHECk: ret i4 %{{.*}}
267 for.body: ; preds = %entry, %for.body
268 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
269 %acc.08 = phi i4 [ 0, %entry ], [ %add, %for.body ]
270 %arrayidx = getelementptr inbounds i4, ptr %ddst, i64 %indvars.iv
271 %l = load i4, ptr %arrayidx, align 1, !nontemporal !8
272 %add = add i4 %l, %acc.08
273 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
274 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
275 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
277 for.cond.cleanup: ; preds = %for.body
281 define i8 @test_load_i8(ptr %ddst) {
282 ; CHECK-LABEL: @test_load_i8(
283 ; CHECK: vector.body:
284 ; CHECK: load <4 x i8>, ptr {{.*}}, align 1, !nontemporal !0
285 ; CHECk: ret i8 %{{.*}}
290 for.body: ; preds = %entry, %for.body
291 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
292 %acc.08 = phi i8 [ 0, %entry ], [ %add, %for.body ]
293 %arrayidx = getelementptr inbounds i8, ptr %ddst, i64 %indvars.iv
294 %l = load i8, ptr %arrayidx, align 1, !nontemporal !8
295 %add = add i8 %l, %acc.08
296 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
297 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
298 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
300 for.cond.cleanup: ; preds = %for.body
304 define half @test_half_load(ptr %ddst) {
305 ; CHECK-LABEL: @test_half_load
306 ; CHECK-LABEL: vector.body:
307 ; CHECK: load <4 x half>, ptr {{.*}}, align 2, !nontemporal !0
308 ; CHECk: ret half %{{.*}}
313 for.body: ; preds = %entry, %for.body
314 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
315 %acc.08 = phi half [ 0.0, %entry ], [ %add, %for.body ]
316 %arrayidx = getelementptr inbounds half, ptr %ddst, i64 %indvars.iv
317 %l = load half, ptr %arrayidx, align 2, !nontemporal !8
318 %add = fadd half %l, %acc.08
319 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
320 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
321 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
323 for.cond.cleanup: ; preds = %for.body
327 define i16 @test_i16_load(ptr %ddst) {
328 ; CHECK-LABEL: @test_i16_load
329 ; CHECK-LABEL: vector.body:
330 ; CHECK: load <4 x i16>, ptr {{.*}}, align 2, !nontemporal !0
331 ; CHECk: ret i16 %{{.*}}
336 for.body: ; preds = %entry, %for.body
337 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
338 %acc.08 = phi i16 [ 0, %entry ], [ %add, %for.body ]
339 %arrayidx = getelementptr inbounds i16, ptr %ddst, i64 %indvars.iv
340 %l = load i16, ptr %arrayidx, align 2, !nontemporal !8
341 %add = add i16 %l, %acc.08
342 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
343 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
344 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
346 for.cond.cleanup: ; preds = %for.body
350 define i32 @test_i32_load(ptr %ddst) {
351 ; CHECK-LABEL: @test_i32_load
352 ; CHECK-LABEL: vector.body:
353 ; CHECK: load <4 x i32>, ptr {{.*}}, align 4, !nontemporal !0
354 ; CHECk: ret i32 %{{.*}}
359 for.body: ; preds = %entry, %for.body
360 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
361 %acc.08 = phi i32 [ 0, %entry ], [ %add, %for.body ]
362 %arrayidx = getelementptr inbounds i32, ptr %ddst, i64 %indvars.iv
363 %l = load i32, ptr %arrayidx, align 4, !nontemporal !8
364 %add = add i32 %l, %acc.08
365 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
366 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
367 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
369 for.cond.cleanup: ; preds = %for.body
373 define i33 @test_i33_load(ptr %ddst) {
374 ; CHECK-LABEL: @test_i33_load
375 ; CHECK-NOT: vector.body:
376 ; CHECk: ret i33 %{{.*}}
381 for.body: ; preds = %entry, %for.body
382 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
383 %acc.08 = phi i33 [ 0, %entry ], [ %add, %for.body ]
384 %arrayidx = getelementptr inbounds i33, ptr %ddst, i64 %indvars.iv
385 %l = load i33, ptr %arrayidx, align 4, !nontemporal !8
386 %add = add i33 %l, %acc.08
387 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
388 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
389 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
391 for.cond.cleanup: ; preds = %for.body
395 define i40 @test_i40_load(ptr %ddst) {
396 ; CHECK-LABEL: @test_i40_load
397 ; CHECK-NOT: vector.body:
398 ; CHECk: ret i40 %{{.*}}
403 for.body: ; preds = %entry, %for.body
404 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
405 %acc.08 = phi i40 [ 0, %entry ], [ %add, %for.body ]
406 %arrayidx = getelementptr inbounds i40, ptr %ddst, i64 %indvars.iv
407 %l = load i40, ptr %arrayidx, align 4, !nontemporal !8
408 %add = add i40 %l, %acc.08
409 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
410 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
411 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
413 for.cond.cleanup: ; preds = %for.body
417 define i64 @test_i64_load(ptr %ddst) {
418 ; CHECK-LABEL: @test_i64_load
419 ; CHECK-LABEL: vector.body:
420 ; CHECK: load <4 x i64>, ptr {{.*}}, align 4, !nontemporal !0
421 ; CHECk: ret i64 %{{.*}}
426 for.body: ; preds = %entry, %for.body
427 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
428 %acc.08 = phi i64 [ 0, %entry ], [ %add, %for.body ]
429 %arrayidx = getelementptr inbounds i64, ptr %ddst, i64 %indvars.iv
430 %l = load i64, ptr %arrayidx, align 4, !nontemporal !8
431 %add = add i64 %l, %acc.08
432 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
433 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
434 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
436 for.cond.cleanup: ; preds = %for.body
440 define double @test_double_load(ptr %ddst) {
441 ; CHECK-LABEL: @test_double_load
442 ; CHECK-LABEL: vector.body:
443 ; CHECK: load <4 x double>, ptr {{.*}}, align 4, !nontemporal !0
444 ; CHECk: ret double %{{.*}}
449 for.body: ; preds = %entry, %for.body
450 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
451 %acc.08 = phi double [ 0.0, %entry ], [ %add, %for.body ]
452 %arrayidx = getelementptr inbounds double, ptr %ddst, i64 %indvars.iv
453 %l = load double, ptr %arrayidx, align 4, !nontemporal !8
454 %add = fadd double %l, %acc.08
455 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
456 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
457 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
459 for.cond.cleanup: ; preds = %for.body
463 define i128 @test_i128_load(ptr %ddst) {
464 ; CHECK-LABEL: @test_i128_load
465 ; CHECK-LABEL: vector.body:
466 ; CHECK: load <4 x i128>, ptr {{.*}}, align 4, !nontemporal !0
467 ; CHECk: ret i128 %{{.*}}
472 for.body: ; preds = %entry, %for.body
473 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
474 %acc.08 = phi i128 [ 0, %entry ], [ %add, %for.body ]
475 %arrayidx = getelementptr inbounds i128, ptr %ddst, i64 %indvars.iv
476 %l = load i128, ptr %arrayidx, align 4, !nontemporal !8
477 %add = add i128 %l, %acc.08
478 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
479 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
480 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
482 for.cond.cleanup: ; preds = %for.body
486 define i256 @test_256_load(ptr %ddst) {
487 ; CHECK-LABEL: @test_256_load
488 ; CHECK-NOT: vector.body:
489 ; CHECk: ret i256 %{{.*}}
494 for.body: ; preds = %entry, %for.body
495 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
496 %acc.08 = phi i256 [ 0, %entry ], [ %add, %for.body ]
497 %arrayidx = getelementptr inbounds i256, ptr %ddst, i64 %indvars.iv
498 %l = load i256, ptr %arrayidx, align 4, !nontemporal !8
499 %add = add i256 %l, %acc.08
500 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
501 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
502 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
504 for.cond.cleanup: ; preds = %for.body