1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2 ; RUN: opt -passes=loop-vectorize \
3 ; RUN: -force-tail-folding-style=data-with-evl \
4 ; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
5 ; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefixes=IF-EVL-OUTLOOP
7 ; RUN: opt -passes=loop-vectorize \
8 ; RUN: -prefer-inloop-reductions \
9 ; RUN: -force-tail-folding-style=data-with-evl \
10 ; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
11 ; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefixes=IF-EVL-INLOOP
13 ; RUN: opt -passes=loop-vectorize \
14 ; RUN: -force-tail-folding-style=none \
15 ; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
16 ; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefixes=NO-VP-OUTLOOP
18 ; RUN: opt -passes=loop-vectorize \
19 ; RUN: -prefer-inloop-reductions \
20 ; RUN: -force-tail-folding-style=none \
21 ; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
22 ; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefixes=NO-VP-INLOOP
24 define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) {
25 ; IF-EVL-OUTLOOP-LABEL: define void @reduction_intermediate_store(
26 ; IF-EVL-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
27 ; IF-EVL-OUTLOOP-NEXT: entry:
28 ; IF-EVL-OUTLOOP-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]]
29 ; IF-EVL-OUTLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
30 ; IF-EVL-OUTLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
31 ; IF-EVL-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
32 ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
33 ; IF-EVL-OUTLOOP: vector.memcheck:
34 ; IF-EVL-OUTLOOP-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ADDR]], i64 4
35 ; IF-EVL-OUTLOOP-NEXT: [[TMP4:%.*]] = shl i64 [[N]], 2
36 ; IF-EVL-OUTLOOP-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
37 ; IF-EVL-OUTLOOP-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ADDR]], [[SCEVGEP1]]
38 ; IF-EVL-OUTLOOP-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
39 ; IF-EVL-OUTLOOP-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
40 ; IF-EVL-OUTLOOP-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[ENTRY:%.*]]
41 ; IF-EVL-OUTLOOP: vector.ph:
42 ; IF-EVL-OUTLOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
43 ; IF-EVL-OUTLOOP-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
44 ; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1
45 ; IF-EVL-OUTLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP7]]
46 ; IF-EVL-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
47 ; IF-EVL-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
48 ; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
49 ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4
50 ; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
51 ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]]
52 ; IF-EVL-OUTLOOP: vector.body:
53 ; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
54 ; IF-EVL-OUTLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ]
55 ; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP10]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ]
56 ; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
57 ; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
58 ; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0
59 ; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
60 ; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
61 ; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]]), !alias.scope [[META0:![0-9]+]]
62 ; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
63 ; IF-EVL-OUTLOOP-NEXT: [[TMP19]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP12]])
64 ; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64
65 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
66 ; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[TMP9]]
67 ; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
68 ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
69 ; IF-EVL-OUTLOOP: middle.block:
70 ; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]])
71 ; IF-EVL-OUTLOOP-NEXT: store i32 [[TMP23]], ptr [[ADDR]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META0]]
72 ; IF-EVL-OUTLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
73 ; IF-EVL-OUTLOOP: scalar.ph:
74 ; IF-EVL-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
75 ; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_MEMCHECK]] ], [ [[START]], [[ENTRY1]] ]
76 ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY1:%.*]]
77 ; IF-EVL-OUTLOOP: for.body:
78 ; IF-EVL-OUTLOOP-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ]
79 ; IF-EVL-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY1]] ]
80 ; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV1]]
81 ; IF-EVL-OUTLOOP-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
82 ; IF-EVL-OUTLOOP-NEXT: [[ADD]] = add nsw i32 [[TMP27]], [[RDX]]
83 ; IF-EVL-OUTLOOP-NEXT: store i32 [[ADD]], ptr [[ADDR]], align 4
84 ; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
85 ; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]]
86 ; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP8:![0-9]+]]
87 ; IF-EVL-OUTLOOP: for.end:
88 ; IF-EVL-OUTLOOP-NEXT: ret void
90 ; IF-EVL-INLOOP-LABEL: define void @reduction_intermediate_store(
91 ; IF-EVL-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
92 ; IF-EVL-INLOOP-NEXT: entry:
93 ; IF-EVL-INLOOP-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]]
94 ; IF-EVL-INLOOP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
95 ; IF-EVL-INLOOP-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
96 ; IF-EVL-INLOOP-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
97 ; IF-EVL-INLOOP-NEXT: br i1 [[TMP4]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
98 ; IF-EVL-INLOOP: vector.memcheck:
99 ; IF-EVL-INLOOP-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ADDR]], i64 4
100 ; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = shl i64 [[N]], 2
101 ; IF-EVL-INLOOP-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
102 ; IF-EVL-INLOOP-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ADDR]], [[SCEVGEP1]]
103 ; IF-EVL-INLOOP-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
104 ; IF-EVL-INLOOP-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
105 ; IF-EVL-INLOOP-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
106 ; IF-EVL-INLOOP: vector.ph:
107 ; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
108 ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4
109 ; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = sub i64 [[TMP9]], 1
110 ; IF-EVL-INLOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP10]]
111 ; IF-EVL-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP9]]
112 ; IF-EVL-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
113 ; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
114 ; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4
115 ; IF-EVL-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
116 ; IF-EVL-INLOOP: vector.body:
117 ; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
118 ; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
119 ; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
120 ; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
121 ; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP13]], i32 4, i1 true)
122 ; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = add i64 [[EVL_BASED_IV]], 0
123 ; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]]
124 ; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0
125 ; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]]), !alias.scope [[META0:![0-9]+]]
126 ; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
127 ; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
128 ; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP14]] to i64
129 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]]
130 ; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]]
131 ; IF-EVL-INLOOP-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
132 ; IF-EVL-INLOOP-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
133 ; IF-EVL-INLOOP: middle.block:
134 ; IF-EVL-INLOOP-NEXT: store i32 [[TMP22]], ptr [[ADDR]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META0]]
135 ; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
136 ; IF-EVL-INLOOP: scalar.ph:
137 ; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
138 ; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_MEMCHECK]] ], [ [[START]], [[ENTRY]] ]
139 ; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]]
140 ; IF-EVL-INLOOP: for.body:
141 ; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
142 ; IF-EVL-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
143 ; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
144 ; IF-EVL-INLOOP-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
145 ; IF-EVL-INLOOP-NEXT: [[ADD]] = add nsw i32 [[TMP25]], [[RDX]]
146 ; IF-EVL-INLOOP-NEXT: store i32 [[ADD]], ptr [[ADDR]], align 4
147 ; IF-EVL-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
148 ; IF-EVL-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
149 ; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
150 ; IF-EVL-INLOOP: for.end:
151 ; IF-EVL-INLOOP-NEXT: ret void
153 ; NO-VP-OUTLOOP-LABEL: define void @reduction_intermediate_store(
154 ; NO-VP-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
155 ; NO-VP-OUTLOOP-NEXT: entry:
156 ; NO-VP-OUTLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
157 ; NO-VP-OUTLOOP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
158 ; NO-VP-OUTLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
159 ; NO-VP-OUTLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
160 ; NO-VP-OUTLOOP: vector.memcheck:
161 ; NO-VP-OUTLOOP-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ADDR]], i64 4
162 ; NO-VP-OUTLOOP-NEXT: [[TMP3:%.*]] = shl i64 [[N]], 2
163 ; NO-VP-OUTLOOP-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
164 ; NO-VP-OUTLOOP-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ADDR]], [[SCEVGEP1]]
165 ; NO-VP-OUTLOOP-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
166 ; NO-VP-OUTLOOP-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
167 ; NO-VP-OUTLOOP-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
168 ; NO-VP-OUTLOOP: vector.ph:
169 ; NO-VP-OUTLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
170 ; NO-VP-OUTLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
171 ; NO-VP-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]]
172 ; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
173 ; NO-VP-OUTLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
174 ; NO-VP-OUTLOOP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
175 ; NO-VP-OUTLOOP-NEXT: [[TMP8:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
176 ; NO-VP-OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
177 ; NO-VP-OUTLOOP: vector.body:
178 ; NO-VP-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
179 ; NO-VP-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[TMP8]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
180 ; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
181 ; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]]
182 ; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0
183 ; NO-VP-OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP11]], align 4, !alias.scope [[META0:![0-9]+]]
184 ; NO-VP-OUTLOOP-NEXT: [[TMP12]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
185 ; NO-VP-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
186 ; NO-VP-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
187 ; NO-VP-OUTLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
188 ; NO-VP-OUTLOOP: middle.block:
189 ; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP12]])
190 ; NO-VP-OUTLOOP-NEXT: store i32 [[TMP14]], ptr [[ADDR]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META0]]
191 ; NO-VP-OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
192 ; NO-VP-OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
193 ; NO-VP-OUTLOOP: scalar.ph:
194 ; NO-VP-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
195 ; NO-VP-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_MEMCHECK]] ], [ [[START]], [[ENTRY]] ]
196 ; NO-VP-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]]
197 ; NO-VP-OUTLOOP: for.body:
198 ; NO-VP-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
199 ; NO-VP-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
200 ; NO-VP-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
201 ; NO-VP-OUTLOOP-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
202 ; NO-VP-OUTLOOP-NEXT: [[ADD]] = add nsw i32 [[TMP15]], [[RDX]]
203 ; NO-VP-OUTLOOP-NEXT: store i32 [[ADD]], ptr [[ADDR]], align 4
204 ; NO-VP-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
205 ; NO-VP-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
206 ; NO-VP-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
207 ; NO-VP-OUTLOOP: for.end:
208 ; NO-VP-OUTLOOP-NEXT: ret void
210 ; NO-VP-INLOOP-LABEL: define void @reduction_intermediate_store(
211 ; NO-VP-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
212 ; NO-VP-INLOOP-NEXT: entry:
213 ; NO-VP-INLOOP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
214 ; NO-VP-INLOOP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
215 ; NO-VP-INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
216 ; NO-VP-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
217 ; NO-VP-INLOOP: vector.memcheck:
218 ; NO-VP-INLOOP-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ADDR]], i64 4
219 ; NO-VP-INLOOP-NEXT: [[TMP3:%.*]] = shl i64 [[N]], 2
220 ; NO-VP-INLOOP-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
221 ; NO-VP-INLOOP-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ADDR]], [[SCEVGEP1]]
222 ; NO-VP-INLOOP-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
223 ; NO-VP-INLOOP-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
224 ; NO-VP-INLOOP-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
225 ; NO-VP-INLOOP: vector.ph:
226 ; NO-VP-INLOOP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
227 ; NO-VP-INLOOP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
228 ; NO-VP-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]]
229 ; NO-VP-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
230 ; NO-VP-INLOOP-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
231 ; NO-VP-INLOOP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
232 ; NO-VP-INLOOP-NEXT: br label [[VECTOR_BODY:%.*]]
233 ; NO-VP-INLOOP: vector.body:
234 ; NO-VP-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
235 ; NO-VP-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
236 ; NO-VP-INLOOP-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
237 ; NO-VP-INLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]]
238 ; NO-VP-INLOOP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
239 ; NO-VP-INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP10]], align 4, !alias.scope [[META0:![0-9]+]]
240 ; NO-VP-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]])
241 ; NO-VP-INLOOP-NEXT: [[TMP12]] = add i32 [[TMP11]], [[VEC_PHI]]
242 ; NO-VP-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
243 ; NO-VP-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
244 ; NO-VP-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
245 ; NO-VP-INLOOP: middle.block:
246 ; NO-VP-INLOOP-NEXT: store i32 [[TMP12]], ptr [[ADDR]], align 4, !alias.scope [[META6:![0-9]+]], !noalias [[META0]]
247 ; NO-VP-INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
248 ; NO-VP-INLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
249 ; NO-VP-INLOOP: scalar.ph:
250 ; NO-VP-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
251 ; NO-VP-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[START]], [[VECTOR_MEMCHECK]] ], [ [[START]], [[ENTRY]] ]
252 ; NO-VP-INLOOP-NEXT: br label [[FOR_BODY:%.*]]
253 ; NO-VP-INLOOP: for.body:
254 ; NO-VP-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
255 ; NO-VP-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
256 ; NO-VP-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
257 ; NO-VP-INLOOP-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
258 ; NO-VP-INLOOP-NEXT: [[ADD]] = add nsw i32 [[TMP14]], [[RDX]]
259 ; NO-VP-INLOOP-NEXT: store i32 [[ADD]], ptr [[ADDR]], align 4
260 ; NO-VP-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
261 ; NO-VP-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
262 ; NO-VP-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
263 ; NO-VP-INLOOP: for.end:
264 ; NO-VP-INLOOP-NEXT: ret void
270 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
271 %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
272 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
273 %0 = load i32, ptr %arrayidx, align 4
274 %add = add nsw i32 %0, %rdx
275 store i32 %add, ptr %addr, align 4
276 %iv.next = add nuw nsw i64 %iv, 1
277 %exitcond.not = icmp eq i64 %iv.next, %n
278 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
284 !0 = distinct !{!0, !1}
285 !1 = !{!"llvm.loop.vectorize.enable", i1 true}
287 ; IF-EVL-OUTLOOP: [[META0]] = !{[[META1:![0-9]+]]}
288 ; IF-EVL-OUTLOOP: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]}
289 ; IF-EVL-OUTLOOP: [[META2]] = distinct !{[[META2]], !"LVerDomain"}
290 ; IF-EVL-OUTLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]], [[META5:![0-9]+]]}
291 ; IF-EVL-OUTLOOP: [[META4]] = !{!"llvm.loop.isvectorized", i32 1}
292 ; IF-EVL-OUTLOOP: [[META5]] = !{!"llvm.loop.unroll.runtime.disable"}
293 ; IF-EVL-OUTLOOP: [[META6]] = !{[[META7:![0-9]+]]}
294 ; IF-EVL-OUTLOOP: [[META7]] = distinct !{[[META7]], [[META2]]}
295 ; IF-EVL-OUTLOOP: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
297 ; IF-EVL-INLOOP: [[META0]] = !{[[META1:![0-9]+]]}
298 ; IF-EVL-INLOOP: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]}
299 ; IF-EVL-INLOOP: [[META2]] = distinct !{[[META2]], !"LVerDomain"}
300 ; IF-EVL-INLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]], [[META5:![0-9]+]]}
301 ; IF-EVL-INLOOP: [[META4]] = !{!"llvm.loop.isvectorized", i32 1}
302 ; IF-EVL-INLOOP: [[META5]] = !{!"llvm.loop.unroll.runtime.disable"}
303 ; IF-EVL-INLOOP: [[META6]] = !{[[META7:![0-9]+]]}
304 ; IF-EVL-INLOOP: [[META7]] = distinct !{[[META7]], [[META2]]}
305 ; IF-EVL-INLOOP: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
307 ; NO-VP-OUTLOOP: [[META0]] = !{[[META1:![0-9]+]]}
308 ; NO-VP-OUTLOOP: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]}
309 ; NO-VP-OUTLOOP: [[META2]] = distinct !{[[META2]], !"LVerDomain"}
310 ; NO-VP-OUTLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]], [[META5:![0-9]+]]}
311 ; NO-VP-OUTLOOP: [[META4]] = !{!"llvm.loop.isvectorized", i32 1}
312 ; NO-VP-OUTLOOP: [[META5]] = !{!"llvm.loop.unroll.runtime.disable"}
313 ; NO-VP-OUTLOOP: [[META6]] = !{[[META7:![0-9]+]]}
314 ; NO-VP-OUTLOOP: [[META7]] = distinct !{[[META7]], [[META2]]}
315 ; NO-VP-OUTLOOP: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}
317 ; NO-VP-INLOOP: [[META0]] = !{[[META1:![0-9]+]]}
318 ; NO-VP-INLOOP: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]}
319 ; NO-VP-INLOOP: [[META2]] = distinct !{[[META2]], !"LVerDomain"}
320 ; NO-VP-INLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]], [[META5:![0-9]+]]}
321 ; NO-VP-INLOOP: [[META4]] = !{!"llvm.loop.isvectorized", i32 1}
322 ; NO-VP-INLOOP: [[META5]] = !{!"llvm.loop.unroll.runtime.disable"}
323 ; NO-VP-INLOOP: [[META6]] = !{[[META7:![0-9]+]]}
324 ; NO-VP-INLOOP: [[META7]] = distinct !{[[META7]], [[META2]]}
325 ; NO-VP-INLOOP: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]}