1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 -S | FileCheck %s --check-prefix VF-TWO-CHECK
3 ; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -epilogue-vectorization-force-VF=4 -S | FileCheck %s --check-prefix VF-FOUR-CHECK
5 target datalayout = "e-m:e-i64:64-n32:64"
6 target triple = "powerpc64le-unknown-linux-gnu"
8 ; Function Attrs: nounwind
9 define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) #0 {
10 ; VF-TWO-CHECK-LABEL: @f1(
11 ; VF-TWO-CHECK-NEXT: entry:
12 ; VF-TWO-CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
13 ; VF-TWO-CHECK-NEXT: br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]]
14 ; VF-TWO-CHECK: iter.check:
15 ; VF-TWO-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
16 ; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 2
17 ; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
18 ; VF-TWO-CHECK: vector.main.loop.iter.check:
19 ; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 48
20 ; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
21 ; VF-TWO-CHECK: vector.ph:
22 ; VF-TWO-CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 48
23 ; VF-TWO-CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
24 ; VF-TWO-CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
25 ; VF-TWO-CHECK: vector.body:
26 ; VF-TWO-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
27 ; VF-TWO-CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
28 ; VF-TWO-CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
29 ; VF-TWO-CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
30 ; VF-TWO-CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
31 ; VF-TWO-CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 16
32 ; VF-TWO-CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 20
33 ; VF-TWO-CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 24
34 ; VF-TWO-CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 28
35 ; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 32
36 ; VF-TWO-CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 36
37 ; VF-TWO-CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 40
38 ; VF-TWO-CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 44
39 ; VF-TWO-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[BB:%.*]], i64 [[TMP0]]
40 ; VF-TWO-CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP1]]
41 ; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP2]]
42 ; VF-TWO-CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP3]]
43 ; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP4]]
44 ; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP5]]
45 ; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP6]]
46 ; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP7]]
47 ; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP8]]
48 ; VF-TWO-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP9]]
49 ; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP10]]
50 ; VF-TWO-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP11]]
51 ; VF-TWO-CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0
52 ; VF-TWO-CHECK-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to <4 x float>*
53 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP25]], align 4
54 ; VF-TWO-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 4
55 ; VF-TWO-CHECK-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <4 x float>*
56 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, <4 x float>* [[TMP27]], align 4
57 ; VF-TWO-CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 8
58 ; VF-TWO-CHECK-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <4 x float>*
59 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, <4 x float>* [[TMP29]], align 4
60 ; VF-TWO-CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 12
61 ; VF-TWO-CHECK-NEXT: [[TMP31:%.*]] = bitcast float* [[TMP30]] to <4 x float>*
62 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, <4 x float>* [[TMP31]], align 4
63 ; VF-TWO-CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 16
64 ; VF-TWO-CHECK-NEXT: [[TMP33:%.*]] = bitcast float* [[TMP32]] to <4 x float>*
65 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, <4 x float>* [[TMP33]], align 4
66 ; VF-TWO-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 20
67 ; VF-TWO-CHECK-NEXT: [[TMP35:%.*]] = bitcast float* [[TMP34]] to <4 x float>*
68 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, <4 x float>* [[TMP35]], align 4
69 ; VF-TWO-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 24
70 ; VF-TWO-CHECK-NEXT: [[TMP37:%.*]] = bitcast float* [[TMP36]] to <4 x float>*
71 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, <4 x float>* [[TMP37]], align 4
72 ; VF-TWO-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 28
73 ; VF-TWO-CHECK-NEXT: [[TMP39:%.*]] = bitcast float* [[TMP38]] to <4 x float>*
74 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP39]], align 4
75 ; VF-TWO-CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 32
76 ; VF-TWO-CHECK-NEXT: [[TMP41:%.*]] = bitcast float* [[TMP40]] to <4 x float>*
77 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, <4 x float>* [[TMP41]], align 4
78 ; VF-TWO-CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 36
79 ; VF-TWO-CHECK-NEXT: [[TMP43:%.*]] = bitcast float* [[TMP42]] to <4 x float>*
80 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP43]], align 4
81 ; VF-TWO-CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 40
82 ; VF-TWO-CHECK-NEXT: [[TMP45:%.*]] = bitcast float* [[TMP44]] to <4 x float>*
83 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, <4 x float>* [[TMP45]], align 4
84 ; VF-TWO-CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 44
85 ; VF-TWO-CHECK-NEXT: [[TMP47:%.*]] = bitcast float* [[TMP46]] to <4 x float>*
86 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP47]], align 4
87 ; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, float* [[CC:%.*]], i64 [[TMP0]]
88 ; VF-TWO-CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP1]]
89 ; VF-TWO-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP2]]
90 ; VF-TWO-CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP3]]
91 ; VF-TWO-CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP4]]
92 ; VF-TWO-CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP5]]
93 ; VF-TWO-CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP6]]
94 ; VF-TWO-CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP7]]
95 ; VF-TWO-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP8]]
96 ; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP9]]
97 ; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP10]]
98 ; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP11]]
99 ; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 0
100 ; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP60]] to <4 x float>*
101 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x float>, <4 x float>* [[TMP61]], align 4
102 ; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 4
103 ; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = bitcast float* [[TMP62]] to <4 x float>*
104 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP63]], align 4
105 ; VF-TWO-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 8
106 ; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = bitcast float* [[TMP64]] to <4 x float>*
107 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP65]], align 4
108 ; VF-TWO-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 12
109 ; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = bitcast float* [[TMP66]] to <4 x float>*
110 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP67]], align 4
111 ; VF-TWO-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 16
112 ; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = bitcast float* [[TMP68]] to <4 x float>*
113 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD17:%.*]] = load <4 x float>, <4 x float>* [[TMP69]], align 4
114 ; VF-TWO-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 20
115 ; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = bitcast float* [[TMP70]] to <4 x float>*
116 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <4 x float>, <4 x float>* [[TMP71]], align 4
117 ; VF-TWO-CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 24
118 ; VF-TWO-CHECK-NEXT: [[TMP73:%.*]] = bitcast float* [[TMP72]] to <4 x float>*
119 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <4 x float>, <4 x float>* [[TMP73]], align 4
120 ; VF-TWO-CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 28
121 ; VF-TWO-CHECK-NEXT: [[TMP75:%.*]] = bitcast float* [[TMP74]] to <4 x float>*
122 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <4 x float>, <4 x float>* [[TMP75]], align 4
123 ; VF-TWO-CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 32
124 ; VF-TWO-CHECK-NEXT: [[TMP77:%.*]] = bitcast float* [[TMP76]] to <4 x float>*
125 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD21:%.*]] = load <4 x float>, <4 x float>* [[TMP77]], align 4
126 ; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 36
127 ; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = bitcast float* [[TMP78]] to <4 x float>*
128 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD22:%.*]] = load <4 x float>, <4 x float>* [[TMP79]], align 4
129 ; VF-TWO-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 40
130 ; VF-TWO-CHECK-NEXT: [[TMP81:%.*]] = bitcast float* [[TMP80]] to <4 x float>*
131 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, <4 x float>* [[TMP81]], align 4
132 ; VF-TWO-CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 44
133 ; VF-TWO-CHECK-NEXT: [[TMP83:%.*]] = bitcast float* [[TMP82]] to <4 x float>*
134 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <4 x float>, <4 x float>* [[TMP83]], align 4
135 ; VF-TWO-CHECK-NEXT: [[TMP84:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD13]]
136 ; VF-TWO-CHECK-NEXT: [[TMP85:%.*]] = fadd fast <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD14]]
137 ; VF-TWO-CHECK-NEXT: [[TMP86:%.*]] = fadd fast <4 x float> [[WIDE_LOAD3]], [[WIDE_LOAD15]]
138 ; VF-TWO-CHECK-NEXT: [[TMP87:%.*]] = fadd fast <4 x float> [[WIDE_LOAD4]], [[WIDE_LOAD16]]
139 ; VF-TWO-CHECK-NEXT: [[TMP88:%.*]] = fadd fast <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD17]]
140 ; VF-TWO-CHECK-NEXT: [[TMP89:%.*]] = fadd fast <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD18]]
141 ; VF-TWO-CHECK-NEXT: [[TMP90:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD19]]
142 ; VF-TWO-CHECK-NEXT: [[TMP91:%.*]] = fadd fast <4 x float> [[WIDE_LOAD8]], [[WIDE_LOAD20]]
143 ; VF-TWO-CHECK-NEXT: [[TMP92:%.*]] = fadd fast <4 x float> [[WIDE_LOAD9]], [[WIDE_LOAD21]]
144 ; VF-TWO-CHECK-NEXT: [[TMP93:%.*]] = fadd fast <4 x float> [[WIDE_LOAD10]], [[WIDE_LOAD22]]
145 ; VF-TWO-CHECK-NEXT: [[TMP94:%.*]] = fadd fast <4 x float> [[WIDE_LOAD11]], [[WIDE_LOAD23]]
146 ; VF-TWO-CHECK-NEXT: [[TMP95:%.*]] = fadd fast <4 x float> [[WIDE_LOAD12]], [[WIDE_LOAD24]]
147 ; VF-TWO-CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds float, float* [[AA:%.*]], i64 [[TMP0]]
148 ; VF-TWO-CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP1]]
149 ; VF-TWO-CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP2]]
150 ; VF-TWO-CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP3]]
151 ; VF-TWO-CHECK-NEXT: [[TMP100:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP4]]
152 ; VF-TWO-CHECK-NEXT: [[TMP101:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP5]]
153 ; VF-TWO-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP6]]
154 ; VF-TWO-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP7]]
155 ; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP8]]
156 ; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP9]]
157 ; VF-TWO-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP10]]
158 ; VF-TWO-CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP11]]
159 ; VF-TWO-CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 0
160 ; VF-TWO-CHECK-NEXT: [[TMP109:%.*]] = bitcast float* [[TMP108]] to <4 x float>*
161 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP84]], <4 x float>* [[TMP109]], align 4
162 ; VF-TWO-CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 4
163 ; VF-TWO-CHECK-NEXT: [[TMP111:%.*]] = bitcast float* [[TMP110]] to <4 x float>*
164 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP85]], <4 x float>* [[TMP111]], align 4
165 ; VF-TWO-CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 8
166 ; VF-TWO-CHECK-NEXT: [[TMP113:%.*]] = bitcast float* [[TMP112]] to <4 x float>*
167 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP86]], <4 x float>* [[TMP113]], align 4
168 ; VF-TWO-CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 12
169 ; VF-TWO-CHECK-NEXT: [[TMP115:%.*]] = bitcast float* [[TMP114]] to <4 x float>*
170 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP87]], <4 x float>* [[TMP115]], align 4
171 ; VF-TWO-CHECK-NEXT: [[TMP116:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 16
172 ; VF-TWO-CHECK-NEXT: [[TMP117:%.*]] = bitcast float* [[TMP116]] to <4 x float>*
173 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP117]], align 4
174 ; VF-TWO-CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 20
175 ; VF-TWO-CHECK-NEXT: [[TMP119:%.*]] = bitcast float* [[TMP118]] to <4 x float>*
176 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP119]], align 4
177 ; VF-TWO-CHECK-NEXT: [[TMP120:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 24
178 ; VF-TWO-CHECK-NEXT: [[TMP121:%.*]] = bitcast float* [[TMP120]] to <4 x float>*
179 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP121]], align 4
180 ; VF-TWO-CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 28
181 ; VF-TWO-CHECK-NEXT: [[TMP123:%.*]] = bitcast float* [[TMP122]] to <4 x float>*
182 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP123]], align 4
183 ; VF-TWO-CHECK-NEXT: [[TMP124:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 32
184 ; VF-TWO-CHECK-NEXT: [[TMP125:%.*]] = bitcast float* [[TMP124]] to <4 x float>*
185 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP92]], <4 x float>* [[TMP125]], align 4
186 ; VF-TWO-CHECK-NEXT: [[TMP126:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 36
187 ; VF-TWO-CHECK-NEXT: [[TMP127:%.*]] = bitcast float* [[TMP126]] to <4 x float>*
188 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP93]], <4 x float>* [[TMP127]], align 4
189 ; VF-TWO-CHECK-NEXT: [[TMP128:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 40
190 ; VF-TWO-CHECK-NEXT: [[TMP129:%.*]] = bitcast float* [[TMP128]] to <4 x float>*
191 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP94]], <4 x float>* [[TMP129]], align 4
192 ; VF-TWO-CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 44
193 ; VF-TWO-CHECK-NEXT: [[TMP131:%.*]] = bitcast float* [[TMP130]] to <4 x float>*
194 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP95]], <4 x float>* [[TMP131]], align 4
195 ; VF-TWO-CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 48
196 ; VF-TWO-CHECK-NEXT: [[TMP132:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
197 ; VF-TWO-CHECK-NEXT: br i1 [[TMP132]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
198 ; VF-TWO-CHECK: middle.block:
199 ; VF-TWO-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
200 ; VF-TWO-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
201 ; VF-TWO-CHECK: vec.epilog.iter.check:
202 ; VF-TWO-CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
203 ; VF-TWO-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2
204 ; VF-TWO-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
205 ; VF-TWO-CHECK: vec.epilog.ph:
206 ; VF-TWO-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
207 ; VF-TWO-CHECK-NEXT: [[N_MOD_VF25:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2
208 ; VF-TWO-CHECK-NEXT: [[N_VEC26:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF25]]
209 ; VF-TWO-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
210 ; VF-TWO-CHECK: vec.epilog.vector.body:
211 ; VF-TWO-CHECK-NEXT: [[INDEX27:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT28:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
212 ; VF-TWO-CHECK-NEXT: [[TMP133:%.*]] = add i64 [[INDEX27]], 0
213 ; VF-TWO-CHECK-NEXT: [[TMP134:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP133]]
214 ; VF-TWO-CHECK-NEXT: [[TMP135:%.*]] = getelementptr inbounds float, float* [[TMP134]], i32 0
215 ; VF-TWO-CHECK-NEXT: [[TMP136:%.*]] = bitcast float* [[TMP135]] to <2 x float>*
216 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <2 x float>, <2 x float>* [[TMP136]], align 4
217 ; VF-TWO-CHECK-NEXT: [[TMP137:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP133]]
218 ; VF-TWO-CHECK-NEXT: [[TMP138:%.*]] = getelementptr inbounds float, float* [[TMP137]], i32 0
219 ; VF-TWO-CHECK-NEXT: [[TMP139:%.*]] = bitcast float* [[TMP138]] to <2 x float>*
220 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD31:%.*]] = load <2 x float>, <2 x float>* [[TMP139]], align 4
221 ; VF-TWO-CHECK-NEXT: [[TMP140:%.*]] = fadd fast <2 x float> [[WIDE_LOAD30]], [[WIDE_LOAD31]]
222 ; VF-TWO-CHECK-NEXT: [[TMP141:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP133]]
223 ; VF-TWO-CHECK-NEXT: [[TMP142:%.*]] = getelementptr inbounds float, float* [[TMP141]], i32 0
224 ; VF-TWO-CHECK-NEXT: [[TMP143:%.*]] = bitcast float* [[TMP142]] to <2 x float>*
225 ; VF-TWO-CHECK-NEXT: store <2 x float> [[TMP140]], <2 x float>* [[TMP143]], align 4
226 ; VF-TWO-CHECK-NEXT: [[INDEX_NEXT28]] = add nuw i64 [[INDEX27]], 2
227 ; VF-TWO-CHECK-NEXT: [[TMP144:%.*]] = icmp eq i64 [[INDEX_NEXT28]], [[N_VEC26]]
228 ; VF-TWO-CHECK-NEXT: br i1 [[TMP144]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
229 ; VF-TWO-CHECK: vec.epilog.middle.block:
230 ; VF-TWO-CHECK-NEXT: [[CMP_N29:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC26]]
231 ; VF-TWO-CHECK-NEXT: br i1 [[CMP_N29]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
232 ; VF-TWO-CHECK: vec.epilog.scalar.ph:
233 ; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
234 ; VF-TWO-CHECK-NEXT: br label [[FOR_BODY:%.*]]
235 ; VF-TWO-CHECK: for.body:
236 ; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
237 ; VF-TWO-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[INDVARS_IV]]
238 ; VF-TWO-CHECK-NEXT: [[TMP145:%.*]] = load float, float* [[ARRAYIDX]], align 4
239 ; VF-TWO-CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[INDVARS_IV]]
240 ; VF-TWO-CHECK-NEXT: [[TMP146:%.*]] = load float, float* [[ARRAYIDX2]], align 4
241 ; VF-TWO-CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP145]], [[TMP146]]
242 ; VF-TWO-CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[INDVARS_IV]]
243 ; VF-TWO-CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX4]], align 4
244 ; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
245 ; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
246 ; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]]
247 ; VF-TWO-CHECK: for.end.loopexit.loopexit:
248 ; VF-TWO-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]]
249 ; VF-TWO-CHECK: for.end.loopexit:
250 ; VF-TWO-CHECK-NEXT: br label [[FOR_END]]
251 ; VF-TWO-CHECK: for.end:
252 ; VF-TWO-CHECK-NEXT: ret void
254 ; VF-FOUR-CHECK-LABEL: @f1(
255 ; VF-FOUR-CHECK-NEXT: entry:
256 ; VF-FOUR-CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
257 ; VF-FOUR-CHECK-NEXT: br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]]
258 ; VF-FOUR-CHECK: iter.check:
259 ; VF-FOUR-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
260 ; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
261 ; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
262 ; VF-FOUR-CHECK: vector.main.loop.iter.check:
263 ; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 48
264 ; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
265 ; VF-FOUR-CHECK: vector.ph:
266 ; VF-FOUR-CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 48
267 ; VF-FOUR-CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
268 ; VF-FOUR-CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
269 ; VF-FOUR-CHECK: vector.body:
270 ; VF-FOUR-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
271 ; VF-FOUR-CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
272 ; VF-FOUR-CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
273 ; VF-FOUR-CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8
274 ; VF-FOUR-CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12
275 ; VF-FOUR-CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 16
276 ; VF-FOUR-CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 20
277 ; VF-FOUR-CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 24
278 ; VF-FOUR-CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 28
279 ; VF-FOUR-CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 32
280 ; VF-FOUR-CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 36
281 ; VF-FOUR-CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 40
282 ; VF-FOUR-CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 44
283 ; VF-FOUR-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[BB:%.*]], i64 [[TMP0]]
284 ; VF-FOUR-CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP1]]
285 ; VF-FOUR-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP2]]
286 ; VF-FOUR-CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP3]]
287 ; VF-FOUR-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP4]]
288 ; VF-FOUR-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP5]]
289 ; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP6]]
290 ; VF-FOUR-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP7]]
291 ; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP8]]
292 ; VF-FOUR-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP9]]
293 ; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP10]]
294 ; VF-FOUR-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP11]]
295 ; VF-FOUR-CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0
296 ; VF-FOUR-CHECK-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to <4 x float>*
297 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP25]], align 4
298 ; VF-FOUR-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 4
299 ; VF-FOUR-CHECK-NEXT: [[TMP27:%.*]] = bitcast float* [[TMP26]] to <4 x float>*
300 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, <4 x float>* [[TMP27]], align 4
301 ; VF-FOUR-CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 8
302 ; VF-FOUR-CHECK-NEXT: [[TMP29:%.*]] = bitcast float* [[TMP28]] to <4 x float>*
303 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, <4 x float>* [[TMP29]], align 4
304 ; VF-FOUR-CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 12
305 ; VF-FOUR-CHECK-NEXT: [[TMP31:%.*]] = bitcast float* [[TMP30]] to <4 x float>*
306 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, <4 x float>* [[TMP31]], align 4
307 ; VF-FOUR-CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 16
308 ; VF-FOUR-CHECK-NEXT: [[TMP33:%.*]] = bitcast float* [[TMP32]] to <4 x float>*
309 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, <4 x float>* [[TMP33]], align 4
310 ; VF-FOUR-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 20
311 ; VF-FOUR-CHECK-NEXT: [[TMP35:%.*]] = bitcast float* [[TMP34]] to <4 x float>*
312 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, <4 x float>* [[TMP35]], align 4
313 ; VF-FOUR-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 24
314 ; VF-FOUR-CHECK-NEXT: [[TMP37:%.*]] = bitcast float* [[TMP36]] to <4 x float>*
315 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, <4 x float>* [[TMP37]], align 4
316 ; VF-FOUR-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 28
317 ; VF-FOUR-CHECK-NEXT: [[TMP39:%.*]] = bitcast float* [[TMP38]] to <4 x float>*
318 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP39]], align 4
319 ; VF-FOUR-CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 32
320 ; VF-FOUR-CHECK-NEXT: [[TMP41:%.*]] = bitcast float* [[TMP40]] to <4 x float>*
321 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, <4 x float>* [[TMP41]], align 4
322 ; VF-FOUR-CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 36
323 ; VF-FOUR-CHECK-NEXT: [[TMP43:%.*]] = bitcast float* [[TMP42]] to <4 x float>*
324 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP43]], align 4
325 ; VF-FOUR-CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 40
326 ; VF-FOUR-CHECK-NEXT: [[TMP45:%.*]] = bitcast float* [[TMP44]] to <4 x float>*
327 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, <4 x float>* [[TMP45]], align 4
328 ; VF-FOUR-CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 44
329 ; VF-FOUR-CHECK-NEXT: [[TMP47:%.*]] = bitcast float* [[TMP46]] to <4 x float>*
330 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP47]], align 4
331 ; VF-FOUR-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, float* [[CC:%.*]], i64 [[TMP0]]
332 ; VF-FOUR-CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP1]]
333 ; VF-FOUR-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP2]]
334 ; VF-FOUR-CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP3]]
335 ; VF-FOUR-CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP4]]
336 ; VF-FOUR-CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP5]]
337 ; VF-FOUR-CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP6]]
338 ; VF-FOUR-CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP7]]
339 ; VF-FOUR-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP8]]
340 ; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP9]]
341 ; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP10]]
342 ; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP11]]
343 ; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 0
344 ; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = bitcast float* [[TMP60]] to <4 x float>*
345 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x float>, <4 x float>* [[TMP61]], align 4
346 ; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 4
347 ; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = bitcast float* [[TMP62]] to <4 x float>*
348 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP63]], align 4
349 ; VF-FOUR-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 8
350 ; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = bitcast float* [[TMP64]] to <4 x float>*
351 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP65]], align 4
352 ; VF-FOUR-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 12
353 ; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = bitcast float* [[TMP66]] to <4 x float>*
354 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, <4 x float>* [[TMP67]], align 4
355 ; VF-FOUR-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 16
356 ; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = bitcast float* [[TMP68]] to <4 x float>*
357 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD17:%.*]] = load <4 x float>, <4 x float>* [[TMP69]], align 4
358 ; VF-FOUR-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 20
359 ; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = bitcast float* [[TMP70]] to <4 x float>*
360 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <4 x float>, <4 x float>* [[TMP71]], align 4
361 ; VF-FOUR-CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 24
362 ; VF-FOUR-CHECK-NEXT: [[TMP73:%.*]] = bitcast float* [[TMP72]] to <4 x float>*
363 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <4 x float>, <4 x float>* [[TMP73]], align 4
364 ; VF-FOUR-CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 28
365 ; VF-FOUR-CHECK-NEXT: [[TMP75:%.*]] = bitcast float* [[TMP74]] to <4 x float>*
366 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <4 x float>, <4 x float>* [[TMP75]], align 4
367 ; VF-FOUR-CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 32
368 ; VF-FOUR-CHECK-NEXT: [[TMP77:%.*]] = bitcast float* [[TMP76]] to <4 x float>*
369 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD21:%.*]] = load <4 x float>, <4 x float>* [[TMP77]], align 4
370 ; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 36
371 ; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = bitcast float* [[TMP78]] to <4 x float>*
372 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD22:%.*]] = load <4 x float>, <4 x float>* [[TMP79]], align 4
373 ; VF-FOUR-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 40
374 ; VF-FOUR-CHECK-NEXT: [[TMP81:%.*]] = bitcast float* [[TMP80]] to <4 x float>*
375 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, <4 x float>* [[TMP81]], align 4
376 ; VF-FOUR-CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, float* [[TMP48]], i32 44
377 ; VF-FOUR-CHECK-NEXT: [[TMP83:%.*]] = bitcast float* [[TMP82]] to <4 x float>*
378 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <4 x float>, <4 x float>* [[TMP83]], align 4
379 ; VF-FOUR-CHECK-NEXT: [[TMP84:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD13]]
380 ; VF-FOUR-CHECK-NEXT: [[TMP85:%.*]] = fadd fast <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD14]]
381 ; VF-FOUR-CHECK-NEXT: [[TMP86:%.*]] = fadd fast <4 x float> [[WIDE_LOAD3]], [[WIDE_LOAD15]]
382 ; VF-FOUR-CHECK-NEXT: [[TMP87:%.*]] = fadd fast <4 x float> [[WIDE_LOAD4]], [[WIDE_LOAD16]]
383 ; VF-FOUR-CHECK-NEXT: [[TMP88:%.*]] = fadd fast <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD17]]
384 ; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = fadd fast <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD18]]
385 ; VF-FOUR-CHECK-NEXT: [[TMP90:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD19]]
386 ; VF-FOUR-CHECK-NEXT: [[TMP91:%.*]] = fadd fast <4 x float> [[WIDE_LOAD8]], [[WIDE_LOAD20]]
387 ; VF-FOUR-CHECK-NEXT: [[TMP92:%.*]] = fadd fast <4 x float> [[WIDE_LOAD9]], [[WIDE_LOAD21]]
388 ; VF-FOUR-CHECK-NEXT: [[TMP93:%.*]] = fadd fast <4 x float> [[WIDE_LOAD10]], [[WIDE_LOAD22]]
389 ; VF-FOUR-CHECK-NEXT: [[TMP94:%.*]] = fadd fast <4 x float> [[WIDE_LOAD11]], [[WIDE_LOAD23]]
390 ; VF-FOUR-CHECK-NEXT: [[TMP95:%.*]] = fadd fast <4 x float> [[WIDE_LOAD12]], [[WIDE_LOAD24]]
391 ; VF-FOUR-CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds float, float* [[AA:%.*]], i64 [[TMP0]]
392 ; VF-FOUR-CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP1]]
393 ; VF-FOUR-CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP2]]
394 ; VF-FOUR-CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP3]]
395 ; VF-FOUR-CHECK-NEXT: [[TMP100:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP4]]
396 ; VF-FOUR-CHECK-NEXT: [[TMP101:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP5]]
397 ; VF-FOUR-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP6]]
398 ; VF-FOUR-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP7]]
399 ; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP8]]
400 ; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP9]]
401 ; VF-FOUR-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP10]]
402 ; VF-FOUR-CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP11]]
403 ; VF-FOUR-CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 0
404 ; VF-FOUR-CHECK-NEXT: [[TMP109:%.*]] = bitcast float* [[TMP108]] to <4 x float>*
405 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP84]], <4 x float>* [[TMP109]], align 4
406 ; VF-FOUR-CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 4
407 ; VF-FOUR-CHECK-NEXT: [[TMP111:%.*]] = bitcast float* [[TMP110]] to <4 x float>*
408 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP85]], <4 x float>* [[TMP111]], align 4
409 ; VF-FOUR-CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 8
410 ; VF-FOUR-CHECK-NEXT: [[TMP113:%.*]] = bitcast float* [[TMP112]] to <4 x float>*
411 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP86]], <4 x float>* [[TMP113]], align 4
412 ; VF-FOUR-CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 12
413 ; VF-FOUR-CHECK-NEXT: [[TMP115:%.*]] = bitcast float* [[TMP114]] to <4 x float>*
414 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP87]], <4 x float>* [[TMP115]], align 4
415 ; VF-FOUR-CHECK-NEXT: [[TMP116:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 16
416 ; VF-FOUR-CHECK-NEXT: [[TMP117:%.*]] = bitcast float* [[TMP116]] to <4 x float>*
417 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP117]], align 4
418 ; VF-FOUR-CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 20
419 ; VF-FOUR-CHECK-NEXT: [[TMP119:%.*]] = bitcast float* [[TMP118]] to <4 x float>*
420 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP119]], align 4
421 ; VF-FOUR-CHECK-NEXT: [[TMP120:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 24
422 ; VF-FOUR-CHECK-NEXT: [[TMP121:%.*]] = bitcast float* [[TMP120]] to <4 x float>*
423 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP121]], align 4
424 ; VF-FOUR-CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 28
425 ; VF-FOUR-CHECK-NEXT: [[TMP123:%.*]] = bitcast float* [[TMP122]] to <4 x float>*
426 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP123]], align 4
427 ; VF-FOUR-CHECK-NEXT: [[TMP124:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 32
428 ; VF-FOUR-CHECK-NEXT: [[TMP125:%.*]] = bitcast float* [[TMP124]] to <4 x float>*
429 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP92]], <4 x float>* [[TMP125]], align 4
430 ; VF-FOUR-CHECK-NEXT: [[TMP126:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 36
431 ; VF-FOUR-CHECK-NEXT: [[TMP127:%.*]] = bitcast float* [[TMP126]] to <4 x float>*
432 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP93]], <4 x float>* [[TMP127]], align 4
433 ; VF-FOUR-CHECK-NEXT: [[TMP128:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 40
434 ; VF-FOUR-CHECK-NEXT: [[TMP129:%.*]] = bitcast float* [[TMP128]] to <4 x float>*
435 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP94]], <4 x float>* [[TMP129]], align 4
436 ; VF-FOUR-CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds float, float* [[TMP96]], i32 44
437 ; VF-FOUR-CHECK-NEXT: [[TMP131:%.*]] = bitcast float* [[TMP130]] to <4 x float>*
438 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP95]], <4 x float>* [[TMP131]], align 4
439 ; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 48
440 ; VF-FOUR-CHECK-NEXT: [[TMP132:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
441 ; VF-FOUR-CHECK-NEXT: br i1 [[TMP132]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
442 ; VF-FOUR-CHECK: middle.block:
443 ; VF-FOUR-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
444 ; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
445 ; VF-FOUR-CHECK: vec.epilog.iter.check:
446 ; VF-FOUR-CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
447 ; VF-FOUR-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
448 ; VF-FOUR-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
449 ; VF-FOUR-CHECK: vec.epilog.ph:
450 ; VF-FOUR-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
451 ; VF-FOUR-CHECK-NEXT: [[N_MOD_VF25:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
452 ; VF-FOUR-CHECK-NEXT: [[N_VEC26:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF25]]
453 ; VF-FOUR-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
454 ; VF-FOUR-CHECK: vec.epilog.vector.body:
455 ; VF-FOUR-CHECK-NEXT: [[INDEX27:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT28:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
456 ; VF-FOUR-CHECK-NEXT: [[TMP133:%.*]] = add i64 [[INDEX27]], 0
457 ; VF-FOUR-CHECK-NEXT: [[TMP134:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[TMP133]]
458 ; VF-FOUR-CHECK-NEXT: [[TMP135:%.*]] = getelementptr inbounds float, float* [[TMP134]], i32 0
459 ; VF-FOUR-CHECK-NEXT: [[TMP136:%.*]] = bitcast float* [[TMP135]] to <4 x float>*
460 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <4 x float>, <4 x float>* [[TMP136]], align 4
461 ; VF-FOUR-CHECK-NEXT: [[TMP137:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[TMP133]]
462 ; VF-FOUR-CHECK-NEXT: [[TMP138:%.*]] = getelementptr inbounds float, float* [[TMP137]], i32 0
463 ; VF-FOUR-CHECK-NEXT: [[TMP139:%.*]] = bitcast float* [[TMP138]] to <4 x float>*
464 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD31:%.*]] = load <4 x float>, <4 x float>* [[TMP139]], align 4
465 ; VF-FOUR-CHECK-NEXT: [[TMP140:%.*]] = fadd fast <4 x float> [[WIDE_LOAD30]], [[WIDE_LOAD31]]
466 ; VF-FOUR-CHECK-NEXT: [[TMP141:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[TMP133]]
467 ; VF-FOUR-CHECK-NEXT: [[TMP142:%.*]] = getelementptr inbounds float, float* [[TMP141]], i32 0
468 ; VF-FOUR-CHECK-NEXT: [[TMP143:%.*]] = bitcast float* [[TMP142]] to <4 x float>*
469 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP140]], <4 x float>* [[TMP143]], align 4
470 ; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT28]] = add nuw i64 [[INDEX27]], 4
471 ; VF-FOUR-CHECK-NEXT: [[TMP144:%.*]] = icmp eq i64 [[INDEX_NEXT28]], [[N_VEC26]]
472 ; VF-FOUR-CHECK-NEXT: br i1 [[TMP144]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
473 ; VF-FOUR-CHECK: vec.epilog.middle.block:
474 ; VF-FOUR-CHECK-NEXT: [[CMP_N29:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC26]]
475 ; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N29]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
476 ; VF-FOUR-CHECK: vec.epilog.scalar.ph:
477 ; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
478 ; VF-FOUR-CHECK-NEXT: br label [[FOR_BODY:%.*]]
479 ; VF-FOUR-CHECK: for.body:
480 ; VF-FOUR-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
481 ; VF-FOUR-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[BB]], i64 [[INDVARS_IV]]
482 ; VF-FOUR-CHECK-NEXT: [[TMP145:%.*]] = load float, float* [[ARRAYIDX]], align 4
483 ; VF-FOUR-CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[CC]], i64 [[INDVARS_IV]]
484 ; VF-FOUR-CHECK-NEXT: [[TMP146:%.*]] = load float, float* [[ARRAYIDX2]], align 4
485 ; VF-FOUR-CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP145]], [[TMP146]]
486 ; VF-FOUR-CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[AA]], i64 [[INDVARS_IV]]
487 ; VF-FOUR-CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX4]], align 4
488 ; VF-FOUR-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
489 ; VF-FOUR-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
490 ; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]]
491 ; VF-FOUR-CHECK: for.end.loopexit.loopexit:
492 ; VF-FOUR-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]]
493 ; VF-FOUR-CHECK: for.end.loopexit:
494 ; VF-FOUR-CHECK-NEXT: br label [[FOR_END]]
495 ; VF-FOUR-CHECK: for.end:
496 ; VF-FOUR-CHECK-NEXT: ret void
501 %cmp1 = icmp sgt i32 %N, 0
502 br i1 %cmp1, label %for.body.preheader, label %for.end
504 for.body.preheader: ; preds = %entry
505 %wide.trip.count = zext i32 %N to i64
508 for.body: ; preds = %for.body.preheader, %for.body
509 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
510 %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv
511 %0 = load float, float* %arrayidx, align 4
512 %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv
513 %1 = load float, float* %arrayidx2, align 4
514 %add = fadd fast float %0, %1
515 %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv
516 store float %add, float* %arrayidx4, align 4
517 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
518 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
519 br i1 %exitcond, label %for.body, label %for.end.loopexit
521 for.end.loopexit: ; preds = %for.body
524 for.end: ; preds = %for.end.loopexit, %entry
528 define dso_local signext i32 @f2(float* noalias %A, float* noalias %B, i32 signext %n) #0 {
529 ; VF-TWO-CHECK-LABEL: @f2(
530 ; VF-TWO-CHECK-NEXT: entry:
531 ; VF-TWO-CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 1
532 ; VF-TWO-CHECK-NEXT: br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]]
533 ; VF-TWO-CHECK: iter.check:
534 ; VF-TWO-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
535 ; VF-TWO-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64
536 ; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 2
537 ; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
538 ; VF-TWO-CHECK: vector.scevcheck:
539 ; VF-TWO-CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
540 ; VF-TWO-CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
541 ; VF-TWO-CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
542 ; VF-TWO-CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
543 ; VF-TWO-CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
544 ; VF-TWO-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]]
545 ; VF-TWO-CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
546 ; VF-TWO-CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]]
547 ; VF-TWO-CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]]
548 ; VF-TWO-CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
549 ; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
550 ; VF-TWO-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
551 ; VF-TWO-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
552 ; VF-TWO-CHECK-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]]
553 ; VF-TWO-CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
554 ; VF-TWO-CHECK: vector.main.loop.iter.check:
555 ; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32
556 ; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
557 ; VF-TWO-CHECK: vector.ph:
558 ; VF-TWO-CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32
559 ; VF-TWO-CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
560 ; VF-TWO-CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
561 ; VF-TWO-CHECK: vector.body:
562 ; VF-TWO-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
563 ; VF-TWO-CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0
564 ; VF-TWO-CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 4
565 ; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 8
566 ; VF-TWO-CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 12
567 ; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 16
568 ; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 20
569 ; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 24
570 ; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 28
571 ; VF-TWO-CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
572 ; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], 0
573 ; VF-TWO-CHECK-NEXT: [[TMP21:%.*]] = add i32 [[OFFSET_IDX]], 4
574 ; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = add i32 [[OFFSET_IDX]], 8
575 ; VF-TWO-CHECK-NEXT: [[TMP23:%.*]] = add i32 [[OFFSET_IDX]], 12
576 ; VF-TWO-CHECK-NEXT: [[TMP24:%.*]] = add i32 [[OFFSET_IDX]], 16
577 ; VF-TWO-CHECK-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 20
578 ; VF-TWO-CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 24
579 ; VF-TWO-CHECK-NEXT: [[TMP27:%.*]] = add i32 [[OFFSET_IDX]], 28
580 ; VF-TWO-CHECK-NEXT: [[TMP28:%.*]] = xor i32 [[TMP20]], -1
581 ; VF-TWO-CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP21]], -1
582 ; VF-TWO-CHECK-NEXT: [[TMP30:%.*]] = xor i32 [[TMP22]], -1
583 ; VF-TWO-CHECK-NEXT: [[TMP31:%.*]] = xor i32 [[TMP23]], -1
584 ; VF-TWO-CHECK-NEXT: [[TMP32:%.*]] = xor i32 [[TMP24]], -1
585 ; VF-TWO-CHECK-NEXT: [[TMP33:%.*]] = xor i32 [[TMP25]], -1
586 ; VF-TWO-CHECK-NEXT: [[TMP34:%.*]] = xor i32 [[TMP26]], -1
587 ; VF-TWO-CHECK-NEXT: [[TMP35:%.*]] = xor i32 [[TMP27]], -1
588 ; VF-TWO-CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP28]], [[N]]
589 ; VF-TWO-CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP29]], [[N]]
590 ; VF-TWO-CHECK-NEXT: [[TMP38:%.*]] = add i32 [[TMP30]], [[N]]
591 ; VF-TWO-CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP31]], [[N]]
592 ; VF-TWO-CHECK-NEXT: [[TMP40:%.*]] = add i32 [[TMP32]], [[N]]
593 ; VF-TWO-CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP33]], [[N]]
594 ; VF-TWO-CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP34]], [[N]]
595 ; VF-TWO-CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP35]], [[N]]
596 ; VF-TWO-CHECK-NEXT: [[TMP44:%.*]] = sext i32 [[TMP36]] to i64
597 ; VF-TWO-CHECK-NEXT: [[TMP45:%.*]] = sext i32 [[TMP37]] to i64
598 ; VF-TWO-CHECK-NEXT: [[TMP46:%.*]] = sext i32 [[TMP38]] to i64
599 ; VF-TWO-CHECK-NEXT: [[TMP47:%.*]] = sext i32 [[TMP39]] to i64
600 ; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = sext i32 [[TMP40]] to i64
601 ; VF-TWO-CHECK-NEXT: [[TMP49:%.*]] = sext i32 [[TMP41]] to i64
602 ; VF-TWO-CHECK-NEXT: [[TMP50:%.*]] = sext i32 [[TMP42]] to i64
603 ; VF-TWO-CHECK-NEXT: [[TMP51:%.*]] = sext i32 [[TMP43]] to i64
604 ; VF-TWO-CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP44]]
605 ; VF-TWO-CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP45]]
606 ; VF-TWO-CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP46]]
607 ; VF-TWO-CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP47]]
608 ; VF-TWO-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP48]]
609 ; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP49]]
610 ; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP50]]
611 ; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP51]]
612 ; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 0
613 ; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, float* [[TMP60]], i32 -3
614 ; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = bitcast float* [[TMP61]] to <4 x float>*
615 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP62]], align 4
616 ; VF-TWO-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
617 ; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -4
618 ; VF-TWO-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, float* [[TMP63]], i32 -3
619 ; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = bitcast float* [[TMP64]] to <4 x float>*
620 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, <4 x float>* [[TMP65]], align 4
621 ; VF-TWO-CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x float> [[WIDE_LOAD2]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
622 ; VF-TWO-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -8
623 ; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, float* [[TMP66]], i32 -3
624 ; VF-TWO-CHECK-NEXT: [[TMP68:%.*]] = bitcast float* [[TMP67]] to <4 x float>*
625 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, <4 x float>* [[TMP68]], align 4
626 ; VF-TWO-CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x float> [[WIDE_LOAD4]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
627 ; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -12
628 ; VF-TWO-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, float* [[TMP69]], i32 -3
629 ; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = bitcast float* [[TMP70]] to <4 x float>*
630 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, <4 x float>* [[TMP71]], align 4
631 ; VF-TWO-CHECK-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x float> [[WIDE_LOAD6]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
632 ; VF-TWO-CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -16
633 ; VF-TWO-CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds float, float* [[TMP72]], i32 -3
634 ; VF-TWO-CHECK-NEXT: [[TMP74:%.*]] = bitcast float* [[TMP73]] to <4 x float>*
635 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP74]], align 4
636 ; VF-TWO-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
637 ; VF-TWO-CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -20
638 ; VF-TWO-CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, float* [[TMP75]], i32 -3
639 ; VF-TWO-CHECK-NEXT: [[TMP77:%.*]] = bitcast float* [[TMP76]] to <4 x float>*
640 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP77]], align 4
641 ; VF-TWO-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
642 ; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -24
643 ; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds float, float* [[TMP78]], i32 -3
644 ; VF-TWO-CHECK-NEXT: [[TMP80:%.*]] = bitcast float* [[TMP79]] to <4 x float>*
645 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP80]], align 4
646 ; VF-TWO-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
647 ; VF-TWO-CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -28
648 ; VF-TWO-CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, float* [[TMP81]], i32 -3
649 ; VF-TWO-CHECK-NEXT: [[TMP83:%.*]] = bitcast float* [[TMP82]] to <4 x float>*
650 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP83]], align 4
651 ; VF-TWO-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
652 ; VF-TWO-CHECK-NEXT: [[TMP84:%.*]] = fadd fast <4 x float> [[REVERSE]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
653 ; VF-TWO-CHECK-NEXT: [[TMP85:%.*]] = fadd fast <4 x float> [[REVERSE3]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
654 ; VF-TWO-CHECK-NEXT: [[TMP86:%.*]] = fadd fast <4 x float> [[REVERSE5]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
655 ; VF-TWO-CHECK-NEXT: [[TMP87:%.*]] = fadd fast <4 x float> [[REVERSE7]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
656 ; VF-TWO-CHECK-NEXT: [[TMP88:%.*]] = fadd fast <4 x float> [[REVERSE9]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
657 ; VF-TWO-CHECK-NEXT: [[TMP89:%.*]] = fadd fast <4 x float> [[REVERSE11]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
658 ; VF-TWO-CHECK-NEXT: [[TMP90:%.*]] = fadd fast <4 x float> [[REVERSE13]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
659 ; VF-TWO-CHECK-NEXT: [[TMP91:%.*]] = fadd fast <4 x float> [[REVERSE15]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
660 ; VF-TWO-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP12]]
661 ; VF-TWO-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP13]]
662 ; VF-TWO-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP14]]
663 ; VF-TWO-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP15]]
664 ; VF-TWO-CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]]
665 ; VF-TWO-CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP17]]
666 ; VF-TWO-CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP18]]
667 ; VF-TWO-CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP19]]
668 ; VF-TWO-CHECK-NEXT: [[TMP100:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 0
669 ; VF-TWO-CHECK-NEXT: [[TMP101:%.*]] = bitcast float* [[TMP100]] to <4 x float>*
670 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP84]], <4 x float>* [[TMP101]], align 4
671 ; VF-TWO-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 4
672 ; VF-TWO-CHECK-NEXT: [[TMP103:%.*]] = bitcast float* [[TMP102]] to <4 x float>*
673 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP85]], <4 x float>* [[TMP103]], align 4
674 ; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 8
675 ; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = bitcast float* [[TMP104]] to <4 x float>*
676 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP86]], <4 x float>* [[TMP105]], align 4
677 ; VF-TWO-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 12
678 ; VF-TWO-CHECK-NEXT: [[TMP107:%.*]] = bitcast float* [[TMP106]] to <4 x float>*
679 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP87]], <4 x float>* [[TMP107]], align 4
680 ; VF-TWO-CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 16
681 ; VF-TWO-CHECK-NEXT: [[TMP109:%.*]] = bitcast float* [[TMP108]] to <4 x float>*
682 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP109]], align 4
683 ; VF-TWO-CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 20
684 ; VF-TWO-CHECK-NEXT: [[TMP111:%.*]] = bitcast float* [[TMP110]] to <4 x float>*
685 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP111]], align 4
686 ; VF-TWO-CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 24
687 ; VF-TWO-CHECK-NEXT: [[TMP113:%.*]] = bitcast float* [[TMP112]] to <4 x float>*
688 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP113]], align 4
689 ; VF-TWO-CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 28
690 ; VF-TWO-CHECK-NEXT: [[TMP115:%.*]] = bitcast float* [[TMP114]] to <4 x float>*
691 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP115]], align 4
692 ; VF-TWO-CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
693 ; VF-TWO-CHECK-NEXT: [[TMP116:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
694 ; VF-TWO-CHECK-NEXT: br i1 [[TMP116]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
695 ; VF-TWO-CHECK: middle.block:
696 ; VF-TWO-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
697 ; VF-TWO-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
698 ; VF-TWO-CHECK: vec.epilog.iter.check:
699 ; VF-TWO-CHECK-NEXT: [[IND_END21:%.*]] = trunc i64 [[N_VEC]] to i32
700 ; VF-TWO-CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
701 ; VF-TWO-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2
702 ; VF-TWO-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
703 ; VF-TWO-CHECK: vec.epilog.ph:
704 ; VF-TWO-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
705 ; VF-TWO-CHECK-NEXT: [[N_MOD_VF16:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2
706 ; VF-TWO-CHECK-NEXT: [[N_VEC17:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF16]]
707 ; VF-TWO-CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC17]] to i32
708 ; VF-TWO-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
709 ; VF-TWO-CHECK: vec.epilog.vector.body:
710 ; VF-TWO-CHECK-NEXT: [[INDEX18:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT19:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
711 ; VF-TWO-CHECK-NEXT: [[TMP117:%.*]] = add i64 [[INDEX18]], 0
712 ; VF-TWO-CHECK-NEXT: [[OFFSET_IDX23:%.*]] = trunc i64 [[INDEX18]] to i32
713 ; VF-TWO-CHECK-NEXT: [[TMP118:%.*]] = add i32 [[OFFSET_IDX23]], 0
714 ; VF-TWO-CHECK-NEXT: [[TMP119:%.*]] = xor i32 [[TMP118]], -1
715 ; VF-TWO-CHECK-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], [[N]]
716 ; VF-TWO-CHECK-NEXT: [[TMP121:%.*]] = sext i32 [[TMP120]] to i64
717 ; VF-TWO-CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP121]]
718 ; VF-TWO-CHECK-NEXT: [[TMP123:%.*]] = getelementptr inbounds float, float* [[TMP122]], i32 0
719 ; VF-TWO-CHECK-NEXT: [[TMP124:%.*]] = getelementptr inbounds float, float* [[TMP123]], i32 -1
720 ; VF-TWO-CHECK-NEXT: [[TMP125:%.*]] = bitcast float* [[TMP124]] to <2 x float>*
721 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <2 x float>, <2 x float>* [[TMP125]], align 4
722 ; VF-TWO-CHECK-NEXT: [[REVERSE25:%.*]] = shufflevector <2 x float> [[WIDE_LOAD24]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
723 ; VF-TWO-CHECK-NEXT: [[TMP126:%.*]] = fadd fast <2 x float> [[REVERSE25]], <float 1.000000e+00, float 1.000000e+00>
724 ; VF-TWO-CHECK-NEXT: [[TMP127:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP117]]
725 ; VF-TWO-CHECK-NEXT: [[TMP128:%.*]] = getelementptr inbounds float, float* [[TMP127]], i32 0
726 ; VF-TWO-CHECK-NEXT: [[TMP129:%.*]] = bitcast float* [[TMP128]] to <2 x float>*
727 ; VF-TWO-CHECK-NEXT: store <2 x float> [[TMP126]], <2 x float>* [[TMP129]], align 4
728 ; VF-TWO-CHECK-NEXT: [[INDEX_NEXT19]] = add nuw i64 [[INDEX18]], 2
729 ; VF-TWO-CHECK-NEXT: [[TMP130:%.*]] = icmp eq i64 [[INDEX_NEXT19]], [[N_VEC17]]
730 ; VF-TWO-CHECK-NEXT: br i1 [[TMP130]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
731 ; VF-TWO-CHECK: vec.epilog.middle.block:
732 ; VF-TWO-CHECK-NEXT: [[CMP_N22:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC17]]
733 ; VF-TWO-CHECK-NEXT: br i1 [[CMP_N22]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
734 ; VF-TWO-CHECK: vec.epilog.scalar.ph:
735 ; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ]
736 ; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL20:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END21]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ]
737 ; VF-TWO-CHECK-NEXT: br label [[FOR_BODY:%.*]]
738 ; VF-TWO-CHECK: for.body:
739 ; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
740 ; VF-TWO-CHECK-NEXT: [[I_014:%.*]] = phi i32 [ [[BC_RESUME_VAL20]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
741 ; VF-TWO-CHECK-NEXT: [[TMP131:%.*]] = xor i32 [[I_014]], -1
742 ; VF-TWO-CHECK-NEXT: [[SUB2:%.*]] = add i32 [[TMP131]], [[N]]
743 ; VF-TWO-CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB2]] to i64
744 ; VF-TWO-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IDXPROM]]
745 ; VF-TWO-CHECK-NEXT: [[TMP132:%.*]] = load float, float* [[ARRAYIDX]], align 4
746 ; VF-TWO-CHECK-NEXT: [[CONV3:%.*]] = fadd fast float [[TMP132]], 1.000000e+00
747 ; VF-TWO-CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
748 ; VF-TWO-CHECK-NEXT: store float [[CONV3]], float* [[ARRAYIDX5]], align 4
749 ; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
750 ; VF-TWO-CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1
751 ; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
752 ; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]]
753 ; VF-TWO-CHECK: for.end.loopexit.loopexit:
754 ; VF-TWO-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]]
755 ; VF-TWO-CHECK: for.end.loopexit:
756 ; VF-TWO-CHECK-NEXT: br label [[FOR_END]]
757 ; VF-TWO-CHECK: for.end:
758 ; VF-TWO-CHECK-NEXT: ret i32 0
760 ; VF-FOUR-CHECK-LABEL: @f2(
761 ; VF-FOUR-CHECK-NEXT: entry:
762 ; VF-FOUR-CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 1
763 ; VF-FOUR-CHECK-NEXT: br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]]
764 ; VF-FOUR-CHECK: iter.check:
765 ; VF-FOUR-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
766 ; VF-FOUR-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64
767 ; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
768 ; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
769 ; VF-FOUR-CHECK: vector.scevcheck:
770 ; VF-FOUR-CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
771 ; VF-FOUR-CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
772 ; VF-FOUR-CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
773 ; VF-FOUR-CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
774 ; VF-FOUR-CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
775 ; VF-FOUR-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[MUL_RESULT]]
776 ; VF-FOUR-CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
777 ; VF-FOUR-CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], [[TMP0]]
778 ; VF-FOUR-CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]]
779 ; VF-FOUR-CHECK-NEXT: [[TMP7:%.*]] = select i1 true, i1 [[TMP5]], i1 [[TMP6]]
780 ; VF-FOUR-CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
781 ; VF-FOUR-CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
782 ; VF-FOUR-CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
783 ; VF-FOUR-CHECK-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]]
784 ; VF-FOUR-CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
785 ; VF-FOUR-CHECK: vector.main.loop.iter.check:
786 ; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32
787 ; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
788 ; VF-FOUR-CHECK: vector.ph:
789 ; VF-FOUR-CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32
790 ; VF-FOUR-CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
791 ; VF-FOUR-CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
792 ; VF-FOUR-CHECK: vector.body:
793 ; VF-FOUR-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
794 ; VF-FOUR-CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0
795 ; VF-FOUR-CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 4
796 ; VF-FOUR-CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 8
797 ; VF-FOUR-CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 12
798 ; VF-FOUR-CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 16
799 ; VF-FOUR-CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 20
800 ; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 24
801 ; VF-FOUR-CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 28
802 ; VF-FOUR-CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
803 ; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], 0
804 ; VF-FOUR-CHECK-NEXT: [[TMP21:%.*]] = add i32 [[OFFSET_IDX]], 4
805 ; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = add i32 [[OFFSET_IDX]], 8
806 ; VF-FOUR-CHECK-NEXT: [[TMP23:%.*]] = add i32 [[OFFSET_IDX]], 12
807 ; VF-FOUR-CHECK-NEXT: [[TMP24:%.*]] = add i32 [[OFFSET_IDX]], 16
808 ; VF-FOUR-CHECK-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 20
809 ; VF-FOUR-CHECK-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 24
810 ; VF-FOUR-CHECK-NEXT: [[TMP27:%.*]] = add i32 [[OFFSET_IDX]], 28
811 ; VF-FOUR-CHECK-NEXT: [[TMP28:%.*]] = xor i32 [[TMP20]], -1
812 ; VF-FOUR-CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP21]], -1
813 ; VF-FOUR-CHECK-NEXT: [[TMP30:%.*]] = xor i32 [[TMP22]], -1
814 ; VF-FOUR-CHECK-NEXT: [[TMP31:%.*]] = xor i32 [[TMP23]], -1
815 ; VF-FOUR-CHECK-NEXT: [[TMP32:%.*]] = xor i32 [[TMP24]], -1
816 ; VF-FOUR-CHECK-NEXT: [[TMP33:%.*]] = xor i32 [[TMP25]], -1
817 ; VF-FOUR-CHECK-NEXT: [[TMP34:%.*]] = xor i32 [[TMP26]], -1
818 ; VF-FOUR-CHECK-NEXT: [[TMP35:%.*]] = xor i32 [[TMP27]], -1
819 ; VF-FOUR-CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP28]], [[N]]
820 ; VF-FOUR-CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP29]], [[N]]
821 ; VF-FOUR-CHECK-NEXT: [[TMP38:%.*]] = add i32 [[TMP30]], [[N]]
822 ; VF-FOUR-CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP31]], [[N]]
823 ; VF-FOUR-CHECK-NEXT: [[TMP40:%.*]] = add i32 [[TMP32]], [[N]]
824 ; VF-FOUR-CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP33]], [[N]]
825 ; VF-FOUR-CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP34]], [[N]]
826 ; VF-FOUR-CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP35]], [[N]]
827 ; VF-FOUR-CHECK-NEXT: [[TMP44:%.*]] = sext i32 [[TMP36]] to i64
828 ; VF-FOUR-CHECK-NEXT: [[TMP45:%.*]] = sext i32 [[TMP37]] to i64
829 ; VF-FOUR-CHECK-NEXT: [[TMP46:%.*]] = sext i32 [[TMP38]] to i64
830 ; VF-FOUR-CHECK-NEXT: [[TMP47:%.*]] = sext i32 [[TMP39]] to i64
831 ; VF-FOUR-CHECK-NEXT: [[TMP48:%.*]] = sext i32 [[TMP40]] to i64
832 ; VF-FOUR-CHECK-NEXT: [[TMP49:%.*]] = sext i32 [[TMP41]] to i64
833 ; VF-FOUR-CHECK-NEXT: [[TMP50:%.*]] = sext i32 [[TMP42]] to i64
834 ; VF-FOUR-CHECK-NEXT: [[TMP51:%.*]] = sext i32 [[TMP43]] to i64
835 ; VF-FOUR-CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP44]]
836 ; VF-FOUR-CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP45]]
837 ; VF-FOUR-CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP46]]
838 ; VF-FOUR-CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP47]]
839 ; VF-FOUR-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP48]]
840 ; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP49]]
841 ; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP50]]
842 ; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP51]]
843 ; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 0
844 ; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, float* [[TMP60]], i32 -3
845 ; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = bitcast float* [[TMP61]] to <4 x float>*
846 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP62]], align 4
847 ; VF-FOUR-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
848 ; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -4
849 ; VF-FOUR-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, float* [[TMP63]], i32 -3
850 ; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = bitcast float* [[TMP64]] to <4 x float>*
851 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, <4 x float>* [[TMP65]], align 4
852 ; VF-FOUR-CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x float> [[WIDE_LOAD2]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
853 ; VF-FOUR-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -8
854 ; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, float* [[TMP66]], i32 -3
855 ; VF-FOUR-CHECK-NEXT: [[TMP68:%.*]] = bitcast float* [[TMP67]] to <4 x float>*
856 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, <4 x float>* [[TMP68]], align 4
857 ; VF-FOUR-CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x float> [[WIDE_LOAD4]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
858 ; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -12
859 ; VF-FOUR-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, float* [[TMP69]], i32 -3
860 ; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = bitcast float* [[TMP70]] to <4 x float>*
861 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, <4 x float>* [[TMP71]], align 4
862 ; VF-FOUR-CHECK-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x float> [[WIDE_LOAD6]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
863 ; VF-FOUR-CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -16
864 ; VF-FOUR-CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds float, float* [[TMP72]], i32 -3
865 ; VF-FOUR-CHECK-NEXT: [[TMP74:%.*]] = bitcast float* [[TMP73]] to <4 x float>*
866 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP74]], align 4
867 ; VF-FOUR-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
868 ; VF-FOUR-CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -20
869 ; VF-FOUR-CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, float* [[TMP75]], i32 -3
870 ; VF-FOUR-CHECK-NEXT: [[TMP77:%.*]] = bitcast float* [[TMP76]] to <4 x float>*
871 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP77]], align 4
872 ; VF-FOUR-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
873 ; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -24
874 ; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds float, float* [[TMP78]], i32 -3
875 ; VF-FOUR-CHECK-NEXT: [[TMP80:%.*]] = bitcast float* [[TMP79]] to <4 x float>*
876 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP80]], align 4
877 ; VF-FOUR-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
878 ; VF-FOUR-CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds float, float* [[TMP52]], i32 -28
879 ; VF-FOUR-CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds float, float* [[TMP81]], i32 -3
880 ; VF-FOUR-CHECK-NEXT: [[TMP83:%.*]] = bitcast float* [[TMP82]] to <4 x float>*
881 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP83]], align 4
882 ; VF-FOUR-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
883 ; VF-FOUR-CHECK-NEXT: [[TMP84:%.*]] = fadd fast <4 x float> [[REVERSE]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
884 ; VF-FOUR-CHECK-NEXT: [[TMP85:%.*]] = fadd fast <4 x float> [[REVERSE3]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
885 ; VF-FOUR-CHECK-NEXT: [[TMP86:%.*]] = fadd fast <4 x float> [[REVERSE5]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
886 ; VF-FOUR-CHECK-NEXT: [[TMP87:%.*]] = fadd fast <4 x float> [[REVERSE7]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
887 ; VF-FOUR-CHECK-NEXT: [[TMP88:%.*]] = fadd fast <4 x float> [[REVERSE9]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
888 ; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = fadd fast <4 x float> [[REVERSE11]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
889 ; VF-FOUR-CHECK-NEXT: [[TMP90:%.*]] = fadd fast <4 x float> [[REVERSE13]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
890 ; VF-FOUR-CHECK-NEXT: [[TMP91:%.*]] = fadd fast <4 x float> [[REVERSE15]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
891 ; VF-FOUR-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP12]]
892 ; VF-FOUR-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP13]]
893 ; VF-FOUR-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP14]]
894 ; VF-FOUR-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP15]]
895 ; VF-FOUR-CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]]
896 ; VF-FOUR-CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP17]]
897 ; VF-FOUR-CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP18]]
898 ; VF-FOUR-CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP19]]
899 ; VF-FOUR-CHECK-NEXT: [[TMP100:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 0
900 ; VF-FOUR-CHECK-NEXT: [[TMP101:%.*]] = bitcast float* [[TMP100]] to <4 x float>*
901 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP84]], <4 x float>* [[TMP101]], align 4
902 ; VF-FOUR-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 4
903 ; VF-FOUR-CHECK-NEXT: [[TMP103:%.*]] = bitcast float* [[TMP102]] to <4 x float>*
904 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP85]], <4 x float>* [[TMP103]], align 4
905 ; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 8
906 ; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = bitcast float* [[TMP104]] to <4 x float>*
907 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP86]], <4 x float>* [[TMP105]], align 4
908 ; VF-FOUR-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 12
909 ; VF-FOUR-CHECK-NEXT: [[TMP107:%.*]] = bitcast float* [[TMP106]] to <4 x float>*
910 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP87]], <4 x float>* [[TMP107]], align 4
911 ; VF-FOUR-CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 16
912 ; VF-FOUR-CHECK-NEXT: [[TMP109:%.*]] = bitcast float* [[TMP108]] to <4 x float>*
913 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP88]], <4 x float>* [[TMP109]], align 4
914 ; VF-FOUR-CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 20
915 ; VF-FOUR-CHECK-NEXT: [[TMP111:%.*]] = bitcast float* [[TMP110]] to <4 x float>*
916 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP89]], <4 x float>* [[TMP111]], align 4
917 ; VF-FOUR-CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 24
918 ; VF-FOUR-CHECK-NEXT: [[TMP113:%.*]] = bitcast float* [[TMP112]] to <4 x float>*
919 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP90]], <4 x float>* [[TMP113]], align 4
920 ; VF-FOUR-CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds float, float* [[TMP92]], i32 28
921 ; VF-FOUR-CHECK-NEXT: [[TMP115:%.*]] = bitcast float* [[TMP114]] to <4 x float>*
922 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP91]], <4 x float>* [[TMP115]], align 4
923 ; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
924 ; VF-FOUR-CHECK-NEXT: [[TMP116:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
925 ; VF-FOUR-CHECK-NEXT: br i1 [[TMP116]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
926 ; VF-FOUR-CHECK: middle.block:
927 ; VF-FOUR-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
928 ; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
929 ; VF-FOUR-CHECK: vec.epilog.iter.check:
930 ; VF-FOUR-CHECK-NEXT: [[IND_END21:%.*]] = trunc i64 [[N_VEC]] to i32
931 ; VF-FOUR-CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
932 ; VF-FOUR-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
933 ; VF-FOUR-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
934 ; VF-FOUR-CHECK: vec.epilog.ph:
935 ; VF-FOUR-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
936 ; VF-FOUR-CHECK-NEXT: [[N_MOD_VF16:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
937 ; VF-FOUR-CHECK-NEXT: [[N_VEC17:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF16]]
938 ; VF-FOUR-CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC17]] to i32
939 ; VF-FOUR-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
940 ; VF-FOUR-CHECK: vec.epilog.vector.body:
941 ; VF-FOUR-CHECK-NEXT: [[INDEX18:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT19:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
942 ; VF-FOUR-CHECK-NEXT: [[TMP117:%.*]] = add i64 [[INDEX18]], 0
943 ; VF-FOUR-CHECK-NEXT: [[OFFSET_IDX23:%.*]] = trunc i64 [[INDEX18]] to i32
944 ; VF-FOUR-CHECK-NEXT: [[TMP118:%.*]] = add i32 [[OFFSET_IDX23]], 0
945 ; VF-FOUR-CHECK-NEXT: [[TMP119:%.*]] = xor i32 [[TMP118]], -1
946 ; VF-FOUR-CHECK-NEXT: [[TMP120:%.*]] = add i32 [[TMP119]], [[N]]
947 ; VF-FOUR-CHECK-NEXT: [[TMP121:%.*]] = sext i32 [[TMP120]] to i64
948 ; VF-FOUR-CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP121]]
949 ; VF-FOUR-CHECK-NEXT: [[TMP123:%.*]] = getelementptr inbounds float, float* [[TMP122]], i32 0
950 ; VF-FOUR-CHECK-NEXT: [[TMP124:%.*]] = getelementptr inbounds float, float* [[TMP123]], i32 -3
951 ; VF-FOUR-CHECK-NEXT: [[TMP125:%.*]] = bitcast float* [[TMP124]] to <4 x float>*
952 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <4 x float>, <4 x float>* [[TMP125]], align 4
953 ; VF-FOUR-CHECK-NEXT: [[REVERSE25:%.*]] = shufflevector <4 x float> [[WIDE_LOAD24]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
954 ; VF-FOUR-CHECK-NEXT: [[TMP126:%.*]] = fadd fast <4 x float> [[REVERSE25]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
955 ; VF-FOUR-CHECK-NEXT: [[TMP127:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP117]]
956 ; VF-FOUR-CHECK-NEXT: [[TMP128:%.*]] = getelementptr inbounds float, float* [[TMP127]], i32 0
957 ; VF-FOUR-CHECK-NEXT: [[TMP129:%.*]] = bitcast float* [[TMP128]] to <4 x float>*
958 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP126]], <4 x float>* [[TMP129]], align 4
959 ; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT19]] = add nuw i64 [[INDEX18]], 4
960 ; VF-FOUR-CHECK-NEXT: [[TMP130:%.*]] = icmp eq i64 [[INDEX_NEXT19]], [[N_VEC17]]
961 ; VF-FOUR-CHECK-NEXT: br i1 [[TMP130]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
962 ; VF-FOUR-CHECK: vec.epilog.middle.block:
963 ; VF-FOUR-CHECK-NEXT: [[CMP_N22:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC17]]
964 ; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N22]], label [[FOR_END_LOOPEXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
965 ; VF-FOUR-CHECK: vec.epilog.scalar.ph:
966 ; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ]
967 ; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL20:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END21]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ]
968 ; VF-FOUR-CHECK-NEXT: br label [[FOR_BODY:%.*]]
969 ; VF-FOUR-CHECK: for.body:
970 ; VF-FOUR-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
971 ; VF-FOUR-CHECK-NEXT: [[I_014:%.*]] = phi i32 [ [[BC_RESUME_VAL20]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
972 ; VF-FOUR-CHECK-NEXT: [[TMP131:%.*]] = xor i32 [[I_014]], -1
973 ; VF-FOUR-CHECK-NEXT: [[SUB2:%.*]] = add i32 [[TMP131]], [[N]]
974 ; VF-FOUR-CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB2]] to i64
975 ; VF-FOUR-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IDXPROM]]
976 ; VF-FOUR-CHECK-NEXT: [[TMP132:%.*]] = load float, float* [[ARRAYIDX]], align 4
977 ; VF-FOUR-CHECK-NEXT: [[CONV3:%.*]] = fadd fast float [[TMP132]], 1.000000e+00
978 ; VF-FOUR-CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
979 ; VF-FOUR-CHECK-NEXT: store float [[CONV3]], float* [[ARRAYIDX5]], align 4
980 ; VF-FOUR-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
981 ; VF-FOUR-CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1
982 ; VF-FOUR-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
983 ; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]]
984 ; VF-FOUR-CHECK: for.end.loopexit.loopexit:
985 ; VF-FOUR-CHECK-NEXT: br label [[FOR_END_LOOPEXIT]]
986 ; VF-FOUR-CHECK: for.end.loopexit:
987 ; VF-FOUR-CHECK-NEXT: br label [[FOR_END]]
988 ; VF-FOUR-CHECK: for.end:
989 ; VF-FOUR-CHECK-NEXT: ret i32 0
994 %cmp1 = icmp sgt i32 %n, 1
995 br i1 %cmp1, label %for.body.preheader, label %for.end
997 for.body.preheader: ; preds = %entry
999 %wide.trip.count = zext i32 %0 to i64
1002 for.body: ; preds = %for.body.preheader, %for.body
1003 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
1004 %i.014 = phi i32 [ 0, %for.body.preheader ], [ %inc, %for.body ]
1005 %1 = xor i32 %i.014, -1
1006 %sub2 = add i32 %1, %n
1007 %idxprom = sext i32 %sub2 to i64
1008 %arrayidx = getelementptr inbounds float, float* %B, i64 %idxprom
1009 %2 = load float, float* %arrayidx, align 4
1010 %conv3 = fadd fast float %2, 1.000000e+00
1011 %arrayidx5 = getelementptr inbounds float, float* %A, i64 %indvars.iv
1012 store float %conv3, float* %arrayidx5, align 4
1013 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1014 %inc = add nuw nsw i32 %i.014, 1
1015 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
1016 br i1 %exitcond, label %for.body, label %for.end.loopexit
1018 for.end.loopexit: ; preds = %for.body
1021 for.end: ; preds = %for.end.loopexit, %entry
1025 attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-spe" "unsafe-fp-math"="true" "use-soft-float"="false" }