1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2 ; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z15 -passes=slp-vectorizer %s -S -o - \
5 ; Test vectorization and reassociation of fmin/fmax operations. Vectorization
6 ; is more profitable if the loads are also vectorizable.
8 define double @fmin_double_4_nums_seq(ptr nocapture noundef readonly %x) {
9 ; CHECK-LABEL: define double @fmin_double_4_nums_seq(
10 ; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0:[0-9]+]] {
11 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[X]], align 4
12 ; CHECK-NEXT: [[TMP2:%.*]] = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[TMP1]])
13 ; CHECK-NEXT: ret double [[TMP2]]
15 %g1 = getelementptr inbounds double, ptr %x, i64 1
16 %g2 = getelementptr inbounds double, ptr %x, i64 2
17 %g3 = getelementptr inbounds double, ptr %x, i64 3
18 %t0 = load double, ptr %x, align 4
19 %t1 = load double, ptr %g1, align 4
20 %t2 = load double, ptr %g2, align 4
21 %t3 = load double, ptr %g3, align 4
22 %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0)
23 %m2 = tail call fast double @llvm.minnum.f64(double %t2, double %m1)
24 %m3 = tail call fast double @llvm.minnum.f64(double %t3, double %m2)
28 define double @fmin_double_16_nums_nonseq(ptr nocapture noundef readonly %x) {
29 ; CHECK-LABEL: define double @fmin_double_16_nums_nonseq(
30 ; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0]] {
31 ; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2
32 ; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 4
33 ; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 6
34 ; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8
35 ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds double, ptr [[X]], i64 10
36 ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 12
37 ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds double, ptr [[X]], i64 14
38 ; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 16
39 ; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds double, ptr [[X]], i64 18
40 ; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds double, ptr [[X]], i64 20
41 ; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds double, ptr [[X]], i64 22
42 ; CHECK-NEXT: [[G12:%.*]] = getelementptr inbounds double, ptr [[X]], i64 24
43 ; CHECK-NEXT: [[G13:%.*]] = getelementptr inbounds double, ptr [[X]], i64 26
44 ; CHECK-NEXT: [[G14:%.*]] = getelementptr inbounds double, ptr [[X]], i64 28
45 ; CHECK-NEXT: [[G15:%.*]] = getelementptr inbounds double, ptr [[X]], i64 30
46 ; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[X]], align 4
47 ; CHECK-NEXT: [[T1:%.*]] = load double, ptr [[G1]], align 4
48 ; CHECK-NEXT: [[T2:%.*]] = load double, ptr [[G2]], align 4
49 ; CHECK-NEXT: [[T3:%.*]] = load double, ptr [[G3]], align 4
50 ; CHECK-NEXT: [[T4:%.*]] = load double, ptr [[G4]], align 4
51 ; CHECK-NEXT: [[T5:%.*]] = load double, ptr [[G5]], align 4
52 ; CHECK-NEXT: [[T6:%.*]] = load double, ptr [[G6]], align 4
53 ; CHECK-NEXT: [[T7:%.*]] = load double, ptr [[G7]], align 4
54 ; CHECK-NEXT: [[T8:%.*]] = load double, ptr [[G8]], align 4
55 ; CHECK-NEXT: [[T9:%.*]] = load double, ptr [[G9]], align 4
56 ; CHECK-NEXT: [[T10:%.*]] = load double, ptr [[G10]], align 4
57 ; CHECK-NEXT: [[T11:%.*]] = load double, ptr [[G11]], align 4
58 ; CHECK-NEXT: [[T12:%.*]] = load double, ptr [[G12]], align 4
59 ; CHECK-NEXT: [[T13:%.*]] = load double, ptr [[G13]], align 4
60 ; CHECK-NEXT: [[T14:%.*]] = load double, ptr [[G14]], align 4
61 ; CHECK-NEXT: [[T15:%.*]] = load double, ptr [[G15]], align 4
62 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x double> poison, double [[T1]], i32 0
63 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x double> [[TMP1]], double [[T0]], i32 1
64 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x double> [[TMP2]], double [[T2]], i32 2
65 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x double> [[TMP3]], double [[T3]], i32 3
66 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x double> [[TMP4]], double [[T4]], i32 4
67 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x double> [[TMP5]], double [[T5]], i32 5
68 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x double> [[TMP6]], double [[T6]], i32 6
69 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x double> [[TMP7]], double [[T7]], i32 7
70 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x double> [[TMP8]], double [[T8]], i32 8
71 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x double> [[TMP9]], double [[T9]], i32 9
72 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x double> [[TMP10]], double [[T10]], i32 10
73 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x double> [[TMP11]], double [[T11]], i32 11
74 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x double> [[TMP12]], double [[T12]], i32 12
75 ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x double> [[TMP13]], double [[T13]], i32 13
76 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x double> [[TMP14]], double [[T14]], i32 14
77 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x double> [[TMP15]], double [[T15]], i32 15
78 ; CHECK-NEXT: [[TMP17:%.*]] = call fast double @llvm.vector.reduce.fmin.v16f64(<16 x double> [[TMP16]])
79 ; CHECK-NEXT: ret double [[TMP17]]
81 %g1 = getelementptr inbounds double, ptr %x, i64 2
82 %g2 = getelementptr inbounds double, ptr %x, i64 4
83 %g3 = getelementptr inbounds double, ptr %x, i64 6
84 %g4 = getelementptr inbounds double, ptr %x, i64 8
85 %g5 = getelementptr inbounds double, ptr %x, i64 10
86 %g6 = getelementptr inbounds double, ptr %x, i64 12
87 %g7 = getelementptr inbounds double, ptr %x, i64 14
88 %g8 = getelementptr inbounds double, ptr %x, i64 16
89 %g9 = getelementptr inbounds double, ptr %x, i64 18
90 %g10 = getelementptr inbounds double, ptr %x, i64 20
91 %g11 = getelementptr inbounds double, ptr %x, i64 22
92 %g12 = getelementptr inbounds double, ptr %x, i64 24
93 %g13 = getelementptr inbounds double, ptr %x, i64 26
94 %g14 = getelementptr inbounds double, ptr %x, i64 28
95 %g15 = getelementptr inbounds double, ptr %x, i64 30
96 %t0 = load double, ptr %x, align 4
97 %t1 = load double, ptr %g1, align 4
98 %t2 = load double, ptr %g2, align 4
99 %t3 = load double, ptr %g3, align 4
100 %t4 = load double, ptr %g4, align 4
101 %t5 = load double, ptr %g5, align 4
102 %t6 = load double, ptr %g6, align 4
103 %t7 = load double, ptr %g7, align 4
104 %t8 = load double, ptr %g8, align 4
105 %t9 = load double, ptr %g9, align 4
106 %t10 = load double, ptr %g10, align 4
107 %t11 = load double, ptr %g11, align 4
108 %t12 = load double, ptr %g12, align 4
109 %t13 = load double, ptr %g13, align 4
110 %t14 = load double, ptr %g14, align 4
111 %t15 = load double, ptr %g15, align 4
112 %m1 = tail call fast double @llvm.minnum.f64(double %t1, double %t0)
113 %m2 = tail call fast double @llvm.minnum.f64(double %t2, double %m1)
114 %m3 = tail call fast double @llvm.minnum.f64(double %t3, double %m2)
115 %m4 = tail call fast double @llvm.minnum.f64(double %t4, double %m3)
116 %m5 = tail call fast double @llvm.minnum.f64(double %t5, double %m4)
117 %m6 = tail call fast double @llvm.minnum.f64(double %t6, double %m5)
118 %m7 = tail call fast double @llvm.minnum.f64(double %t7, double %m6)
119 %m8 = tail call fast double @llvm.minnum.f64(double %t8, double %m7)
120 %m9 = tail call fast double @llvm.minnum.f64(double %t9, double %m8)
121 %m10 = tail call fast double @llvm.minnum.f64(double %t10, double %m9)
122 %m11 = tail call fast double @llvm.minnum.f64(double %t11, double %m10)
123 %m12 = tail call fast double @llvm.minnum.f64(double %t12, double %m11)
124 %m13 = tail call fast double @llvm.minnum.f64(double %t13, double %m12)
125 %m14 = tail call fast double @llvm.minnum.f64(double %t14, double %m13)
126 %m15 = tail call fast double @llvm.minnum.f64(double %t15, double %m14)
130 define float @fmin_float_12_nums_nonseq(ptr nocapture noundef readonly %x) {
131 ; CHECK-LABEL: define float @fmin_float_12_nums_nonseq(
132 ; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0]] {
133 ; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
134 ; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 4
135 ; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 6
136 ; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 8
137 ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, ptr [[X]], i64 10
138 ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, ptr [[X]], i64 12
139 ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, ptr [[X]], i64 14
140 ; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds float, ptr [[X]], i64 16
141 ; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds float, ptr [[X]], i64 18
142 ; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds float, ptr [[X]], i64 20
143 ; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds float, ptr [[X]], i64 22
144 ; CHECK-NEXT: [[T0:%.*]] = load float, ptr [[X]], align 4
145 ; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4
146 ; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4
147 ; CHECK-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4
148 ; CHECK-NEXT: [[T4:%.*]] = load float, ptr [[G4]], align 4
149 ; CHECK-NEXT: [[T5:%.*]] = load float, ptr [[G5]], align 4
150 ; CHECK-NEXT: [[T6:%.*]] = load float, ptr [[G6]], align 4
151 ; CHECK-NEXT: [[T7:%.*]] = load float, ptr [[G7]], align 4
152 ; CHECK-NEXT: [[T8:%.*]] = load float, ptr [[G8]], align 4
153 ; CHECK-NEXT: [[T9:%.*]] = load float, ptr [[G9]], align 4
154 ; CHECK-NEXT: [[T10:%.*]] = load float, ptr [[G10]], align 4
155 ; CHECK-NEXT: [[T11:%.*]] = load float, ptr [[G11]], align 4
156 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <12 x float> poison, float [[T1]], i32 0
157 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <12 x float> [[TMP1]], float [[T0]], i32 1
158 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <12 x float> [[TMP2]], float [[T2]], i32 2
159 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <12 x float> [[TMP3]], float [[T3]], i32 3
160 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <12 x float> [[TMP4]], float [[T4]], i32 4
161 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <12 x float> [[TMP5]], float [[T5]], i32 5
162 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <12 x float> [[TMP6]], float [[T6]], i32 6
163 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <12 x float> [[TMP7]], float [[T7]], i32 7
164 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <12 x float> [[TMP8]], float [[T8]], i32 8
165 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <12 x float> [[TMP9]], float [[T9]], i32 9
166 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <12 x float> [[TMP10]], float [[T10]], i32 10
167 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <12 x float> [[TMP11]], float [[T11]], i32 11
168 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.v12f32(<12 x float> [[TMP12]])
169 ; CHECK-NEXT: ret float [[TMP13]]
171 %g1 = getelementptr inbounds float, ptr %x, i64 2
172 %g2 = getelementptr inbounds float, ptr %x, i64 4
173 %g3 = getelementptr inbounds float, ptr %x, i64 6
174 %g4 = getelementptr inbounds float, ptr %x, i64 8
175 %g5 = getelementptr inbounds float, ptr %x, i64 10
176 %g6 = getelementptr inbounds float, ptr %x, i64 12
177 %g7 = getelementptr inbounds float, ptr %x, i64 14
178 %g8 = getelementptr inbounds float, ptr %x, i64 16
179 %g9 = getelementptr inbounds float, ptr %x, i64 18
180 %g10 = getelementptr inbounds float, ptr %x, i64 20
181 %g11 = getelementptr inbounds float, ptr %x, i64 22
182 %t0 = load float, ptr %x, align 4
183 %t1 = load float, ptr %g1, align 4
184 %t2 = load float, ptr %g2, align 4
185 %t3 = load float, ptr %g3, align 4
186 %t4 = load float, ptr %g4, align 4
187 %t5 = load float, ptr %g5, align 4
188 %t6 = load float, ptr %g6, align 4
189 %t7 = load float, ptr %g7, align 4
190 %t8 = load float, ptr %g8, align 4
191 %t9 = load float, ptr %g9, align 4
192 %t10 = load float, ptr %g10, align 4
193 %t11 = load float, ptr %g11, align 4
194 %m1 = tail call fast float @llvm.minnum.f32(float %t1, float %t0)
195 %m2 = tail call fast float @llvm.minnum.f32(float %t2, float %m1)
196 %m3 = tail call fast float @llvm.minnum.f32(float %t3, float %m2)
197 %m4 = tail call fast float @llvm.minnum.f32(float %t4, float %m3)
198 %m5 = tail call fast float @llvm.minnum.f32(float %t5, float %m4)
199 %m6 = tail call fast float @llvm.minnum.f32(float %t6, float %m5)
200 %m7 = tail call fast float @llvm.minnum.f32(float %t7, float %m6)
201 %m8 = tail call fast float @llvm.minnum.f32(float %t8, float %m7)
202 %m9 = tail call fast float @llvm.minnum.f32(float %t9, float %m8)
203 %m10 = tail call fast float @llvm.minnum.f32(float %t10, float %m9)
204 %m11 = tail call fast float @llvm.minnum.f32(float %t11, float %m10)
208 define double @fmax_double_4_nums_seq(ptr nocapture noundef readonly %x) {
209 ; CHECK-LABEL: define double @fmax_double_4_nums_seq(
210 ; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0]] {
211 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[X]], align 4
212 ; CHECK-NEXT: [[TMP2:%.*]] = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[TMP1]])
213 ; CHECK-NEXT: ret double [[TMP2]]
215 %g1 = getelementptr inbounds double, ptr %x, i64 1
216 %g2 = getelementptr inbounds double, ptr %x, i64 2
217 %g3 = getelementptr inbounds double, ptr %x, i64 3
218 %t0 = load double, ptr %x, align 4
219 %t1 = load double, ptr %g1, align 4
220 %t2 = load double, ptr %g2, align 4
221 %t3 = load double, ptr %g3, align 4
222 %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0)
223 %m2 = tail call fast double @llvm.maxnum.f64(double %t2, double %m1)
224 %m3 = tail call fast double @llvm.maxnum.f64(double %t3, double %m2)
228 define double @fmax_double_16_nums_nonseq(ptr nocapture noundef readonly %x) {
229 ; CHECK-LABEL: define double @fmax_double_16_nums_nonseq(
230 ; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0]] {
231 ; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2
232 ; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 4
233 ; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 6
234 ; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8
235 ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds double, ptr [[X]], i64 10
236 ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 12
237 ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds double, ptr [[X]], i64 14
238 ; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 16
239 ; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds double, ptr [[X]], i64 18
240 ; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds double, ptr [[X]], i64 20
241 ; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds double, ptr [[X]], i64 22
242 ; CHECK-NEXT: [[G12:%.*]] = getelementptr inbounds double, ptr [[X]], i64 24
243 ; CHECK-NEXT: [[G13:%.*]] = getelementptr inbounds double, ptr [[X]], i64 26
244 ; CHECK-NEXT: [[G14:%.*]] = getelementptr inbounds double, ptr [[X]], i64 28
245 ; CHECK-NEXT: [[G15:%.*]] = getelementptr inbounds double, ptr [[X]], i64 30
246 ; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[X]], align 4
247 ; CHECK-NEXT: [[T1:%.*]] = load double, ptr [[G1]], align 4
248 ; CHECK-NEXT: [[T2:%.*]] = load double, ptr [[G2]], align 4
249 ; CHECK-NEXT: [[T3:%.*]] = load double, ptr [[G3]], align 4
250 ; CHECK-NEXT: [[T4:%.*]] = load double, ptr [[G4]], align 4
251 ; CHECK-NEXT: [[T5:%.*]] = load double, ptr [[G5]], align 4
252 ; CHECK-NEXT: [[T6:%.*]] = load double, ptr [[G6]], align 4
253 ; CHECK-NEXT: [[T7:%.*]] = load double, ptr [[G7]], align 4
254 ; CHECK-NEXT: [[T8:%.*]] = load double, ptr [[G8]], align 4
255 ; CHECK-NEXT: [[T9:%.*]] = load double, ptr [[G9]], align 4
256 ; CHECK-NEXT: [[T10:%.*]] = load double, ptr [[G10]], align 4
257 ; CHECK-NEXT: [[T11:%.*]] = load double, ptr [[G11]], align 4
258 ; CHECK-NEXT: [[T12:%.*]] = load double, ptr [[G12]], align 4
259 ; CHECK-NEXT: [[T13:%.*]] = load double, ptr [[G13]], align 4
260 ; CHECK-NEXT: [[T14:%.*]] = load double, ptr [[G14]], align 4
261 ; CHECK-NEXT: [[T15:%.*]] = load double, ptr [[G15]], align 4
262 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x double> poison, double [[T1]], i32 0
263 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x double> [[TMP1]], double [[T0]], i32 1
264 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x double> [[TMP2]], double [[T2]], i32 2
265 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x double> [[TMP3]], double [[T3]], i32 3
266 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x double> [[TMP4]], double [[T4]], i32 4
267 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x double> [[TMP5]], double [[T5]], i32 5
268 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x double> [[TMP6]], double [[T6]], i32 6
269 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x double> [[TMP7]], double [[T7]], i32 7
270 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x double> [[TMP8]], double [[T8]], i32 8
271 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x double> [[TMP9]], double [[T9]], i32 9
272 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x double> [[TMP10]], double [[T10]], i32 10
273 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x double> [[TMP11]], double [[T11]], i32 11
274 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x double> [[TMP12]], double [[T12]], i32 12
275 ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x double> [[TMP13]], double [[T13]], i32 13
276 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x double> [[TMP14]], double [[T14]], i32 14
277 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x double> [[TMP15]], double [[T15]], i32 15
278 ; CHECK-NEXT: [[TMP17:%.*]] = call fast double @llvm.vector.reduce.fmax.v16f64(<16 x double> [[TMP16]])
279 ; CHECK-NEXT: ret double [[TMP17]]
281 %g1 = getelementptr inbounds double, ptr %x, i64 2
282 %g2 = getelementptr inbounds double, ptr %x, i64 4
283 %g3 = getelementptr inbounds double, ptr %x, i64 6
284 %g4 = getelementptr inbounds double, ptr %x, i64 8
285 %g5 = getelementptr inbounds double, ptr %x, i64 10
286 %g6 = getelementptr inbounds double, ptr %x, i64 12
287 %g7 = getelementptr inbounds double, ptr %x, i64 14
288 %g8 = getelementptr inbounds double, ptr %x, i64 16
289 %g9 = getelementptr inbounds double, ptr %x, i64 18
290 %g10 = getelementptr inbounds double, ptr %x, i64 20
291 %g11 = getelementptr inbounds double, ptr %x, i64 22
292 %g12 = getelementptr inbounds double, ptr %x, i64 24
293 %g13 = getelementptr inbounds double, ptr %x, i64 26
294 %g14 = getelementptr inbounds double, ptr %x, i64 28
295 %g15 = getelementptr inbounds double, ptr %x, i64 30
296 %t0 = load double, ptr %x, align 4
297 %t1 = load double, ptr %g1, align 4
298 %t2 = load double, ptr %g2, align 4
299 %t3 = load double, ptr %g3, align 4
300 %t4 = load double, ptr %g4, align 4
301 %t5 = load double, ptr %g5, align 4
302 %t6 = load double, ptr %g6, align 4
303 %t7 = load double, ptr %g7, align 4
304 %t8 = load double, ptr %g8, align 4
305 %t9 = load double, ptr %g9, align 4
306 %t10 = load double, ptr %g10, align 4
307 %t11 = load double, ptr %g11, align 4
308 %t12 = load double, ptr %g12, align 4
309 %t13 = load double, ptr %g13, align 4
310 %t14 = load double, ptr %g14, align 4
311 %t15 = load double, ptr %g15, align 4
312 %m1 = tail call fast double @llvm.maxnum.f64(double %t1, double %t0)
313 %m2 = tail call fast double @llvm.maxnum.f64(double %t2, double %m1)
314 %m3 = tail call fast double @llvm.maxnum.f64(double %t3, double %m2)
315 %m4 = tail call fast double @llvm.maxnum.f64(double %t4, double %m3)
316 %m5 = tail call fast double @llvm.maxnum.f64(double %t5, double %m4)
317 %m6 = tail call fast double @llvm.maxnum.f64(double %t6, double %m5)
318 %m7 = tail call fast double @llvm.maxnum.f64(double %t7, double %m6)
319 %m8 = tail call fast double @llvm.maxnum.f64(double %t8, double %m7)
320 %m9 = tail call fast double @llvm.maxnum.f64(double %t9, double %m8)
321 %m10 = tail call fast double @llvm.maxnum.f64(double %t10, double %m9)
322 %m11 = tail call fast double @llvm.maxnum.f64(double %t11, double %m10)
323 %m12 = tail call fast double @llvm.maxnum.f64(double %t12, double %m11)
324 %m13 = tail call fast double @llvm.maxnum.f64(double %t13, double %m12)
325 %m14 = tail call fast double @llvm.maxnum.f64(double %t14, double %m13)
326 %m15 = tail call fast double @llvm.maxnum.f64(double %t15, double %m14)
330 define float @fmax_float_12_nums_nonseq(ptr nocapture noundef readonly %x) {
331 ; CHECK-LABEL: define float @fmax_float_12_nums_nonseq(
332 ; CHECK-SAME: ptr nocapture noundef readonly [[X:%.*]]) #[[ATTR0]] {
333 ; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
334 ; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 4
335 ; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 6
336 ; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, ptr [[X]], i64 8
337 ; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, ptr [[X]], i64 10
338 ; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, ptr [[X]], i64 12
339 ; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, ptr [[X]], i64 14
340 ; CHECK-NEXT: [[G8:%.*]] = getelementptr inbounds float, ptr [[X]], i64 16
341 ; CHECK-NEXT: [[G9:%.*]] = getelementptr inbounds float, ptr [[X]], i64 18
342 ; CHECK-NEXT: [[G10:%.*]] = getelementptr inbounds float, ptr [[X]], i64 20
343 ; CHECK-NEXT: [[G11:%.*]] = getelementptr inbounds float, ptr [[X]], i64 22
344 ; CHECK-NEXT: [[T0:%.*]] = load float, ptr [[X]], align 4
345 ; CHECK-NEXT: [[T1:%.*]] = load float, ptr [[G1]], align 4
346 ; CHECK-NEXT: [[T2:%.*]] = load float, ptr [[G2]], align 4
347 ; CHECK-NEXT: [[T3:%.*]] = load float, ptr [[G3]], align 4
348 ; CHECK-NEXT: [[T4:%.*]] = load float, ptr [[G4]], align 4
349 ; CHECK-NEXT: [[T5:%.*]] = load float, ptr [[G5]], align 4
350 ; CHECK-NEXT: [[T6:%.*]] = load float, ptr [[G6]], align 4
351 ; CHECK-NEXT: [[T7:%.*]] = load float, ptr [[G7]], align 4
352 ; CHECK-NEXT: [[T8:%.*]] = load float, ptr [[G8]], align 4
353 ; CHECK-NEXT: [[T9:%.*]] = load float, ptr [[G9]], align 4
354 ; CHECK-NEXT: [[T10:%.*]] = load float, ptr [[G10]], align 4
355 ; CHECK-NEXT: [[T11:%.*]] = load float, ptr [[G11]], align 4
356 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <12 x float> poison, float [[T1]], i32 0
357 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <12 x float> [[TMP1]], float [[T0]], i32 1
358 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <12 x float> [[TMP2]], float [[T2]], i32 2
359 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <12 x float> [[TMP3]], float [[T3]], i32 3
360 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <12 x float> [[TMP4]], float [[T4]], i32 4
361 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <12 x float> [[TMP5]], float [[T5]], i32 5
362 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <12 x float> [[TMP6]], float [[T6]], i32 6
363 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <12 x float> [[TMP7]], float [[T7]], i32 7
364 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <12 x float> [[TMP8]], float [[T8]], i32 8
365 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <12 x float> [[TMP9]], float [[T9]], i32 9
366 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <12 x float> [[TMP10]], float [[T10]], i32 10
367 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <12 x float> [[TMP11]], float [[T11]], i32 11
368 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmax.v12f32(<12 x float> [[TMP12]])
369 ; CHECK-NEXT: ret float [[TMP13]]
371 %g1 = getelementptr inbounds float, ptr %x, i64 2
372 %g2 = getelementptr inbounds float, ptr %x, i64 4
373 %g3 = getelementptr inbounds float, ptr %x, i64 6
374 %g4 = getelementptr inbounds float, ptr %x, i64 8
375 %g5 = getelementptr inbounds float, ptr %x, i64 10
376 %g6 = getelementptr inbounds float, ptr %x, i64 12
377 %g7 = getelementptr inbounds float, ptr %x, i64 14
378 %g8 = getelementptr inbounds float, ptr %x, i64 16
379 %g9 = getelementptr inbounds float, ptr %x, i64 18
380 %g10 = getelementptr inbounds float, ptr %x, i64 20
381 %g11 = getelementptr inbounds float, ptr %x, i64 22
382 %t0 = load float, ptr %x, align 4
383 %t1 = load float, ptr %g1, align 4
384 %t2 = load float, ptr %g2, align 4
385 %t3 = load float, ptr %g3, align 4
386 %t4 = load float, ptr %g4, align 4
387 %t5 = load float, ptr %g5, align 4
388 %t6 = load float, ptr %g6, align 4
389 %t7 = load float, ptr %g7, align 4
390 %t8 = load float, ptr %g8, align 4
391 %t9 = load float, ptr %g9, align 4
392 %t10 = load float, ptr %g10, align 4
393 %t11 = load float, ptr %g11, align 4
394 %m1 = tail call fast float @llvm.maxnum.f32(float %t1, float %t0)
395 %m2 = tail call fast float @llvm.maxnum.f32(float %t2, float %m1)
396 %m3 = tail call fast float @llvm.maxnum.f32(float %t3, float %m2)
397 %m4 = tail call fast float @llvm.maxnum.f32(float %t4, float %m3)
398 %m5 = tail call fast float @llvm.maxnum.f32(float %t5, float %m4)
399 %m6 = tail call fast float @llvm.maxnum.f32(float %t6, float %m5)
400 %m7 = tail call fast float @llvm.maxnum.f32(float %t7, float %m6)
401 %m8 = tail call fast float @llvm.maxnum.f32(float %t8, float %m7)
402 %m9 = tail call fast float @llvm.maxnum.f32(float %t9, float %m8)
403 %m10 = tail call fast float @llvm.maxnum.f32(float %t10, float %m9)
404 %m11 = tail call fast float @llvm.maxnum.f32(float %t11, float %m10)
408 declare float @llvm.minnum.f32(float, float)
409 declare double @llvm.minnum.f64(double, double)
410 declare float @llvm.maxnum.f32(float, float)
411 declare double @llvm.maxnum.f64(double, double)