1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -O2 -expand-reductions -mattr=avx -S < %s | FileCheck %s
4 ; Test if SLP vector reduction patterns are recognized
5 ; and optionally converted to reduction intrinsics and
8 target triple = "x86_64--"
9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
11 define i32 @add_v4i32(i32* %p) #0 {
12 ; CHECK-LABEL: @add_v4i32(
14 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
15 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa [[TBAA0:![0-9]+]]
16 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
17 ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP1]], [[RDX_SHUF]]
18 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
19 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF3]]
20 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[BIN_RDX4]], i32 0
21 ; CHECK-NEXT: ret i32 [[TMP2]]
27 %r.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
28 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
29 %cmp = icmp slt i32 %i.0, 4
30 br i1 %cmp, label %for.body, label %for.cond.cleanup
36 %idxprom = sext i32 %i.0 to i64
37 %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom
38 %0 = load i32, i32* %arrayidx, align 4, !tbaa !3
39 %add = add nsw i32 %r.0, %0
43 %inc = add nsw i32 %i.0, 1
50 define signext i16 @mul_v8i16(i16* %p) #0 {
51 ; CHECK-LABEL: @mul_v8i16(
53 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[P:%.*]] to <8 x i16>*
54 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2, !tbaa [[TBAA4:![0-9]+]]
55 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
56 ; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <8 x i16> [[TMP1]], [[RDX_SHUF]]
57 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
58 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = mul <8 x i16> [[BIN_RDX]], [[RDX_SHUF3]]
59 ; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <8 x i16> [[BIN_RDX4]], <8 x i16> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
60 ; CHECK-NEXT: [[BIN_RDX6:%.*]] = mul <8 x i16> [[BIN_RDX4]], [[RDX_SHUF5]]
61 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i16> [[BIN_RDX6]], i32 0
62 ; CHECK-NEXT: ret i16 [[TMP2]]
68 %r.0 = phi i16 [ 1, %entry ], [ %conv2, %for.inc ]
69 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
70 %cmp = icmp slt i32 %i.0, 8
71 br i1 %cmp, label %for.body, label %for.cond.cleanup
77 %idxprom = sext i32 %i.0 to i64
78 %arrayidx = getelementptr inbounds i16, i16* %p, i64 %idxprom
79 %0 = load i16, i16* %arrayidx, align 2, !tbaa !7
80 %conv = sext i16 %0 to i32
81 %conv1 = sext i16 %r.0 to i32
82 %mul = mul nsw i32 %conv1, %conv
83 %conv2 = trunc i32 %mul to i16
87 %inc = add nsw i32 %i.0, 1
94 define signext i8 @or_v16i8(i8* %p) #0 {
95 ; CHECK-LABEL: @or_v16i8(
97 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[P:%.*]] to <16 x i8>*
98 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1, !tbaa [[TBAA6:![0-9]+]]
99 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
100 ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <16 x i8> [[TMP1]], [[RDX_SHUF]]
101 ; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x i8> [[BIN_RDX]], <16 x i8> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
102 ; CHECK-NEXT: [[BIN_RDX5:%.*]] = or <16 x i8> [[BIN_RDX]], [[RDX_SHUF4]]
103 ; CHECK-NEXT: [[RDX_SHUF6:%.*]] = shufflevector <16 x i8> [[BIN_RDX5]], <16 x i8> poison, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
104 ; CHECK-NEXT: [[BIN_RDX7:%.*]] = or <16 x i8> [[BIN_RDX5]], [[RDX_SHUF6]]
105 ; CHECK-NEXT: [[RDX_SHUF8:%.*]] = shufflevector <16 x i8> [[BIN_RDX7]], <16 x i8> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
106 ; CHECK-NEXT: [[BIN_RDX9:%.*]] = or <16 x i8> [[BIN_RDX7]], [[RDX_SHUF8]]
107 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i8> [[BIN_RDX9]], i32 0
108 ; CHECK-NEXT: ret i8 [[TMP2]]
114 %r.0 = phi i8 [ 0, %entry ], [ %conv2, %for.inc ]
115 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
116 %cmp = icmp slt i32 %i.0, 16
117 br i1 %cmp, label %for.body, label %for.cond.cleanup
123 %idxprom = sext i32 %i.0 to i64
124 %arrayidx = getelementptr inbounds i8, i8* %p, i64 %idxprom
125 %0 = load i8, i8* %arrayidx, align 1, !tbaa !9
126 %conv = sext i8 %0 to i32
127 %conv1 = sext i8 %r.0 to i32
128 %or = or i32 %conv1, %conv
129 %conv2 = trunc i32 %or to i8
133 %inc = add nsw i32 %i.0, 1
140 define i32 @smin_v4i32(i32* %p) #0 {
141 ; CHECK-LABEL: @smin_v4i32(
143 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
144 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa [[TBAA0]]
145 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
146 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP1]], [[RDX_SHUF]]
147 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]]
148 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
149 ; CHECK-NEXT: [[RDX_MINMAX_CMP4:%.*]] = icmp slt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]]
150 ; CHECK-NEXT: [[RDX_MINMAX_SELECT5:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP4]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF3]]
151 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT5]], i32 0
152 ; CHECK-NEXT: ret i32 [[TMP2]]
158 %r.0 = phi i32 [ 2147483647, %entry ], [ %cond, %for.inc ]
159 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
160 %cmp = icmp slt i32 %i.0, 4
161 br i1 %cmp, label %for.body, label %for.cond.cleanup
167 %idxprom = sext i32 %i.0 to i64
168 %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom
169 %0 = load i32, i32* %arrayidx, align 4, !tbaa !3
170 %cmp1 = icmp slt i32 %0, %r.0
171 br i1 %cmp1, label %cond.true, label %cond.false
174 %idxprom2 = sext i32 %i.0 to i64
175 %arrayidx3 = getelementptr inbounds i32, i32* %p, i64 %idxprom2
176 %1 = load i32, i32* %arrayidx3, align 4, !tbaa !3
183 %cond = phi i32 [ %1, %cond.true ], [ %r.0, %cond.false ]
187 %inc = add nsw i32 %i.0, 1
194 define i32 @umax_v4i32(i32* %p) #0 {
195 ; CHECK-LABEL: @umax_v4i32(
197 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
198 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa [[TBAA0]]
199 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
200 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt <4 x i32> [[TMP1]], [[RDX_SHUF]]
201 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]]
202 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
203 ; CHECK-NEXT: [[RDX_MINMAX_CMP4:%.*]] = icmp ugt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]]
204 ; CHECK-NEXT: [[RDX_MINMAX_SELECT5:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP4]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF3]]
205 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT5]], i32 0
206 ; CHECK-NEXT: ret i32 [[TMP2]]
212 %r.0 = phi i32 [ 0, %entry ], [ %cond, %for.inc ]
213 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
214 %cmp = icmp slt i32 %i.0, 4
215 br i1 %cmp, label %for.body, label %for.cond.cleanup
221 %idxprom = sext i32 %i.0 to i64
222 %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom
223 %0 = load i32, i32* %arrayidx, align 4, !tbaa !3
224 %cmp1 = icmp ugt i32 %0, %r.0
225 br i1 %cmp1, label %cond.true, label %cond.false
228 %idxprom2 = sext i32 %i.0 to i64
229 %arrayidx3 = getelementptr inbounds i32, i32* %p, i64 %idxprom2
230 %1 = load i32, i32* %arrayidx3, align 4, !tbaa !3
237 %cond = phi i32 [ %1, %cond.true ], [ %r.0, %cond.false ]
241 %inc = add nsw i32 %i.0, 1
248 define float @fadd_v4i32(float* %p) #0 {
249 ; CHECK-LABEL: @fadd_v4i32(
251 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
252 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa [[TBAA7:![0-9]+]]
253 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
254 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP1]], [[RDX_SHUF]]
255 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
256 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF3]]
257 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX4]], i32 0
258 ; CHECK-NEXT: [[BIN_RDX5:%.*]] = fadd fast float 4.200000e+01, [[TMP2]]
259 ; CHECK-NEXT: ret float [[BIN_RDX5]]
265 %r.0 = phi float [ 4.200000e+01, %entry ], [ %add, %for.inc ]
266 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
267 %cmp = icmp slt i32 %i.0, 4
268 br i1 %cmp, label %for.body, label %for.cond.cleanup
274 %idxprom = sext i32 %i.0 to i64
275 %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
276 %0 = load float, float* %arrayidx, align 4, !tbaa !10
277 %add = fadd fast float %r.0, %0
281 %inc = add nsw i32 %i.0, 1
288 define float @fmul_v4i32(float* %p) #0 {
289 ; CHECK-LABEL: @fmul_v4i32(
291 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
292 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa [[TBAA7]]
293 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
294 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[TMP1]], [[RDX_SHUF]]
295 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
296 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF3]]
297 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX4]], i32 0
298 ; CHECK-NEXT: [[BIN_RDX5:%.*]] = fmul fast float 1.000000e+00, [[TMP2]]
299 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fmul fast float [[BIN_RDX5]], 4.200000e+01
300 ; CHECK-NEXT: ret float [[OP_EXTRA]]
306 %r.0 = phi float [ 4.200000e+01, %entry ], [ %mul, %for.inc ]
307 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
308 %cmp = icmp slt i32 %i.0, 4
309 br i1 %cmp, label %for.body, label %for.cond.cleanup
315 %idxprom = sext i32 %i.0 to i64
316 %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
317 %0 = load float, float* %arrayidx, align 4, !tbaa !10
318 %mul = fmul fast float %r.0, %0
322 %inc = add nsw i32 %i.0, 1
329 define float @fmin_v4f32(float* %p) #0 {
330 ; CHECK-LABEL: @fmin_v4f32(
332 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
333 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa [[TBAA7]]
334 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
335 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <4 x float> [[TMP1]], [[RDX_SHUF]]
336 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP1]], <4 x float> [[RDX_SHUF]]
337 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
338 ; CHECK-NEXT: [[RDX_MINMAX_CMP4:%.*]] = fcmp fast olt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]]
339 ; CHECK-NEXT: [[RDX_MINMAX_SELECT5:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP4]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF3]]
340 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT5]], i32 0
341 ; CHECK-NEXT: ret float [[TMP2]]
347 %r.0 = phi float [ 0x47EFFFFFE0000000, %entry ], [ %cond, %for.inc ]
348 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
349 %cmp = icmp slt i32 %i.0, 4
350 br i1 %cmp, label %for.body, label %for.cond.cleanup
356 %idxprom = sext i32 %i.0 to i64
357 %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
358 %0 = load float, float* %arrayidx, align 4, !tbaa !10
359 %cmp1 = fcmp fast olt float %0, %r.0
360 br i1 %cmp1, label %cond.true, label %cond.false
363 %idxprom2 = sext i32 %i.0 to i64
364 %arrayidx3 = getelementptr inbounds float, float* %p, i64 %idxprom2
365 %1 = load float, float* %arrayidx3, align 4, !tbaa !10
372 %cond = phi fast float [ %1, %cond.true ], [ %r.0, %cond.false ]
376 %inc = add nsw i32 %i.0, 1
383 define available_externally float @max(float %a, float %b) {
385 %a.addr = alloca float, align 4
386 %b.addr = alloca float, align 4
387 store float %a, float* %a.addr, align 4
388 store float %b, float* %b.addr, align 4
389 %0 = load float, float* %a.addr, align 4
390 %1 = load float, float* %b.addr, align 4
391 %cmp = fcmp nnan ninf nsz ogt float %0, %1
392 br i1 %cmp, label %cond.true, label %cond.false
394 cond.true: ; preds = %entry
395 %2 = load float, float* %a.addr, align 4
398 cond.false: ; preds = %entry
399 %3 = load float, float* %b.addr, align 4
402 cond.end: ; preds = %cond.false, %cond.true
403 %cond = phi nnan ninf nsz float [ %2, %cond.true ], [ %3, %cond.false ]
409 define float @findMax(<8 x float>* byval(<8 x float>) align 16 %0) {
410 ; CHECK-LABEL: @findMax(
412 ; CHECK-NEXT: [[V:%.*]] = load <8 x float>, <8 x float>* [[TMP0:%.*]], align 16, !tbaa [[TBAA0]]
413 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[V]], <8 x float> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
414 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[V]], [[RDX_SHUF]]
415 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[V]], <8 x float> [[RDX_SHUF]]
416 ; CHECK-NEXT: [[RDX_SHUF8:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
417 ; CHECK-NEXT: [[RDX_MINMAX_CMP9:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF8]]
418 ; CHECK-NEXT: [[RDX_MINMAX_SELECT10:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP9]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF8]]
419 ; CHECK-NEXT: [[RDX_SHUF11:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT10]], <8 x float> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
420 ; CHECK-NEXT: [[RDX_MINMAX_CMP12:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[RDX_MINMAX_SELECT10]], [[RDX_SHUF11]]
421 ; CHECK-NEXT: [[RDX_MINMAX_SELECT13:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP12]], <8 x float> [[RDX_MINMAX_SELECT10]], <8 x float> [[RDX_SHUF11]]
422 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT13]], i32 0
423 ; CHECK-NEXT: ret float [[TMP1]]
426 %v.addr = alloca <8 x float>, align 32
427 %v = load <8 x float>, <8 x float>* %0, align 16, !tbaa !3
428 store <8 x float> %v, <8 x float>* %v.addr, align 32, !tbaa !3
429 %1 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
430 %vecext = extractelement <8 x float> %1, i32 0
431 %2 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
432 %vecext1 = extractelement <8 x float> %2, i32 1
433 %call = call nnan ninf nsz float @max(float %vecext, float %vecext1)
434 %3 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
435 %vecext2 = extractelement <8 x float> %3, i32 2
436 %call3 = call nnan ninf nsz float @max(float %call, float %vecext2)
437 %4 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
438 %vecext4 = extractelement <8 x float> %4, i32 3
439 %call5 = call nnan ninf nsz float @max(float %call3, float %vecext4)
440 %5 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
441 %vecext6 = extractelement <8 x float> %5, i32 4
442 %call7 = call nnan ninf nsz float @max(float %call5, float %vecext6)
443 %6 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
444 %vecext8 = extractelement <8 x float> %6, i32 5
445 %call9 = call nnan ninf nsz float @max(float %call7, float %vecext8)
446 %7 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
447 %vecext10 = extractelement <8 x float> %7, i32 6
448 %call11 = call nnan ninf nsz float @max(float %call9, float %vecext10)
449 %8 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
450 %vecext12 = extractelement <8 x float> %8, i32 7
451 %call13 = call nnan ninf nsz float @max(float %call11, float %vecext12)
455 attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+avx,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="true" "use-soft-float"="false" }
457 !0 = !{i32 1, !"wchar_size", i32 4}
458 !1 = !{i32 7, !"PIC Level", i32 2}
459 !2 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git a9fe69c359de653015c39e413e48630d069abe27)"}
460 !3 = !{!4, !4, i64 0}
461 !4 = !{!"int", !5, i64 0}
462 !5 = !{!"omnipotent char", !6, i64 0}
463 !6 = !{!"Simple C/C++ TBAA"}
464 !7 = !{!8, !8, i64 0}
465 !8 = !{!"short", !5, i64 0}
466 !9 = !{!5, !5, i64 0}
467 !10 = !{!11, !11, i64 0}
468 !11 = !{!"float", !5, i64 0}