1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
4 ; The function finds the smallest value from a float vector.
5 ; Check if vectorization is enabled by instruction flag `fcmp nnan`.
7 define float @minloop(float* nocapture readonly %arg) {
8 ; CHECK-LABEL: @minloop(
10 ; CHECK-NEXT: [[T:%.*]] = load float, float* [[ARG:%.*]], align 4
11 ; CHECK-NEXT: br label [[LOOP:%.*]]
13 ; CHECK-NEXT: [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ 1, [[TOP:%.*]] ]
14 ; CHECK-NEXT: [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[T]], [[TOP]] ]
15 ; CHECK-NEXT: [[T3:%.*]] = getelementptr float, float* [[ARG]], i64 [[T1]]
16 ; CHECK-NEXT: [[T4:%.*]] = load float, float* [[T3]], align 4
17 ; CHECK-NEXT: [[T5:%.*]] = fcmp nnan olt float [[T2]], [[T4]]
18 ; CHECK-NEXT: [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]]
19 ; CHECK-NEXT: [[T7]] = add i64 [[T1]], 1
20 ; CHECK-NEXT: [[T8:%.*]] = icmp eq i64 [[T7]], 65537
21 ; CHECK-NEXT: br i1 [[T8]], label [[OUT:%.*]], label [[LOOP]]
23 ; CHECK-NEXT: [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ]
24 ; CHECK-NEXT: ret float [[T6_LCSSA]]
27 %t = load float, float* %arg
30 loop: ; preds = %loop, %top
31 %t1 = phi i64 [ %t7, %loop ], [ 1, %top ]
32 %t2 = phi float [ %t6, %loop ], [ %t, %top ]
33 %t3 = getelementptr float, float* %arg, i64 %t1
34 %t4 = load float, float* %t3, align 4
35 %t5 = fcmp nnan olt float %t2, %t4
36 %t6 = select i1 %t5, float %t2, float %t4
38 %t8 = icmp eq i64 %t7, 65537
39 br i1 %t8, label %out, label %loop
45 ; Check if vectorization is still enabled by function attribute.
47 define float @minloopattr(float* nocapture readonly %arg) #0 {
48 ; CHECK-LABEL: @minloopattr(
50 ; CHECK-NEXT: [[T:%.*]] = load float, float* [[ARG:%.*]], align 4
51 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
53 ; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[T]], i32 0
54 ; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x float> [[MINMAX_IDENT_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
55 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
57 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
58 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
59 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
60 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
61 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, float* [[ARG]], i64 [[TMP0]]
62 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0
63 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
64 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
65 ; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
66 ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]
67 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
68 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
69 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
70 ; CHECK: middle.block:
71 ; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP5]])
72 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 65536, 65536
73 ; CHECK-NEXT: br i1 [[CMP_N]], label [[OUT:%.*]], label [[SCALAR_PH]]
75 ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 65537, [[MIDDLE_BLOCK]] ], [ 1, [[TOP:%.*]] ]
76 ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[T]], [[TOP]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
77 ; CHECK-NEXT: br label [[LOOP:%.*]]
79 ; CHECK-NEXT: [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
80 ; CHECK-NEXT: [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
81 ; CHECK-NEXT: [[T3:%.*]] = getelementptr float, float* [[ARG]], i64 [[T1]]
82 ; CHECK-NEXT: [[T4:%.*]] = load float, float* [[T3]], align 4
83 ; CHECK-NEXT: [[T5:%.*]] = fcmp olt float [[T2]], [[T4]]
84 ; CHECK-NEXT: [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]]
85 ; CHECK-NEXT: [[T7]] = add i64 [[T1]], 1
86 ; CHECK-NEXT: [[T8:%.*]] = icmp eq i64 [[T7]], 65537
87 ; CHECK-NEXT: br i1 [[T8]], label [[OUT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]]
89 ; CHECK-NEXT: [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
90 ; CHECK-NEXT: ret float [[T6_LCSSA]]
93 %t = load float, float* %arg
96 loop: ; preds = %loop, %top
97 %t1 = phi i64 [ %t7, %loop ], [ 1, %top ]
98 %t2 = phi float [ %t6, %loop ], [ %t, %top ]
99 %t3 = getelementptr float, float* %arg, i64 %t1
100 %t4 = load float, float* %t3, align 4
101 %t5 = fcmp olt float %t2, %t4
102 %t6 = select i1 %t5, float %t2, float %t4
104 %t8 = icmp eq i64 %t7, 65537
105 br i1 %t8, label %out, label %loop
111 ; Check if vectorization is prevented without the flag or attribute.
113 define float @minloopnovec(float* nocapture readonly %arg) {
114 ; CHECK-LABEL: @minloopnovec(
116 ; CHECK-NEXT: [[T:%.*]] = load float, float* [[ARG:%.*]], align 4
117 ; CHECK-NEXT: br label [[LOOP:%.*]]
119 ; CHECK-NEXT: [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ 1, [[TOP:%.*]] ]
120 ; CHECK-NEXT: [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[T]], [[TOP]] ]
121 ; CHECK-NEXT: [[T3:%.*]] = getelementptr float, float* [[ARG]], i64 [[T1]]
122 ; CHECK-NEXT: [[T4:%.*]] = load float, float* [[T3]], align 4
123 ; CHECK-NEXT: [[T5:%.*]] = fcmp olt float [[T2]], [[T4]]
124 ; CHECK-NEXT: [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]]
125 ; CHECK-NEXT: [[T7]] = add i64 [[T1]], 1
126 ; CHECK-NEXT: [[T8:%.*]] = icmp eq i64 [[T7]], 65537
127 ; CHECK-NEXT: br i1 [[T8]], label [[OUT:%.*]], label [[LOOP]]
129 ; CHECK-NEXT: [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ]
130 ; CHECK-NEXT: ret float [[T6_LCSSA]]
133 %t = load float, float* %arg
136 loop: ; preds = %loop, %top
137 %t1 = phi i64 [ %t7, %loop ], [ 1, %top ]
138 %t2 = phi float [ %t6, %loop ], [ %t, %top ]
139 %t3 = getelementptr float, float* %arg, i64 %t1
140 %t4 = load float, float* %t3, align 4
141 %t5 = fcmp olt float %t2, %t4
142 %t6 = select i1 %t5, float %t2, float %t4
144 %t8 = icmp eq i64 %t7, 65537
145 br i1 %t8, label %out, label %loop
151 ; This test is checking that we don't vectorize when only one of the required attributes is set.
152 ; Note that this test should not vectorize even after switching to IR-level FMF.
153 define float @minloopmissingnsz(float* nocapture readonly %arg) #1 {
154 ; CHECK-LABEL: @minloopmissingnsz(
156 ; CHECK-NEXT: [[T:%.*]] = load float, float* [[ARG:%.*]], align 4
157 ; CHECK-NEXT: br label [[LOOP:%.*]]
159 ; CHECK-NEXT: [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ 1, [[TOP:%.*]] ]
160 ; CHECK-NEXT: [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[T]], [[TOP]] ]
161 ; CHECK-NEXT: [[T3:%.*]] = getelementptr float, float* [[ARG]], i64 [[T1]]
162 ; CHECK-NEXT: [[T4:%.*]] = load float, float* [[T3]], align 4
163 ; CHECK-NEXT: [[T5:%.*]] = fcmp olt float [[T2]], [[T4]]
164 ; CHECK-NEXT: [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]]
165 ; CHECK-NEXT: [[T7]] = add i64 [[T1]], 1
166 ; CHECK-NEXT: [[T8:%.*]] = icmp eq i64 [[T7]], 65537
167 ; CHECK-NEXT: br i1 [[T8]], label [[OUT:%.*]], label [[LOOP]]
169 ; CHECK-NEXT: [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ]
170 ; CHECK-NEXT: ret float [[T6_LCSSA]]
173 %t = load float, float* %arg
176 loop: ; preds = %loop, %top
177 %t1 = phi i64 [ %t7, %loop ], [ 1, %top ]
178 %t2 = phi float [ %t6, %loop ], [ %t, %top ]
179 %t3 = getelementptr float, float* %arg, i64 %t1
180 %t4 = load float, float* %t3, align 4
181 %t5 = fcmp olt float %t2, %t4
182 %t6 = select i1 %t5, float %t2, float %t4
184 %t8 = icmp eq i64 %t7, 65537
185 br i1 %t8, label %out, label %loop
191 ; This would assert on FMF propagation.
193 define void @not_a_min_max() {
194 ; CHECK-LABEL: @not_a_min_max(
196 ; CHECK-NEXT: br label [[LOOP:%.*]]
198 ; CHECK-NEXT: [[F9_S0_V0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ]
199 ; CHECK-NEXT: [[T14:%.*]] = icmp eq i32 [[F9_S0_V0]], 5
200 ; CHECK-NEXT: [[T15:%.*]] = select reassoc nnan ninf nsz contract afn i1 [[T14]], float 0x36A0000000000000, float 0.000000e+00
201 ; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[F9_S0_V0]], 1
202 ; CHECK-NEXT: br i1 false, label [[END:%.*]], label [[LOOP]]
204 ; CHECK-NEXT: ret void
210 %f9.s0.v0 = phi i32 [ 0, %entry ], [ %add, %loop ]
211 %t14 = icmp eq i32 %f9.s0.v0, 5
212 %t15 = select reassoc nnan ninf nsz contract afn i1 %t14, float 0x36A0000000000000, float 0.0
213 %add = add nuw nsw i32 %f9.s0.v0, 1
214 br i1 false, label %end, label %loop
220 attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
221 attributes #1 = { "no-nans-fp-math"="true" }