1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
3 ; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
5 declare void @use(<4 x i32>)
6 declare void @usef(<4 x float>)
8 ; Eliminating an insert is profitable.
10 define <16 x i1> @ins0_ins0_i8(i8 %x, i8 %y) {
11 ; CHECK-LABEL: @ins0_ins0_i8(
12 ; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
13 ; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i1> undef, i1 [[R_SCALAR]], i64 0
14 ; CHECK-NEXT: ret <16 x i1> [[R]]
16 %i0 = insertelement <16 x i8> undef, i8 %x, i32 0
17 %i1 = insertelement <16 x i8> undef, i8 %y, i32 0
18 %r = icmp eq <16 x i8> %i0, %i1
22 ; Eliminating an insert is still profitable. Mismatch types on index is ok.
24 define <8 x i1> @ins5_ins5_i16(i16 %x, i16 %y) {
25 ; CHECK-LABEL: @ins5_ins5_i16(
26 ; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp sgt i16 [[X:%.*]], [[Y:%.*]]
27 ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i1> undef, i1 [[R_SCALAR]], i64 5
28 ; CHECK-NEXT: ret <8 x i1> [[R]]
30 %i0 = insertelement <8 x i16> undef, i16 %x, i8 5
31 %i1 = insertelement <8 x i16> undef, i16 %y, i32 5
32 %r = icmp sgt <8 x i16> %i0, %i1
36 ; The new vector constant is calculated by constant folding.
38 define <2 x i1> @ins1_ins1_i64(i64 %x, i64 %y) {
39 ; CHECK-LABEL: @ins1_ins1_i64(
40 ; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp sle i64 [[X:%.*]], [[Y:%.*]]
41 ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> <i1 true, i1 false>, i1 [[R_SCALAR]], i64 1
42 ; CHECK-NEXT: ret <2 x i1> [[R]]
44 %i0 = insertelement <2 x i64> zeroinitializer, i64 %x, i64 1
45 %i1 = insertelement <2 x i64> <i64 1, i64 -1>, i64 %y, i32 1
46 %r = icmp sle <2 x i64> %i0, %i1
50 ; The inserts are free, but it's still better to scalarize.
52 define <2 x i1> @ins0_ins0_f64(double %x, double %y) {
53 ; CHECK-LABEL: @ins0_ins0_f64(
54 ; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp nnan ninf uge double [[X:%.*]], [[Y:%.*]]
55 ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> <i1 true, i1 true>, i1 [[R_SCALAR]], i64 0
56 ; CHECK-NEXT: ret <2 x i1> [[R]]
58 %i0 = insertelement <2 x double> undef, double %x, i32 0
59 %i1 = insertelement <2 x double> undef, double %y, i32 0
60 %r = fcmp nnan ninf uge <2 x double> %i0, %i1
64 ; Negative test - mismatched indexes (but could fold this).
66 define <16 x i1> @ins1_ins0_i8(i8 %x, i8 %y) {
67 ; CHECK-LABEL: @ins1_ins0_i8(
68 ; CHECK-NEXT: [[I0:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 1
69 ; CHECK-NEXT: [[I1:%.*]] = insertelement <16 x i8> undef, i8 [[Y:%.*]], i32 0
70 ; CHECK-NEXT: [[R:%.*]] = icmp sle <16 x i8> [[I0]], [[I1]]
71 ; CHECK-NEXT: ret <16 x i1> [[R]]
73 %i0 = insertelement <16 x i8> undef, i8 %x, i32 1
74 %i1 = insertelement <16 x i8> undef, i8 %y, i32 0
75 %r = icmp sle <16 x i8> %i0, %i1
79 ; Base vector does not have to be undef.
81 define <4 x i1> @ins0_ins0_i32(i32 %x, i32 %y) {
82 ; CHECK-LABEL: @ins0_ins0_i32(
83 ; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
84 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> undef, i1 [[R_SCALAR]], i64 0
85 ; CHECK-NEXT: ret <4 x i1> [[R]]
87 %i0 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
88 %i1 = insertelement <4 x i32> undef, i32 %y, i32 0
89 %r = icmp ne <4 x i32> %i0, %i1
93 ; Extra use is accounted for in cost calculation.
95 define <4 x i1> @ins0_ins0_i32_use(i32 %x, i32 %y) {
96 ; CHECK-LABEL: @ins0_ins0_i32_use(
97 ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
98 ; CHECK-NEXT: call void @use(<4 x i32> [[I0]])
99 ; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ugt i32 [[X]], [[Y:%.*]]
100 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> undef, i1 [[R_SCALAR]], i64 0
101 ; CHECK-NEXT: ret <4 x i1> [[R]]
103 %i0 = insertelement <4 x i32> undef, i32 %x, i32 0
104 call void @use(<4 x i32> %i0)
105 %i1 = insertelement <4 x i32> undef, i32 %y, i32 0
106 %r = icmp ugt <4 x i32> %i0, %i1
110 ; Extra use is accounted for in cost calculation.
112 define <4 x i1> @ins1_ins1_f32_use(float %x, float %y) {
113 ; CHECK-LABEL: @ins1_ins1_f32_use(
114 ; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 1
115 ; CHECK-NEXT: call void @usef(<4 x float> [[I1]])
116 ; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp ogt float [[X:%.*]], [[Y]]
117 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 1
118 ; CHECK-NEXT: ret <4 x i1> [[R]]
120 %i0 = insertelement <4 x float> undef, float %x, i32 1
121 %i1 = insertelement <4 x float> undef, float %y, i32 1
122 call void @usef(<4 x float> %i1)
123 %r = fcmp ogt <4 x float> %i0, %i1
127 ; If the scalar cmp is not cheaper than the vector cmp, extra uses can prevent the transform.
129 define <4 x i1> @ins2_ins2_f32_uses(float %x, float %y) {
130 ; CHECK-LABEL: @ins2_ins2_f32_uses(
131 ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 2
132 ; CHECK-NEXT: call void @usef(<4 x float> [[I0]])
133 ; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 2
134 ; CHECK-NEXT: call void @usef(<4 x float> [[I1]])
135 ; CHECK-NEXT: [[R:%.*]] = fcmp oeq <4 x float> [[I0]], [[I1]]
136 ; CHECK-NEXT: ret <4 x i1> [[R]]
138 %i0 = insertelement <4 x float> undef, float %x, i32 2
139 call void @usef(<4 x float> %i0)
140 %i1 = insertelement <4 x float> undef, float %y, i32 2
141 call void @usef(<4 x float> %i1)
142 %r = fcmp oeq <4 x float> %i0, %i1
146 define <2 x i1> @constant_op1_i64(i64 %x) {
147 ; CHECK-LABEL: @constant_op1_i64(
148 ; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ne i64 [[X:%.*]], 42
149 ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> undef, i1 [[R_SCALAR]], i64 0
150 ; CHECK-NEXT: ret <2 x i1> [[R]]
152 %ins = insertelement <2 x i64> undef, i64 %x, i32 0
153 %r = icmp ne <2 x i64> %ins, <i64 42, i64 undef>
157 define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) {
158 ; CHECK-LABEL: @constant_op1_i64_not_undef_lane(
159 ; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp sge i64 [[X:%.*]], 42
160 ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> <i1 true, i1 true>, i1 [[R_SCALAR]], i64 0
161 ; CHECK-NEXT: ret <2 x i1> [[R]]
163 %ins = insertelement <2 x i64> undef, i64 %x, i32 0
164 %r = icmp sge <2 x i64> %ins, <i64 42, i64 -42>
168 ; negative test - load prevents the transform
170 define <2 x i1> @constant_op1_i64_load(i64* %p) {
171 ; CHECK-LABEL: @constant_op1_i64_load(
172 ; CHECK-NEXT: [[LD:%.*]] = load i64, i64* [[P:%.*]], align 4
173 ; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 0
174 ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], <i64 42, i64 -42>
175 ; CHECK-NEXT: ret <2 x i1> [[R]]
177 %ld = load i64, i64* %p
178 %ins = insertelement <2 x i64> undef, i64 %ld, i32 0
179 %r = icmp eq <2 x i64> %ins, <i64 42, i64 -42>
183 define <4 x i1> @constant_op0_i32(i32 %x) {
184 ; CHECK-LABEL: @constant_op0_i32(
185 ; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ult i32 -42, [[X:%.*]]
186 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 1
187 ; CHECK-NEXT: ret <4 x i1> [[R]]
189 %ins = insertelement <4 x i32> undef, i32 %x, i32 1
190 %r = icmp ult <4 x i32> <i32 undef, i32 -42, i32 undef, i32 undef>, %ins
194 define <4 x i1> @constant_op0_i32_not_undef_lane(i32 %x) {
195 ; CHECK-LABEL: @constant_op0_i32_not_undef_lane(
196 ; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ule i32 42, [[X:%.*]]
197 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i1 [[R_SCALAR]], i64 1
198 ; CHECK-NEXT: ret <4 x i1> [[R]]
200 %ins = insertelement <4 x i32> undef, i32 %x, i32 1
201 %r = icmp ule <4 x i32> <i32 1, i32 42, i32 42, i32 -42>, %ins
205 define <2 x i1> @constant_op0_f64(double %x) {
206 ; CHECK-LABEL: @constant_op0_f64(
207 ; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp fast olt double 4.200000e+01, [[X:%.*]]
208 ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 0
209 ; CHECK-NEXT: ret <2 x i1> [[R]]
211 %ins = insertelement <2 x double> undef, double %x, i32 0
212 %r = fcmp fast olt <2 x double> <double 42.0, double undef>, %ins
216 define <2 x i1> @constant_op0_f64_not_undef_lane(double %x) {
217 ; CHECK-LABEL: @constant_op0_f64_not_undef_lane(
218 ; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp nnan ueq double -4.200000e+01, [[X:%.*]]
219 ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> <i1 true, i1 true>, i1 [[R_SCALAR]], i64 1
220 ; CHECK-NEXT: ret <2 x i1> [[R]]
222 %ins = insertelement <2 x double> undef, double %x, i32 1
223 %r = fcmp nnan ueq <2 x double> <double 42.0, double -42.0>, %ins
227 define <2 x i1> @constant_op1_f64(double %x) {
228 ; CHECK-LABEL: @constant_op1_f64(
229 ; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp one double [[X:%.*]], 4.200000e+01
230 ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 1
231 ; CHECK-NEXT: ret <2 x i1> [[R]]
233 %ins = insertelement <2 x double> undef, double %x, i32 1
234 %r = fcmp one <2 x double> %ins, <double undef, double 42.0>
238 define <4 x i1> @constant_op1_f32_not_undef_lane(float %x) {
239 ; CHECK-LABEL: @constant_op1_f32_not_undef_lane(
240 ; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp uge float [[X:%.*]], 4.200000e+01
241 ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i1 [[R_SCALAR]], i64 0
242 ; CHECK-NEXT: ret <4 x i1> [[R]]
244 %ins = insertelement <4 x float> undef, float %x, i32 0
245 %r = fcmp uge <4 x float> %ins, <float 42.0, float -42.0, float 0.0, float 1.0>
249 ; negative test - select prevents the transform
251 define <4 x float> @vec_select_use1(<4 x float> %x, <4 x float> %y, i32 %a, i32 %b) {
252 ; CHECK-LABEL: @vec_select_use1(
253 ; CHECK-NEXT: [[VECA:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i8 0
254 ; CHECK-NEXT: [[VECB:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i8 0
255 ; CHECK-NEXT: [[COND:%.*]] = icmp eq <4 x i32> [[VECA]], [[VECB]]
256 ; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]
257 ; CHECK-NEXT: ret <4 x float> [[R]]
259 %veca = insertelement <4 x i32> undef, i32 %a, i8 0
260 %vecb = insertelement <4 x i32> undef, i32 %b, i8 0
261 %cond = icmp eq <4 x i32> %veca, %vecb
262 %r = select <4 x i1> %cond, <4 x float> %x, <4 x float> %y
266 ; negative test - select prevents the transform
268 define <4 x float> @vec_select_use2(<4 x float> %x, <4 x float> %y, float %a) {
269 ; CHECK-LABEL: @vec_select_use2(
270 ; CHECK-NEXT: [[VECA:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i8 0
271 ; CHECK-NEXT: [[COND:%.*]] = fcmp oeq <4 x float> [[VECA]], zeroinitializer
272 ; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]
273 ; CHECK-NEXT: ret <4 x float> [[R]]
275 %veca = insertelement <4 x float> undef, float %a, i8 0
276 %cond = fcmp oeq <4 x float> %veca, zeroinitializer
277 %r = select <4 x i1> %cond, <4 x float> %x, <4 x float> %y
281 define <4 x i1> @vector_of_pointers(i32* %t1) {
282 ; CHECK-LABEL: @vector_of_pointers(
283 ; CHECK-NEXT: [[T6_SCALAR:%.*]] = icmp ne i32* [[T1:%.*]], null
284 ; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x i1> undef, i1 [[T6_SCALAR]], i64 0
285 ; CHECK-NEXT: ret <4 x i1> [[T6]]
287 %t5 = insertelement <4 x i32*> undef, i32* %t1, i32 0
288 %t6 = icmp ne <4 x i32*> %t5, zeroinitializer