1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -instcombine -S | FileCheck %s
4 ; Can't get smaller than this.
6 define <2 x i1> @trunc(<2 x i64> %a) {
8 ; CHECK-NEXT: [[T:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1>
9 ; CHECK-NEXT: ret <2 x i1> [[T]]
11 %t = trunc <2 x i64> %a to <2 x i1>
17 define <2 x i1> @and_cmp_is_trunc(<2 x i64> %a) {
18 ; CHECK-LABEL: @and_cmp_is_trunc(
19 ; CHECK-NEXT: [[R:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1>
20 ; CHECK-NEXT: ret <2 x i1> [[R]]
22 %t = and <2 x i64> %a, <i64 1, i64 1>
23 %r = icmp ne <2 x i64> %t, zeroinitializer
29 define <2 x i1> @and_cmp_is_trunc_even_with_undef_elt(<2 x i64> %a) {
30 ; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elt(
31 ; CHECK-NEXT: [[R:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1>
32 ; CHECK-NEXT: ret <2 x i1> [[R]]
34 %t = and <2 x i64> %a, <i64 undef, i64 1>
35 %r = icmp ne <2 x i64> %t, zeroinitializer
39 ; TODO: This could be just 1 instruction (trunc), but our undef matching is incomplete.
41 define <2 x i1> @and_cmp_is_trunc_even_with_undef_elts(<2 x i64> %a) {
42 ; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elts(
43 ; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[A:%.*]], <i64 undef, i64 1>
44 ; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i64> [[T]], <i64 undef, i64 0>
45 ; CHECK-NEXT: ret <2 x i1> [[R]]
47 %t = and <2 x i64> %a, <i64 undef, i64 1>
48 %r = icmp ne <2 x i64> %t, <i64 undef, i64 0>
52 ; The ashr turns into an lshr.
53 define <2 x i64> @test2(<2 x i64> %a) {
54 ; CHECK-LABEL: @test2(
55 ; CHECK-NEXT: [[B:%.*]] = lshr <2 x i64> [[A:%.*]], <i64 1, i64 1>
56 ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[B]], <i64 32767, i64 32767>
57 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
59 %b = and <2 x i64> %a, <i64 65535, i64 65535>
60 %t = ashr <2 x i64> %b, <i64 1, i64 1>
64 define <2 x i64> @test3(<4 x float> %a, <4 x float> %b) {
65 ; CHECK-LABEL: @test3(
66 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord <4 x float> [[A:%.*]], [[B:%.*]]
67 ; CHECK-NEXT: [[AND:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
68 ; CHECK-NEXT: [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
69 ; CHECK-NEXT: ret <2 x i64> [[CONV]]
71 %cmp = fcmp ord <4 x float> %a, zeroinitializer
72 %sext = sext <4 x i1> %cmp to <4 x i32>
73 %cmp4 = fcmp ord <4 x float> %b, zeroinitializer
74 %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
75 %and = and <4 x i32> %sext, %sext5
76 %conv = bitcast <4 x i32> %and to <2 x i64>
80 define <2 x i64> @test4(<4 x float> %a, <4 x float> %b) {
81 ; CHECK-LABEL: @test4(
82 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno <4 x float> [[A:%.*]], [[B:%.*]]
83 ; CHECK-NEXT: [[OR:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
84 ; CHECK-NEXT: [[CONV:%.*]] = bitcast <4 x i32> [[OR]] to <2 x i64>
85 ; CHECK-NEXT: ret <2 x i64> [[CONV]]
87 %cmp = fcmp uno <4 x float> %a, zeroinitializer
88 %sext = sext <4 x i1> %cmp to <4 x i32>
89 %cmp4 = fcmp uno <4 x float> %b, zeroinitializer
90 %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
91 %or = or <4 x i32> %sext, %sext5
92 %conv = bitcast <4 x i32> %or to <2 x i64>
97 define <2 x i64> @test5(<4 x float> %a, <4 x float> %b) {
98 ; CHECK-LABEL: @test5(
99 ; CHECK-NEXT: [[CMP:%.*]] = fcmp ult <4 x float> [[A:%.*]], zeroinitializer
100 ; CHECK-NEXT: [[CMP4:%.*]] = fcmp ult <4 x float> [[B:%.*]], zeroinitializer
101 ; CHECK-NEXT: [[AND1:%.*]] = and <4 x i1> [[CMP]], [[CMP4]]
102 ; CHECK-NEXT: [[AND:%.*]] = sext <4 x i1> [[AND1]] to <4 x i32>
103 ; CHECK-NEXT: [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
104 ; CHECK-NEXT: ret <2 x i64> [[CONV]]
106 %cmp = fcmp ult <4 x float> %a, zeroinitializer
107 %sext = sext <4 x i1> %cmp to <4 x i32>
108 %cmp4 = fcmp ult <4 x float> %b, zeroinitializer
109 %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
110 %and = and <4 x i32> %sext, %sext5
111 %conv = bitcast <4 x i32> %and to <2 x i64>
115 define <2 x i64> @test6(<4 x float> %a, <4 x float> %b) {
116 ; CHECK-LABEL: @test6(
117 ; CHECK-NEXT: [[CMP:%.*]] = fcmp ult <4 x float> [[A:%.*]], zeroinitializer
118 ; CHECK-NEXT: [[CMP4:%.*]] = fcmp ult <4 x float> [[B:%.*]], zeroinitializer
119 ; CHECK-NEXT: [[AND1:%.*]] = or <4 x i1> [[CMP]], [[CMP4]]
120 ; CHECK-NEXT: [[AND:%.*]] = sext <4 x i1> [[AND1]] to <4 x i32>
121 ; CHECK-NEXT: [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
122 ; CHECK-NEXT: ret <2 x i64> [[CONV]]
124 %cmp = fcmp ult <4 x float> %a, zeroinitializer
125 %sext = sext <4 x i1> %cmp to <4 x i32>
126 %cmp4 = fcmp ult <4 x float> %b, zeroinitializer
127 %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
128 %and = or <4 x i32> %sext, %sext5
129 %conv = bitcast <4 x i32> %and to <2 x i64>
133 define <2 x i64> @test7(<4 x float> %a, <4 x float> %b) {
134 ; CHECK-LABEL: @test7(
135 ; CHECK-NEXT: [[CMP:%.*]] = fcmp ult <4 x float> [[A:%.*]], zeroinitializer
136 ; CHECK-NEXT: [[CMP4:%.*]] = fcmp ult <4 x float> [[B:%.*]], zeroinitializer
137 ; CHECK-NEXT: [[AND1:%.*]] = xor <4 x i1> [[CMP]], [[CMP4]]
138 ; CHECK-NEXT: [[AND:%.*]] = sext <4 x i1> [[AND1]] to <4 x i32>
139 ; CHECK-NEXT: [[CONV:%.*]] = bitcast <4 x i32> [[AND]] to <2 x i64>
140 ; CHECK-NEXT: ret <2 x i64> [[CONV]]
142 %cmp = fcmp ult <4 x float> %a, zeroinitializer
143 %sext = sext <4 x i1> %cmp to <4 x i32>
144 %cmp4 = fcmp ult <4 x float> %b, zeroinitializer
145 %sext5 = sext <4 x i1> %cmp4 to <4 x i32>
146 %and = xor <4 x i32> %sext, %sext5
147 %conv = bitcast <4 x i32> %and to <2 x i64>
151 define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) {
152 ; CHECK-LABEL: @convert(
153 ; CHECK-NEXT: [[VAL:%.*]] = trunc <2 x i64> [[SRC:%.*]] to <2 x i32>
154 ; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[VAL]], <i32 1, i32 1>
155 ; CHECK-NEXT: store <2 x i32> [[ADD]], <2 x i32>* [[DST_ADDR:%.*]], align 8
156 ; CHECK-NEXT: ret void
158 %val = trunc <2 x i64> %src to <2 x i32>
159 %add = add <2 x i32> %val, <i32 1, i32 1>
160 store <2 x i32> %add, <2 x i32>* %dst.addr
164 define <2 x i65> @foo(<2 x i64> %t) {
166 ; CHECK-NEXT: [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], <i64 4294967295, i64 4294967295>
167 ; CHECK-NEXT: [[B:%.*]] = zext <2 x i64> [[A_MASK]] to <2 x i65>
168 ; CHECK-NEXT: ret <2 x i65> [[B]]
170 %a = trunc <2 x i64> %t to <2 x i32>
171 %b = zext <2 x i32> %a to <2 x i65>
175 define <2 x i64> @bar(<2 x i65> %t) {
177 ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64>
178 ; CHECK-NEXT: [[B:%.*]] = and <2 x i64> [[TMP1]], <i64 4294967295, i64 4294967295>
179 ; CHECK-NEXT: ret <2 x i64> [[B]]
181 %a = trunc <2 x i65> %t to <2 x i32>
182 %b = zext <2 x i32> %a to <2 x i64>
186 define <2 x i64> @bars(<2 x i65> %t) {
187 ; CHECK-LABEL: @bars(
188 ; CHECK-NEXT: [[A:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i32>
189 ; CHECK-NEXT: [[B:%.*]] = sext <2 x i32> [[A]] to <2 x i64>
190 ; CHECK-NEXT: ret <2 x i64> [[B]]
192 %a = trunc <2 x i65> %t to <2 x i32>
193 %b = sext <2 x i32> %a to <2 x i64>
197 define <2 x i64> @quxs(<2 x i64> %t) {
198 ; CHECK-LABEL: @quxs(
199 ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], <i64 32, i64 32>
200 ; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], <i64 32, i64 32>
201 ; CHECK-NEXT: ret <2 x i64> [[B]]
203 %a = trunc <2 x i64> %t to <2 x i32>
204 %b = sext <2 x i32> %a to <2 x i64>
208 define <2 x i64> @quxt(<2 x i64> %t) {
209 ; CHECK-LABEL: @quxt(
210 ; CHECK-NEXT: [[A:%.*]] = shl <2 x i64> [[T:%.*]], <i64 32, i64 32>
211 ; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[A]], <i64 32, i64 32>
212 ; CHECK-NEXT: ret <2 x i64> [[B]]
214 %a = shl <2 x i64> %t, <i64 32, i64 32>
215 %b = ashr <2 x i64> %a, <i64 32, i64 32>
219 define <2 x double> @fa(<2 x double> %t) {
221 ; CHECK-NEXT: [[A:%.*]] = fptrunc <2 x double> [[T:%.*]] to <2 x float>
222 ; CHECK-NEXT: [[B:%.*]] = fpext <2 x float> [[A]] to <2 x double>
223 ; CHECK-NEXT: ret <2 x double> [[B]]
225 %a = fptrunc <2 x double> %t to <2 x float>
226 %b = fpext <2 x float> %a to <2 x double>
230 define <2 x double> @fb(<2 x double> %t) {
232 ; CHECK-NEXT: [[A:%.*]] = fptoui <2 x double> [[T:%.*]] to <2 x i64>
233 ; CHECK-NEXT: [[B:%.*]] = uitofp <2 x i64> [[A]] to <2 x double>
234 ; CHECK-NEXT: ret <2 x double> [[B]]
236 %a = fptoui <2 x double> %t to <2 x i64>
237 %b = uitofp <2 x i64> %a to <2 x double>
241 define <2 x double> @fc(<2 x double> %t) {
243 ; CHECK-NEXT: [[A:%.*]] = fptosi <2 x double> [[T:%.*]] to <2 x i64>
244 ; CHECK-NEXT: [[B:%.*]] = sitofp <2 x i64> [[A]] to <2 x double>
245 ; CHECK-NEXT: ret <2 x double> [[B]]
247 %a = fptosi <2 x double> %t to <2 x i64>
248 %b = sitofp <2 x i64> %a to <2 x double>
253 define <4 x float> @f(i32 %a) {
255 ; CHECK-NEXT: ret <4 x float> undef
257 %dim = insertelement <4 x i32> undef, i32 %a, i32 0
258 %dim30 = insertelement <4 x i32> %dim, i32 %a, i32 1
259 %dim31 = insertelement <4 x i32> %dim30, i32 %a, i32 2
260 %dim32 = insertelement <4 x i32> %dim31, i32 %a, i32 3
262 %offset_ptr = getelementptr <4 x float>, <4 x float>* null, i32 1
263 %offset_int = ptrtoint <4 x float>* %offset_ptr to i64
264 %sizeof32 = trunc i64 %offset_int to i32
266 %smearinsert33 = insertelement <4 x i32> undef, i32 %sizeof32, i32 0
267 %smearinsert34 = insertelement <4 x i32> %smearinsert33, i32 %sizeof32, i32 1
268 %smearinsert35 = insertelement <4 x i32> %smearinsert34, i32 %sizeof32, i32 2
269 %smearinsert36 = insertelement <4 x i32> %smearinsert35, i32 %sizeof32, i32 3
271 %delta_scale = mul <4 x i32> %dim32, %smearinsert36
272 %offset_delta = add <4 x i32> zeroinitializer, %delta_scale
274 %offset_varying_delta = add <4 x i32> %offset_delta, undef
276 ret <4 x float> undef
279 define <8 x i32> @pr24458(<8 x float> %n) {
280 ; CHECK-LABEL: @pr24458(
281 ; CHECK-NEXT: ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
283 %notequal_b_load_.i = fcmp une <8 x float> %n, zeroinitializer
284 %equal_a_load72_.i = fcmp ueq <8 x float> %n, zeroinitializer
285 %notequal_b_load__to_boolvec.i = sext <8 x i1> %notequal_b_load_.i to <8 x i32>
286 %equal_a_load72__to_boolvec.i = sext <8 x i1> %equal_a_load72_.i to <8 x i32>
287 %wrong = or <8 x i32> %notequal_b_load__to_boolvec.i, %equal_a_load72__to_boolvec.i
291 ; Hoist a trunc to a scalar if we're inserting into an undef vector.
292 ; trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
294 define <3 x i16> @trunc_inselt_undef(i32 %x) {
295 ; CHECK-LABEL: @trunc_inselt_undef(
296 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
297 ; CHECK-NEXT: [[TRUNC:%.*]] = insertelement <3 x i16> undef, i16 [[TMP1]], i32 1
298 ; CHECK-NEXT: ret <3 x i16> [[TRUNC]]
300 %vec = insertelement <3 x i32> undef, i32 %x, i32 1
301 %trunc = trunc <3 x i32> %vec to <3 x i16>
305 ; Hoist a trunc to a scalar if we're inserting into an undef vector.
306 ; trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
308 define <2 x float> @fptrunc_inselt_undef(double %x, i32 %index) {
309 ; CHECK-LABEL: @fptrunc_inselt_undef(
310 ; CHECK-NEXT: [[TMP1:%.*]] = fptrunc double [[X:%.*]] to float
311 ; CHECK-NEXT: [[TRUNC:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 [[INDEX:%.*]]
312 ; CHECK-NEXT: ret <2 x float> [[TRUNC]]
314 %vec = insertelement <2 x double> <double undef, double undef>, double %x, i32 %index
315 %trunc = fptrunc <2 x double> %vec to <2 x float>
316 ret <2 x float> %trunc
319 ; TODO: Strengthen the backend, so we can have this canonicalization.
320 ; Insert a scalar int into a constant vector and truncate:
321 ; trunc (inselt C, X, Index) --> inselt C, (trunc X), Index
323 define <3 x i16> @trunc_inselt1(i32 %x) {
324 ; CHECK-LABEL: @trunc_inselt1(
325 ; CHECK-NEXT: [[VEC:%.*]] = insertelement <3 x i32> <i32 3, i32 undef, i32 65536>, i32 [[X:%.*]], i32 1
326 ; CHECK-NEXT: [[TRUNC:%.*]] = trunc <3 x i32> [[VEC]] to <3 x i16>
327 ; CHECK-NEXT: ret <3 x i16> [[TRUNC]]
329 %vec = insertelement <3 x i32> <i32 3, i32 -2, i32 65536>, i32 %x, i32 1
330 %trunc = trunc <3 x i32> %vec to <3 x i16>
334 ; TODO: Strengthen the backend, so we can have this canonicalization.
335 ; Insert a scalar FP into a constant vector and FP truncate:
336 ; fptrunc (inselt C, X, Index) --> inselt C, (fptrunc X), Index
338 define <2 x float> @fptrunc_inselt1(double %x, i32 %index) {
339 ; CHECK-LABEL: @fptrunc_inselt1(
340 ; CHECK-NEXT: [[VEC:%.*]] = insertelement <2 x double> <double undef, double 3.000000e+00>, double [[X:%.*]], i32 [[INDEX:%.*]]
341 ; CHECK-NEXT: [[TRUNC:%.*]] = fptrunc <2 x double> [[VEC]] to <2 x float>
342 ; CHECK-NEXT: ret <2 x float> [[TRUNC]]
344 %vec = insertelement <2 x double> <double undef, double 3.0>, double %x, i32 %index
345 %trunc = fptrunc <2 x double> %vec to <2 x float>
346 ret <2 x float> %trunc
349 ; TODO: Strengthen the backend, so we can have this canonicalization.
350 ; Insert a scalar int constant into a vector and truncate:
351 ; trunc (inselt X, C, Index) --> inselt (trunc X), C', Index
353 define <8 x i16> @trunc_inselt2(<8 x i32> %x, i32 %index) {
354 ; CHECK-LABEL: @trunc_inselt2(
355 ; CHECK-NEXT: [[VEC:%.*]] = insertelement <8 x i32> [[X:%.*]], i32 1048576, i32 [[INDEX:%.*]]
356 ; CHECK-NEXT: [[TRUNC:%.*]] = trunc <8 x i32> [[VEC]] to <8 x i16>
357 ; CHECK-NEXT: ret <8 x i16> [[TRUNC]]
359 %vec = insertelement <8 x i32> %x, i32 1048576, i32 %index
360 %trunc = trunc <8 x i32> %vec to <8 x i16>
364 ; TODO: Strengthen the backend, so we can have this canonicalization.
365 ; Insert a scalar FP constant into a vector and FP truncate:
366 ; fptrunc (inselt X, C, Index) --> inselt (fptrunc X), C', Index
368 define <3 x float> @fptrunc_inselt2(<3 x double> %x) {
369 ; CHECK-LABEL: @fptrunc_inselt2(
370 ; CHECK-NEXT: [[VEC:%.*]] = insertelement <3 x double> [[X:%.*]], double 4.000000e+00, i32 2
371 ; CHECK-NEXT: [[TRUNC:%.*]] = fptrunc <3 x double> [[VEC]] to <3 x float>
372 ; CHECK-NEXT: ret <3 x float> [[TRUNC]]
374 %vec = insertelement <3 x double> %x, double 4.0, i32 2
375 %trunc = fptrunc <3 x double> %vec to <3 x float>
376 ret <3 x float> %trunc
379 ; Converting to a wide type might reduce instruction count,
380 ; but we can not do that unless the backend can recover from
381 ; the creation of a potentially illegal op (like a 64-bit vmul).
382 ; PR40032 - https://bugs.llvm.org/show_bug.cgi?id=40032
384 define <2 x i64> @sext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) {
385 ; CHECK-LABEL: @sext_less_casting_with_wideop(
386 ; CHECK-NEXT: [[XNARROW:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32>
387 ; CHECK-NEXT: [[YNARROW:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32>
388 ; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[XNARROW]], [[YNARROW]]
389 ; CHECK-NEXT: [[R:%.*]] = sext <2 x i32> [[MUL]] to <2 x i64>
390 ; CHECK-NEXT: ret <2 x i64> [[R]]
392 %xnarrow = trunc <2 x i64> %x to <2 x i32>
393 %ynarrow = trunc <2 x i64> %y to <2 x i32>
394 %mul = mul <2 x i32> %xnarrow, %ynarrow
395 %r = sext <2 x i32> %mul to <2 x i64>
399 define <2 x i64> @zext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) {
400 ; CHECK-LABEL: @zext_less_casting_with_wideop(
401 ; CHECK-NEXT: [[XNARROW:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32>
402 ; CHECK-NEXT: [[YNARROW:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32>
403 ; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[XNARROW]], [[YNARROW]]
404 ; CHECK-NEXT: [[R:%.*]] = zext <2 x i32> [[MUL]] to <2 x i64>
405 ; CHECK-NEXT: ret <2 x i64> [[R]]
407 %xnarrow = trunc <2 x i64> %x to <2 x i32>
408 %ynarrow = trunc <2 x i64> %y to <2 x i32>
409 %mul = mul <2 x i32> %xnarrow, %ynarrow
410 %r = zext <2 x i32> %mul to <2 x i64>