1 ; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=corei7-avx -o /dev/null
4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2 -o /dev/null
5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -o /dev/null
6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -o /dev/null
7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512bw -o /dev/null
8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
9 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx -o /dev/null
11 define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
12 ; Force the execution domain with an add.
13 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
14 %x = and <4 x i64> %a2, %b
18 define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
19 ; Force the execution domain with an add.
20 %a2 = add <2 x i64> %a, <i64 1, i64 1>
21 %x = and <2 x i64> %a2, %b
25 define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
26 ; Force the execution domain with an add.
27 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
28 %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
29 %x = and <4 x i64> %a, %y
33 define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
34 ; Force the execution domain with an add.
35 %a2 = add <2 x i64> %a, <i64 1, i64 1>
36 %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
37 %x = and <2 x i64> %a, %y
41 define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
42 ; Force the execution domain with an add.
43 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
44 %x = or <4 x i64> %a2, %b
48 define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
49 ; Force the execution domain with an add.
50 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
51 %x = xor <4 x i64> %a2, %b
55 define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
56 ; Force the execution domain with an add.
57 %a2 = add <2 x i64> %a, <i64 1, i64 1>
58 %x = or <2 x i64> %a2, %b
62 define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
63 ; Force the execution domain with an add.
64 %a2 = add <2 x i64> %a, <i64 1, i64 1>
65 %x = xor <2 x i64> %a2, %b
69 define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
70 %x = add <4 x i64> %i, %j
74 define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
75 %x = add <8 x i32> %i, %j
79 define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
80 %x = add <16 x i16> %i, %j
84 define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
85 %x = add <32 x i8> %i, %j
89 define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
90 %x = sub <4 x i64> %i, %j
94 define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
95 %x = sub <8 x i32> %i, %j
99 define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
100 %x = sub <16 x i16> %i, %j
104 define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
105 %x = sub <32 x i8> %i, %j
109 define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
110 %x = mul <16 x i16> %i, %j
114 define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
115 %bincmp = icmp slt <8 x i32> %i, %j
116 %x = sext <8 x i1> %bincmp to <8 x i32>
120 define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
121 %bincmp = icmp eq <32 x i8> %i, %j
122 %x = sext <32 x i1> %bincmp to <32 x i8>
126 define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
127 %bincmp = icmp eq <16 x i16> %i, %j
128 %x = sext <16 x i1> %bincmp to <16 x i16>
132 define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
133 %bincmp = icmp slt <32 x i8> %i, %j
134 %x = sext <32 x i1> %bincmp to <32 x i8>
138 define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
139 %bincmp = icmp slt <16 x i16> %i, %j
140 %x = sext <16 x i1> %bincmp to <16 x i16>
144 define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
145 %bincmp = icmp eq <8 x i32> %i, %j
146 %x = sext <8 x i1> %bincmp to <8 x i32>
150 define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
151 %x = add <2 x i64> %i, %j
155 define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
156 %x = add <4 x i32> %i, %j
160 define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
161 %x = add <8 x i16> %i, %j
165 define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
166 %x = add <16 x i8> %i, %j
170 define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
171 %x = sub <2 x i64> %i, %j
175 define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
176 %x = sub <4 x i32> %i, %j
180 define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
181 %x = sub <8 x i16> %i, %j
185 define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
186 %x = sub <16 x i8> %i, %j
190 define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
191 %x = mul <8 x i16> %i, %j
195 define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
196 %bincmp = icmp slt <8 x i16> %i, %j
197 %x = sext <8 x i1> %bincmp to <8 x i16>
201 define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
202 %bincmp = icmp slt <16 x i8> %i, %j
203 %x = sext <16 x i1> %bincmp to <16 x i8>
207 define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
208 %bincmp = icmp eq <8 x i16> %i, %j
209 %x = sext <8 x i1> %bincmp to <8 x i16>
213 define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
214 %bincmp = icmp eq <16 x i8> %i, %j
215 %x = sext <16 x i1> %bincmp to <16 x i8>
219 define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) {
220 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
221 ret <8 x i16> %shuffle
224 define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) {
225 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
226 ret <16 x i16> %shuffle
229 define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) {
230 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
231 ret <16 x i8> %shuffle
234 define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) {
235 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
236 ret <32 x i8> %shuffle
239 define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) {
240 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
241 ret <2 x i64> %shuffle
244 define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) {
245 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
246 ret <4 x i32> %shuffle
249 define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) {
250 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
251 ret <8 x i32> %shuffle
254 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
255 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
256 ret <4 x double> %shuffle
259 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
260 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
261 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
262 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
263 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
264 ret <2 x double> %bitcast64
267 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
268 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
269 ret <16 x i16> %shuffle
272 define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
273 %r1 = extractelement <2 x i64> %x, i32 0
274 %r2 = extractelement <2 x i64> %x, i32 1
275 store i64 %r2, i64* %dst, align 1
279 define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
280 %r1 = extractelement <4 x i32> %x, i32 1
281 %r2 = extractelement <4 x i32> %x, i32 3
282 store i32 %r2, i32* %dst, align 1
286 define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
287 %r1 = extractelement <8 x i16> %x, i32 1
288 %r2 = extractelement <8 x i16> %x, i32 3
289 store i16 %r2, i16* %dst, align 1
293 define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
294 %r1 = extractelement <16 x i8> %x, i32 1
295 %r2 = extractelement <16 x i8> %x, i32 3
296 store i8 %r2, i8* %dst, align 1
300 define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
301 %val = load i64, i64* %ptr
302 %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
303 %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
307 define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
308 %val = load i32, i32* %ptr
309 %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
310 %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
314 define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
315 %val = load i16, i16* %ptr
316 %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
317 %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
321 define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
322 %val = load i8, i8* %ptr
323 %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
324 %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
328 define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
329 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
330 ret <4 x i32> %shuffle
333 define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
334 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
335 ret <4 x i32> %shuffle
338 define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
339 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
340 ret <16 x i8> %shuffle
343 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
344 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
345 ret <16 x i16> %shuffle
348 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
350 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
351 ret <8 x float> %shuffle
354 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
356 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
357 ret <4 x float> %shuffle
360 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
362 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
363 ret <8 x float> %shuffle
366 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
368 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
369 ret <4 x float> %shuffle
372 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
373 %a = load double, double* %ptr
374 %v = insertelement <2 x double> undef, double %a, i32 0
375 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
376 ret <2 x double> %shuffle
379 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
380 %a = load double, double* %ptr
381 %v = insertelement <2 x double> undef, double %a, i32 0
382 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
383 ret <2 x double> %shuffle
386 define void @store_floats(<4 x float> %x, i64* %p) {
387 %a = fadd <4 x float> %x, %x
388 %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
389 %c = bitcast <2 x float> %b to i64
390 store i64 %c, i64* %p
394 define void @store_double(<2 x double> %x, i64* %p) {
395 %a = fadd <2 x double> %x, %x
396 %b = extractelement <2 x double> %a, i32 0
397 %c = bitcast double %b to i64
398 store i64 %c, i64* %p
402 define void @store_h_double(<2 x double> %x, i64* %p) {
403 %a = fadd <2 x double> %x, %x
404 %b = extractelement <2 x double> %a, i32 1
405 %c = bitcast double %b to i64
406 store i64 %c, i64* %p
410 define <2 x double> @test39(double* %ptr) nounwind {
411 %a = load double, double* %ptr
412 %v = insertelement <2 x double> undef, double %a, i32 0
413 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
414 ret <2 x double> %shuffle
417 define <2 x double> @test40(<2 x double>* %ptr) nounwind {
418 %v = load <2 x double>, <2 x double>* %ptr
419 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
420 ret <2 x double> %shuffle
423 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
424 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
425 ret <2 x double> %shuffle
428 define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
429 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
430 ret <4 x double> %shuffle
433 define <8 x i32> @ashr_v8i32(<8 x i32> %a, <8 x i32> %b) {
434 %shift = ashr <8 x i32> %a, %b
438 define <8 x i32> @lshr_v8i32(<8 x i32> %a, <8 x i32> %b) {
439 %shift = lshr <8 x i32> %a, %b
443 define <8 x i32> @shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
444 %shift = shl <8 x i32> %a, %b
448 define <8 x i32> @ashr_const_v8i32(<8 x i32> %a) {
449 %shift = ashr <8 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
453 define <8 x i32> @lshr_const_v8i32(<8 x i32> %a) {
454 %shift = lshr <8 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
458 define <8 x i32> @shl_const_v8i32(<8 x i32> %a) {
459 %shift = shl <8 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
463 define <4 x i64> @ashr_v4i64(<4 x i64> %a, <4 x i64> %b) {
464 %shift = ashr <4 x i64> %a, %b
468 define <4 x i64> @lshr_v4i64(<4 x i64> %a, <4 x i64> %b) {
469 %shift = lshr <4 x i64> %a, %b
473 define <4 x i64> @shl_v4i64(<4 x i64> %a, <4 x i64> %b) {
474 %shift = shl <4 x i64> %a, %b
478 define <4 x i64> @ashr_const_v4i64(<4 x i64> %a) {
479 %shift = ashr <4 x i64> %a, <i64 3, i64 3, i64 3, i64 3>
483 define <4 x i64> @lshr_const_v4i64(<4 x i64> %a) {
484 %shift = lshr <4 x i64> %a, <i64 3, i64 3, i64 3, i64 3>
488 define <4 x i64> @shl_const_v4i64(<4 x i64> %a) {
489 %shift = shl <4 x i64> %a, <i64 3, i64 3, i64 3, i64 3>
493 define <16 x i16> @ashr_v16i16(<16 x i16> %a, <16 x i16> %b) {
494 %shift = ashr <16 x i16> %a, %b
495 ret <16 x i16> %shift
498 define <16 x i16> @lshr_v16i16(<16 x i16> %a, <16 x i16> %b) {
499 %shift = lshr <16 x i16> %a, %b
500 ret <16 x i16> %shift
503 define <16 x i16> @shl_v16i16(<16 x i16> %a, <16 x i16> %b) {
504 %shift = shl <16 x i16> %a, %b
505 ret <16 x i16> %shift
508 define <16 x i16> @ashr_const_v16i16(<16 x i16> %a) {
509 %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
510 ret <16 x i16> %shift
513 define <16 x i16> @lshr_const_v16i16(<16 x i16> %a) {
514 %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
515 ret <16 x i16> %shift
518 define <16 x i16> @shl_const_v16i16(<16 x i16> %a) {
519 %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
520 ret <16 x i16> %shift
523 define <4 x i32> @ashr_v4i32(<4 x i32> %a, <4 x i32> %b) {
524 %shift = ashr <4 x i32> %a, %b
528 define <4 x i32> @shl_const_v4i32(<4 x i32> %a) {
529 %shift = shl <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
533 define <2 x i64> @ashr_v2i64(<2 x i64> %a, <2 x i64> %b) {
534 %shift = ashr <2 x i64> %a, %b
538 define <2 x i64> @shl_const_v2i64(<2 x i64> %a) {
539 %shift = shl <2 x i64> %a, <i64 3, i64 3>
543 define <8 x i16> @ashr_v8i16(<8 x i16> %a, <8 x i16> %b) {
544 %shift = ashr <8 x i16> %a, %b
548 define <8 x i16> @lshr_v8i16(<8 x i16> %a, <8 x i16> %b) {
549 %shift = lshr <8 x i16> %a, %b
553 define <8 x i16> @shl_v8i16(<8 x i16> %a, <8 x i16> %b) {
554 %shift = shl <8 x i16> %a, %b
558 define <8 x i16> @ashr_const_v8i16(<8 x i16> %a) {
559 %shift = ashr <8 x i16> %a,<i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
563 define <8 x i16> @lshr_const_v8i16(<8 x i16> %a) {
564 %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
568 define <8 x i16> @shl_const_v8i16(<8 x i16> %a) {
569 %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
573 define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
575 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
576 %C = zext <8 x i8> %B to <8 x i16>
580 define <32 x i8> @_broadcast32xi8(i8 %a) {
581 %b = insertelement <32 x i8> undef, i8 %a, i32 0
582 %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
586 define <16 x i8> @_broadcast16xi8(i8 %a) {
587 %b = insertelement <16 x i8> undef, i8 %a, i32 0
588 %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
592 define <16 x i16> @_broadcast16xi16(i16 %a) {
593 %b = insertelement <16 x i16> undef, i16 %a, i32 0
594 %c = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
598 define <8 x i16> @_broadcast8xi16(i16 %a) {
599 %b = insertelement <8 x i16> undef, i16 %a, i32 0
600 %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
604 define <8 x i32> @_broadcast8xi32(i32 %a) {
605 %b = insertelement <8 x i32> undef, i32 %a, i32 0
606 %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
610 define <4 x i32> @_broadcast4xi32(i32 %a) {
611 %b = insertelement <4 x i32> undef, i32 %a, i32 0
612 %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
616 define <4 x i64> @_broadcast4xi64(i64 %a) {
617 %b = insertelement <4 x i64> undef, i64 %a, i64 0
618 %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
622 define <2 x i64> @_broadcast2xi64(i64 %a) {
623 %b = insertelement <2 x i64> undef, i64 %a, i64 0
624 %c = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
628 define <8 x float> @_broadcast8xfloat(float %a) {
629 %b = insertelement <8 x float> undef, float %a, i32 0
630 %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
634 define <4 x float> @_broadcast4xfloat(float %a) {
635 %b = insertelement <4 x float> undef, float %a, i32 0
636 %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
640 define <4 x double> @_broadcast4xdouble(double %a) {
641 %b = insertelement <4 x double> undef, double %a, i32 0
642 %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
646 define <2 x double> @_broadcast2xdouble(double %a) {
647 %b = insertelement <2 x double> undef, double %a, i32 0
648 %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer
652 define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
653 %x = fmul <4 x float> %a0, %a1
654 %res = fsub <4 x float> %x, %a2
658 define <32 x i8> @test_cmpgtb(<32 x i8> %A) {
659 ; generate the follow code
660 ; vpxor %ymm1, %ymm1, %ymm1
661 ; vpcmpgtb %ymm0, %ymm1, %ymm0
662 %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
666 define <4 x float> @_inreg4xfloat(float %a) {
667 %b = insertelement <4 x float> undef, float %a, i32 0
668 %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
672 define <8 x float> @_inreg8xfloat(float %a) {
673 %b = insertelement <8 x float> undef, float %a, i32 0
674 %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
678 define <4 x double> @_inreg4xdouble(double %a) {
679 %b = insertelement <4 x double> undef, double %a, i32 0
680 %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
684 define <8 x i32> @test_mul_v8i32(<8 x i32> %arg1, <8 x i32> %arg2) #0 {
685 %ret = mul <8 x i32> %arg1, %arg2