1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64
5 define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
6 ; CHECK-LABEL: test_vpaddq:
8 ; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0
9 ; CHECK-NEXT: ret{{[l|q]}}
10 %x = add <4 x i64> %i, %j
14 define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
15 ; CHECK-LABEL: test_vpaddd:
17 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
18 ; CHECK-NEXT: ret{{[l|q]}}
19 %x = add <8 x i32> %i, %j
23 define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
24 ; CHECK-LABEL: test_vpaddw:
26 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
27 ; CHECK-NEXT: ret{{[l|q]}}
28 %x = add <16 x i16> %i, %j
32 define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
33 ; CHECK-LABEL: test_vpaddb:
35 ; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0
36 ; CHECK-NEXT: ret{{[l|q]}}
37 %x = add <32 x i8> %i, %j
41 define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
42 ; CHECK-LABEL: test_vpsubq:
44 ; CHECK-NEXT: vpsubq %ymm1, %ymm0, %ymm0
45 ; CHECK-NEXT: ret{{[l|q]}}
46 %x = sub <4 x i64> %i, %j
50 define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
51 ; CHECK-LABEL: test_vpsubd:
53 ; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0
54 ; CHECK-NEXT: ret{{[l|q]}}
55 %x = sub <8 x i32> %i, %j
59 define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
60 ; CHECK-LABEL: test_vpsubw:
62 ; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0
63 ; CHECK-NEXT: ret{{[l|q]}}
64 %x = sub <16 x i16> %i, %j
68 define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
69 ; CHECK-LABEL: test_vpsubb:
71 ; CHECK-NEXT: vpsubb %ymm1, %ymm0, %ymm0
72 ; CHECK-NEXT: ret{{[l|q]}}
73 %x = sub <32 x i8> %i, %j
77 define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
78 ; CHECK-LABEL: test_vpmulld:
80 ; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm0
81 ; CHECK-NEXT: ret{{[l|q]}}
82 %x = mul <8 x i32> %i, %j
86 define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
87 ; CHECK-LABEL: test_vpmullw:
89 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
90 ; CHECK-NEXT: ret{{[l|q]}}
91 %x = mul <16 x i16> %i, %j
95 define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
96 ; X86-LABEL: mul_v16i8:
98 ; X86-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
99 ; X86-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
100 ; X86-NEXT: vpmullw %ymm1, %ymm0, %ymm0
101 ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
102 ; X86-NEXT: vextracti128 $1, %ymm0, %xmm1
103 ; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
104 ; X86-NEXT: vzeroupper
107 ; X64-LABEL: mul_v16i8:
109 ; X64-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
110 ; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
111 ; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0
112 ; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
113 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm1
114 ; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
115 ; X64-NEXT: vzeroupper
117 %x = mul <16 x i8> %i, %j
121 define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
122 ; CHECK-LABEL: mul_v32i8:
124 ; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
125 ; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
126 ; CHECK-NEXT: vpmullw %ymm2, %ymm3, %ymm2
127 ; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
128 ; CHECK-NEXT: vpand %ymm3, %ymm2, %ymm2
129 ; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
130 ; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
131 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
132 ; CHECK-NEXT: vpand %ymm3, %ymm0, %ymm0
133 ; CHECK-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
134 ; CHECK-NEXT: ret{{[l|q]}}
135 %x = mul <32 x i8> %i, %j
139 define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
140 ; CHECK-LABEL: mul_v4i64:
142 ; CHECK-NEXT: vpsrlq $32, %ymm0, %ymm2
143 ; CHECK-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
144 ; CHECK-NEXT: vpsrlq $32, %ymm1, %ymm3
145 ; CHECK-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
146 ; CHECK-NEXT: vpaddq %ymm2, %ymm3, %ymm2
147 ; CHECK-NEXT: vpsllq $32, %ymm2, %ymm2
148 ; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
149 ; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0
150 ; CHECK-NEXT: ret{{[l|q]}}
151 %x = mul <4 x i64> %i, %j
155 define <8 x i32> @mul_const1(<8 x i32> %x) {
156 ; CHECK-LABEL: mul_const1:
158 ; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm0
159 ; CHECK-NEXT: ret{{[l|q]}}
160 %y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
164 define <4 x i64> @mul_const2(<4 x i64> %x) {
165 ; CHECK-LABEL: mul_const2:
167 ; CHECK-NEXT: vpsllq $2, %ymm0, %ymm0
168 ; CHECK-NEXT: ret{{[l|q]}}
169 %y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4>
173 define <16 x i16> @mul_const3(<16 x i16> %x) {
174 ; CHECK-LABEL: mul_const3:
176 ; CHECK-NEXT: vpsllw $3, %ymm0, %ymm0
177 ; CHECK-NEXT: ret{{[l|q]}}
178 %y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
182 define <4 x i64> @mul_const4(<4 x i64> %x) {
183 ; CHECK-LABEL: mul_const4:
185 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
186 ; CHECK-NEXT: vpsubq %ymm0, %ymm1, %ymm0
187 ; CHECK-NEXT: ret{{[l|q]}}
188 %y = mul <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
192 define <8 x i32> @mul_const5(<8 x i32> %x) {
193 ; CHECK-LABEL: mul_const5:
195 ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
196 ; CHECK-NEXT: ret{{[l|q]}}
197 %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
201 define <8 x i32> @mul_const6(<8 x i32> %x) {
202 ; X86-LABEL: mul_const6:
204 ; X86-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
207 ; X64-LABEL: mul_const6:
209 ; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
211 %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0>
215 define <8 x i64> @mul_const7(<8 x i64> %x) {
216 ; CHECK-LABEL: mul_const7:
218 ; CHECK-NEXT: vpaddq %ymm0, %ymm0, %ymm0
219 ; CHECK-NEXT: vpaddq %ymm1, %ymm1, %ymm1
220 ; CHECK-NEXT: ret{{[l|q]}}
221 %y = mul <8 x i64> %x, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
225 define <8 x i16> @mul_const8(<8 x i16> %x) {
226 ; CHECK-LABEL: mul_const8:
228 ; CHECK-NEXT: vpsllw $3, %xmm0, %xmm0
229 ; CHECK-NEXT: ret{{[l|q]}}
230 %y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
234 define <8 x i32> @mul_const9(<8 x i32> %x) {
235 ; CHECK-LABEL: mul_const9:
237 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [2,0,0,0]
238 ; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm0
239 ; CHECK-NEXT: ret{{[l|q]}}
240 %y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
245 define <4 x i32> @mul_const10(<4 x i32> %x) {
246 ; CHECK-LABEL: mul_const10:
248 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16843009,16843009,16843009,16843009]
249 ; CHECK-NEXT: vpmulld %xmm1, %xmm0, %xmm0
250 ; CHECK-NEXT: ret{{[l|q]}}
251 %m = mul <4 x i32> %x, <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
256 define <4 x i32> @mul_const11(<4 x i32> %x) {
257 ; CHECK-LABEL: mul_const11:
259 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2155905152,2155905152,2155905152,2155905152]
260 ; CHECK-NEXT: vpmulld %xmm1, %xmm0, %xmm0
261 ; CHECK-NEXT: ret{{[l|q]}}
262 %m = mul <4 x i32> %x, <i32 2155905152, i32 2155905152, i32 2155905152, i32 2155905152>