1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64
5 define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
6 ; CHECK-LABEL: test_vpaddq:
8 ; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0
9 ; CHECK-NEXT: ret{{[l|q]}}
10 %x = add <4 x i64> %i, %j
14 define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
15 ; CHECK-LABEL: test_vpaddd:
17 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
18 ; CHECK-NEXT: ret{{[l|q]}}
19 %x = add <8 x i32> %i, %j
23 define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
24 ; CHECK-LABEL: test_vpaddw:
26 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
27 ; CHECK-NEXT: ret{{[l|q]}}
28 %x = add <16 x i16> %i, %j
32 define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
33 ; CHECK-LABEL: test_vpaddb:
35 ; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0
36 ; CHECK-NEXT: ret{{[l|q]}}
37 %x = add <32 x i8> %i, %j
41 define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
42 ; CHECK-LABEL: test_vpsubq:
44 ; CHECK-NEXT: vpsubq %ymm1, %ymm0, %ymm0
45 ; CHECK-NEXT: ret{{[l|q]}}
46 %x = sub <4 x i64> %i, %j
50 define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
51 ; CHECK-LABEL: test_vpsubd:
53 ; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0
54 ; CHECK-NEXT: ret{{[l|q]}}
55 %x = sub <8 x i32> %i, %j
59 define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
60 ; CHECK-LABEL: test_vpsubw:
62 ; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0
63 ; CHECK-NEXT: ret{{[l|q]}}
64 %x = sub <16 x i16> %i, %j
68 define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
69 ; CHECK-LABEL: test_vpsubb:
71 ; CHECK-NEXT: vpsubb %ymm1, %ymm0, %ymm0
72 ; CHECK-NEXT: ret{{[l|q]}}
73 %x = sub <32 x i8> %i, %j
77 define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
78 ; CHECK-LABEL: test_vpmulld:
80 ; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm0
81 ; CHECK-NEXT: ret{{[l|q]}}
82 %x = mul <8 x i32> %i, %j
86 define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
87 ; CHECK-LABEL: test_vpmullw:
89 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
90 ; CHECK-NEXT: ret{{[l|q]}}
91 %x = mul <16 x i16> %i, %j
95 define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
96 ; X86-LABEL: mul_v16i8:
98 ; X86-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
99 ; X86-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
100 ; X86-NEXT: vpmullw %ymm1, %ymm0, %ymm0
101 ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
102 ; X86-NEXT: vextracti128 $1, %ymm0, %xmm1
103 ; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
104 ; X86-NEXT: vzeroupper
107 ; X64-LABEL: mul_v16i8:
109 ; X64-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
110 ; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
111 ; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0
112 ; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
113 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm1
114 ; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
115 ; X64-NEXT: vzeroupper
117 %x = mul <16 x i8> %i, %j
121 define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
122 ; CHECK-LABEL: mul_v32i8:
124 ; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
125 ; CHECK-NEXT: vpand %ymm2, %ymm1, %ymm3
126 ; CHECK-NEXT: vpmaddubsw %ymm3, %ymm0, %ymm3
127 ; CHECK-NEXT: vpand %ymm2, %ymm3, %ymm3
128 ; CHECK-NEXT: vpandn %ymm1, %ymm2, %ymm1
129 ; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0
130 ; CHECK-NEXT: vpsllw $8, %ymm0, %ymm0
131 ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0
132 ; CHECK-NEXT: ret{{[l|q]}}
133 %x = mul <32 x i8> %i, %j
137 define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
138 ; CHECK-LABEL: mul_v4i64:
140 ; CHECK-NEXT: vpsrlq $32, %ymm0, %ymm2
141 ; CHECK-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
142 ; CHECK-NEXT: vpsrlq $32, %ymm1, %ymm3
143 ; CHECK-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
144 ; CHECK-NEXT: vpaddq %ymm2, %ymm3, %ymm2
145 ; CHECK-NEXT: vpsllq $32, %ymm2, %ymm2
146 ; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
147 ; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0
148 ; CHECK-NEXT: ret{{[l|q]}}
149 %x = mul <4 x i64> %i, %j
153 define <8 x i32> @mul_const1(<8 x i32> %x) {
154 ; CHECK-LABEL: mul_const1:
156 ; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm0
157 ; CHECK-NEXT: ret{{[l|q]}}
158 %y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
162 define <4 x i64> @mul_const2(<4 x i64> %x) {
163 ; CHECK-LABEL: mul_const2:
165 ; CHECK-NEXT: vpsllq $2, %ymm0, %ymm0
166 ; CHECK-NEXT: ret{{[l|q]}}
167 %y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4>
171 define <16 x i16> @mul_const3(<16 x i16> %x) {
172 ; CHECK-LABEL: mul_const3:
174 ; CHECK-NEXT: vpsllw $3, %ymm0, %ymm0
175 ; CHECK-NEXT: ret{{[l|q]}}
176 %y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
180 define <4 x i64> @mul_const4(<4 x i64> %x) {
181 ; CHECK-LABEL: mul_const4:
183 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
184 ; CHECK-NEXT: vpsubq %ymm0, %ymm1, %ymm0
185 ; CHECK-NEXT: ret{{[l|q]}}
186 %y = mul <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
190 define <8 x i32> @mul_const5(<8 x i32> %x) {
191 ; CHECK-LABEL: mul_const5:
193 ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
194 ; CHECK-NEXT: ret{{[l|q]}}
195 %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
199 define <8 x i32> @mul_const6(<8 x i32> %x) {
200 ; X86-LABEL: mul_const6:
202 ; X86-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
205 ; X64-LABEL: mul_const6:
207 ; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
209 %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0>
213 define <8 x i64> @mul_const7(<8 x i64> %x) {
214 ; CHECK-LABEL: mul_const7:
216 ; CHECK-NEXT: vpaddq %ymm0, %ymm0, %ymm0
217 ; CHECK-NEXT: vpaddq %ymm1, %ymm1, %ymm1
218 ; CHECK-NEXT: ret{{[l|q]}}
219 %y = mul <8 x i64> %x, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
223 define <8 x i16> @mul_const8(<8 x i16> %x) {
224 ; CHECK-LABEL: mul_const8:
226 ; CHECK-NEXT: vpsllw $3, %xmm0, %xmm0
227 ; CHECK-NEXT: ret{{[l|q]}}
228 %y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
232 define <8 x i32> @mul_const9(<8 x i32> %x) {
233 ; CHECK-LABEL: mul_const9:
235 ; CHECK-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,0]
236 ; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm0
237 ; CHECK-NEXT: ret{{[l|q]}}
238 %y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
243 define <4 x i32> @mul_const10(<4 x i32> %x) {
244 ; CHECK-LABEL: mul_const10:
246 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16843009,16843009,16843009,16843009]
247 ; CHECK-NEXT: vpmulld %xmm1, %xmm0, %xmm0
248 ; CHECK-NEXT: ret{{[l|q]}}
249 %m = mul <4 x i32> %x, <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
254 define <4 x i32> @mul_const11(<4 x i32> %x) {
255 ; CHECK-LABEL: mul_const11:
257 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2155905152,2155905152,2155905152,2155905152]
258 ; CHECK-NEXT: vpmulld %xmm1, %xmm0, %xmm0
259 ; CHECK-NEXT: ret{{[l|q]}}
260 %m = mul <4 x i32> %x, <i32 2155905152, i32 2155905152, i32 2155905152, i32 2155905152>