1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,EVEX512
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,-evex512 | FileCheck %s --check-prefixes=CHECK,EVEX256
7 define <32 x i8> @vpaddb256_test(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
8 ; CHECK-LABEL: vpaddb256_test:
10 ; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0
12 %x = add <32 x i8> %i, %j
16 define <32 x i8> @vpaddb256_fold_test(<32 x i8> %i, ptr %j) nounwind {
17 ; CHECK-LABEL: vpaddb256_fold_test:
19 ; CHECK-NEXT: vpaddb (%rdi), %ymm0, %ymm0
21 %tmp = load <32 x i8>, ptr %j, align 4
22 %x = add <32 x i8> %i, %tmp
26 define <16 x i16> @vpaddw256_test(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
27 ; CHECK-LABEL: vpaddw256_test:
29 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
31 %x = add <16 x i16> %i, %j
35 define <16 x i16> @vpaddw256_fold_test(<16 x i16> %i, ptr %j) nounwind {
36 ; CHECK-LABEL: vpaddw256_fold_test:
38 ; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm0
40 %tmp = load <16 x i16>, ptr %j, align 4
41 %x = add <16 x i16> %i, %tmp
45 define <16 x i16> @vpaddw256_mask_test(<16 x i16> %i, <16 x i16> %j, <16 x i16> %mask1) nounwind readnone {
46 ; CHECK-LABEL: vpaddw256_mask_test:
48 ; CHECK-NEXT: vptestmw %ymm2, %ymm2, %k1
49 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1}
51 %mask = icmp ne <16 x i16> %mask1, zeroinitializer
52 %x = add <16 x i16> %i, %j
53 %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %i
57 define <16 x i16> @vpaddw256_maskz_test(<16 x i16> %i, <16 x i16> %j, <16 x i16> %mask1) nounwind readnone {
58 ; CHECK-LABEL: vpaddw256_maskz_test:
60 ; CHECK-NEXT: vptestmw %ymm2, %ymm2, %k1
61 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z}
63 %mask = icmp ne <16 x i16> %mask1, zeroinitializer
64 %x = add <16 x i16> %i, %j
65 %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
69 define <16 x i16> @vpaddw256_mask_fold_test(<16 x i16> %i, ptr %j.ptr, <16 x i16> %mask1) nounwind readnone {
70 ; CHECK-LABEL: vpaddw256_mask_fold_test:
72 ; CHECK-NEXT: vptestmw %ymm1, %ymm1, %k1
73 ; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm0 {%k1}
75 %mask = icmp ne <16 x i16> %mask1, zeroinitializer
76 %j = load <16 x i16>, ptr %j.ptr
77 %x = add <16 x i16> %i, %j
78 %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %i
82 define <16 x i16> @vpaddw256_maskz_fold_test(<16 x i16> %i, ptr %j.ptr, <16 x i16> %mask1) nounwind readnone {
83 ; CHECK-LABEL: vpaddw256_maskz_fold_test:
85 ; CHECK-NEXT: vptestmw %ymm1, %ymm1, %k1
86 ; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z}
88 %mask = icmp ne <16 x i16> %mask1, zeroinitializer
89 %j = load <16 x i16>, ptr %j.ptr
90 %x = add <16 x i16> %i, %j
91 %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
95 define <32 x i8> @vpsubb256_test(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
96 ; CHECK-LABEL: vpsubb256_test:
98 ; CHECK-NEXT: vpsubb %ymm1, %ymm0, %ymm0
100 %x = sub <32 x i8> %i, %j
104 define <16 x i16> @vpsubw256_test(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
105 ; CHECK-LABEL: vpsubw256_test:
107 ; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0
109 %x = sub <16 x i16> %i, %j
113 define <16 x i16> @vpmullw256_test(<16 x i16> %i, <16 x i16> %j) {
114 ; CHECK-LABEL: vpmullw256_test:
116 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
118 %x = mul <16 x i16> %i, %j
124 define <16 x i8> @vpaddb128_test(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
125 ; CHECK-LABEL: vpaddb128_test:
127 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
129 %x = add <16 x i8> %i, %j
133 define <16 x i8> @vpaddb128_fold_test(<16 x i8> %i, ptr %j) nounwind {
134 ; CHECK-LABEL: vpaddb128_fold_test:
136 ; CHECK-NEXT: vpaddb (%rdi), %xmm0, %xmm0
138 %tmp = load <16 x i8>, ptr %j, align 4
139 %x = add <16 x i8> %i, %tmp
143 define <8 x i16> @vpaddw128_test(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
144 ; CHECK-LABEL: vpaddw128_test:
146 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
148 %x = add <8 x i16> %i, %j
152 define <8 x i16> @vpaddw128_fold_test(<8 x i16> %i, ptr %j) nounwind {
153 ; CHECK-LABEL: vpaddw128_fold_test:
155 ; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm0
157 %tmp = load <8 x i16>, ptr %j, align 4
158 %x = add <8 x i16> %i, %tmp
162 define <8 x i16> @vpaddw128_mask_test(<8 x i16> %i, <8 x i16> %j, <8 x i16> %mask1) nounwind readnone {
163 ; CHECK-LABEL: vpaddw128_mask_test:
165 ; CHECK-NEXT: vptestmw %xmm2, %xmm2, %k1
166 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1}
168 %mask = icmp ne <8 x i16> %mask1, zeroinitializer
169 %x = add <8 x i16> %i, %j
170 %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %i
174 define <8 x i16> @vpaddw128_maskz_test(<8 x i16> %i, <8 x i16> %j, <8 x i16> %mask1) nounwind readnone {
175 ; CHECK-LABEL: vpaddw128_maskz_test:
177 ; CHECK-NEXT: vptestmw %xmm2, %xmm2, %k1
178 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z}
180 %mask = icmp ne <8 x i16> %mask1, zeroinitializer
181 %x = add <8 x i16> %i, %j
182 %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
186 define <8 x i16> @vpaddw128_mask_fold_test(<8 x i16> %i, ptr %j.ptr, <8 x i16> %mask1) nounwind readnone {
187 ; CHECK-LABEL: vpaddw128_mask_fold_test:
189 ; CHECK-NEXT: vptestmw %xmm1, %xmm1, %k1
190 ; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm0 {%k1}
192 %mask = icmp ne <8 x i16> %mask1, zeroinitializer
193 %j = load <8 x i16>, ptr %j.ptr
194 %x = add <8 x i16> %i, %j
195 %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %i
199 define <8 x i16> @vpaddw128_maskz_fold_test(<8 x i16> %i, ptr %j.ptr, <8 x i16> %mask1) nounwind readnone {
200 ; CHECK-LABEL: vpaddw128_maskz_fold_test:
202 ; CHECK-NEXT: vptestmw %xmm1, %xmm1, %k1
203 ; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z}
205 %mask = icmp ne <8 x i16> %mask1, zeroinitializer
206 %j = load <8 x i16>, ptr %j.ptr
207 %x = add <8 x i16> %i, %j
208 %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
212 define <16 x i8> @vpsubb128_test(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
213 ; CHECK-LABEL: vpsubb128_test:
215 ; CHECK-NEXT: vpsubb %xmm1, %xmm0, %xmm0
217 %x = sub <16 x i8> %i, %j
221 define <8 x i16> @vpsubw128_test(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
222 ; CHECK-LABEL: vpsubw128_test:
224 ; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0
226 %x = sub <8 x i16> %i, %j
230 define <8 x i16> @vpmullw128_test(<8 x i16> %i, <8 x i16> %j) {
231 ; CHECK-LABEL: vpmullw128_test:
233 ; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0
235 %x = mul <8 x i16> %i, %j
239 define i16 @PR90356(<16 x i1> %a) {
240 ; EVEX512-LABEL: PR90356:
242 ; EVEX512-NEXT: vpsllw $7, %xmm0, %xmm0
243 ; EVEX512-NEXT: vpmovb2m %xmm0, %k1
244 ; EVEX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
245 ; EVEX512-NEXT: movb $63, %al
246 ; EVEX512-NEXT: kmovd %eax, %k1
247 ; EVEX512-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z}
248 ; EVEX512-NEXT: vptestmd %zmm0, %zmm0, %k0
249 ; EVEX512-NEXT: kmovd %k0, %eax
250 ; EVEX512-NEXT: # kill: def $ax killed $ax killed $eax
251 ; EVEX512-NEXT: vzeroupper
254 ; EVEX256-LABEL: PR90356:
256 ; EVEX256-NEXT: vpsllw $7, %xmm0, %xmm0
257 ; EVEX256-NEXT: vpmovb2m %xmm0, %k0
258 ; EVEX256-NEXT: vpmovm2w %k0, %ymm0
259 ; EVEX256-NEXT: vpxor %xmm1, %xmm1, %xmm1
260 ; EVEX256-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
261 ; EVEX256-NEXT: vpmovw2m %ymm0, %k0
262 ; EVEX256-NEXT: kmovd %k0, %eax
263 ; EVEX256-NEXT: # kill: def $ax killed $ax killed $eax
264 ; EVEX256-NEXT: vzeroupper
266 %1 = shufflevector <16 x i1> %a, <16 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
267 %2 = bitcast <16 x i1> %1 to i16