1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW,AVX512BWNOVL
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512VL,AVX512BWVL
7 define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %p, <8 x i32> %mask) {
8 ; AVX512F-LABEL: truncstore_v8i64_v8i32:
10 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
11 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
12 ; AVX512F-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
13 ; AVX512F-NEXT: vzeroupper
16 ; AVX512VL-LABEL: truncstore_v8i64_v8i32:
18 ; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1
19 ; AVX512VL-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
20 ; AVX512VL-NEXT: vzeroupper
23 ; AVX512BW-LABEL: truncstore_v8i64_v8i32:
25 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
26 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
27 ; AVX512BW-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
28 ; AVX512BW-NEXT: vzeroupper
30 %a = icmp ne <8 x i32> %mask, zeroinitializer
31 %b = trunc <8 x i64> %x to <8 x i32>
32 call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %b, <8 x i32>* %p, i32 1, <8 x i1> %a)
36 define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %p, <8 x i32> %mask) {
37 ; AVX512F-LABEL: truncstore_v8i64_v8i16:
39 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
40 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
41 ; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
42 ; AVX512F-NEXT: kmovw %k0, %eax
43 ; AVX512F-NEXT: testb $1, %al
44 ; AVX512F-NEXT: je .LBB1_2
45 ; AVX512F-NEXT: # %bb.1: # %cond.store
46 ; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
47 ; AVX512F-NEXT: .LBB1_2: # %else
48 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
49 ; AVX512F-NEXT: kshiftrw $1, %k0, %k0
50 ; AVX512F-NEXT: kmovw %k0, %eax
51 ; AVX512F-NEXT: testb $1, %al
52 ; AVX512F-NEXT: je .LBB1_4
53 ; AVX512F-NEXT: # %bb.3: # %cond.store1
54 ; AVX512F-NEXT: vpextrw $1, %xmm0, 2(%rdi)
55 ; AVX512F-NEXT: .LBB1_4: # %else2
56 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
57 ; AVX512F-NEXT: kshiftrw $2, %k0, %k0
58 ; AVX512F-NEXT: kmovw %k0, %eax
59 ; AVX512F-NEXT: testb $1, %al
60 ; AVX512F-NEXT: je .LBB1_6
61 ; AVX512F-NEXT: # %bb.5: # %cond.store3
62 ; AVX512F-NEXT: vpextrw $2, %xmm0, 4(%rdi)
63 ; AVX512F-NEXT: .LBB1_6: # %else4
64 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
65 ; AVX512F-NEXT: kshiftrw $3, %k0, %k0
66 ; AVX512F-NEXT: kmovw %k0, %eax
67 ; AVX512F-NEXT: testb $1, %al
68 ; AVX512F-NEXT: je .LBB1_8
69 ; AVX512F-NEXT: # %bb.7: # %cond.store5
70 ; AVX512F-NEXT: vpextrw $3, %xmm0, 6(%rdi)
71 ; AVX512F-NEXT: .LBB1_8: # %else6
72 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
73 ; AVX512F-NEXT: kshiftrw $4, %k0, %k0
74 ; AVX512F-NEXT: kmovw %k0, %eax
75 ; AVX512F-NEXT: testb $1, %al
76 ; AVX512F-NEXT: je .LBB1_10
77 ; AVX512F-NEXT: # %bb.9: # %cond.store7
78 ; AVX512F-NEXT: vpextrw $4, %xmm0, 8(%rdi)
79 ; AVX512F-NEXT: .LBB1_10: # %else8
80 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
81 ; AVX512F-NEXT: kshiftrw $5, %k0, %k0
82 ; AVX512F-NEXT: kmovw %k0, %eax
83 ; AVX512F-NEXT: testb $1, %al
84 ; AVX512F-NEXT: je .LBB1_12
85 ; AVX512F-NEXT: # %bb.11: # %cond.store9
86 ; AVX512F-NEXT: vpextrw $5, %xmm0, 10(%rdi)
87 ; AVX512F-NEXT: .LBB1_12: # %else10
88 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
89 ; AVX512F-NEXT: kshiftrw $6, %k0, %k0
90 ; AVX512F-NEXT: kmovw %k0, %eax
91 ; AVX512F-NEXT: testb $1, %al
92 ; AVX512F-NEXT: je .LBB1_14
93 ; AVX512F-NEXT: # %bb.13: # %cond.store11
94 ; AVX512F-NEXT: vpextrw $6, %xmm0, 12(%rdi)
95 ; AVX512F-NEXT: .LBB1_14: # %else12
96 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
97 ; AVX512F-NEXT: kshiftrw $7, %k0, %k0
98 ; AVX512F-NEXT: kmovw %k0, %eax
99 ; AVX512F-NEXT: testb $1, %al
100 ; AVX512F-NEXT: je .LBB1_16
101 ; AVX512F-NEXT: # %bb.15: # %cond.store13
102 ; AVX512F-NEXT: vpextrw $7, %xmm0, 14(%rdi)
103 ; AVX512F-NEXT: .LBB1_16: # %else14
104 ; AVX512F-NEXT: vzeroupper
107 ; AVX512BW-LABEL: truncstore_v8i64_v8i16:
109 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
110 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
111 ; AVX512BW-NEXT: vpmovqw %zmm0, (%rdi) {%k1}
112 ; AVX512BW-NEXT: vzeroupper
113 ; AVX512BW-NEXT: retq
115 ; AVX512BWVL-LABEL: truncstore_v8i64_v8i16:
116 ; AVX512BWVL: # %bb.0:
117 ; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
118 ; AVX512BWVL-NEXT: vpmovqw %zmm0, (%rdi) {%k1}
119 ; AVX512BWVL-NEXT: vzeroupper
120 ; AVX512BWVL-NEXT: retq
121 %a = icmp ne <8 x i32> %mask, zeroinitializer
122 %b = trunc <8 x i64> %x to <8 x i16>
123 call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %b, <8 x i16>* %p, i32 1, <8 x i1> %a)
127 define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask) {
128 ; AVX512F-LABEL: truncstore_v8i64_v8i8:
130 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
131 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
132 ; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
133 ; AVX512F-NEXT: kmovw %k0, %eax
134 ; AVX512F-NEXT: testb $1, %al
135 ; AVX512F-NEXT: je .LBB2_2
136 ; AVX512F-NEXT: # %bb.1: # %cond.store
137 ; AVX512F-NEXT: vpextrb $0, %xmm0, (%rdi)
138 ; AVX512F-NEXT: .LBB2_2: # %else
139 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
140 ; AVX512F-NEXT: kshiftrw $1, %k0, %k0
141 ; AVX512F-NEXT: kmovw %k0, %eax
142 ; AVX512F-NEXT: testb $1, %al
143 ; AVX512F-NEXT: je .LBB2_4
144 ; AVX512F-NEXT: # %bb.3: # %cond.store1
145 ; AVX512F-NEXT: vpextrb $2, %xmm0, 1(%rdi)
146 ; AVX512F-NEXT: .LBB2_4: # %else2
147 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
148 ; AVX512F-NEXT: kshiftrw $2, %k0, %k0
149 ; AVX512F-NEXT: kmovw %k0, %eax
150 ; AVX512F-NEXT: testb $1, %al
151 ; AVX512F-NEXT: je .LBB2_6
152 ; AVX512F-NEXT: # %bb.5: # %cond.store3
153 ; AVX512F-NEXT: vpextrb $4, %xmm0, 2(%rdi)
154 ; AVX512F-NEXT: .LBB2_6: # %else4
155 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
156 ; AVX512F-NEXT: kshiftrw $3, %k0, %k0
157 ; AVX512F-NEXT: kmovw %k0, %eax
158 ; AVX512F-NEXT: testb $1, %al
159 ; AVX512F-NEXT: je .LBB2_8
160 ; AVX512F-NEXT: # %bb.7: # %cond.store5
161 ; AVX512F-NEXT: vpextrb $6, %xmm0, 3(%rdi)
162 ; AVX512F-NEXT: .LBB2_8: # %else6
163 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
164 ; AVX512F-NEXT: kshiftrw $4, %k0, %k0
165 ; AVX512F-NEXT: kmovw %k0, %eax
166 ; AVX512F-NEXT: testb $1, %al
167 ; AVX512F-NEXT: je .LBB2_10
168 ; AVX512F-NEXT: # %bb.9: # %cond.store7
169 ; AVX512F-NEXT: vpextrb $8, %xmm0, 4(%rdi)
170 ; AVX512F-NEXT: .LBB2_10: # %else8
171 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
172 ; AVX512F-NEXT: kshiftrw $5, %k0, %k0
173 ; AVX512F-NEXT: kmovw %k0, %eax
174 ; AVX512F-NEXT: testb $1, %al
175 ; AVX512F-NEXT: je .LBB2_12
176 ; AVX512F-NEXT: # %bb.11: # %cond.store9
177 ; AVX512F-NEXT: vpextrb $10, %xmm0, 5(%rdi)
178 ; AVX512F-NEXT: .LBB2_12: # %else10
179 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
180 ; AVX512F-NEXT: kshiftrw $6, %k0, %k0
181 ; AVX512F-NEXT: kmovw %k0, %eax
182 ; AVX512F-NEXT: testb $1, %al
183 ; AVX512F-NEXT: je .LBB2_14
184 ; AVX512F-NEXT: # %bb.13: # %cond.store11
185 ; AVX512F-NEXT: vpextrb $12, %xmm0, 6(%rdi)
186 ; AVX512F-NEXT: .LBB2_14: # %else12
187 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
188 ; AVX512F-NEXT: kshiftrw $7, %k0, %k0
189 ; AVX512F-NEXT: kmovw %k0, %eax
190 ; AVX512F-NEXT: testb $1, %al
191 ; AVX512F-NEXT: je .LBB2_16
192 ; AVX512F-NEXT: # %bb.15: # %cond.store13
193 ; AVX512F-NEXT: vpextrb $14, %xmm0, 7(%rdi)
194 ; AVX512F-NEXT: .LBB2_16: # %else14
195 ; AVX512F-NEXT: vzeroupper
198 ; AVX512BW-LABEL: truncstore_v8i64_v8i8:
200 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
201 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
202 ; AVX512BW-NEXT: vpmovqb %zmm0, (%rdi) {%k1}
203 ; AVX512BW-NEXT: vzeroupper
204 ; AVX512BW-NEXT: retq
206 ; AVX512BWVL-LABEL: truncstore_v8i64_v8i8:
207 ; AVX512BWVL: # %bb.0:
208 ; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
209 ; AVX512BWVL-NEXT: vpmovqb %zmm0, (%rdi) {%k1}
210 ; AVX512BWVL-NEXT: vzeroupper
211 ; AVX512BWVL-NEXT: retq
212 %a = icmp ne <8 x i32> %mask, zeroinitializer
213 %b = trunc <8 x i64> %x to <8 x i8>
214 call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %b, <8 x i8>* %p, i32 1, <8 x i1> %a)
218 define void @truncstore_v4i64_v4i32(<4 x i64> %x, <4 x i32>* %p, <4 x i32> %mask) {
219 ; AVX512F-LABEL: truncstore_v4i64_v4i32:
221 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
222 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
223 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
224 ; AVX512F-NEXT: kshiftlw $12, %k0, %k0
225 ; AVX512F-NEXT: kshiftrw $12, %k0, %k1
226 ; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
227 ; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
228 ; AVX512F-NEXT: vzeroupper
231 ; AVX512VL-LABEL: truncstore_v4i64_v4i32:
233 ; AVX512VL-NEXT: vptestmd %xmm1, %xmm1, %k1
234 ; AVX512VL-NEXT: vpmovqd %ymm0, (%rdi) {%k1}
235 ; AVX512VL-NEXT: vzeroupper
236 ; AVX512VL-NEXT: retq
238 ; AVX512BW-LABEL: truncstore_v4i64_v4i32:
240 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
241 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
242 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
243 ; AVX512BW-NEXT: kshiftlw $12, %k0, %k0
244 ; AVX512BW-NEXT: kshiftrw $12, %k0, %k1
245 ; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
246 ; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
247 ; AVX512BW-NEXT: vzeroupper
248 ; AVX512BW-NEXT: retq
249 %a = icmp ne <4 x i32> %mask, zeroinitializer
250 %b = trunc <4 x i64> %x to <4 x i32>
251 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %b, <4 x i32>* %p, i32 1, <4 x i1> %a)
255 define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %p, <4 x i32> %mask) {
256 ; AVX512F-LABEL: truncstore_v4i64_v4i16:
258 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
259 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
260 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
261 ; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
262 ; AVX512F-NEXT: kmovw %k0, %eax
263 ; AVX512F-NEXT: testb $1, %al
264 ; AVX512F-NEXT: je .LBB4_2
265 ; AVX512F-NEXT: # %bb.1: # %cond.store
266 ; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
267 ; AVX512F-NEXT: .LBB4_2: # %else
268 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
269 ; AVX512F-NEXT: kshiftrw $1, %k0, %k0
270 ; AVX512F-NEXT: kmovw %k0, %eax
271 ; AVX512F-NEXT: testb $1, %al
272 ; AVX512F-NEXT: je .LBB4_4
273 ; AVX512F-NEXT: # %bb.3: # %cond.store1
274 ; AVX512F-NEXT: vpextrw $2, %xmm0, 2(%rdi)
275 ; AVX512F-NEXT: .LBB4_4: # %else2
276 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
277 ; AVX512F-NEXT: kshiftrw $2, %k0, %k0
278 ; AVX512F-NEXT: kmovw %k0, %eax
279 ; AVX512F-NEXT: testb $1, %al
280 ; AVX512F-NEXT: je .LBB4_6
281 ; AVX512F-NEXT: # %bb.5: # %cond.store3
282 ; AVX512F-NEXT: vpextrw $4, %xmm0, 4(%rdi)
283 ; AVX512F-NEXT: .LBB4_6: # %else4
284 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
285 ; AVX512F-NEXT: kshiftrw $3, %k0, %k0
286 ; AVX512F-NEXT: kmovw %k0, %eax
287 ; AVX512F-NEXT: testb $1, %al
288 ; AVX512F-NEXT: je .LBB4_8
289 ; AVX512F-NEXT: # %bb.7: # %cond.store5
290 ; AVX512F-NEXT: vpextrw $6, %xmm0, 6(%rdi)
291 ; AVX512F-NEXT: .LBB4_8: # %else6
292 ; AVX512F-NEXT: vzeroupper
295 ; AVX512BW-LABEL: truncstore_v4i64_v4i16:
297 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
298 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
299 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
300 ; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
301 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
302 ; AVX512BW-NEXT: kshiftld $28, %k0, %k0
303 ; AVX512BW-NEXT: kshiftrd $28, %k0, %k1
304 ; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
305 ; AVX512BW-NEXT: vzeroupper
306 ; AVX512BW-NEXT: retq
308 ; AVX512BWVL-LABEL: truncstore_v4i64_v4i16:
309 ; AVX512BWVL: # %bb.0:
310 ; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
311 ; AVX512BWVL-NEXT: vpmovqw %ymm0, (%rdi) {%k1}
312 ; AVX512BWVL-NEXT: vzeroupper
313 ; AVX512BWVL-NEXT: retq
314 %a = icmp ne <4 x i32> %mask, zeroinitializer
315 %b = trunc <4 x i64> %x to <4 x i16>
316 call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %b, <4 x i16>* %p, i32 1, <4 x i1> %a)
320 define void @truncstore_v4i64_v4i8(<4 x i64> %x, <4 x i8>* %p, <4 x i32> %mask) {
321 ; AVX512F-LABEL: truncstore_v4i64_v4i8:
323 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
324 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
325 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
326 ; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
327 ; AVX512F-NEXT: kmovw %k0, %eax
328 ; AVX512F-NEXT: testb $1, %al
329 ; AVX512F-NEXT: je .LBB5_2
330 ; AVX512F-NEXT: # %bb.1: # %cond.store
331 ; AVX512F-NEXT: vpextrb $0, %xmm0, (%rdi)
332 ; AVX512F-NEXT: .LBB5_2: # %else
333 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
334 ; AVX512F-NEXT: kshiftrw $1, %k0, %k0
335 ; AVX512F-NEXT: kmovw %k0, %eax
336 ; AVX512F-NEXT: testb $1, %al
337 ; AVX512F-NEXT: je .LBB5_4
338 ; AVX512F-NEXT: # %bb.3: # %cond.store1
339 ; AVX512F-NEXT: vpextrb $4, %xmm0, 1(%rdi)
340 ; AVX512F-NEXT: .LBB5_4: # %else2
341 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
342 ; AVX512F-NEXT: kshiftrw $2, %k0, %k0
343 ; AVX512F-NEXT: kmovw %k0, %eax
344 ; AVX512F-NEXT: testb $1, %al
345 ; AVX512F-NEXT: je .LBB5_6
346 ; AVX512F-NEXT: # %bb.5: # %cond.store3
347 ; AVX512F-NEXT: vpextrb $8, %xmm0, 2(%rdi)
348 ; AVX512F-NEXT: .LBB5_6: # %else4
349 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
350 ; AVX512F-NEXT: kshiftrw $3, %k0, %k0
351 ; AVX512F-NEXT: kmovw %k0, %eax
352 ; AVX512F-NEXT: testb $1, %al
353 ; AVX512F-NEXT: je .LBB5_8
354 ; AVX512F-NEXT: # %bb.7: # %cond.store5
355 ; AVX512F-NEXT: vpextrb $12, %xmm0, 3(%rdi)
356 ; AVX512F-NEXT: .LBB5_8: # %else6
357 ; AVX512F-NEXT: vzeroupper
360 ; AVX512BW-LABEL: truncstore_v4i64_v4i8:
362 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
363 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
364 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
365 ; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
366 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
367 ; AVX512BW-NEXT: kshiftlq $60, %k0, %k0
368 ; AVX512BW-NEXT: kshiftrq $60, %k0, %k1
369 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
370 ; AVX512BW-NEXT: vzeroupper
371 ; AVX512BW-NEXT: retq
373 ; AVX512BWVL-LABEL: truncstore_v4i64_v4i8:
374 ; AVX512BWVL: # %bb.0:
375 ; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
376 ; AVX512BWVL-NEXT: vpmovqb %ymm0, (%rdi) {%k1}
377 ; AVX512BWVL-NEXT: vzeroupper
378 ; AVX512BWVL-NEXT: retq
379 %a = icmp ne <4 x i32> %mask, zeroinitializer
380 %b = trunc <4 x i64> %x to <4 x i8>
381 call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %b, <4 x i8>* %p, i32 1, <4 x i1> %a)
385 define void @truncstore_v2i64_v2i32(<2 x i64> %x, <2 x i32>* %p, <2 x i64> %mask) {
386 ; AVX512F-LABEL: truncstore_v2i64_v2i32:
388 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
389 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0
390 ; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
391 ; AVX512F-NEXT: kshiftlw $14, %k0, %k0
392 ; AVX512F-NEXT: kshiftrw $14, %k0, %k1
393 ; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
394 ; AVX512F-NEXT: vzeroupper
397 ; AVX512VL-LABEL: truncstore_v2i64_v2i32:
399 ; AVX512VL-NEXT: vptestmq %xmm1, %xmm1, %k1
400 ; AVX512VL-NEXT: vpmovqd %xmm0, (%rdi) {%k1}
401 ; AVX512VL-NEXT: retq
403 ; AVX512BW-LABEL: truncstore_v2i64_v2i32:
405 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
406 ; AVX512BW-NEXT: vptestmq %zmm1, %zmm1, %k0
407 ; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
408 ; AVX512BW-NEXT: kshiftlw $14, %k0, %k0
409 ; AVX512BW-NEXT: kshiftrw $14, %k0, %k1
410 ; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
411 ; AVX512BW-NEXT: vzeroupper
412 ; AVX512BW-NEXT: retq
413 %a = icmp ne <2 x i64> %mask, zeroinitializer
414 %b = trunc <2 x i64> %x to <2 x i32>
415 call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %b, <2 x i32>* %p, i32 1, <2 x i1> %a)
419 define void @truncstore_v2i64_v2i16(<2 x i64> %x, <2 x i16>* %p, <2 x i64> %mask) {
420 ; AVX512F-LABEL: truncstore_v2i64_v2i16:
422 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
423 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0
424 ; AVX512F-NEXT: kmovw %k0, %eax
425 ; AVX512F-NEXT: testb $1, %al
426 ; AVX512F-NEXT: je .LBB7_2
427 ; AVX512F-NEXT: # %bb.1: # %cond.store
428 ; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
429 ; AVX512F-NEXT: .LBB7_2: # %else
430 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0
431 ; AVX512F-NEXT: kshiftrw $1, %k0, %k0
432 ; AVX512F-NEXT: kmovw %k0, %eax
433 ; AVX512F-NEXT: testb $1, %al
434 ; AVX512F-NEXT: je .LBB7_4
435 ; AVX512F-NEXT: # %bb.3: # %cond.store1
436 ; AVX512F-NEXT: vpextrw $4, %xmm0, 2(%rdi)
437 ; AVX512F-NEXT: .LBB7_4: # %else2
438 ; AVX512F-NEXT: vzeroupper
441 ; AVX512BW-LABEL: truncstore_v2i64_v2i16:
443 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
444 ; AVX512BW-NEXT: vptestmq %zmm1, %zmm1, %k0
445 ; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
446 ; AVX512BW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
447 ; AVX512BW-NEXT: kshiftld $30, %k0, %k0
448 ; AVX512BW-NEXT: kshiftrd $30, %k0, %k1
449 ; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
450 ; AVX512BW-NEXT: vzeroupper
451 ; AVX512BW-NEXT: retq
453 ; AVX512BWVL-LABEL: truncstore_v2i64_v2i16:
454 ; AVX512BWVL: # %bb.0:
455 ; AVX512BWVL-NEXT: vptestmq %xmm1, %xmm1, %k1
456 ; AVX512BWVL-NEXT: vpmovqw %xmm0, (%rdi) {%k1}
457 ; AVX512BWVL-NEXT: retq
458 %a = icmp ne <2 x i64> %mask, zeroinitializer
459 %b = trunc <2 x i64> %x to <2 x i16>
460 call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %b, <2 x i16>* %p, i32 1, <2 x i1> %a)
464 define void @truncstore_v2i64_v2i8(<2 x i64> %x, <2 x i8>* %p, <2 x i64> %mask) {
465 ; AVX512F-LABEL: truncstore_v2i64_v2i8:
467 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
468 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0
469 ; AVX512F-NEXT: kmovw %k0, %eax
470 ; AVX512F-NEXT: testb $1, %al
471 ; AVX512F-NEXT: je .LBB8_2
472 ; AVX512F-NEXT: # %bb.1: # %cond.store
473 ; AVX512F-NEXT: vpextrb $0, %xmm0, (%rdi)
474 ; AVX512F-NEXT: .LBB8_2: # %else
475 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0
476 ; AVX512F-NEXT: kshiftrw $1, %k0, %k0
477 ; AVX512F-NEXT: kmovw %k0, %eax
478 ; AVX512F-NEXT: testb $1, %al
479 ; AVX512F-NEXT: je .LBB8_4
480 ; AVX512F-NEXT: # %bb.3: # %cond.store1
481 ; AVX512F-NEXT: vpextrb $8, %xmm0, 1(%rdi)
482 ; AVX512F-NEXT: .LBB8_4: # %else2
483 ; AVX512F-NEXT: vzeroupper
486 ; AVX512BW-LABEL: truncstore_v2i64_v2i8:
488 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
489 ; AVX512BW-NEXT: vptestmq %zmm1, %zmm1, %k0
490 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
491 ; AVX512BW-NEXT: kshiftlq $62, %k0, %k0
492 ; AVX512BW-NEXT: kshiftrq $62, %k0, %k1
493 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
494 ; AVX512BW-NEXT: vzeroupper
495 ; AVX512BW-NEXT: retq
497 ; AVX512BWVL-LABEL: truncstore_v2i64_v2i8:
498 ; AVX512BWVL: # %bb.0:
499 ; AVX512BWVL-NEXT: vptestmq %xmm1, %xmm1, %k1
500 ; AVX512BWVL-NEXT: vpmovqb %xmm0, (%rdi) {%k1}
501 ; AVX512BWVL-NEXT: retq
502 %a = icmp ne <2 x i64> %mask, zeroinitializer
503 %b = trunc <2 x i64> %x to <2 x i8>
504 call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %b, <2 x i8>* %p, i32 1, <2 x i1> %a)
508 define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32> %mask) {
509 ; AVX512F-LABEL: truncstore_v16i32_v16i16:
511 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
512 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
513 ; AVX512F-NEXT: kmovw %k0, %eax
514 ; AVX512F-NEXT: testb $1, %al
515 ; AVX512F-NEXT: je .LBB9_2
516 ; AVX512F-NEXT: # %bb.1: # %cond.store
517 ; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
518 ; AVX512F-NEXT: .LBB9_2: # %else
519 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
520 ; AVX512F-NEXT: kmovw %k1, %eax
521 ; AVX512F-NEXT: testb $1, %al
522 ; AVX512F-NEXT: je .LBB9_4
523 ; AVX512F-NEXT: # %bb.3: # %cond.store1
524 ; AVX512F-NEXT: vpextrw $1, %xmm0, 2(%rdi)
525 ; AVX512F-NEXT: .LBB9_4: # %else2
526 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
527 ; AVX512F-NEXT: kmovw %k1, %eax
528 ; AVX512F-NEXT: testb $1, %al
529 ; AVX512F-NEXT: je .LBB9_6
530 ; AVX512F-NEXT: # %bb.5: # %cond.store3
531 ; AVX512F-NEXT: vpextrw $2, %xmm0, 4(%rdi)
532 ; AVX512F-NEXT: .LBB9_6: # %else4
533 ; AVX512F-NEXT: kshiftrw $3, %k0, %k1
534 ; AVX512F-NEXT: kmovw %k1, %eax
535 ; AVX512F-NEXT: testb $1, %al
536 ; AVX512F-NEXT: je .LBB9_8
537 ; AVX512F-NEXT: # %bb.7: # %cond.store5
538 ; AVX512F-NEXT: vpextrw $3, %xmm0, 6(%rdi)
539 ; AVX512F-NEXT: .LBB9_8: # %else6
540 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
541 ; AVX512F-NEXT: kmovw %k1, %eax
542 ; AVX512F-NEXT: testb $1, %al
543 ; AVX512F-NEXT: je .LBB9_10
544 ; AVX512F-NEXT: # %bb.9: # %cond.store7
545 ; AVX512F-NEXT: vpextrw $4, %xmm0, 8(%rdi)
546 ; AVX512F-NEXT: .LBB9_10: # %else8
547 ; AVX512F-NEXT: kshiftrw $5, %k0, %k1
548 ; AVX512F-NEXT: kmovw %k1, %eax
549 ; AVX512F-NEXT: testb $1, %al
550 ; AVX512F-NEXT: je .LBB9_12
551 ; AVX512F-NEXT: # %bb.11: # %cond.store9
552 ; AVX512F-NEXT: vpextrw $5, %xmm0, 10(%rdi)
553 ; AVX512F-NEXT: .LBB9_12: # %else10
554 ; AVX512F-NEXT: kshiftrw $6, %k0, %k1
555 ; AVX512F-NEXT: kmovw %k1, %eax
556 ; AVX512F-NEXT: testb $1, %al
557 ; AVX512F-NEXT: je .LBB9_14
558 ; AVX512F-NEXT: # %bb.13: # %cond.store11
559 ; AVX512F-NEXT: vpextrw $6, %xmm0, 12(%rdi)
560 ; AVX512F-NEXT: .LBB9_14: # %else12
561 ; AVX512F-NEXT: kshiftrw $7, %k0, %k1
562 ; AVX512F-NEXT: kmovw %k1, %eax
563 ; AVX512F-NEXT: testb $1, %al
564 ; AVX512F-NEXT: je .LBB9_16
565 ; AVX512F-NEXT: # %bb.15: # %cond.store13
566 ; AVX512F-NEXT: vpextrw $7, %xmm0, 14(%rdi)
567 ; AVX512F-NEXT: .LBB9_16: # %else14
568 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
569 ; AVX512F-NEXT: kmovw %k1, %eax
570 ; AVX512F-NEXT: testb $1, %al
571 ; AVX512F-NEXT: je .LBB9_18
572 ; AVX512F-NEXT: # %bb.17: # %cond.store15
573 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
574 ; AVX512F-NEXT: vpextrw $0, %xmm1, 16(%rdi)
575 ; AVX512F-NEXT: .LBB9_18: # %else16
576 ; AVX512F-NEXT: kshiftrw $9, %k0, %k1
577 ; AVX512F-NEXT: kmovw %k1, %eax
578 ; AVX512F-NEXT: testb $1, %al
579 ; AVX512F-NEXT: je .LBB9_20
580 ; AVX512F-NEXT: # %bb.19: # %cond.store17
581 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
582 ; AVX512F-NEXT: vpextrw $1, %xmm1, 18(%rdi)
583 ; AVX512F-NEXT: .LBB9_20: # %else18
584 ; AVX512F-NEXT: kshiftrw $10, %k0, %k1
585 ; AVX512F-NEXT: kmovw %k1, %eax
586 ; AVX512F-NEXT: testb $1, %al
587 ; AVX512F-NEXT: je .LBB9_22
588 ; AVX512F-NEXT: # %bb.21: # %cond.store19
589 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
590 ; AVX512F-NEXT: vpextrw $2, %xmm1, 20(%rdi)
591 ; AVX512F-NEXT: .LBB9_22: # %else20
592 ; AVX512F-NEXT: kshiftrw $11, %k0, %k1
593 ; AVX512F-NEXT: kmovw %k1, %eax
594 ; AVX512F-NEXT: testb $1, %al
595 ; AVX512F-NEXT: je .LBB9_24
596 ; AVX512F-NEXT: # %bb.23: # %cond.store21
597 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
598 ; AVX512F-NEXT: vpextrw $3, %xmm1, 22(%rdi)
599 ; AVX512F-NEXT: .LBB9_24: # %else22
600 ; AVX512F-NEXT: kshiftrw $12, %k0, %k1
601 ; AVX512F-NEXT: kmovw %k1, %eax
602 ; AVX512F-NEXT: testb $1, %al
603 ; AVX512F-NEXT: je .LBB9_26
604 ; AVX512F-NEXT: # %bb.25: # %cond.store23
605 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
606 ; AVX512F-NEXT: vpextrw $4, %xmm1, 24(%rdi)
607 ; AVX512F-NEXT: .LBB9_26: # %else24
608 ; AVX512F-NEXT: kshiftrw $13, %k0, %k1
609 ; AVX512F-NEXT: kmovw %k1, %eax
610 ; AVX512F-NEXT: testb $1, %al
611 ; AVX512F-NEXT: je .LBB9_28
612 ; AVX512F-NEXT: # %bb.27: # %cond.store25
613 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
614 ; AVX512F-NEXT: vpextrw $5, %xmm1, 26(%rdi)
615 ; AVX512F-NEXT: .LBB9_28: # %else26
616 ; AVX512F-NEXT: kshiftrw $14, %k0, %k1
617 ; AVX512F-NEXT: kmovw %k1, %eax
618 ; AVX512F-NEXT: testb $1, %al
619 ; AVX512F-NEXT: je .LBB9_30
620 ; AVX512F-NEXT: # %bb.29: # %cond.store27
621 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
622 ; AVX512F-NEXT: vpextrw $6, %xmm1, 28(%rdi)
623 ; AVX512F-NEXT: .LBB9_30: # %else28
624 ; AVX512F-NEXT: kshiftrw $15, %k0, %k0
625 ; AVX512F-NEXT: kmovw %k0, %eax
626 ; AVX512F-NEXT: testb $1, %al
627 ; AVX512F-NEXT: je .LBB9_32
628 ; AVX512F-NEXT: # %bb.31: # %cond.store29
629 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
630 ; AVX512F-NEXT: vpextrw $7, %xmm0, 30(%rdi)
631 ; AVX512F-NEXT: .LBB9_32: # %else30
632 ; AVX512F-NEXT: vzeroupper
635 ; AVX512BW-LABEL: truncstore_v16i32_v16i16:
637 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
638 ; AVX512BW-NEXT: vpmovdw %zmm0, (%rdi) {%k1}
639 ; AVX512BW-NEXT: vzeroupper
640 ; AVX512BW-NEXT: retq
642 ; AVX512BWVL-LABEL: truncstore_v16i32_v16i16:
643 ; AVX512BWVL: # %bb.0:
644 ; AVX512BWVL-NEXT: vptestmd %zmm1, %zmm1, %k1
645 ; AVX512BWVL-NEXT: vpmovdw %zmm0, (%rdi) {%k1}
646 ; AVX512BWVL-NEXT: vzeroupper
647 ; AVX512BWVL-NEXT: retq
648 %a = icmp ne <16 x i32> %mask, zeroinitializer
649 %b = trunc <16 x i32> %x to <16 x i16>
650 call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> %b, <16 x i16>* %p, i32 1, <16 x i1> %a)
654 define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %mask) {
655 ; AVX512F-LABEL: truncstore_v16i32_v16i8:
657 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
658 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
659 ; AVX512F-NEXT: kmovw %k0, %eax
660 ; AVX512F-NEXT: testb $1, %al
661 ; AVX512F-NEXT: je .LBB10_2
662 ; AVX512F-NEXT: # %bb.1: # %cond.store
663 ; AVX512F-NEXT: vpextrb $0, %xmm0, (%rdi)
664 ; AVX512F-NEXT: .LBB10_2: # %else
665 ; AVX512F-NEXT: kshiftrw $1, %k0, %k1
666 ; AVX512F-NEXT: kmovw %k1, %eax
667 ; AVX512F-NEXT: testb $1, %al
668 ; AVX512F-NEXT: je .LBB10_4
669 ; AVX512F-NEXT: # %bb.3: # %cond.store1
670 ; AVX512F-NEXT: vpextrb $1, %xmm0, 1(%rdi)
671 ; AVX512F-NEXT: .LBB10_4: # %else2
672 ; AVX512F-NEXT: kshiftrw $2, %k0, %k1
673 ; AVX512F-NEXT: kmovw %k1, %eax
674 ; AVX512F-NEXT: testb $1, %al
675 ; AVX512F-NEXT: je .LBB10_6
676 ; AVX512F-NEXT: # %bb.5: # %cond.store3
677 ; AVX512F-NEXT: vpextrb $2, %xmm0, 2(%rdi)
678 ; AVX512F-NEXT: .LBB10_6: # %else4
679 ; AVX512F-NEXT: kshiftrw $3, %k0, %k1
680 ; AVX512F-NEXT: kmovw %k1, %eax
681 ; AVX512F-NEXT: testb $1, %al
682 ; AVX512F-NEXT: je .LBB10_8
683 ; AVX512F-NEXT: # %bb.7: # %cond.store5
684 ; AVX512F-NEXT: vpextrb $3, %xmm0, 3(%rdi)
685 ; AVX512F-NEXT: .LBB10_8: # %else6
686 ; AVX512F-NEXT: kshiftrw $4, %k0, %k1
687 ; AVX512F-NEXT: kmovw %k1, %eax
688 ; AVX512F-NEXT: testb $1, %al
689 ; AVX512F-NEXT: je .LBB10_10
690 ; AVX512F-NEXT: # %bb.9: # %cond.store7
691 ; AVX512F-NEXT: vpextrb $4, %xmm0, 4(%rdi)
692 ; AVX512F-NEXT: .LBB10_10: # %else8
693 ; AVX512F-NEXT: kshiftrw $5, %k0, %k1
694 ; AVX512F-NEXT: kmovw %k1, %eax
695 ; AVX512F-NEXT: testb $1, %al
696 ; AVX512F-NEXT: je .LBB10_12
697 ; AVX512F-NEXT: # %bb.11: # %cond.store9
698 ; AVX512F-NEXT: vpextrb $5, %xmm0, 5(%rdi)
699 ; AVX512F-NEXT: .LBB10_12: # %else10
700 ; AVX512F-NEXT: kshiftrw $6, %k0, %k1
701 ; AVX512F-NEXT: kmovw %k1, %eax
702 ; AVX512F-NEXT: testb $1, %al
703 ; AVX512F-NEXT: je .LBB10_14
704 ; AVX512F-NEXT: # %bb.13: # %cond.store11
705 ; AVX512F-NEXT: vpextrb $6, %xmm0, 6(%rdi)
706 ; AVX512F-NEXT: .LBB10_14: # %else12
707 ; AVX512F-NEXT: kshiftrw $7, %k0, %k1
708 ; AVX512F-NEXT: kmovw %k1, %eax
709 ; AVX512F-NEXT: testb $1, %al
710 ; AVX512F-NEXT: je .LBB10_16
711 ; AVX512F-NEXT: # %bb.15: # %cond.store13
712 ; AVX512F-NEXT: vpextrb $7, %xmm0, 7(%rdi)
713 ; AVX512F-NEXT: .LBB10_16: # %else14
714 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1
715 ; AVX512F-NEXT: kmovw %k1, %eax
716 ; AVX512F-NEXT: testb $1, %al
717 ; AVX512F-NEXT: je .LBB10_18
718 ; AVX512F-NEXT: # %bb.17: # %cond.store15
719 ; AVX512F-NEXT: vpextrb $8, %xmm0, 8(%rdi)
720 ; AVX512F-NEXT: .LBB10_18: # %else16
721 ; AVX512F-NEXT: kshiftrw $9, %k0, %k1
722 ; AVX512F-NEXT: kmovw %k1, %eax
723 ; AVX512F-NEXT: testb $1, %al
724 ; AVX512F-NEXT: je .LBB10_20
725 ; AVX512F-NEXT: # %bb.19: # %cond.store17
726 ; AVX512F-NEXT: vpextrb $9, %xmm0, 9(%rdi)
727 ; AVX512F-NEXT: .LBB10_20: # %else18
728 ; AVX512F-NEXT: kshiftrw $10, %k0, %k1
729 ; AVX512F-NEXT: kmovw %k1, %eax
730 ; AVX512F-NEXT: testb $1, %al
731 ; AVX512F-NEXT: je .LBB10_22
732 ; AVX512F-NEXT: # %bb.21: # %cond.store19
733 ; AVX512F-NEXT: vpextrb $10, %xmm0, 10(%rdi)
734 ; AVX512F-NEXT: .LBB10_22: # %else20
735 ; AVX512F-NEXT: kshiftrw $11, %k0, %k1
736 ; AVX512F-NEXT: kmovw %k1, %eax
737 ; AVX512F-NEXT: testb $1, %al
738 ; AVX512F-NEXT: je .LBB10_24
739 ; AVX512F-NEXT: # %bb.23: # %cond.store21
740 ; AVX512F-NEXT: vpextrb $11, %xmm0, 11(%rdi)
741 ; AVX512F-NEXT: .LBB10_24: # %else22
742 ; AVX512F-NEXT: kshiftrw $12, %k0, %k1
743 ; AVX512F-NEXT: kmovw %k1, %eax
744 ; AVX512F-NEXT: testb $1, %al
745 ; AVX512F-NEXT: je .LBB10_26
746 ; AVX512F-NEXT: # %bb.25: # %cond.store23
747 ; AVX512F-NEXT: vpextrb $12, %xmm0, 12(%rdi)
748 ; AVX512F-NEXT: .LBB10_26: # %else24
749 ; AVX512F-NEXT: kshiftrw $13, %k0, %k1
750 ; AVX512F-NEXT: kmovw %k1, %eax
751 ; AVX512F-NEXT: testb $1, %al
752 ; AVX512F-NEXT: je .LBB10_28
753 ; AVX512F-NEXT: # %bb.27: # %cond.store25
754 ; AVX512F-NEXT: vpextrb $13, %xmm0, 13(%rdi)
755 ; AVX512F-NEXT: .LBB10_28: # %else26
756 ; AVX512F-NEXT: kshiftrw $14, %k0, %k1
757 ; AVX512F-NEXT: kmovw %k1, %eax
758 ; AVX512F-NEXT: testb $1, %al
759 ; AVX512F-NEXT: je .LBB10_30
760 ; AVX512F-NEXT: # %bb.29: # %cond.store27
761 ; AVX512F-NEXT: vpextrb $14, %xmm0, 14(%rdi)
762 ; AVX512F-NEXT: .LBB10_30: # %else28
763 ; AVX512F-NEXT: kshiftrw $15, %k0, %k0
764 ; AVX512F-NEXT: kmovw %k0, %eax
765 ; AVX512F-NEXT: testb $1, %al
766 ; AVX512F-NEXT: je .LBB10_32
767 ; AVX512F-NEXT: # %bb.31: # %cond.store29
768 ; AVX512F-NEXT: vpextrb $15, %xmm0, 15(%rdi)
769 ; AVX512F-NEXT: .LBB10_32: # %else30
770 ; AVX512F-NEXT: vzeroupper
773 ; AVX512BW-LABEL: truncstore_v16i32_v16i8:
775 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k1
776 ; AVX512BW-NEXT: vpmovdb %zmm0, (%rdi) {%k1}
777 ; AVX512BW-NEXT: vzeroupper
778 ; AVX512BW-NEXT: retq
780 ; AVX512BWVL-LABEL: truncstore_v16i32_v16i8:
781 ; AVX512BWVL: # %bb.0:
782 ; AVX512BWVL-NEXT: vptestmd %zmm1, %zmm1, %k1
783 ; AVX512BWVL-NEXT: vpmovdb %zmm0, (%rdi) {%k1}
784 ; AVX512BWVL-NEXT: vzeroupper
785 ; AVX512BWVL-NEXT: retq
786 %a = icmp ne <16 x i32> %mask, zeroinitializer
787 %b = trunc <16 x i32> %x to <16 x i8>
788 call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %b, <16 x i8>* %p, i32 1, <16 x i1> %a)
792 define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask) {
793 ; AVX512F-LABEL: truncstore_v8i32_v8i16:
795 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
796 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
797 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
798 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
799 ; AVX512F-NEXT: kmovw %k0, %eax
800 ; AVX512F-NEXT: testb $1, %al
801 ; AVX512F-NEXT: je .LBB11_2
802 ; AVX512F-NEXT: # %bb.1: # %cond.store
803 ; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
804 ; AVX512F-NEXT: .LBB11_2: # %else
805 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
806 ; AVX512F-NEXT: kshiftrw $1, %k0, %k0
807 ; AVX512F-NEXT: kmovw %k0, %eax
808 ; AVX512F-NEXT: testb $1, %al
809 ; AVX512F-NEXT: je .LBB11_4
810 ; AVX512F-NEXT: # %bb.3: # %cond.store1
811 ; AVX512F-NEXT: vpextrw $1, %xmm0, 2(%rdi)
812 ; AVX512F-NEXT: .LBB11_4: # %else2
813 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
814 ; AVX512F-NEXT: kshiftrw $2, %k0, %k0
815 ; AVX512F-NEXT: kmovw %k0, %eax
816 ; AVX512F-NEXT: testb $1, %al
817 ; AVX512F-NEXT: je .LBB11_6
818 ; AVX512F-NEXT: # %bb.5: # %cond.store3
819 ; AVX512F-NEXT: vpextrw $2, %xmm0, 4(%rdi)
820 ; AVX512F-NEXT: .LBB11_6: # %else4
821 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
822 ; AVX512F-NEXT: kshiftrw $3, %k0, %k0
823 ; AVX512F-NEXT: kmovw %k0, %eax
824 ; AVX512F-NEXT: testb $1, %al
825 ; AVX512F-NEXT: je .LBB11_8
826 ; AVX512F-NEXT: # %bb.7: # %cond.store5
827 ; AVX512F-NEXT: vpextrw $3, %xmm0, 6(%rdi)
828 ; AVX512F-NEXT: .LBB11_8: # %else6
829 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
830 ; AVX512F-NEXT: kshiftrw $4, %k0, %k0
831 ; AVX512F-NEXT: kmovw %k0, %eax
832 ; AVX512F-NEXT: testb $1, %al
833 ; AVX512F-NEXT: je .LBB11_10
834 ; AVX512F-NEXT: # %bb.9: # %cond.store7
835 ; AVX512F-NEXT: vpextrw $4, %xmm0, 8(%rdi)
836 ; AVX512F-NEXT: .LBB11_10: # %else8
837 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
838 ; AVX512F-NEXT: kshiftrw $5, %k0, %k0
839 ; AVX512F-NEXT: kmovw %k0, %eax
840 ; AVX512F-NEXT: testb $1, %al
841 ; AVX512F-NEXT: je .LBB11_12
842 ; AVX512F-NEXT: # %bb.11: # %cond.store9
843 ; AVX512F-NEXT: vpextrw $5, %xmm0, 10(%rdi)
844 ; AVX512F-NEXT: .LBB11_12: # %else10
845 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
846 ; AVX512F-NEXT: kshiftrw $6, %k0, %k0
847 ; AVX512F-NEXT: kmovw %k0, %eax
848 ; AVX512F-NEXT: testb $1, %al
849 ; AVX512F-NEXT: je .LBB11_14
850 ; AVX512F-NEXT: # %bb.13: # %cond.store11
851 ; AVX512F-NEXT: vpextrw $6, %xmm0, 12(%rdi)
852 ; AVX512F-NEXT: .LBB11_14: # %else12
853 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
854 ; AVX512F-NEXT: kshiftrw $7, %k0, %k0
855 ; AVX512F-NEXT: kmovw %k0, %eax
856 ; AVX512F-NEXT: testb $1, %al
857 ; AVX512F-NEXT: je .LBB11_16
858 ; AVX512F-NEXT: # %bb.15: # %cond.store13
859 ; AVX512F-NEXT: vpextrw $7, %xmm0, 14(%rdi)
860 ; AVX512F-NEXT: .LBB11_16: # %else14
861 ; AVX512F-NEXT: vzeroupper
864 ; AVX512BW-LABEL: truncstore_v8i32_v8i16:
866 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
867 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
868 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
869 ; AVX512BW-NEXT: kshiftld $24, %k0, %k0
870 ; AVX512BW-NEXT: kshiftrd $24, %k0, %k1
871 ; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
872 ; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
873 ; AVX512BW-NEXT: vzeroupper
874 ; AVX512BW-NEXT: retq
876 ; AVX512BWVL-LABEL: truncstore_v8i32_v8i16:
877 ; AVX512BWVL: # %bb.0:
878 ; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
879 ; AVX512BWVL-NEXT: vpmovdw %ymm0, (%rdi) {%k1}
880 ; AVX512BWVL-NEXT: vzeroupper
881 ; AVX512BWVL-NEXT: retq
882 %a = icmp ne <8 x i32> %mask, zeroinitializer
883 %b = trunc <8 x i32> %x to <8 x i16>
884 call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %b, <8 x i16>* %p, i32 1, <8 x i1> %a)
888 define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask) {
889 ; AVX512F-LABEL: truncstore_v8i32_v8i8:
891 ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
892 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
893 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
894 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
895 ; AVX512F-NEXT: kmovw %k0, %eax
896 ; AVX512F-NEXT: testb $1, %al
897 ; AVX512F-NEXT: je .LBB12_2
898 ; AVX512F-NEXT: # %bb.1: # %cond.store
899 ; AVX512F-NEXT: vpextrb $0, %xmm0, (%rdi)
900 ; AVX512F-NEXT: .LBB12_2: # %else
901 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
902 ; AVX512F-NEXT: kshiftrw $1, %k0, %k0
903 ; AVX512F-NEXT: kmovw %k0, %eax
904 ; AVX512F-NEXT: testb $1, %al
905 ; AVX512F-NEXT: je .LBB12_4
906 ; AVX512F-NEXT: # %bb.3: # %cond.store1
907 ; AVX512F-NEXT: vpextrb $2, %xmm0, 1(%rdi)
908 ; AVX512F-NEXT: .LBB12_4: # %else2
909 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
910 ; AVX512F-NEXT: kshiftrw $2, %k0, %k0
911 ; AVX512F-NEXT: kmovw %k0, %eax
912 ; AVX512F-NEXT: testb $1, %al
913 ; AVX512F-NEXT: je .LBB12_6
914 ; AVX512F-NEXT: # %bb.5: # %cond.store3
915 ; AVX512F-NEXT: vpextrb $4, %xmm0, 2(%rdi)
916 ; AVX512F-NEXT: .LBB12_6: # %else4
917 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
918 ; AVX512F-NEXT: kshiftrw $3, %k0, %k0
919 ; AVX512F-NEXT: kmovw %k0, %eax
920 ; AVX512F-NEXT: testb $1, %al
921 ; AVX512F-NEXT: je .LBB12_8
922 ; AVX512F-NEXT: # %bb.7: # %cond.store5
923 ; AVX512F-NEXT: vpextrb $6, %xmm0, 3(%rdi)
924 ; AVX512F-NEXT: .LBB12_8: # %else6
925 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
926 ; AVX512F-NEXT: kshiftrw $4, %k0, %k0
927 ; AVX512F-NEXT: kmovw %k0, %eax
928 ; AVX512F-NEXT: testb $1, %al
929 ; AVX512F-NEXT: je .LBB12_10
930 ; AVX512F-NEXT: # %bb.9: # %cond.store7
931 ; AVX512F-NEXT: vpextrb $8, %xmm0, 4(%rdi)
932 ; AVX512F-NEXT: .LBB12_10: # %else8
933 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
934 ; AVX512F-NEXT: kshiftrw $5, %k0, %k0
935 ; AVX512F-NEXT: kmovw %k0, %eax
936 ; AVX512F-NEXT: testb $1, %al
937 ; AVX512F-NEXT: je .LBB12_12
938 ; AVX512F-NEXT: # %bb.11: # %cond.store9
939 ; AVX512F-NEXT: vpextrb $10, %xmm0, 5(%rdi)
940 ; AVX512F-NEXT: .LBB12_12: # %else10
941 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
942 ; AVX512F-NEXT: kshiftrw $6, %k0, %k0
943 ; AVX512F-NEXT: kmovw %k0, %eax
944 ; AVX512F-NEXT: testb $1, %al
945 ; AVX512F-NEXT: je .LBB12_14
946 ; AVX512F-NEXT: # %bb.13: # %cond.store11
947 ; AVX512F-NEXT: vpextrb $12, %xmm0, 6(%rdi)
948 ; AVX512F-NEXT: .LBB12_14: # %else12
949 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
950 ; AVX512F-NEXT: kshiftrw $7, %k0, %k0
951 ; AVX512F-NEXT: kmovw %k0, %eax
952 ; AVX512F-NEXT: testb $1, %al
953 ; AVX512F-NEXT: je .LBB12_16
954 ; AVX512F-NEXT: # %bb.15: # %cond.store13
955 ; AVX512F-NEXT: vpextrb $14, %xmm0, 7(%rdi)
956 ; AVX512F-NEXT: .LBB12_16: # %else14
957 ; AVX512F-NEXT: vzeroupper
960 ; AVX512BW-LABEL: truncstore_v8i32_v8i8:
962 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
963 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
964 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
965 ; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
966 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
967 ; AVX512BW-NEXT: kshiftlq $56, %k0, %k0
968 ; AVX512BW-NEXT: kshiftrq $56, %k0, %k1
969 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
970 ; AVX512BW-NEXT: vzeroupper
971 ; AVX512BW-NEXT: retq
973 ; AVX512BWVL-LABEL: truncstore_v8i32_v8i8:
974 ; AVX512BWVL: # %bb.0:
975 ; AVX512BWVL-NEXT: vptestmd %ymm1, %ymm1, %k1
976 ; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rdi) {%k1}
977 ; AVX512BWVL-NEXT: vzeroupper
978 ; AVX512BWVL-NEXT: retq
979 %a = icmp ne <8 x i32> %mask, zeroinitializer
980 %b = trunc <8 x i32> %x to <8 x i8>
981 call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %b, <8 x i8>* %p, i32 1, <8 x i1> %a)
985 define void @truncstore_v4i32_v4i16(<4 x i32> %x, <4 x i16>* %p, <4 x i32> %mask) {
986 ; AVX512F-LABEL: truncstore_v4i32_v4i16:
988 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
989 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
990 ; AVX512F-NEXT: kmovw %k0, %eax
991 ; AVX512F-NEXT: testb $1, %al
992 ; AVX512F-NEXT: je .LBB13_2
993 ; AVX512F-NEXT: # %bb.1: # %cond.store
994 ; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
995 ; AVX512F-NEXT: .LBB13_2: # %else
996 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
997 ; AVX512F-NEXT: kshiftrw $1, %k0, %k0
998 ; AVX512F-NEXT: kmovw %k0, %eax
999 ; AVX512F-NEXT: testb $1, %al
1000 ; AVX512F-NEXT: je .LBB13_4
1001 ; AVX512F-NEXT: # %bb.3: # %cond.store1
1002 ; AVX512F-NEXT: vpextrw $2, %xmm0, 2(%rdi)
1003 ; AVX512F-NEXT: .LBB13_4: # %else2
1004 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1005 ; AVX512F-NEXT: kshiftrw $2, %k0, %k0
1006 ; AVX512F-NEXT: kmovw %k0, %eax
1007 ; AVX512F-NEXT: testb $1, %al
1008 ; AVX512F-NEXT: je .LBB13_6
1009 ; AVX512F-NEXT: # %bb.5: # %cond.store3
1010 ; AVX512F-NEXT: vpextrw $4, %xmm0, 4(%rdi)
1011 ; AVX512F-NEXT: .LBB13_6: # %else4
1012 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1013 ; AVX512F-NEXT: kshiftrw $3, %k0, %k0
1014 ; AVX512F-NEXT: kmovw %k0, %eax
1015 ; AVX512F-NEXT: testb $1, %al
1016 ; AVX512F-NEXT: je .LBB13_8
1017 ; AVX512F-NEXT: # %bb.7: # %cond.store5
1018 ; AVX512F-NEXT: vpextrw $6, %xmm0, 6(%rdi)
1019 ; AVX512F-NEXT: .LBB13_8: # %else6
1020 ; AVX512F-NEXT: vzeroupper
1021 ; AVX512F-NEXT: retq
1023 ; AVX512BW-LABEL: truncstore_v4i32_v4i16:
1024 ; AVX512BW: # %bb.0:
1025 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1026 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
1027 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1028 ; AVX512BW-NEXT: kshiftld $28, %k0, %k0
1029 ; AVX512BW-NEXT: kshiftrd $28, %k0, %k1
1030 ; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
1031 ; AVX512BW-NEXT: vzeroupper
1032 ; AVX512BW-NEXT: retq
1034 ; AVX512BWVL-LABEL: truncstore_v4i32_v4i16:
1035 ; AVX512BWVL: # %bb.0:
1036 ; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
1037 ; AVX512BWVL-NEXT: vpmovdw %xmm0, (%rdi) {%k1}
1038 ; AVX512BWVL-NEXT: retq
1039 %a = icmp ne <4 x i32> %mask, zeroinitializer
1040 %b = trunc <4 x i32> %x to <4 x i16>
1041 call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %b, <4 x i16>* %p, i32 1, <4 x i1> %a)
1045 define void @truncstore_v4i32_v4i8(<4 x i32> %x, <4 x i8>* %p, <4 x i32> %mask) {
1046 ; AVX512F-LABEL: truncstore_v4i32_v4i8:
1048 ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1049 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1050 ; AVX512F-NEXT: kmovw %k0, %eax
1051 ; AVX512F-NEXT: testb $1, %al
1052 ; AVX512F-NEXT: je .LBB14_2
1053 ; AVX512F-NEXT: # %bb.1: # %cond.store
1054 ; AVX512F-NEXT: vpextrb $0, %xmm0, (%rdi)
1055 ; AVX512F-NEXT: .LBB14_2: # %else
1056 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1057 ; AVX512F-NEXT: kshiftrw $1, %k0, %k0
1058 ; AVX512F-NEXT: kmovw %k0, %eax
1059 ; AVX512F-NEXT: testb $1, %al
1060 ; AVX512F-NEXT: je .LBB14_4
1061 ; AVX512F-NEXT: # %bb.3: # %cond.store1
1062 ; AVX512F-NEXT: vpextrb $4, %xmm0, 1(%rdi)
1063 ; AVX512F-NEXT: .LBB14_4: # %else2
1064 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1065 ; AVX512F-NEXT: kshiftrw $2, %k0, %k0
1066 ; AVX512F-NEXT: kmovw %k0, %eax
1067 ; AVX512F-NEXT: testb $1, %al
1068 ; AVX512F-NEXT: je .LBB14_6
1069 ; AVX512F-NEXT: # %bb.5: # %cond.store3
1070 ; AVX512F-NEXT: vpextrb $8, %xmm0, 2(%rdi)
1071 ; AVX512F-NEXT: .LBB14_6: # %else4
1072 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1073 ; AVX512F-NEXT: kshiftrw $3, %k0, %k0
1074 ; AVX512F-NEXT: kmovw %k0, %eax
1075 ; AVX512F-NEXT: testb $1, %al
1076 ; AVX512F-NEXT: je .LBB14_8
1077 ; AVX512F-NEXT: # %bb.7: # %cond.store5
1078 ; AVX512F-NEXT: vpextrb $12, %xmm0, 3(%rdi)
1079 ; AVX512F-NEXT: .LBB14_8: # %else6
1080 ; AVX512F-NEXT: vzeroupper
1081 ; AVX512F-NEXT: retq
1083 ; AVX512BW-LABEL: truncstore_v4i32_v4i8:
1084 ; AVX512BW: # %bb.0:
1085 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1086 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
1087 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
1088 ; AVX512BW-NEXT: kshiftlq $60, %k0, %k0
1089 ; AVX512BW-NEXT: kshiftrq $60, %k0, %k1
1090 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
1091 ; AVX512BW-NEXT: vzeroupper
1092 ; AVX512BW-NEXT: retq
1094 ; AVX512BWVL-LABEL: truncstore_v4i32_v4i8:
1095 ; AVX512BWVL: # %bb.0:
1096 ; AVX512BWVL-NEXT: vptestmd %xmm1, %xmm1, %k1
1097 ; AVX512BWVL-NEXT: vpmovdb %xmm0, (%rdi) {%k1}
1098 ; AVX512BWVL-NEXT: retq
1099 %a = icmp ne <4 x i32> %mask, zeroinitializer
1100 %b = trunc <4 x i32> %x to <4 x i8>
1101 call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %b, <4 x i8>* %p, i32 1, <4 x i1> %a)
1105 define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %mask) {
1106 ; AVX512F-LABEL: truncstore_v32i16_v32i8:
1108 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
1109 ; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm3
1110 ; AVX512F-NEXT: vmovdqa64 %zmm3, %zmm4
1111 ; AVX512F-NEXT: vpternlogq $15, %zmm3, %zmm3, %zmm4
1112 ; AVX512F-NEXT: vpmovsxbd %xmm4, %zmm4
1113 ; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k0
1114 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1115 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
1116 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
1117 ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
1118 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1119 ; AVX512F-NEXT: kmovw %k0, %eax
1120 ; AVX512F-NEXT: testb $1, %al
1121 ; AVX512F-NEXT: je .LBB15_2
1122 ; AVX512F-NEXT: # %bb.1: # %cond.store
1123 ; AVX512F-NEXT: vpextrb $0, %xmm0, (%rdi)
1124 ; AVX512F-NEXT: .LBB15_2: # %else
1125 ; AVX512F-NEXT: vpternlogq $15, %zmm3, %zmm3, %zmm3
1126 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm1
1127 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1128 ; AVX512F-NEXT: kshiftrw $1, %k0, %k0
1129 ; AVX512F-NEXT: kmovw %k0, %eax
1130 ; AVX512F-NEXT: testb $1, %al
1131 ; AVX512F-NEXT: je .LBB15_4
1132 ; AVX512F-NEXT: # %bb.3: # %cond.store1
1133 ; AVX512F-NEXT: vpextrb $1, %xmm0, 1(%rdi)
1134 ; AVX512F-NEXT: .LBB15_4: # %else2
1135 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1136 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
1137 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm3
1138 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm3
1139 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1140 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1141 ; AVX512F-NEXT: kshiftrw $2, %k0, %k0
1142 ; AVX512F-NEXT: kmovw %k0, %eax
1143 ; AVX512F-NEXT: testb $1, %al
1144 ; AVX512F-NEXT: je .LBB15_6
1145 ; AVX512F-NEXT: # %bb.5: # %cond.store3
1146 ; AVX512F-NEXT: vpextrb $2, %xmm0, 2(%rdi)
1147 ; AVX512F-NEXT: .LBB15_6: # %else4
1148 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1149 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1150 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1151 ; AVX512F-NEXT: kshiftrw $3, %k0, %k0
1152 ; AVX512F-NEXT: kmovw %k0, %eax
1153 ; AVX512F-NEXT: testb $1, %al
1154 ; AVX512F-NEXT: je .LBB15_8
1155 ; AVX512F-NEXT: # %bb.7: # %cond.store5
1156 ; AVX512F-NEXT: vpextrb $3, %xmm0, 3(%rdi)
1157 ; AVX512F-NEXT: .LBB15_8: # %else6
1158 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1159 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
1160 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm3
1161 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm3
1162 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1163 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1164 ; AVX512F-NEXT: kshiftrw $4, %k0, %k0
1165 ; AVX512F-NEXT: kmovw %k0, %eax
1166 ; AVX512F-NEXT: testb $1, %al
1167 ; AVX512F-NEXT: je .LBB15_10
1168 ; AVX512F-NEXT: # %bb.9: # %cond.store7
1169 ; AVX512F-NEXT: vpextrb $4, %xmm0, 4(%rdi)
1170 ; AVX512F-NEXT: .LBB15_10: # %else8
1171 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1172 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1173 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1174 ; AVX512F-NEXT: kshiftrw $5, %k0, %k0
1175 ; AVX512F-NEXT: kmovw %k0, %eax
1176 ; AVX512F-NEXT: testb $1, %al
1177 ; AVX512F-NEXT: je .LBB15_12
1178 ; AVX512F-NEXT: # %bb.11: # %cond.store9
1179 ; AVX512F-NEXT: vpextrb $5, %xmm0, 5(%rdi)
1180 ; AVX512F-NEXT: .LBB15_12: # %else10
1181 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1182 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
1183 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm3
1184 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm3
1185 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1186 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1187 ; AVX512F-NEXT: kshiftrw $6, %k0, %k0
1188 ; AVX512F-NEXT: kmovw %k0, %eax
1189 ; AVX512F-NEXT: testb $1, %al
1190 ; AVX512F-NEXT: je .LBB15_14
1191 ; AVX512F-NEXT: # %bb.13: # %cond.store11
1192 ; AVX512F-NEXT: vpextrb $6, %xmm0, 6(%rdi)
1193 ; AVX512F-NEXT: .LBB15_14: # %else12
1194 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1195 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1196 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1197 ; AVX512F-NEXT: kshiftrw $7, %k0, %k0
1198 ; AVX512F-NEXT: kmovw %k0, %eax
1199 ; AVX512F-NEXT: testb $1, %al
1200 ; AVX512F-NEXT: je .LBB15_16
1201 ; AVX512F-NEXT: # %bb.15: # %cond.store13
1202 ; AVX512F-NEXT: vpextrb $7, %xmm0, 7(%rdi)
1203 ; AVX512F-NEXT: .LBB15_16: # %else14
1204 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1205 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
1206 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm3
1207 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm3
1208 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1209 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1210 ; AVX512F-NEXT: kshiftrw $8, %k0, %k0
1211 ; AVX512F-NEXT: kmovw %k0, %eax
1212 ; AVX512F-NEXT: testb $1, %al
1213 ; AVX512F-NEXT: je .LBB15_18
1214 ; AVX512F-NEXT: # %bb.17: # %cond.store15
1215 ; AVX512F-NEXT: vpextrb $8, %xmm0, 8(%rdi)
1216 ; AVX512F-NEXT: .LBB15_18: # %else16
1217 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1218 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1219 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1220 ; AVX512F-NEXT: kshiftrw $9, %k0, %k0
1221 ; AVX512F-NEXT: kmovw %k0, %eax
1222 ; AVX512F-NEXT: testb $1, %al
1223 ; AVX512F-NEXT: je .LBB15_20
1224 ; AVX512F-NEXT: # %bb.19: # %cond.store17
1225 ; AVX512F-NEXT: vpextrb $9, %xmm0, 9(%rdi)
1226 ; AVX512F-NEXT: .LBB15_20: # %else18
1227 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1228 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
1229 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm3
1230 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm3
1231 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1232 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1233 ; AVX512F-NEXT: kshiftrw $10, %k0, %k0
1234 ; AVX512F-NEXT: kmovw %k0, %eax
1235 ; AVX512F-NEXT: testb $1, %al
1236 ; AVX512F-NEXT: je .LBB15_22
1237 ; AVX512F-NEXT: # %bb.21: # %cond.store19
1238 ; AVX512F-NEXT: vpextrb $10, %xmm0, 10(%rdi)
1239 ; AVX512F-NEXT: .LBB15_22: # %else20
1240 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1241 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1242 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1243 ; AVX512F-NEXT: kshiftrw $11, %k0, %k0
1244 ; AVX512F-NEXT: kmovw %k0, %eax
1245 ; AVX512F-NEXT: testb $1, %al
1246 ; AVX512F-NEXT: je .LBB15_24
1247 ; AVX512F-NEXT: # %bb.23: # %cond.store21
1248 ; AVX512F-NEXT: vpextrb $11, %xmm0, 11(%rdi)
1249 ; AVX512F-NEXT: .LBB15_24: # %else22
1250 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1251 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
1252 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm3
1253 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm3
1254 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1255 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1256 ; AVX512F-NEXT: kshiftrw $12, %k0, %k0
1257 ; AVX512F-NEXT: kmovw %k0, %eax
1258 ; AVX512F-NEXT: testb $1, %al
1259 ; AVX512F-NEXT: je .LBB15_26
1260 ; AVX512F-NEXT: # %bb.25: # %cond.store23
1261 ; AVX512F-NEXT: vpextrb $12, %xmm0, 12(%rdi)
1262 ; AVX512F-NEXT: .LBB15_26: # %else24
1263 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1264 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1265 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1266 ; AVX512F-NEXT: kshiftrw $13, %k0, %k0
1267 ; AVX512F-NEXT: kmovw %k0, %eax
1268 ; AVX512F-NEXT: testb $1, %al
1269 ; AVX512F-NEXT: je .LBB15_28
1270 ; AVX512F-NEXT: # %bb.27: # %cond.store25
1271 ; AVX512F-NEXT: vpextrb $13, %xmm0, 13(%rdi)
1272 ; AVX512F-NEXT: .LBB15_28: # %else26
1273 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1274 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
1275 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm3
1276 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm3
1277 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1278 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1279 ; AVX512F-NEXT: kshiftrw $14, %k0, %k0
1280 ; AVX512F-NEXT: kmovw %k0, %eax
1281 ; AVX512F-NEXT: testb $1, %al
1282 ; AVX512F-NEXT: je .LBB15_30
1283 ; AVX512F-NEXT: # %bb.29: # %cond.store27
1284 ; AVX512F-NEXT: vpextrb $14, %xmm0, 14(%rdi)
1285 ; AVX512F-NEXT: .LBB15_30: # %else28
1286 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1287 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1288 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1289 ; AVX512F-NEXT: kshiftrw $15, %k0, %k0
1290 ; AVX512F-NEXT: kmovw %k0, %eax
1291 ; AVX512F-NEXT: testb $1, %al
1292 ; AVX512F-NEXT: je .LBB15_32
1293 ; AVX512F-NEXT: # %bb.31: # %cond.store29
1294 ; AVX512F-NEXT: vpextrb $15, %xmm0, 15(%rdi)
1295 ; AVX512F-NEXT: .LBB15_32: # %else30
1296 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1297 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
1298 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
1299 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm3
1300 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm3
1301 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1302 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1303 ; AVX512F-NEXT: kmovw %k0, %eax
1304 ; AVX512F-NEXT: testb $1, %al
1305 ; AVX512F-NEXT: je .LBB15_34
1306 ; AVX512F-NEXT: # %bb.33: # %cond.store31
1307 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
1308 ; AVX512F-NEXT: vpextrb $0, %xmm3, 16(%rdi)
1309 ; AVX512F-NEXT: .LBB15_34: # %else32
1310 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1311 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1312 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1313 ; AVX512F-NEXT: kshiftrw $1, %k0, %k0
1314 ; AVX512F-NEXT: kmovw %k0, %eax
1315 ; AVX512F-NEXT: testb $1, %al
1316 ; AVX512F-NEXT: je .LBB15_36
1317 ; AVX512F-NEXT: # %bb.35: # %cond.store33
1318 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
1319 ; AVX512F-NEXT: vpextrb $1, %xmm1, 17(%rdi)
1320 ; AVX512F-NEXT: .LBB15_36: # %else34
1321 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1322 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
1323 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
1324 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm3
1325 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm3
1326 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1327 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1328 ; AVX512F-NEXT: kshiftrw $2, %k0, %k0
1329 ; AVX512F-NEXT: kmovw %k0, %eax
1330 ; AVX512F-NEXT: testb $1, %al
1331 ; AVX512F-NEXT: je .LBB15_38
1332 ; AVX512F-NEXT: # %bb.37: # %cond.store35
1333 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
1334 ; AVX512F-NEXT: vpextrb $2, %xmm3, 18(%rdi)
1335 ; AVX512F-NEXT: .LBB15_38: # %else36
1336 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1337 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1338 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1339 ; AVX512F-NEXT: kshiftrw $3, %k0, %k0
1340 ; AVX512F-NEXT: kmovw %k0, %eax
1341 ; AVX512F-NEXT: testb $1, %al
1342 ; AVX512F-NEXT: je .LBB15_40
1343 ; AVX512F-NEXT: # %bb.39: # %cond.store37
1344 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
1345 ; AVX512F-NEXT: vpextrb $3, %xmm1, 19(%rdi)
1346 ; AVX512F-NEXT: .LBB15_40: # %else38
1347 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1348 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
1349 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
1350 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm3
1351 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm3
1352 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1353 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1354 ; AVX512F-NEXT: kshiftrw $4, %k0, %k0
1355 ; AVX512F-NEXT: kmovw %k0, %eax
1356 ; AVX512F-NEXT: testb $1, %al
1357 ; AVX512F-NEXT: je .LBB15_42
1358 ; AVX512F-NEXT: # %bb.41: # %cond.store39
1359 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
1360 ; AVX512F-NEXT: vpextrb $4, %xmm3, 20(%rdi)
1361 ; AVX512F-NEXT: .LBB15_42: # %else40
1362 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1363 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1364 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1365 ; AVX512F-NEXT: kshiftrw $5, %k0, %k0
1366 ; AVX512F-NEXT: kmovw %k0, %eax
1367 ; AVX512F-NEXT: testb $1, %al
1368 ; AVX512F-NEXT: je .LBB15_44
1369 ; AVX512F-NEXT: # %bb.43: # %cond.store41
1370 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
1371 ; AVX512F-NEXT: vpextrb $5, %xmm1, 21(%rdi)
1372 ; AVX512F-NEXT: .LBB15_44: # %else42
1373 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1374 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
1375 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
1376 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm3
1377 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm3
1378 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1379 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1380 ; AVX512F-NEXT: kshiftrw $6, %k0, %k0
1381 ; AVX512F-NEXT: kmovw %k0, %eax
1382 ; AVX512F-NEXT: testb $1, %al
1383 ; AVX512F-NEXT: je .LBB15_46
1384 ; AVX512F-NEXT: # %bb.45: # %cond.store43
1385 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
1386 ; AVX512F-NEXT: vpextrb $6, %xmm3, 22(%rdi)
1387 ; AVX512F-NEXT: .LBB15_46: # %else44
1388 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1389 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1390 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1391 ; AVX512F-NEXT: kshiftrw $7, %k0, %k0
1392 ; AVX512F-NEXT: kmovw %k0, %eax
1393 ; AVX512F-NEXT: testb $1, %al
1394 ; AVX512F-NEXT: je .LBB15_48
1395 ; AVX512F-NEXT: # %bb.47: # %cond.store45
1396 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
1397 ; AVX512F-NEXT: vpextrb $7, %xmm1, 23(%rdi)
1398 ; AVX512F-NEXT: .LBB15_48: # %else46
1399 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1400 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
1401 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
1402 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm3
1403 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm3
1404 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1405 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1406 ; AVX512F-NEXT: kshiftrw $8, %k0, %k0
1407 ; AVX512F-NEXT: kmovw %k0, %eax
1408 ; AVX512F-NEXT: testb $1, %al
1409 ; AVX512F-NEXT: je .LBB15_50
1410 ; AVX512F-NEXT: # %bb.49: # %cond.store47
1411 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
1412 ; AVX512F-NEXT: vpextrb $8, %xmm3, 24(%rdi)
1413 ; AVX512F-NEXT: .LBB15_50: # %else48
1414 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1415 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1416 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1417 ; AVX512F-NEXT: kshiftrw $9, %k0, %k0
1418 ; AVX512F-NEXT: kmovw %k0, %eax
1419 ; AVX512F-NEXT: testb $1, %al
1420 ; AVX512F-NEXT: je .LBB15_52
1421 ; AVX512F-NEXT: # %bb.51: # %cond.store49
1422 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
1423 ; AVX512F-NEXT: vpextrb $9, %xmm1, 25(%rdi)
1424 ; AVX512F-NEXT: .LBB15_52: # %else50
1425 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1426 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
1427 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
1428 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm3
1429 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm3
1430 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1431 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1432 ; AVX512F-NEXT: kshiftrw $10, %k0, %k0
1433 ; AVX512F-NEXT: kmovw %k0, %eax
1434 ; AVX512F-NEXT: testb $1, %al
1435 ; AVX512F-NEXT: je .LBB15_54
1436 ; AVX512F-NEXT: # %bb.53: # %cond.store51
1437 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
1438 ; AVX512F-NEXT: vpextrb $10, %xmm3, 26(%rdi)
1439 ; AVX512F-NEXT: .LBB15_54: # %else52
1440 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1441 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1442 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1443 ; AVX512F-NEXT: kshiftrw $11, %k0, %k0
1444 ; AVX512F-NEXT: kmovw %k0, %eax
1445 ; AVX512F-NEXT: testb $1, %al
1446 ; AVX512F-NEXT: je .LBB15_56
1447 ; AVX512F-NEXT: # %bb.55: # %cond.store53
1448 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
1449 ; AVX512F-NEXT: vpextrb $11, %xmm1, 27(%rdi)
1450 ; AVX512F-NEXT: .LBB15_56: # %else54
1451 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1452 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
1453 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
1454 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm3
1455 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm3
1456 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1457 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1458 ; AVX512F-NEXT: kshiftrw $12, %k0, %k0
1459 ; AVX512F-NEXT: kmovw %k0, %eax
1460 ; AVX512F-NEXT: testb $1, %al
1461 ; AVX512F-NEXT: je .LBB15_58
1462 ; AVX512F-NEXT: # %bb.57: # %cond.store55
1463 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm3
1464 ; AVX512F-NEXT: vpextrb $12, %xmm3, 28(%rdi)
1465 ; AVX512F-NEXT: .LBB15_58: # %else56
1466 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1467 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1468 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1469 ; AVX512F-NEXT: kshiftrw $13, %k0, %k0
1470 ; AVX512F-NEXT: kmovw %k0, %eax
1471 ; AVX512F-NEXT: testb $1, %al
1472 ; AVX512F-NEXT: je .LBB15_60
1473 ; AVX512F-NEXT: # %bb.59: # %cond.store57
1474 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
1475 ; AVX512F-NEXT: vpextrb $13, %xmm1, 29(%rdi)
1476 ; AVX512F-NEXT: .LBB15_60: # %else58
1477 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
1478 ; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
1479 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
1480 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm2
1481 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm2
1482 ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1483 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1484 ; AVX512F-NEXT: kshiftrw $14, %k0, %k0
1485 ; AVX512F-NEXT: kmovw %k0, %eax
1486 ; AVX512F-NEXT: testb $1, %al
1487 ; AVX512F-NEXT: je .LBB15_62
1488 ; AVX512F-NEXT: # %bb.61: # %cond.store59
1489 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
1490 ; AVX512F-NEXT: vpextrb $14, %xmm2, 30(%rdi)
1491 ; AVX512F-NEXT: .LBB15_62: # %else60
1492 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1493 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1494 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1495 ; AVX512F-NEXT: kshiftrw $15, %k0, %k0
1496 ; AVX512F-NEXT: kmovw %k0, %eax
1497 ; AVX512F-NEXT: testb $1, %al
1498 ; AVX512F-NEXT: je .LBB15_64
1499 ; AVX512F-NEXT: # %bb.63: # %cond.store61
1500 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
1501 ; AVX512F-NEXT: vpextrb $15, %xmm0, 31(%rdi)
1502 ; AVX512F-NEXT: .LBB15_64: # %else62
1503 ; AVX512F-NEXT: vzeroupper
1504 ; AVX512F-NEXT: retq
1506 ; AVX512BW-LABEL: truncstore_v32i16_v32i8:
1507 ; AVX512BW: # %bb.0:
1508 ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1509 ; AVX512BW-NEXT: vptestmb %zmm1, %zmm1, %k1
1510 ; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) {%k1}
1511 ; AVX512BW-NEXT: vzeroupper
1512 ; AVX512BW-NEXT: retq
1514 ; AVX512BWVL-LABEL: truncstore_v32i16_v32i8:
1515 ; AVX512BWVL: # %bb.0:
1516 ; AVX512BWVL-NEXT: vptestmb %ymm1, %ymm1, %k1
1517 ; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rdi) {%k1}
1518 ; AVX512BWVL-NEXT: vzeroupper
1519 ; AVX512BWVL-NEXT: retq
1520 %a = icmp ne <32 x i8> %mask, zeroinitializer
1521 %b = trunc <32 x i16> %x to <32 x i8>
1522 call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> %b, <32 x i8>* %p, i32 1, <32 x i1> %a)
1526 define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %mask) {
1527 ; AVX512F-LABEL: truncstore_v16i16_v16i8:
1529 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1530 ; AVX512F-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
1531 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3
1532 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm3
1533 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1534 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1535 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1536 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
1537 ; AVX512F-NEXT: kmovw %k0, %eax
1538 ; AVX512F-NEXT: testb $1, %al
1539 ; AVX512F-NEXT: je .LBB16_2
1540 ; AVX512F-NEXT: # %bb.1: # %cond.store
1541 ; AVX512F-NEXT: vpextrb $0, %xmm0, (%rdi)
1542 ; AVX512F-NEXT: .LBB16_2: # %else
1543 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
1544 ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1545 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1546 ; AVX512F-NEXT: kshiftrw $1, %k0, %k0
1547 ; AVX512F-NEXT: kmovw %k0, %eax
1548 ; AVX512F-NEXT: testb $1, %al
1549 ; AVX512F-NEXT: je .LBB16_4
1550 ; AVX512F-NEXT: # %bb.3: # %cond.store1
1551 ; AVX512F-NEXT: vpextrb $1, %xmm0, 1(%rdi)
1552 ; AVX512F-NEXT: .LBB16_4: # %else2
1553 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1554 ; AVX512F-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
1555 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3
1556 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm3
1557 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1558 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1559 ; AVX512F-NEXT: kshiftrw $2, %k0, %k0
1560 ; AVX512F-NEXT: kmovw %k0, %eax
1561 ; AVX512F-NEXT: testb $1, %al
1562 ; AVX512F-NEXT: je .LBB16_6
1563 ; AVX512F-NEXT: # %bb.5: # %cond.store3
1564 ; AVX512F-NEXT: vpextrb $2, %xmm0, 2(%rdi)
1565 ; AVX512F-NEXT: .LBB16_6: # %else4
1566 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
1567 ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1568 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1569 ; AVX512F-NEXT: kshiftrw $3, %k0, %k0
1570 ; AVX512F-NEXT: kmovw %k0, %eax
1571 ; AVX512F-NEXT: testb $1, %al
1572 ; AVX512F-NEXT: je .LBB16_8
1573 ; AVX512F-NEXT: # %bb.7: # %cond.store5
1574 ; AVX512F-NEXT: vpextrb $3, %xmm0, 3(%rdi)
1575 ; AVX512F-NEXT: .LBB16_8: # %else6
1576 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1577 ; AVX512F-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
1578 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3
1579 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm3
1580 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1581 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1582 ; AVX512F-NEXT: kshiftrw $4, %k0, %k0
1583 ; AVX512F-NEXT: kmovw %k0, %eax
1584 ; AVX512F-NEXT: testb $1, %al
1585 ; AVX512F-NEXT: je .LBB16_10
1586 ; AVX512F-NEXT: # %bb.9: # %cond.store7
1587 ; AVX512F-NEXT: vpextrb $4, %xmm0, 4(%rdi)
1588 ; AVX512F-NEXT: .LBB16_10: # %else8
1589 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
1590 ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1591 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1592 ; AVX512F-NEXT: kshiftrw $5, %k0, %k0
1593 ; AVX512F-NEXT: kmovw %k0, %eax
1594 ; AVX512F-NEXT: testb $1, %al
1595 ; AVX512F-NEXT: je .LBB16_12
1596 ; AVX512F-NEXT: # %bb.11: # %cond.store9
1597 ; AVX512F-NEXT: vpextrb $5, %xmm0, 5(%rdi)
1598 ; AVX512F-NEXT: .LBB16_12: # %else10
1599 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1600 ; AVX512F-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
1601 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3
1602 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm3
1603 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1604 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1605 ; AVX512F-NEXT: kshiftrw $6, %k0, %k0
1606 ; AVX512F-NEXT: kmovw %k0, %eax
1607 ; AVX512F-NEXT: testb $1, %al
1608 ; AVX512F-NEXT: je .LBB16_14
1609 ; AVX512F-NEXT: # %bb.13: # %cond.store11
1610 ; AVX512F-NEXT: vpextrb $6, %xmm0, 6(%rdi)
1611 ; AVX512F-NEXT: .LBB16_14: # %else12
1612 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
1613 ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1614 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1615 ; AVX512F-NEXT: kshiftrw $7, %k0, %k0
1616 ; AVX512F-NEXT: kmovw %k0, %eax
1617 ; AVX512F-NEXT: testb $1, %al
1618 ; AVX512F-NEXT: je .LBB16_16
1619 ; AVX512F-NEXT: # %bb.15: # %cond.store13
1620 ; AVX512F-NEXT: vpextrb $7, %xmm0, 7(%rdi)
1621 ; AVX512F-NEXT: .LBB16_16: # %else14
1622 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1623 ; AVX512F-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
1624 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3
1625 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm3
1626 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1627 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1628 ; AVX512F-NEXT: kshiftrw $8, %k0, %k0
1629 ; AVX512F-NEXT: kmovw %k0, %eax
1630 ; AVX512F-NEXT: testb $1, %al
1631 ; AVX512F-NEXT: je .LBB16_18
1632 ; AVX512F-NEXT: # %bb.17: # %cond.store15
1633 ; AVX512F-NEXT: vpextrb $8, %xmm0, 8(%rdi)
1634 ; AVX512F-NEXT: .LBB16_18: # %else16
1635 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
1636 ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1637 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1638 ; AVX512F-NEXT: kshiftrw $9, %k0, %k0
1639 ; AVX512F-NEXT: kmovw %k0, %eax
1640 ; AVX512F-NEXT: testb $1, %al
1641 ; AVX512F-NEXT: je .LBB16_20
1642 ; AVX512F-NEXT: # %bb.19: # %cond.store17
1643 ; AVX512F-NEXT: vpextrb $9, %xmm0, 9(%rdi)
1644 ; AVX512F-NEXT: .LBB16_20: # %else18
1645 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1646 ; AVX512F-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
1647 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3
1648 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm3
1649 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1650 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1651 ; AVX512F-NEXT: kshiftrw $10, %k0, %k0
1652 ; AVX512F-NEXT: kmovw %k0, %eax
1653 ; AVX512F-NEXT: testb $1, %al
1654 ; AVX512F-NEXT: je .LBB16_22
1655 ; AVX512F-NEXT: # %bb.21: # %cond.store19
1656 ; AVX512F-NEXT: vpextrb $10, %xmm0, 10(%rdi)
1657 ; AVX512F-NEXT: .LBB16_22: # %else20
1658 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
1659 ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1660 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1661 ; AVX512F-NEXT: kshiftrw $11, %k0, %k0
1662 ; AVX512F-NEXT: kmovw %k0, %eax
1663 ; AVX512F-NEXT: testb $1, %al
1664 ; AVX512F-NEXT: je .LBB16_24
1665 ; AVX512F-NEXT: # %bb.23: # %cond.store21
1666 ; AVX512F-NEXT: vpextrb $11, %xmm0, 11(%rdi)
1667 ; AVX512F-NEXT: .LBB16_24: # %else22
1668 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1669 ; AVX512F-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm2
1670 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3
1671 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm3
1672 ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
1673 ; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
1674 ; AVX512F-NEXT: kshiftrw $12, %k0, %k0
1675 ; AVX512F-NEXT: kmovw %k0, %eax
1676 ; AVX512F-NEXT: testb $1, %al
1677 ; AVX512F-NEXT: je .LBB16_26
1678 ; AVX512F-NEXT: # %bb.25: # %cond.store23
1679 ; AVX512F-NEXT: vpextrb $12, %xmm0, 12(%rdi)
1680 ; AVX512F-NEXT: .LBB16_26: # %else24
1681 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
1682 ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1683 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1684 ; AVX512F-NEXT: kshiftrw $13, %k0, %k0
1685 ; AVX512F-NEXT: kmovw %k0, %eax
1686 ; AVX512F-NEXT: testb $1, %al
1687 ; AVX512F-NEXT: je .LBB16_28
1688 ; AVX512F-NEXT: # %bb.27: # %cond.store25
1689 ; AVX512F-NEXT: vpextrb $13, %xmm0, 13(%rdi)
1690 ; AVX512F-NEXT: .LBB16_28: # %else26
1691 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1692 ; AVX512F-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
1693 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm2
1694 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm2
1695 ; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
1696 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
1697 ; AVX512F-NEXT: kshiftrw $14, %k0, %k0
1698 ; AVX512F-NEXT: kmovw %k0, %eax
1699 ; AVX512F-NEXT: testb $1, %al
1700 ; AVX512F-NEXT: je .LBB16_30
1701 ; AVX512F-NEXT: # %bb.29: # %cond.store27
1702 ; AVX512F-NEXT: vpextrb $14, %xmm0, 14(%rdi)
1703 ; AVX512F-NEXT: .LBB16_30: # %else28
1704 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1705 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
1706 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
1707 ; AVX512F-NEXT: kshiftrw $15, %k0, %k0
1708 ; AVX512F-NEXT: kmovw %k0, %eax
1709 ; AVX512F-NEXT: testb $1, %al
1710 ; AVX512F-NEXT: je .LBB16_32
1711 ; AVX512F-NEXT: # %bb.31: # %cond.store29
1712 ; AVX512F-NEXT: vpextrb $15, %xmm0, 15(%rdi)
1713 ; AVX512F-NEXT: .LBB16_32: # %else30
1714 ; AVX512F-NEXT: vzeroupper
1715 ; AVX512F-NEXT: retq
1717 ; AVX512BW-LABEL: truncstore_v16i16_v16i8:
1718 ; AVX512BW: # %bb.0:
1719 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1720 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1721 ; AVX512BW-NEXT: vptestmb %zmm1, %zmm1, %k0
1722 ; AVX512BW-NEXT: kmovw %k0, %k1
1723 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1724 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
1725 ; AVX512BW-NEXT: vzeroupper
1726 ; AVX512BW-NEXT: retq
1728 ; AVX512BWVL-LABEL: truncstore_v16i16_v16i8:
1729 ; AVX512BWVL: # %bb.0:
1730 ; AVX512BWVL-NEXT: vptestmb %xmm1, %xmm1, %k1
1731 ; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rdi) {%k1}
1732 ; AVX512BWVL-NEXT: vzeroupper
1733 ; AVX512BWVL-NEXT: retq
1734 %a = icmp ne <16 x i8> %mask, zeroinitializer
1735 %b = trunc <16 x i16> %x to <16 x i8>
1736 call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %b, <16 x i8>* %p, i32 1, <16 x i1> %a)
1740 define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask) {
1741 ; AVX512F-LABEL: truncstore_v8i16_v8i8:
1743 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1744 ; AVX512F-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2
1745 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3
1746 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm3
1747 ; AVX512F-NEXT: vpmovsxwq %xmm3, %zmm3
1748 ; AVX512F-NEXT: vptestmq %zmm3, %zmm3, %k0
1749 ; AVX512F-NEXT: kmovw %k0, %eax
1750 ; AVX512F-NEXT: testb $1, %al
1751 ; AVX512F-NEXT: je .LBB17_2
1752 ; AVX512F-NEXT: # %bb.1: # %cond.store
1753 ; AVX512F-NEXT: vpextrb $0, %xmm0, (%rdi)
1754 ; AVX512F-NEXT: .LBB17_2: # %else
1755 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
1756 ; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2
1757 ; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0
1758 ; AVX512F-NEXT: kshiftrw $1, %k0, %k0
1759 ; AVX512F-NEXT: kmovw %k0, %eax
1760 ; AVX512F-NEXT: testb $1, %al
1761 ; AVX512F-NEXT: je .LBB17_4
1762 ; AVX512F-NEXT: # %bb.3: # %cond.store1
1763 ; AVX512F-NEXT: vpextrb $2, %xmm0, 1(%rdi)
1764 ; AVX512F-NEXT: .LBB17_4: # %else2
1765 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1766 ; AVX512F-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2
1767 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3
1768 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm3
1769 ; AVX512F-NEXT: vpmovsxwq %xmm3, %zmm3
1770 ; AVX512F-NEXT: vptestmq %zmm3, %zmm3, %k0
1771 ; AVX512F-NEXT: kshiftrw $2, %k0, %k0
1772 ; AVX512F-NEXT: kmovw %k0, %eax
1773 ; AVX512F-NEXT: testb $1, %al
1774 ; AVX512F-NEXT: je .LBB17_6
1775 ; AVX512F-NEXT: # %bb.5: # %cond.store3
1776 ; AVX512F-NEXT: vpextrb $4, %xmm0, 2(%rdi)
1777 ; AVX512F-NEXT: .LBB17_6: # %else4
1778 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
1779 ; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2
1780 ; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0
1781 ; AVX512F-NEXT: kshiftrw $3, %k0, %k0
1782 ; AVX512F-NEXT: kmovw %k0, %eax
1783 ; AVX512F-NEXT: testb $1, %al
1784 ; AVX512F-NEXT: je .LBB17_8
1785 ; AVX512F-NEXT: # %bb.7: # %cond.store5
1786 ; AVX512F-NEXT: vpextrb $6, %xmm0, 3(%rdi)
1787 ; AVX512F-NEXT: .LBB17_8: # %else6
1788 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1789 ; AVX512F-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2
1790 ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm3
1791 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm3
1792 ; AVX512F-NEXT: vpmovsxwq %xmm3, %zmm3
1793 ; AVX512F-NEXT: vptestmq %zmm3, %zmm3, %k0
1794 ; AVX512F-NEXT: kshiftrw $4, %k0, %k0
1795 ; AVX512F-NEXT: kmovw %k0, %eax
1796 ; AVX512F-NEXT: testb $1, %al
1797 ; AVX512F-NEXT: je .LBB17_10
1798 ; AVX512F-NEXT: # %bb.9: # %cond.store7
1799 ; AVX512F-NEXT: vpextrb $8, %xmm0, 4(%rdi)
1800 ; AVX512F-NEXT: .LBB17_10: # %else8
1801 ; AVX512F-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
1802 ; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2
1803 ; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0
1804 ; AVX512F-NEXT: kshiftrw $5, %k0, %k0
1805 ; AVX512F-NEXT: kmovw %k0, %eax
1806 ; AVX512F-NEXT: testb $1, %al
1807 ; AVX512F-NEXT: je .LBB17_12
1808 ; AVX512F-NEXT: # %bb.11: # %cond.store9
1809 ; AVX512F-NEXT: vpextrb $10, %xmm0, 5(%rdi)
1810 ; AVX512F-NEXT: .LBB17_12: # %else10
1811 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
1812 ; AVX512F-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
1813 ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm2
1814 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm2
1815 ; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2
1816 ; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k0
1817 ; AVX512F-NEXT: kshiftrw $6, %k0, %k0
1818 ; AVX512F-NEXT: kmovw %k0, %eax
1819 ; AVX512F-NEXT: testb $1, %al
1820 ; AVX512F-NEXT: je .LBB17_14
1821 ; AVX512F-NEXT: # %bb.13: # %cond.store11
1822 ; AVX512F-NEXT: vpextrb $12, %xmm0, 6(%rdi)
1823 ; AVX512F-NEXT: .LBB17_14: # %else12
1824 ; AVX512F-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
1825 ; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1
1826 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0
1827 ; AVX512F-NEXT: kshiftrw $7, %k0, %k0
1828 ; AVX512F-NEXT: kmovw %k0, %eax
1829 ; AVX512F-NEXT: testb $1, %al
1830 ; AVX512F-NEXT: je .LBB17_16
1831 ; AVX512F-NEXT: # %bb.15: # %cond.store13
1832 ; AVX512F-NEXT: vpextrb $14, %xmm0, 7(%rdi)
1833 ; AVX512F-NEXT: .LBB17_16: # %else14
1834 ; AVX512F-NEXT: vzeroupper
1835 ; AVX512F-NEXT: retq
1837 ; AVX512BW-LABEL: truncstore_v8i16_v8i8:
1838 ; AVX512BW: # %bb.0:
1839 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1840 ; AVX512BW-NEXT: vptestmw %zmm1, %zmm1, %k0
1841 ; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1842 ; AVX512BW-NEXT: kshiftlq $56, %k0, %k0
1843 ; AVX512BW-NEXT: kshiftrq $56, %k0, %k1
1844 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
1845 ; AVX512BW-NEXT: vzeroupper
1846 ; AVX512BW-NEXT: retq
1848 ; AVX512BWVL-LABEL: truncstore_v8i16_v8i8:
1849 ; AVX512BWVL: # %bb.0:
1850 ; AVX512BWVL-NEXT: vptestmw %xmm1, %xmm1, %k1
1851 ; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rdi) {%k1}
1852 ; AVX512BWVL-NEXT: retq
1853 %a = icmp ne <8 x i16> %mask, zeroinitializer
1854 %b = trunc <8 x i16> %x to <8 x i8>
1855 call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %b, <8 x i8>* %p, i32 1, <8 x i1> %a)
1859 declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
1860 declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
1861 declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>)
1862 declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
1863 declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>)
1864 declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>)
1865 declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
1866 declare void @llvm.masked.store.v2i16.p0v2i16(<2 x i16>, <2 x i16>*, i32, <2 x i1>)
1867 declare void @llvm.masked.store.v2i8.p0v2i8(<2 x i8>, <2 x i8>*, i32, <2 x i1>)
1868 declare void @llvm.masked.store.v16i16.p0v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>)
1869 declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
1870 declare void @llvm.masked.store.v32i8.p0v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>)