1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefixes=SSE,SSE4A
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512DQ
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512BW
10 ; Test codegen for under aligned nontemporal vector stores
14 define void @test_zero_v2f64_align1(<2 x double>* %dst) nounwind {
15 ; SSE-LABEL: test_zero_v2f64_align1:
17 ; SSE-NEXT: xorl %eax, %eax
18 ; SSE-NEXT: movntiq %rax, 8(%rdi)
19 ; SSE-NEXT: movntiq %rax, (%rdi)
22 ; AVX-LABEL: test_zero_v2f64_align1:
24 ; AVX-NEXT: xorl %eax, %eax
25 ; AVX-NEXT: movntiq %rax, 8(%rdi)
26 ; AVX-NEXT: movntiq %rax, (%rdi)
29 ; AVX512-LABEL: test_zero_v2f64_align1:
31 ; AVX512-NEXT: xorl %eax, %eax
32 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
33 ; AVX512-NEXT: movntiq %rax, (%rdi)
35 store <2 x double> zeroinitializer, <2 x double>* %dst, align 1, !nontemporal !1
39 define void @test_zero_v4f32_align1(<4 x float>* %dst) nounwind {
40 ; SSE-LABEL: test_zero_v4f32_align1:
42 ; SSE-NEXT: xorl %eax, %eax
43 ; SSE-NEXT: movntiq %rax, 8(%rdi)
44 ; SSE-NEXT: movntiq %rax, (%rdi)
47 ; AVX-LABEL: test_zero_v4f32_align1:
49 ; AVX-NEXT: xorl %eax, %eax
50 ; AVX-NEXT: movntiq %rax, 8(%rdi)
51 ; AVX-NEXT: movntiq %rax, (%rdi)
54 ; AVX512-LABEL: test_zero_v4f32_align1:
56 ; AVX512-NEXT: xorl %eax, %eax
57 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
58 ; AVX512-NEXT: movntiq %rax, (%rdi)
60 store <4 x float> zeroinitializer, <4 x float>* %dst, align 1, !nontemporal !1
64 define void @test_zero_v2i64_align1(<2 x i64>* %dst) nounwind {
65 ; SSE-LABEL: test_zero_v2i64_align1:
67 ; SSE-NEXT: xorl %eax, %eax
68 ; SSE-NEXT: movntiq %rax, 8(%rdi)
69 ; SSE-NEXT: movntiq %rax, (%rdi)
72 ; AVX-LABEL: test_zero_v2i64_align1:
74 ; AVX-NEXT: xorl %eax, %eax
75 ; AVX-NEXT: movntiq %rax, 8(%rdi)
76 ; AVX-NEXT: movntiq %rax, (%rdi)
79 ; AVX512-LABEL: test_zero_v2i64_align1:
81 ; AVX512-NEXT: xorl %eax, %eax
82 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
83 ; AVX512-NEXT: movntiq %rax, (%rdi)
85 store <2 x i64> zeroinitializer, <2 x i64>* %dst, align 1, !nontemporal !1
89 define void @test_zero_v4i32_align1(<4 x i32>* %dst) nounwind {
90 ; SSE-LABEL: test_zero_v4i32_align1:
92 ; SSE-NEXT: xorl %eax, %eax
93 ; SSE-NEXT: movntiq %rax, 8(%rdi)
94 ; SSE-NEXT: movntiq %rax, (%rdi)
97 ; AVX-LABEL: test_zero_v4i32_align1:
99 ; AVX-NEXT: xorl %eax, %eax
100 ; AVX-NEXT: movntiq %rax, 8(%rdi)
101 ; AVX-NEXT: movntiq %rax, (%rdi)
104 ; AVX512-LABEL: test_zero_v4i32_align1:
106 ; AVX512-NEXT: xorl %eax, %eax
107 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
108 ; AVX512-NEXT: movntiq %rax, (%rdi)
110 store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 1, !nontemporal !1
114 define void @test_zero_v8i16_align1(<8 x i16>* %dst) nounwind {
115 ; SSE-LABEL: test_zero_v8i16_align1:
117 ; SSE-NEXT: xorl %eax, %eax
118 ; SSE-NEXT: movntiq %rax, 8(%rdi)
119 ; SSE-NEXT: movntiq %rax, (%rdi)
122 ; AVX-LABEL: test_zero_v8i16_align1:
124 ; AVX-NEXT: xorl %eax, %eax
125 ; AVX-NEXT: movntiq %rax, 8(%rdi)
126 ; AVX-NEXT: movntiq %rax, (%rdi)
129 ; AVX512-LABEL: test_zero_v8i16_align1:
131 ; AVX512-NEXT: xorl %eax, %eax
132 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
133 ; AVX512-NEXT: movntiq %rax, (%rdi)
135 store <8 x i16> zeroinitializer, <8 x i16>* %dst, align 1, !nontemporal !1
139 define void @test_zero_v16i8_align1(<16 x i8>* %dst) nounwind {
140 ; SSE-LABEL: test_zero_v16i8_align1:
142 ; SSE-NEXT: xorl %eax, %eax
143 ; SSE-NEXT: movntiq %rax, 8(%rdi)
144 ; SSE-NEXT: movntiq %rax, (%rdi)
147 ; AVX-LABEL: test_zero_v16i8_align1:
149 ; AVX-NEXT: xorl %eax, %eax
150 ; AVX-NEXT: movntiq %rax, 8(%rdi)
151 ; AVX-NEXT: movntiq %rax, (%rdi)
154 ; AVX512-LABEL: test_zero_v16i8_align1:
156 ; AVX512-NEXT: xorl %eax, %eax
157 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
158 ; AVX512-NEXT: movntiq %rax, (%rdi)
160 store <16 x i8> zeroinitializer, <16 x i8>* %dst, align 1, !nontemporal !1
166 define void @test_zero_v4f64_align1(<4 x double>* %dst) nounwind {
167 ; SSE-LABEL: test_zero_v4f64_align1:
169 ; SSE-NEXT: xorl %eax, %eax
170 ; SSE-NEXT: movntiq %rax, 8(%rdi)
171 ; SSE-NEXT: movntiq %rax, (%rdi)
172 ; SSE-NEXT: movntiq %rax, 24(%rdi)
173 ; SSE-NEXT: movntiq %rax, 16(%rdi)
176 ; AVX-LABEL: test_zero_v4f64_align1:
178 ; AVX-NEXT: xorl %eax, %eax
179 ; AVX-NEXT: movntiq %rax, 8(%rdi)
180 ; AVX-NEXT: movntiq %rax, (%rdi)
181 ; AVX-NEXT: movntiq %rax, 24(%rdi)
182 ; AVX-NEXT: movntiq %rax, 16(%rdi)
185 ; AVX512-LABEL: test_zero_v4f64_align1:
187 ; AVX512-NEXT: xorl %eax, %eax
188 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
189 ; AVX512-NEXT: movntiq %rax, (%rdi)
190 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
191 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
193 store <4 x double> zeroinitializer, <4 x double>* %dst, align 1, !nontemporal !1
197 define void @test_zero_v8f32_align1(<8 x float>* %dst) nounwind {
198 ; SSE2-LABEL: test_zero_v8f32_align1:
200 ; SSE2-NEXT: xorl %eax, %eax
201 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
202 ; SSE2-NEXT: movntiq %rax, (%rdi)
203 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
204 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
207 ; SSE4A-LABEL: test_zero_v8f32_align1:
209 ; SSE4A-NEXT: xorl %eax, %eax
210 ; SSE4A-NEXT: movntiq %rax, 8(%rdi)
211 ; SSE4A-NEXT: movntiq %rax, 24(%rdi)
212 ; SSE4A-NEXT: xorps %xmm0, %xmm0
213 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
214 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
217 ; SSE41-LABEL: test_zero_v8f32_align1:
219 ; SSE41-NEXT: xorl %eax, %eax
220 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
221 ; SSE41-NEXT: movntiq %rax, (%rdi)
222 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
223 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
226 ; AVX-LABEL: test_zero_v8f32_align1:
228 ; AVX-NEXT: xorl %eax, %eax
229 ; AVX-NEXT: movntiq %rax, 8(%rdi)
230 ; AVX-NEXT: movntiq %rax, (%rdi)
231 ; AVX-NEXT: movntiq %rax, 24(%rdi)
232 ; AVX-NEXT: movntiq %rax, 16(%rdi)
235 ; AVX512-LABEL: test_zero_v8f32_align1:
237 ; AVX512-NEXT: xorl %eax, %eax
238 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
239 ; AVX512-NEXT: movntiq %rax, (%rdi)
240 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
241 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
243 store <8 x float> zeroinitializer, <8 x float>* %dst, align 1, !nontemporal !1
247 define void @test_zero_v4i64_align1(<4 x i64>* %dst) nounwind {
248 ; SSE2-LABEL: test_zero_v4i64_align1:
250 ; SSE2-NEXT: xorl %eax, %eax
251 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
252 ; SSE2-NEXT: movntiq %rax, (%rdi)
253 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
254 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
257 ; SSE4A-LABEL: test_zero_v4i64_align1:
259 ; SSE4A-NEXT: xorps %xmm0, %xmm0
260 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
261 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
262 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
263 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
266 ; SSE41-LABEL: test_zero_v4i64_align1:
268 ; SSE41-NEXT: xorl %eax, %eax
269 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
270 ; SSE41-NEXT: movntiq %rax, (%rdi)
271 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
272 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
275 ; AVX-LABEL: test_zero_v4i64_align1:
277 ; AVX-NEXT: xorl %eax, %eax
278 ; AVX-NEXT: movntiq %rax, 8(%rdi)
279 ; AVX-NEXT: movntiq %rax, (%rdi)
280 ; AVX-NEXT: movntiq %rax, 24(%rdi)
281 ; AVX-NEXT: movntiq %rax, 16(%rdi)
284 ; AVX512-LABEL: test_zero_v4i64_align1:
286 ; AVX512-NEXT: xorl %eax, %eax
287 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
288 ; AVX512-NEXT: movntiq %rax, (%rdi)
289 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
290 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
292 store <4 x i64> zeroinitializer, <4 x i64>* %dst, align 1, !nontemporal !1
296 define void @test_zero_v8i32_align1(<8 x i32>* %dst) nounwind {
297 ; SSE2-LABEL: test_zero_v8i32_align1:
299 ; SSE2-NEXT: xorl %eax, %eax
300 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
301 ; SSE2-NEXT: movntiq %rax, (%rdi)
302 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
303 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
306 ; SSE4A-LABEL: test_zero_v8i32_align1:
308 ; SSE4A-NEXT: xorps %xmm0, %xmm0
309 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
310 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
311 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
312 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
315 ; SSE41-LABEL: test_zero_v8i32_align1:
317 ; SSE41-NEXT: xorl %eax, %eax
318 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
319 ; SSE41-NEXT: movntiq %rax, (%rdi)
320 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
321 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
324 ; AVX-LABEL: test_zero_v8i32_align1:
326 ; AVX-NEXT: xorl %eax, %eax
327 ; AVX-NEXT: movntiq %rax, 8(%rdi)
328 ; AVX-NEXT: movntiq %rax, (%rdi)
329 ; AVX-NEXT: movntiq %rax, 24(%rdi)
330 ; AVX-NEXT: movntiq %rax, 16(%rdi)
333 ; AVX512-LABEL: test_zero_v8i32_align1:
335 ; AVX512-NEXT: xorl %eax, %eax
336 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
337 ; AVX512-NEXT: movntiq %rax, (%rdi)
338 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
339 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
341 store <8 x i32> zeroinitializer, <8 x i32>* %dst, align 1, !nontemporal !1
345 define void @test_zero_v16i16_align1(<16 x i16>* %dst) nounwind {
346 ; SSE2-LABEL: test_zero_v16i16_align1:
348 ; SSE2-NEXT: xorl %eax, %eax
349 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
350 ; SSE2-NEXT: movntiq %rax, (%rdi)
351 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
352 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
355 ; SSE4A-LABEL: test_zero_v16i16_align1:
357 ; SSE4A-NEXT: xorps %xmm0, %xmm0
358 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
359 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
360 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
361 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
364 ; SSE41-LABEL: test_zero_v16i16_align1:
366 ; SSE41-NEXT: xorl %eax, %eax
367 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
368 ; SSE41-NEXT: movntiq %rax, (%rdi)
369 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
370 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
373 ; AVX-LABEL: test_zero_v16i16_align1:
375 ; AVX-NEXT: xorl %eax, %eax
376 ; AVX-NEXT: movntiq %rax, 8(%rdi)
377 ; AVX-NEXT: movntiq %rax, (%rdi)
378 ; AVX-NEXT: movntiq %rax, 24(%rdi)
379 ; AVX-NEXT: movntiq %rax, 16(%rdi)
382 ; AVX512-LABEL: test_zero_v16i16_align1:
384 ; AVX512-NEXT: xorl %eax, %eax
385 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
386 ; AVX512-NEXT: movntiq %rax, (%rdi)
387 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
388 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
390 store <16 x i16> zeroinitializer, <16 x i16>* %dst, align 1, !nontemporal !1
394 define void @test_zero_v32i8_align1(<32 x i8>* %dst) nounwind {
395 ; SSE2-LABEL: test_zero_v32i8_align1:
397 ; SSE2-NEXT: xorl %eax, %eax
398 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
399 ; SSE2-NEXT: movntiq %rax, (%rdi)
400 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
401 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
404 ; SSE4A-LABEL: test_zero_v32i8_align1:
406 ; SSE4A-NEXT: xorps %xmm0, %xmm0
407 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
408 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
409 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
410 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
413 ; SSE41-LABEL: test_zero_v32i8_align1:
415 ; SSE41-NEXT: xorl %eax, %eax
416 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
417 ; SSE41-NEXT: movntiq %rax, (%rdi)
418 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
419 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
422 ; AVX-LABEL: test_zero_v32i8_align1:
424 ; AVX-NEXT: xorl %eax, %eax
425 ; AVX-NEXT: movntiq %rax, 8(%rdi)
426 ; AVX-NEXT: movntiq %rax, (%rdi)
427 ; AVX-NEXT: movntiq %rax, 24(%rdi)
428 ; AVX-NEXT: movntiq %rax, 16(%rdi)
431 ; AVX512-LABEL: test_zero_v32i8_align1:
433 ; AVX512-NEXT: xorl %eax, %eax
434 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
435 ; AVX512-NEXT: movntiq %rax, (%rdi)
436 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
437 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
439 store <32 x i8> zeroinitializer, <32 x i8>* %dst, align 1, !nontemporal !1
443 define void @test_zero_v4f64_align16(<4 x double>* %dst) nounwind {
444 ; SSE-LABEL: test_zero_v4f64_align16:
446 ; SSE-NEXT: xorps %xmm0, %xmm0
447 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
448 ; SSE-NEXT: movntps %xmm0, (%rdi)
451 ; AVX-LABEL: test_zero_v4f64_align16:
453 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
454 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
455 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
458 ; AVX512-LABEL: test_zero_v4f64_align16:
460 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
461 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
462 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
464 store <4 x double> zeroinitializer, <4 x double>* %dst, align 16, !nontemporal !1
468 define void @test_zero_v8f32_align16(<8 x float>* %dst) nounwind {
469 ; SSE-LABEL: test_zero_v8f32_align16:
471 ; SSE-NEXT: xorps %xmm0, %xmm0
472 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
473 ; SSE-NEXT: movntps %xmm0, (%rdi)
476 ; AVX-LABEL: test_zero_v8f32_align16:
478 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
479 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
480 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
483 ; AVX512-LABEL: test_zero_v8f32_align16:
485 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
486 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
487 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
489 store <8 x float> zeroinitializer, <8 x float>* %dst, align 16, !nontemporal !1
493 define void @test_zero_v4i64_align16(<4 x i64>* %dst) nounwind {
494 ; SSE-LABEL: test_zero_v4i64_align16:
496 ; SSE-NEXT: xorps %xmm0, %xmm0
497 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
498 ; SSE-NEXT: movntps %xmm0, (%rdi)
501 ; AVX-LABEL: test_zero_v4i64_align16:
503 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
504 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
505 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
508 ; AVX512-LABEL: test_zero_v4i64_align16:
510 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
511 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
512 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
514 store <4 x i64> zeroinitializer, <4 x i64>* %dst, align 16, !nontemporal !1
518 define void @test_zero_v8i32_align16(<8 x i32>* %dst) nounwind {
519 ; SSE-LABEL: test_zero_v8i32_align16:
521 ; SSE-NEXT: xorps %xmm0, %xmm0
522 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
523 ; SSE-NEXT: movntps %xmm0, (%rdi)
526 ; AVX-LABEL: test_zero_v8i32_align16:
528 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
529 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
530 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
533 ; AVX512-LABEL: test_zero_v8i32_align16:
535 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
536 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
537 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
539 store <8 x i32> zeroinitializer, <8 x i32>* %dst, align 16, !nontemporal !1
543 define void @test_zero_v16i16_align16(<16 x i16>* %dst) nounwind {
544 ; SSE-LABEL: test_zero_v16i16_align16:
546 ; SSE-NEXT: xorps %xmm0, %xmm0
547 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
548 ; SSE-NEXT: movntps %xmm0, (%rdi)
551 ; AVX-LABEL: test_zero_v16i16_align16:
553 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
554 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
555 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
558 ; AVX512-LABEL: test_zero_v16i16_align16:
560 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
561 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
562 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
564 store <16 x i16> zeroinitializer, <16 x i16>* %dst, align 16, !nontemporal !1
568 define void @test_zero_v32i8_align16(<32 x i8>* %dst) nounwind {
569 ; SSE-LABEL: test_zero_v32i8_align16:
571 ; SSE-NEXT: xorps %xmm0, %xmm0
572 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
573 ; SSE-NEXT: movntps %xmm0, (%rdi)
576 ; AVX-LABEL: test_zero_v32i8_align16:
578 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
579 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
580 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
583 ; AVX512-LABEL: test_zero_v32i8_align16:
585 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
586 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
587 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
589 store <32 x i8> zeroinitializer, <32 x i8>* %dst, align 16, !nontemporal !1
595 define void @test_zero_v8f64_align1(<8 x double>* %dst) nounwind {
596 ; SSE-LABEL: test_zero_v8f64_align1:
598 ; SSE-NEXT: xorl %eax, %eax
599 ; SSE-NEXT: movntiq %rax, 24(%rdi)
600 ; SSE-NEXT: movntiq %rax, 16(%rdi)
601 ; SSE-NEXT: movntiq %rax, 8(%rdi)
602 ; SSE-NEXT: movntiq %rax, (%rdi)
603 ; SSE-NEXT: movntiq %rax, 56(%rdi)
604 ; SSE-NEXT: movntiq %rax, 48(%rdi)
605 ; SSE-NEXT: movntiq %rax, 40(%rdi)
606 ; SSE-NEXT: movntiq %rax, 32(%rdi)
609 ; AVX-LABEL: test_zero_v8f64_align1:
611 ; AVX-NEXT: xorl %eax, %eax
612 ; AVX-NEXT: movntiq %rax, 24(%rdi)
613 ; AVX-NEXT: movntiq %rax, 16(%rdi)
614 ; AVX-NEXT: movntiq %rax, 8(%rdi)
615 ; AVX-NEXT: movntiq %rax, (%rdi)
616 ; AVX-NEXT: movntiq %rax, 56(%rdi)
617 ; AVX-NEXT: movntiq %rax, 48(%rdi)
618 ; AVX-NEXT: movntiq %rax, 40(%rdi)
619 ; AVX-NEXT: movntiq %rax, 32(%rdi)
622 ; AVX512-LABEL: test_zero_v8f64_align1:
624 ; AVX512-NEXT: xorl %eax, %eax
625 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
626 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
627 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
628 ; AVX512-NEXT: movntiq %rax, (%rdi)
629 ; AVX512-NEXT: movntiq %rax, 56(%rdi)
630 ; AVX512-NEXT: movntiq %rax, 48(%rdi)
631 ; AVX512-NEXT: movntiq %rax, 40(%rdi)
632 ; AVX512-NEXT: movntiq %rax, 32(%rdi)
634 store <8 x double> zeroinitializer, <8 x double>* %dst, align 1, !nontemporal !1
638 define void @test_zero_v16f32_align1(<16 x float>* %dst) nounwind {
639 ; SSE2-LABEL: test_zero_v16f32_align1:
641 ; SSE2-NEXT: xorl %eax, %eax
642 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
643 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
644 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
645 ; SSE2-NEXT: movntiq %rax, (%rdi)
646 ; SSE2-NEXT: movntiq %rax, 56(%rdi)
647 ; SSE2-NEXT: movntiq %rax, 48(%rdi)
648 ; SSE2-NEXT: movntiq %rax, 40(%rdi)
649 ; SSE2-NEXT: movntiq %rax, 32(%rdi)
652 ; SSE4A-LABEL: test_zero_v16f32_align1:
654 ; SSE4A-NEXT: xorl %eax, %eax
655 ; SSE4A-NEXT: movntiq %rax, 24(%rdi)
656 ; SSE4A-NEXT: movntiq %rax, 8(%rdi)
657 ; SSE4A-NEXT: movntiq %rax, 56(%rdi)
658 ; SSE4A-NEXT: movntiq %rax, 40(%rdi)
659 ; SSE4A-NEXT: xorps %xmm0, %xmm0
660 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
661 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
662 ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
663 ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
666 ; SSE41-LABEL: test_zero_v16f32_align1:
668 ; SSE41-NEXT: xorl %eax, %eax
669 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
670 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
671 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
672 ; SSE41-NEXT: movntiq %rax, (%rdi)
673 ; SSE41-NEXT: movntiq %rax, 56(%rdi)
674 ; SSE41-NEXT: movntiq %rax, 48(%rdi)
675 ; SSE41-NEXT: movntiq %rax, 40(%rdi)
676 ; SSE41-NEXT: movntiq %rax, 32(%rdi)
679 ; AVX-LABEL: test_zero_v16f32_align1:
681 ; AVX-NEXT: xorl %eax, %eax
682 ; AVX-NEXT: movntiq %rax, 24(%rdi)
683 ; AVX-NEXT: movntiq %rax, 16(%rdi)
684 ; AVX-NEXT: movntiq %rax, 8(%rdi)
685 ; AVX-NEXT: movntiq %rax, (%rdi)
686 ; AVX-NEXT: movntiq %rax, 56(%rdi)
687 ; AVX-NEXT: movntiq %rax, 48(%rdi)
688 ; AVX-NEXT: movntiq %rax, 40(%rdi)
689 ; AVX-NEXT: movntiq %rax, 32(%rdi)
692 ; AVX512-LABEL: test_zero_v16f32_align1:
694 ; AVX512-NEXT: xorl %eax, %eax
695 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
696 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
697 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
698 ; AVX512-NEXT: movntiq %rax, (%rdi)
699 ; AVX512-NEXT: movntiq %rax, 56(%rdi)
700 ; AVX512-NEXT: movntiq %rax, 48(%rdi)
701 ; AVX512-NEXT: movntiq %rax, 40(%rdi)
702 ; AVX512-NEXT: movntiq %rax, 32(%rdi)
704 store <16 x float> zeroinitializer, <16 x float>* %dst, align 1, !nontemporal !1
708 define void @test_zero_v8i64_align1(<8 x i64>* %dst) nounwind {
709 ; SSE2-LABEL: test_zero_v8i64_align1:
711 ; SSE2-NEXT: xorl %eax, %eax
712 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
713 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
714 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
715 ; SSE2-NEXT: movntiq %rax, (%rdi)
716 ; SSE2-NEXT: movntiq %rax, 56(%rdi)
717 ; SSE2-NEXT: movntiq %rax, 48(%rdi)
718 ; SSE2-NEXT: movntiq %rax, 40(%rdi)
719 ; SSE2-NEXT: movntiq %rax, 32(%rdi)
722 ; SSE4A-LABEL: test_zero_v8i64_align1:
724 ; SSE4A-NEXT: xorps %xmm0, %xmm0
725 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
726 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
727 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
728 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
729 ; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
730 ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
731 ; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
732 ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
735 ; SSE41-LABEL: test_zero_v8i64_align1:
737 ; SSE41-NEXT: xorl %eax, %eax
738 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
739 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
740 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
741 ; SSE41-NEXT: movntiq %rax, (%rdi)
742 ; SSE41-NEXT: movntiq %rax, 56(%rdi)
743 ; SSE41-NEXT: movntiq %rax, 48(%rdi)
744 ; SSE41-NEXT: movntiq %rax, 40(%rdi)
745 ; SSE41-NEXT: movntiq %rax, 32(%rdi)
748 ; AVX-LABEL: test_zero_v8i64_align1:
750 ; AVX-NEXT: xorl %eax, %eax
751 ; AVX-NEXT: movntiq %rax, 24(%rdi)
752 ; AVX-NEXT: movntiq %rax, 16(%rdi)
753 ; AVX-NEXT: movntiq %rax, 8(%rdi)
754 ; AVX-NEXT: movntiq %rax, (%rdi)
755 ; AVX-NEXT: movntiq %rax, 56(%rdi)
756 ; AVX-NEXT: movntiq %rax, 48(%rdi)
757 ; AVX-NEXT: movntiq %rax, 40(%rdi)
758 ; AVX-NEXT: movntiq %rax, 32(%rdi)
761 ; AVX512-LABEL: test_zero_v8i64_align1:
763 ; AVX512-NEXT: xorl %eax, %eax
764 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
765 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
766 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
767 ; AVX512-NEXT: movntiq %rax, (%rdi)
768 ; AVX512-NEXT: movntiq %rax, 56(%rdi)
769 ; AVX512-NEXT: movntiq %rax, 48(%rdi)
770 ; AVX512-NEXT: movntiq %rax, 40(%rdi)
771 ; AVX512-NEXT: movntiq %rax, 32(%rdi)
773 store <8 x i64> zeroinitializer, <8 x i64>* %dst, align 1, !nontemporal !1
777 define void @test_zero_v16i32_align1(<16 x i32>* %dst) nounwind {
778 ; SSE2-LABEL: test_zero_v16i32_align1:
780 ; SSE2-NEXT: xorl %eax, %eax
781 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
782 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
783 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
784 ; SSE2-NEXT: movntiq %rax, (%rdi)
785 ; SSE2-NEXT: movntiq %rax, 56(%rdi)
786 ; SSE2-NEXT: movntiq %rax, 48(%rdi)
787 ; SSE2-NEXT: movntiq %rax, 40(%rdi)
788 ; SSE2-NEXT: movntiq %rax, 32(%rdi)
791 ; SSE4A-LABEL: test_zero_v16i32_align1:
793 ; SSE4A-NEXT: xorps %xmm0, %xmm0
794 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
795 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
796 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
797 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
798 ; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
799 ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
800 ; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
801 ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
804 ; SSE41-LABEL: test_zero_v16i32_align1:
806 ; SSE41-NEXT: xorl %eax, %eax
807 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
808 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
809 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
810 ; SSE41-NEXT: movntiq %rax, (%rdi)
811 ; SSE41-NEXT: movntiq %rax, 56(%rdi)
812 ; SSE41-NEXT: movntiq %rax, 48(%rdi)
813 ; SSE41-NEXT: movntiq %rax, 40(%rdi)
814 ; SSE41-NEXT: movntiq %rax, 32(%rdi)
817 ; AVX-LABEL: test_zero_v16i32_align1:
819 ; AVX-NEXT: xorl %eax, %eax
820 ; AVX-NEXT: movntiq %rax, 24(%rdi)
821 ; AVX-NEXT: movntiq %rax, 16(%rdi)
822 ; AVX-NEXT: movntiq %rax, 8(%rdi)
823 ; AVX-NEXT: movntiq %rax, (%rdi)
824 ; AVX-NEXT: movntiq %rax, 56(%rdi)
825 ; AVX-NEXT: movntiq %rax, 48(%rdi)
826 ; AVX-NEXT: movntiq %rax, 40(%rdi)
827 ; AVX-NEXT: movntiq %rax, 32(%rdi)
830 ; AVX512-LABEL: test_zero_v16i32_align1:
832 ; AVX512-NEXT: xorl %eax, %eax
833 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
834 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
835 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
836 ; AVX512-NEXT: movntiq %rax, (%rdi)
837 ; AVX512-NEXT: movntiq %rax, 56(%rdi)
838 ; AVX512-NEXT: movntiq %rax, 48(%rdi)
839 ; AVX512-NEXT: movntiq %rax, 40(%rdi)
840 ; AVX512-NEXT: movntiq %rax, 32(%rdi)
842 store <16 x i32> zeroinitializer, <16 x i32>* %dst, align 1, !nontemporal !1
846 define void @test_zero_v32i16_align1(<32 x i16>* %dst) nounwind {
847 ; SSE2-LABEL: test_zero_v32i16_align1:
849 ; SSE2-NEXT: xorl %eax, %eax
850 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
851 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
852 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
853 ; SSE2-NEXT: movntiq %rax, (%rdi)
854 ; SSE2-NEXT: movntiq %rax, 56(%rdi)
855 ; SSE2-NEXT: movntiq %rax, 48(%rdi)
856 ; SSE2-NEXT: movntiq %rax, 40(%rdi)
857 ; SSE2-NEXT: movntiq %rax, 32(%rdi)
860 ; SSE4A-LABEL: test_zero_v32i16_align1:
862 ; SSE4A-NEXT: xorps %xmm0, %xmm0
863 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
864 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
865 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
866 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
867 ; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
868 ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
869 ; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
870 ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
873 ; SSE41-LABEL: test_zero_v32i16_align1:
875 ; SSE41-NEXT: xorl %eax, %eax
876 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
877 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
878 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
879 ; SSE41-NEXT: movntiq %rax, (%rdi)
880 ; SSE41-NEXT: movntiq %rax, 56(%rdi)
881 ; SSE41-NEXT: movntiq %rax, 48(%rdi)
882 ; SSE41-NEXT: movntiq %rax, 40(%rdi)
883 ; SSE41-NEXT: movntiq %rax, 32(%rdi)
886 ; AVX-LABEL: test_zero_v32i16_align1:
888 ; AVX-NEXT: xorl %eax, %eax
889 ; AVX-NEXT: movntiq %rax, 24(%rdi)
890 ; AVX-NEXT: movntiq %rax, 16(%rdi)
891 ; AVX-NEXT: movntiq %rax, 8(%rdi)
892 ; AVX-NEXT: movntiq %rax, (%rdi)
893 ; AVX-NEXT: movntiq %rax, 56(%rdi)
894 ; AVX-NEXT: movntiq %rax, 48(%rdi)
895 ; AVX-NEXT: movntiq %rax, 40(%rdi)
896 ; AVX-NEXT: movntiq %rax, 32(%rdi)
899 ; AVX512-LABEL: test_zero_v32i16_align1:
901 ; AVX512-NEXT: xorl %eax, %eax
902 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
903 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
904 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
905 ; AVX512-NEXT: movntiq %rax, (%rdi)
906 ; AVX512-NEXT: movntiq %rax, 56(%rdi)
907 ; AVX512-NEXT: movntiq %rax, 48(%rdi)
908 ; AVX512-NEXT: movntiq %rax, 40(%rdi)
909 ; AVX512-NEXT: movntiq %rax, 32(%rdi)
911 store <32 x i16> zeroinitializer, <32 x i16>* %dst, align 1, !nontemporal !1
915 define void @test_zero_v64i8_align1(<64 x i8>* %dst) nounwind {
916 ; SSE2-LABEL: test_zero_v64i8_align1:
918 ; SSE2-NEXT: xorl %eax, %eax
919 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
920 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
921 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
922 ; SSE2-NEXT: movntiq %rax, (%rdi)
923 ; SSE2-NEXT: movntiq %rax, 56(%rdi)
924 ; SSE2-NEXT: movntiq %rax, 48(%rdi)
925 ; SSE2-NEXT: movntiq %rax, 40(%rdi)
926 ; SSE2-NEXT: movntiq %rax, 32(%rdi)
929 ; SSE4A-LABEL: test_zero_v64i8_align1:
931 ; SSE4A-NEXT: xorps %xmm0, %xmm0
932 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
933 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
934 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
935 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
936 ; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
937 ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
938 ; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
939 ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
942 ; SSE41-LABEL: test_zero_v64i8_align1:
944 ; SSE41-NEXT: xorl %eax, %eax
945 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
946 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
947 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
948 ; SSE41-NEXT: movntiq %rax, (%rdi)
949 ; SSE41-NEXT: movntiq %rax, 56(%rdi)
950 ; SSE41-NEXT: movntiq %rax, 48(%rdi)
951 ; SSE41-NEXT: movntiq %rax, 40(%rdi)
952 ; SSE41-NEXT: movntiq %rax, 32(%rdi)
955 ; AVX-LABEL: test_zero_v64i8_align1:
957 ; AVX-NEXT: xorl %eax, %eax
958 ; AVX-NEXT: movntiq %rax, 24(%rdi)
959 ; AVX-NEXT: movntiq %rax, 16(%rdi)
960 ; AVX-NEXT: movntiq %rax, 8(%rdi)
961 ; AVX-NEXT: movntiq %rax, (%rdi)
962 ; AVX-NEXT: movntiq %rax, 56(%rdi)
963 ; AVX-NEXT: movntiq %rax, 48(%rdi)
964 ; AVX-NEXT: movntiq %rax, 40(%rdi)
965 ; AVX-NEXT: movntiq %rax, 32(%rdi)
968 ; AVX512-LABEL: test_zero_v64i8_align1:
970 ; AVX512-NEXT: xorl %eax, %eax
971 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
972 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
973 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
974 ; AVX512-NEXT: movntiq %rax, (%rdi)
975 ; AVX512-NEXT: movntiq %rax, 56(%rdi)
976 ; AVX512-NEXT: movntiq %rax, 48(%rdi)
977 ; AVX512-NEXT: movntiq %rax, 40(%rdi)
978 ; AVX512-NEXT: movntiq %rax, 32(%rdi)
980 store <64 x i8> zeroinitializer, <64 x i8>* %dst, align 1, !nontemporal !1
984 define void @test_zero_v8f64_align16(<8 x double>* %dst) nounwind {
985 ; SSE-LABEL: test_zero_v8f64_align16:
987 ; SSE-NEXT: xorps %xmm0, %xmm0
988 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
989 ; SSE-NEXT: movntps %xmm0, (%rdi)
990 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
991 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
994 ; AVX-LABEL: test_zero_v8f64_align16:
996 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
997 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
998 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
999 ; AVX-NEXT: vmovntps %xmm0, 48(%rdi)
1000 ; AVX-NEXT: vmovntps %xmm0, 32(%rdi)
1003 ; AVX512-LABEL: test_zero_v8f64_align16:
1005 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1006 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
1007 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
1008 ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi)
1009 ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi)
1011 store <8 x double> zeroinitializer, <8 x double>* %dst, align 16, !nontemporal !1
1015 define void @test_zero_v16f32_align16(<16 x float>* %dst) nounwind {
1016 ; SSE-LABEL: test_zero_v16f32_align16:
1018 ; SSE-NEXT: xorps %xmm0, %xmm0
1019 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1020 ; SSE-NEXT: movntps %xmm0, (%rdi)
1021 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1022 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1025 ; AVX-LABEL: test_zero_v16f32_align16:
1027 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1028 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
1029 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
1030 ; AVX-NEXT: vmovntps %xmm0, 48(%rdi)
1031 ; AVX-NEXT: vmovntps %xmm0, 32(%rdi)
1034 ; AVX512-LABEL: test_zero_v16f32_align16:
1036 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1037 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
1038 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
1039 ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi)
1040 ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi)
1042 store <16 x float> zeroinitializer, <16 x float>* %dst, align 16, !nontemporal !1
1046 define void @test_zero_v8i64_align16(<8 x i64>* %dst) nounwind {
1047 ; SSE-LABEL: test_zero_v8i64_align16:
1049 ; SSE-NEXT: xorps %xmm0, %xmm0
1050 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1051 ; SSE-NEXT: movntps %xmm0, (%rdi)
1052 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1053 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1056 ; AVX-LABEL: test_zero_v8i64_align16:
1058 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1059 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
1060 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
1061 ; AVX-NEXT: vmovntps %xmm0, 48(%rdi)
1062 ; AVX-NEXT: vmovntps %xmm0, 32(%rdi)
1065 ; AVX512-LABEL: test_zero_v8i64_align16:
1067 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1068 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
1069 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
1070 ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi)
1071 ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi)
1073 store <8 x i64> zeroinitializer, <8 x i64>* %dst, align 16, !nontemporal !1
1077 define void @test_zero_v16i32_align16(<16 x i32>* %dst) nounwind {
1078 ; SSE-LABEL: test_zero_v16i32_align16:
1080 ; SSE-NEXT: xorps %xmm0, %xmm0
1081 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1082 ; SSE-NEXT: movntps %xmm0, (%rdi)
1083 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1084 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1087 ; AVX-LABEL: test_zero_v16i32_align16:
1089 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1090 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
1091 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
1092 ; AVX-NEXT: vmovntps %xmm0, 48(%rdi)
1093 ; AVX-NEXT: vmovntps %xmm0, 32(%rdi)
1096 ; AVX512-LABEL: test_zero_v16i32_align16:
1098 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1099 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
1100 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
1101 ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi)
1102 ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi)
1104 store <16 x i32> zeroinitializer, <16 x i32>* %dst, align 16, !nontemporal !1
1108 define void @test_zero_v32i16_align16(<32 x i16>* %dst) nounwind {
1109 ; SSE-LABEL: test_zero_v32i16_align16:
1111 ; SSE-NEXT: xorps %xmm0, %xmm0
1112 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1113 ; SSE-NEXT: movntps %xmm0, (%rdi)
1114 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1115 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1118 ; AVX-LABEL: test_zero_v32i16_align16:
1120 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1121 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
1122 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
1123 ; AVX-NEXT: vmovntps %xmm0, 48(%rdi)
1124 ; AVX-NEXT: vmovntps %xmm0, 32(%rdi)
1127 ; AVX512-LABEL: test_zero_v32i16_align16:
1129 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1130 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
1131 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
1132 ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi)
1133 ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi)
1135 store <32 x i16> zeroinitializer, <32 x i16>* %dst, align 16, !nontemporal !1
1139 define void @test_zero_v64i8_align16(<64 x i8>* %dst) nounwind {
1140 ; SSE-LABEL: test_zero_v64i8_align16:
1142 ; SSE-NEXT: xorps %xmm0, %xmm0
1143 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1144 ; SSE-NEXT: movntps %xmm0, (%rdi)
1145 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1146 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1149 ; AVX-LABEL: test_zero_v64i8_align16:
1151 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1152 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
1153 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
1154 ; AVX-NEXT: vmovntps %xmm0, 48(%rdi)
1155 ; AVX-NEXT: vmovntps %xmm0, 32(%rdi)
1158 ; AVX512-LABEL: test_zero_v64i8_align16:
1160 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1161 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
1162 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
1163 ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi)
1164 ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi)
1166 store <64 x i8> zeroinitializer, <64 x i8>* %dst, align 16, !nontemporal !1
1170 define void @test_zero_v8f64_align32(<8 x double>* %dst) nounwind {
1171 ; SSE-LABEL: test_zero_v8f64_align32:
1173 ; SSE-NEXT: xorps %xmm0, %xmm0
1174 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1175 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1176 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1177 ; SSE-NEXT: movntps %xmm0, (%rdi)
1180 ; AVX-LABEL: test_zero_v8f64_align32:
1182 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1183 ; AVX-NEXT: vmovntps %ymm0, 32(%rdi)
1184 ; AVX-NEXT: vmovntps %ymm0, (%rdi)
1185 ; AVX-NEXT: vzeroupper
1188 ; AVX512-LABEL: test_zero_v8f64_align32:
1190 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1191 ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi)
1192 ; AVX512-NEXT: vmovntps %ymm0, (%rdi)
1193 ; AVX512-NEXT: vzeroupper
1195 store <8 x double> zeroinitializer, <8 x double>* %dst, align 32, !nontemporal !1
1199 define void @test_zero_v16f32_align32(<16 x float>* %dst) nounwind {
1200 ; SSE-LABEL: test_zero_v16f32_align32:
1202 ; SSE-NEXT: xorps %xmm0, %xmm0
1203 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1204 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1205 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1206 ; SSE-NEXT: movntps %xmm0, (%rdi)
1209 ; AVX-LABEL: test_zero_v16f32_align32:
1211 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1212 ; AVX-NEXT: vmovntps %ymm0, 32(%rdi)
1213 ; AVX-NEXT: vmovntps %ymm0, (%rdi)
1214 ; AVX-NEXT: vzeroupper
1217 ; AVX512-LABEL: test_zero_v16f32_align32:
1219 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1220 ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi)
1221 ; AVX512-NEXT: vmovntps %ymm0, (%rdi)
1222 ; AVX512-NEXT: vzeroupper
1224 store <16 x float> zeroinitializer, <16 x float>* %dst, align 32, !nontemporal !1
1228 define void @test_zero_v8i64_align32(<8 x i64>* %dst) nounwind {
1229 ; SSE-LABEL: test_zero_v8i64_align32:
1231 ; SSE-NEXT: xorps %xmm0, %xmm0
1232 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1233 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1234 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1235 ; SSE-NEXT: movntps %xmm0, (%rdi)
1238 ; AVX-LABEL: test_zero_v8i64_align32:
1240 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1241 ; AVX-NEXT: vmovntps %ymm0, 32(%rdi)
1242 ; AVX-NEXT: vmovntps %ymm0, (%rdi)
1243 ; AVX-NEXT: vzeroupper
1246 ; AVX512-LABEL: test_zero_v8i64_align32:
1248 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1249 ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi)
1250 ; AVX512-NEXT: vmovntps %ymm0, (%rdi)
1251 ; AVX512-NEXT: vzeroupper
1253 store <8 x i64> zeroinitializer, <8 x i64>* %dst, align 32, !nontemporal !1
1257 define void @test_zero_v16i32_align32(<16 x i32>* %dst) nounwind {
1258 ; SSE-LABEL: test_zero_v16i32_align32:
1260 ; SSE-NEXT: xorps %xmm0, %xmm0
1261 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1262 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1263 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1264 ; SSE-NEXT: movntps %xmm0, (%rdi)
1267 ; AVX-LABEL: test_zero_v16i32_align32:
1269 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1270 ; AVX-NEXT: vmovntps %ymm0, 32(%rdi)
1271 ; AVX-NEXT: vmovntps %ymm0, (%rdi)
1272 ; AVX-NEXT: vzeroupper
1275 ; AVX512-LABEL: test_zero_v16i32_align32:
1277 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1278 ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi)
1279 ; AVX512-NEXT: vmovntps %ymm0, (%rdi)
1280 ; AVX512-NEXT: vzeroupper
1282 store <16 x i32> zeroinitializer, <16 x i32>* %dst, align 32, !nontemporal !1
1286 define void @test_zero_v32i16_align32(<32 x i16>* %dst) nounwind {
1287 ; SSE-LABEL: test_zero_v32i16_align32:
1289 ; SSE-NEXT: xorps %xmm0, %xmm0
1290 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1291 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1292 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1293 ; SSE-NEXT: movntps %xmm0, (%rdi)
1296 ; AVX-LABEL: test_zero_v32i16_align32:
1298 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1299 ; AVX-NEXT: vmovntps %ymm0, 32(%rdi)
1300 ; AVX-NEXT: vmovntps %ymm0, (%rdi)
1301 ; AVX-NEXT: vzeroupper
1304 ; AVX512-LABEL: test_zero_v32i16_align32:
1306 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1307 ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi)
1308 ; AVX512-NEXT: vmovntps %ymm0, (%rdi)
1309 ; AVX512-NEXT: vzeroupper
1311 store <32 x i16> zeroinitializer, <32 x i16>* %dst, align 32, !nontemporal !1
1315 define void @test_zero_v64i8_align32(<64 x i8>* %dst) nounwind {
1316 ; SSE-LABEL: test_zero_v64i8_align32:
1318 ; SSE-NEXT: xorps %xmm0, %xmm0
1319 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1320 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1321 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1322 ; SSE-NEXT: movntps %xmm0, (%rdi)
1325 ; AVX-LABEL: test_zero_v64i8_align32:
1327 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1328 ; AVX-NEXT: vmovntps %ymm0, 32(%rdi)
1329 ; AVX-NEXT: vmovntps %ymm0, (%rdi)
1330 ; AVX-NEXT: vzeroupper
1333 ; AVX512-LABEL: test_zero_v64i8_align32:
1335 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1336 ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi)
1337 ; AVX512-NEXT: vmovntps %ymm0, (%rdi)
1338 ; AVX512-NEXT: vzeroupper
1340 store <64 x i8> zeroinitializer, <64 x i8>* %dst, align 32, !nontemporal !1