1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefixes=CHECK,SSE,SSE4A
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
10 ; Test codegen for under aligned nontemporal vector stores
14 define void @test_zero_v2f64_align1(ptr %dst) nounwind {
15 ; CHECK-LABEL: test_zero_v2f64_align1:
17 ; CHECK-NEXT: xorl %eax, %eax
18 ; CHECK-NEXT: movntiq %rax, 8(%rdi)
19 ; CHECK-NEXT: movntiq %rax, (%rdi)
21 store <2 x double> zeroinitializer, ptr %dst, align 1, !nontemporal !1
25 define void @test_zero_v4f32_align1(ptr %dst) nounwind {
26 ; CHECK-LABEL: test_zero_v4f32_align1:
28 ; CHECK-NEXT: xorl %eax, %eax
29 ; CHECK-NEXT: movntiq %rax, 8(%rdi)
30 ; CHECK-NEXT: movntiq %rax, (%rdi)
32 store <4 x float> zeroinitializer, ptr %dst, align 1, !nontemporal !1
36 define void @test_zero_v2i64_align1(ptr %dst) nounwind {
37 ; CHECK-LABEL: test_zero_v2i64_align1:
39 ; CHECK-NEXT: xorl %eax, %eax
40 ; CHECK-NEXT: movntiq %rax, 8(%rdi)
41 ; CHECK-NEXT: movntiq %rax, (%rdi)
43 store <2 x i64> zeroinitializer, ptr %dst, align 1, !nontemporal !1
47 define void @test_zero_v4i32_align1(ptr %dst) nounwind {
48 ; CHECK-LABEL: test_zero_v4i32_align1:
50 ; CHECK-NEXT: xorl %eax, %eax
51 ; CHECK-NEXT: movntiq %rax, 8(%rdi)
52 ; CHECK-NEXT: movntiq %rax, (%rdi)
54 store <4 x i32> zeroinitializer, ptr %dst, align 1, !nontemporal !1
58 define void @test_zero_v8i16_align1(ptr %dst) nounwind {
59 ; CHECK-LABEL: test_zero_v8i16_align1:
61 ; CHECK-NEXT: xorl %eax, %eax
62 ; CHECK-NEXT: movntiq %rax, 8(%rdi)
63 ; CHECK-NEXT: movntiq %rax, (%rdi)
65 store <8 x i16> zeroinitializer, ptr %dst, align 1, !nontemporal !1
69 define void @test_zero_v16i8_align1(ptr %dst) nounwind {
70 ; CHECK-LABEL: test_zero_v16i8_align1:
72 ; CHECK-NEXT: xorl %eax, %eax
73 ; CHECK-NEXT: movntiq %rax, 8(%rdi)
74 ; CHECK-NEXT: movntiq %rax, (%rdi)
76 store <16 x i8> zeroinitializer, ptr %dst, align 1, !nontemporal !1
82 define void @test_zero_v4f64_align1(ptr %dst) nounwind {
83 ; CHECK-LABEL: test_zero_v4f64_align1:
85 ; CHECK-NEXT: xorl %eax, %eax
86 ; CHECK-NEXT: movntiq %rax, 8(%rdi)
87 ; CHECK-NEXT: movntiq %rax, (%rdi)
88 ; CHECK-NEXT: movntiq %rax, 24(%rdi)
89 ; CHECK-NEXT: movntiq %rax, 16(%rdi)
91 store <4 x double> zeroinitializer, ptr %dst, align 1, !nontemporal !1
95 define void @test_zero_v8f32_align1(ptr %dst) nounwind {
96 ; SSE2-LABEL: test_zero_v8f32_align1:
98 ; SSE2-NEXT: xorl %eax, %eax
99 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
100 ; SSE2-NEXT: movntiq %rax, (%rdi)
101 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
102 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
105 ; SSE4A-LABEL: test_zero_v8f32_align1:
107 ; SSE4A-NEXT: xorl %eax, %eax
108 ; SSE4A-NEXT: movntiq %rax, 8(%rdi)
109 ; SSE4A-NEXT: movntiq %rax, 24(%rdi)
110 ; SSE4A-NEXT: xorps %xmm0, %xmm0
111 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
112 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
115 ; SSE41-LABEL: test_zero_v8f32_align1:
117 ; SSE41-NEXT: xorl %eax, %eax
118 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
119 ; SSE41-NEXT: movntiq %rax, (%rdi)
120 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
121 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
124 ; AVX-LABEL: test_zero_v8f32_align1:
126 ; AVX-NEXT: xorl %eax, %eax
127 ; AVX-NEXT: movntiq %rax, 8(%rdi)
128 ; AVX-NEXT: movntiq %rax, (%rdi)
129 ; AVX-NEXT: movntiq %rax, 24(%rdi)
130 ; AVX-NEXT: movntiq %rax, 16(%rdi)
133 ; AVX512-LABEL: test_zero_v8f32_align1:
135 ; AVX512-NEXT: xorl %eax, %eax
136 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
137 ; AVX512-NEXT: movntiq %rax, (%rdi)
138 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
139 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
141 store <8 x float> zeroinitializer, ptr %dst, align 1, !nontemporal !1
145 define void @test_zero_v4i64_align1(ptr %dst) nounwind {
146 ; SSE2-LABEL: test_zero_v4i64_align1:
148 ; SSE2-NEXT: xorl %eax, %eax
149 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
150 ; SSE2-NEXT: movntiq %rax, (%rdi)
151 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
152 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
155 ; SSE4A-LABEL: test_zero_v4i64_align1:
157 ; SSE4A-NEXT: xorps %xmm0, %xmm0
158 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
159 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
160 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
161 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
164 ; SSE41-LABEL: test_zero_v4i64_align1:
166 ; SSE41-NEXT: xorl %eax, %eax
167 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
168 ; SSE41-NEXT: movntiq %rax, (%rdi)
169 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
170 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
173 ; AVX-LABEL: test_zero_v4i64_align1:
175 ; AVX-NEXT: xorl %eax, %eax
176 ; AVX-NEXT: movntiq %rax, 8(%rdi)
177 ; AVX-NEXT: movntiq %rax, (%rdi)
178 ; AVX-NEXT: movntiq %rax, 24(%rdi)
179 ; AVX-NEXT: movntiq %rax, 16(%rdi)
182 ; AVX512-LABEL: test_zero_v4i64_align1:
184 ; AVX512-NEXT: xorl %eax, %eax
185 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
186 ; AVX512-NEXT: movntiq %rax, (%rdi)
187 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
188 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
190 store <4 x i64> zeroinitializer, ptr %dst, align 1, !nontemporal !1
194 define void @test_zero_v8i32_align1(ptr %dst) nounwind {
195 ; SSE2-LABEL: test_zero_v8i32_align1:
197 ; SSE2-NEXT: xorl %eax, %eax
198 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
199 ; SSE2-NEXT: movntiq %rax, (%rdi)
200 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
201 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
204 ; SSE4A-LABEL: test_zero_v8i32_align1:
206 ; SSE4A-NEXT: xorps %xmm0, %xmm0
207 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
208 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
209 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
210 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
213 ; SSE41-LABEL: test_zero_v8i32_align1:
215 ; SSE41-NEXT: xorl %eax, %eax
216 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
217 ; SSE41-NEXT: movntiq %rax, (%rdi)
218 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
219 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
222 ; AVX-LABEL: test_zero_v8i32_align1:
224 ; AVX-NEXT: xorl %eax, %eax
225 ; AVX-NEXT: movntiq %rax, 8(%rdi)
226 ; AVX-NEXT: movntiq %rax, (%rdi)
227 ; AVX-NEXT: movntiq %rax, 24(%rdi)
228 ; AVX-NEXT: movntiq %rax, 16(%rdi)
231 ; AVX512-LABEL: test_zero_v8i32_align1:
233 ; AVX512-NEXT: xorl %eax, %eax
234 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
235 ; AVX512-NEXT: movntiq %rax, (%rdi)
236 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
237 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
239 store <8 x i32> zeroinitializer, ptr %dst, align 1, !nontemporal !1
243 define void @test_zero_v16i16_align1(ptr %dst) nounwind {
244 ; SSE2-LABEL: test_zero_v16i16_align1:
246 ; SSE2-NEXT: xorl %eax, %eax
247 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
248 ; SSE2-NEXT: movntiq %rax, (%rdi)
249 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
250 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
253 ; SSE4A-LABEL: test_zero_v16i16_align1:
255 ; SSE4A-NEXT: xorps %xmm0, %xmm0
256 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
257 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
258 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
259 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
262 ; SSE41-LABEL: test_zero_v16i16_align1:
264 ; SSE41-NEXT: xorl %eax, %eax
265 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
266 ; SSE41-NEXT: movntiq %rax, (%rdi)
267 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
268 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
271 ; AVX-LABEL: test_zero_v16i16_align1:
273 ; AVX-NEXT: xorl %eax, %eax
274 ; AVX-NEXT: movntiq %rax, 8(%rdi)
275 ; AVX-NEXT: movntiq %rax, (%rdi)
276 ; AVX-NEXT: movntiq %rax, 24(%rdi)
277 ; AVX-NEXT: movntiq %rax, 16(%rdi)
280 ; AVX512-LABEL: test_zero_v16i16_align1:
282 ; AVX512-NEXT: xorl %eax, %eax
283 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
284 ; AVX512-NEXT: movntiq %rax, (%rdi)
285 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
286 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
288 store <16 x i16> zeroinitializer, ptr %dst, align 1, !nontemporal !1
292 define void @test_zero_v32i8_align1(ptr %dst) nounwind {
293 ; SSE2-LABEL: test_zero_v32i8_align1:
295 ; SSE2-NEXT: xorl %eax, %eax
296 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
297 ; SSE2-NEXT: movntiq %rax, (%rdi)
298 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
299 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
302 ; SSE4A-LABEL: test_zero_v32i8_align1:
304 ; SSE4A-NEXT: xorps %xmm0, %xmm0
305 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
306 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
307 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
308 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
311 ; SSE41-LABEL: test_zero_v32i8_align1:
313 ; SSE41-NEXT: xorl %eax, %eax
314 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
315 ; SSE41-NEXT: movntiq %rax, (%rdi)
316 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
317 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
320 ; AVX-LABEL: test_zero_v32i8_align1:
322 ; AVX-NEXT: xorl %eax, %eax
323 ; AVX-NEXT: movntiq %rax, 8(%rdi)
324 ; AVX-NEXT: movntiq %rax, (%rdi)
325 ; AVX-NEXT: movntiq %rax, 24(%rdi)
326 ; AVX-NEXT: movntiq %rax, 16(%rdi)
329 ; AVX512-LABEL: test_zero_v32i8_align1:
331 ; AVX512-NEXT: xorl %eax, %eax
332 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
333 ; AVX512-NEXT: movntiq %rax, (%rdi)
334 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
335 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
337 store <32 x i8> zeroinitializer, ptr %dst, align 1, !nontemporal !1
341 define void @test_zero_v4f64_align16(ptr %dst) nounwind {
342 ; SSE-LABEL: test_zero_v4f64_align16:
344 ; SSE-NEXT: xorps %xmm0, %xmm0
345 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
346 ; SSE-NEXT: movntps %xmm0, (%rdi)
349 ; AVX-LABEL: test_zero_v4f64_align16:
351 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
352 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
353 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
356 ; AVX512-LABEL: test_zero_v4f64_align16:
358 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
359 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
360 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
362 store <4 x double> zeroinitializer, ptr %dst, align 16, !nontemporal !1
366 define void @test_zero_v8f32_align16(ptr %dst) nounwind {
367 ; SSE-LABEL: test_zero_v8f32_align16:
369 ; SSE-NEXT: xorps %xmm0, %xmm0
370 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
371 ; SSE-NEXT: movntps %xmm0, (%rdi)
374 ; AVX-LABEL: test_zero_v8f32_align16:
376 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
377 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
378 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
381 ; AVX512-LABEL: test_zero_v8f32_align16:
383 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
384 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
385 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
387 store <8 x float> zeroinitializer, ptr %dst, align 16, !nontemporal !1
391 define void @test_zero_v4i64_align16(ptr %dst) nounwind {
392 ; SSE-LABEL: test_zero_v4i64_align16:
394 ; SSE-NEXT: xorps %xmm0, %xmm0
395 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
396 ; SSE-NEXT: movntps %xmm0, (%rdi)
399 ; AVX-LABEL: test_zero_v4i64_align16:
401 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
402 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
403 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
406 ; AVX512-LABEL: test_zero_v4i64_align16:
408 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
409 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
410 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
412 store <4 x i64> zeroinitializer, ptr %dst, align 16, !nontemporal !1
416 define void @test_zero_v8i32_align16(ptr %dst) nounwind {
417 ; SSE-LABEL: test_zero_v8i32_align16:
419 ; SSE-NEXT: xorps %xmm0, %xmm0
420 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
421 ; SSE-NEXT: movntps %xmm0, (%rdi)
424 ; AVX-LABEL: test_zero_v8i32_align16:
426 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
427 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
428 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
431 ; AVX512-LABEL: test_zero_v8i32_align16:
433 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
434 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
435 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
437 store <8 x i32> zeroinitializer, ptr %dst, align 16, !nontemporal !1
441 define void @test_zero_v16i16_align16(ptr %dst) nounwind {
442 ; SSE-LABEL: test_zero_v16i16_align16:
444 ; SSE-NEXT: xorps %xmm0, %xmm0
445 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
446 ; SSE-NEXT: movntps %xmm0, (%rdi)
449 ; AVX-LABEL: test_zero_v16i16_align16:
451 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
452 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
453 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
456 ; AVX512-LABEL: test_zero_v16i16_align16:
458 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
459 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
460 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
462 store <16 x i16> zeroinitializer, ptr %dst, align 16, !nontemporal !1
466 define void @test_zero_v32i8_align16(ptr %dst) nounwind {
467 ; SSE-LABEL: test_zero_v32i8_align16:
469 ; SSE-NEXT: xorps %xmm0, %xmm0
470 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
471 ; SSE-NEXT: movntps %xmm0, (%rdi)
474 ; AVX-LABEL: test_zero_v32i8_align16:
476 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
477 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
478 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
481 ; AVX512-LABEL: test_zero_v32i8_align16:
483 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
484 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
485 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
487 store <32 x i8> zeroinitializer, ptr %dst, align 16, !nontemporal !1
493 define void @test_zero_v8f64_align1(ptr %dst) nounwind {
494 ; CHECK-LABEL: test_zero_v8f64_align1:
496 ; CHECK-NEXT: xorl %eax, %eax
497 ; CHECK-NEXT: movntiq %rax, 8(%rdi)
498 ; CHECK-NEXT: movntiq %rax, (%rdi)
499 ; CHECK-NEXT: movntiq %rax, 24(%rdi)
500 ; CHECK-NEXT: movntiq %rax, 16(%rdi)
501 ; CHECK-NEXT: movntiq %rax, 40(%rdi)
502 ; CHECK-NEXT: movntiq %rax, 32(%rdi)
503 ; CHECK-NEXT: movntiq %rax, 56(%rdi)
504 ; CHECK-NEXT: movntiq %rax, 48(%rdi)
506 store <8 x double> zeroinitializer, ptr %dst, align 1, !nontemporal !1
510 define void @test_zero_v16f32_align1(ptr %dst) nounwind {
511 ; SSE2-LABEL: test_zero_v16f32_align1:
513 ; SSE2-NEXT: xorl %eax, %eax
514 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
515 ; SSE2-NEXT: movntiq %rax, (%rdi)
516 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
517 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
518 ; SSE2-NEXT: movntiq %rax, 40(%rdi)
519 ; SSE2-NEXT: movntiq %rax, 32(%rdi)
520 ; SSE2-NEXT: movntiq %rax, 56(%rdi)
521 ; SSE2-NEXT: movntiq %rax, 48(%rdi)
524 ; SSE4A-LABEL: test_zero_v16f32_align1:
526 ; SSE4A-NEXT: xorl %eax, %eax
527 ; SSE4A-NEXT: movntiq %rax, 8(%rdi)
528 ; SSE4A-NEXT: movntiq %rax, 24(%rdi)
529 ; SSE4A-NEXT: movntiq %rax, 40(%rdi)
530 ; SSE4A-NEXT: movntiq %rax, 56(%rdi)
531 ; SSE4A-NEXT: xorps %xmm0, %xmm0
532 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
533 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
534 ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
535 ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
538 ; SSE41-LABEL: test_zero_v16f32_align1:
540 ; SSE41-NEXT: xorl %eax, %eax
541 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
542 ; SSE41-NEXT: movntiq %rax, (%rdi)
543 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
544 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
545 ; SSE41-NEXT: movntiq %rax, 40(%rdi)
546 ; SSE41-NEXT: movntiq %rax, 32(%rdi)
547 ; SSE41-NEXT: movntiq %rax, 56(%rdi)
548 ; SSE41-NEXT: movntiq %rax, 48(%rdi)
551 ; AVX-LABEL: test_zero_v16f32_align1:
553 ; AVX-NEXT: xorl %eax, %eax
554 ; AVX-NEXT: movntiq %rax, 8(%rdi)
555 ; AVX-NEXT: movntiq %rax, (%rdi)
556 ; AVX-NEXT: movntiq %rax, 24(%rdi)
557 ; AVX-NEXT: movntiq %rax, 16(%rdi)
558 ; AVX-NEXT: movntiq %rax, 40(%rdi)
559 ; AVX-NEXT: movntiq %rax, 32(%rdi)
560 ; AVX-NEXT: movntiq %rax, 56(%rdi)
561 ; AVX-NEXT: movntiq %rax, 48(%rdi)
564 ; AVX512-LABEL: test_zero_v16f32_align1:
566 ; AVX512-NEXT: xorl %eax, %eax
567 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
568 ; AVX512-NEXT: movntiq %rax, (%rdi)
569 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
570 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
571 ; AVX512-NEXT: movntiq %rax, 40(%rdi)
572 ; AVX512-NEXT: movntiq %rax, 32(%rdi)
573 ; AVX512-NEXT: movntiq %rax, 56(%rdi)
574 ; AVX512-NEXT: movntiq %rax, 48(%rdi)
576 store <16 x float> zeroinitializer, ptr %dst, align 1, !nontemporal !1
580 define void @test_zero_v8i64_align1(ptr %dst) nounwind {
581 ; SSE2-LABEL: test_zero_v8i64_align1:
583 ; SSE2-NEXT: xorl %eax, %eax
584 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
585 ; SSE2-NEXT: movntiq %rax, (%rdi)
586 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
587 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
588 ; SSE2-NEXT: movntiq %rax, 40(%rdi)
589 ; SSE2-NEXT: movntiq %rax, 32(%rdi)
590 ; SSE2-NEXT: movntiq %rax, 56(%rdi)
591 ; SSE2-NEXT: movntiq %rax, 48(%rdi)
594 ; SSE4A-LABEL: test_zero_v8i64_align1:
596 ; SSE4A-NEXT: xorps %xmm0, %xmm0
597 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
598 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
599 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
600 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
601 ; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
602 ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
603 ; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
604 ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
607 ; SSE41-LABEL: test_zero_v8i64_align1:
609 ; SSE41-NEXT: xorl %eax, %eax
610 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
611 ; SSE41-NEXT: movntiq %rax, (%rdi)
612 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
613 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
614 ; SSE41-NEXT: movntiq %rax, 40(%rdi)
615 ; SSE41-NEXT: movntiq %rax, 32(%rdi)
616 ; SSE41-NEXT: movntiq %rax, 56(%rdi)
617 ; SSE41-NEXT: movntiq %rax, 48(%rdi)
620 ; AVX-LABEL: test_zero_v8i64_align1:
622 ; AVX-NEXT: xorl %eax, %eax
623 ; AVX-NEXT: movntiq %rax, 8(%rdi)
624 ; AVX-NEXT: movntiq %rax, (%rdi)
625 ; AVX-NEXT: movntiq %rax, 24(%rdi)
626 ; AVX-NEXT: movntiq %rax, 16(%rdi)
627 ; AVX-NEXT: movntiq %rax, 40(%rdi)
628 ; AVX-NEXT: movntiq %rax, 32(%rdi)
629 ; AVX-NEXT: movntiq %rax, 56(%rdi)
630 ; AVX-NEXT: movntiq %rax, 48(%rdi)
633 ; AVX512-LABEL: test_zero_v8i64_align1:
635 ; AVX512-NEXT: xorl %eax, %eax
636 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
637 ; AVX512-NEXT: movntiq %rax, (%rdi)
638 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
639 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
640 ; AVX512-NEXT: movntiq %rax, 40(%rdi)
641 ; AVX512-NEXT: movntiq %rax, 32(%rdi)
642 ; AVX512-NEXT: movntiq %rax, 56(%rdi)
643 ; AVX512-NEXT: movntiq %rax, 48(%rdi)
645 store <8 x i64> zeroinitializer, ptr %dst, align 1, !nontemporal !1
649 define void @test_zero_v16i32_align1(ptr %dst) nounwind {
650 ; SSE2-LABEL: test_zero_v16i32_align1:
652 ; SSE2-NEXT: xorl %eax, %eax
653 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
654 ; SSE2-NEXT: movntiq %rax, (%rdi)
655 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
656 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
657 ; SSE2-NEXT: movntiq %rax, 40(%rdi)
658 ; SSE2-NEXT: movntiq %rax, 32(%rdi)
659 ; SSE2-NEXT: movntiq %rax, 56(%rdi)
660 ; SSE2-NEXT: movntiq %rax, 48(%rdi)
663 ; SSE4A-LABEL: test_zero_v16i32_align1:
665 ; SSE4A-NEXT: xorps %xmm0, %xmm0
666 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
667 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
668 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
669 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
670 ; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
671 ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
672 ; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
673 ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
676 ; SSE41-LABEL: test_zero_v16i32_align1:
678 ; SSE41-NEXT: xorl %eax, %eax
679 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
680 ; SSE41-NEXT: movntiq %rax, (%rdi)
681 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
682 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
683 ; SSE41-NEXT: movntiq %rax, 40(%rdi)
684 ; SSE41-NEXT: movntiq %rax, 32(%rdi)
685 ; SSE41-NEXT: movntiq %rax, 56(%rdi)
686 ; SSE41-NEXT: movntiq %rax, 48(%rdi)
689 ; AVX-LABEL: test_zero_v16i32_align1:
691 ; AVX-NEXT: xorl %eax, %eax
692 ; AVX-NEXT: movntiq %rax, 8(%rdi)
693 ; AVX-NEXT: movntiq %rax, (%rdi)
694 ; AVX-NEXT: movntiq %rax, 24(%rdi)
695 ; AVX-NEXT: movntiq %rax, 16(%rdi)
696 ; AVX-NEXT: movntiq %rax, 40(%rdi)
697 ; AVX-NEXT: movntiq %rax, 32(%rdi)
698 ; AVX-NEXT: movntiq %rax, 56(%rdi)
699 ; AVX-NEXT: movntiq %rax, 48(%rdi)
702 ; AVX512-LABEL: test_zero_v16i32_align1:
704 ; AVX512-NEXT: xorl %eax, %eax
705 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
706 ; AVX512-NEXT: movntiq %rax, (%rdi)
707 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
708 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
709 ; AVX512-NEXT: movntiq %rax, 40(%rdi)
710 ; AVX512-NEXT: movntiq %rax, 32(%rdi)
711 ; AVX512-NEXT: movntiq %rax, 56(%rdi)
712 ; AVX512-NEXT: movntiq %rax, 48(%rdi)
714 store <16 x i32> zeroinitializer, ptr %dst, align 1, !nontemporal !1
718 define void @test_zero_v32i16_align1(ptr %dst) nounwind {
719 ; SSE2-LABEL: test_zero_v32i16_align1:
721 ; SSE2-NEXT: xorl %eax, %eax
722 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
723 ; SSE2-NEXT: movntiq %rax, (%rdi)
724 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
725 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
726 ; SSE2-NEXT: movntiq %rax, 40(%rdi)
727 ; SSE2-NEXT: movntiq %rax, 32(%rdi)
728 ; SSE2-NEXT: movntiq %rax, 56(%rdi)
729 ; SSE2-NEXT: movntiq %rax, 48(%rdi)
732 ; SSE4A-LABEL: test_zero_v32i16_align1:
734 ; SSE4A-NEXT: xorps %xmm0, %xmm0
735 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
736 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
737 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
738 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
739 ; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
740 ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
741 ; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
742 ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
745 ; SSE41-LABEL: test_zero_v32i16_align1:
747 ; SSE41-NEXT: xorl %eax, %eax
748 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
749 ; SSE41-NEXT: movntiq %rax, (%rdi)
750 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
751 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
752 ; SSE41-NEXT: movntiq %rax, 40(%rdi)
753 ; SSE41-NEXT: movntiq %rax, 32(%rdi)
754 ; SSE41-NEXT: movntiq %rax, 56(%rdi)
755 ; SSE41-NEXT: movntiq %rax, 48(%rdi)
758 ; AVX-LABEL: test_zero_v32i16_align1:
760 ; AVX-NEXT: xorl %eax, %eax
761 ; AVX-NEXT: movntiq %rax, 8(%rdi)
762 ; AVX-NEXT: movntiq %rax, (%rdi)
763 ; AVX-NEXT: movntiq %rax, 24(%rdi)
764 ; AVX-NEXT: movntiq %rax, 16(%rdi)
765 ; AVX-NEXT: movntiq %rax, 40(%rdi)
766 ; AVX-NEXT: movntiq %rax, 32(%rdi)
767 ; AVX-NEXT: movntiq %rax, 56(%rdi)
768 ; AVX-NEXT: movntiq %rax, 48(%rdi)
771 ; AVX512-LABEL: test_zero_v32i16_align1:
773 ; AVX512-NEXT: xorl %eax, %eax
774 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
775 ; AVX512-NEXT: movntiq %rax, (%rdi)
776 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
777 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
778 ; AVX512-NEXT: movntiq %rax, 40(%rdi)
779 ; AVX512-NEXT: movntiq %rax, 32(%rdi)
780 ; AVX512-NEXT: movntiq %rax, 56(%rdi)
781 ; AVX512-NEXT: movntiq %rax, 48(%rdi)
783 store <32 x i16> zeroinitializer, ptr %dst, align 1, !nontemporal !1
787 define void @test_zero_v64i8_align1(ptr %dst) nounwind {
788 ; SSE2-LABEL: test_zero_v64i8_align1:
790 ; SSE2-NEXT: xorl %eax, %eax
791 ; SSE2-NEXT: movntiq %rax, 8(%rdi)
792 ; SSE2-NEXT: movntiq %rax, (%rdi)
793 ; SSE2-NEXT: movntiq %rax, 24(%rdi)
794 ; SSE2-NEXT: movntiq %rax, 16(%rdi)
795 ; SSE2-NEXT: movntiq %rax, 40(%rdi)
796 ; SSE2-NEXT: movntiq %rax, 32(%rdi)
797 ; SSE2-NEXT: movntiq %rax, 56(%rdi)
798 ; SSE2-NEXT: movntiq %rax, 48(%rdi)
801 ; SSE4A-LABEL: test_zero_v64i8_align1:
803 ; SSE4A-NEXT: xorps %xmm0, %xmm0
804 ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
805 ; SSE4A-NEXT: movntsd %xmm0, (%rdi)
806 ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
807 ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
808 ; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
809 ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
810 ; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
811 ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
814 ; SSE41-LABEL: test_zero_v64i8_align1:
816 ; SSE41-NEXT: xorl %eax, %eax
817 ; SSE41-NEXT: movntiq %rax, 8(%rdi)
818 ; SSE41-NEXT: movntiq %rax, (%rdi)
819 ; SSE41-NEXT: movntiq %rax, 24(%rdi)
820 ; SSE41-NEXT: movntiq %rax, 16(%rdi)
821 ; SSE41-NEXT: movntiq %rax, 40(%rdi)
822 ; SSE41-NEXT: movntiq %rax, 32(%rdi)
823 ; SSE41-NEXT: movntiq %rax, 56(%rdi)
824 ; SSE41-NEXT: movntiq %rax, 48(%rdi)
827 ; AVX-LABEL: test_zero_v64i8_align1:
829 ; AVX-NEXT: xorl %eax, %eax
830 ; AVX-NEXT: movntiq %rax, 8(%rdi)
831 ; AVX-NEXT: movntiq %rax, (%rdi)
832 ; AVX-NEXT: movntiq %rax, 24(%rdi)
833 ; AVX-NEXT: movntiq %rax, 16(%rdi)
834 ; AVX-NEXT: movntiq %rax, 40(%rdi)
835 ; AVX-NEXT: movntiq %rax, 32(%rdi)
836 ; AVX-NEXT: movntiq %rax, 56(%rdi)
837 ; AVX-NEXT: movntiq %rax, 48(%rdi)
840 ; AVX512-LABEL: test_zero_v64i8_align1:
842 ; AVX512-NEXT: xorl %eax, %eax
843 ; AVX512-NEXT: movntiq %rax, 8(%rdi)
844 ; AVX512-NEXT: movntiq %rax, (%rdi)
845 ; AVX512-NEXT: movntiq %rax, 24(%rdi)
846 ; AVX512-NEXT: movntiq %rax, 16(%rdi)
847 ; AVX512-NEXT: movntiq %rax, 40(%rdi)
848 ; AVX512-NEXT: movntiq %rax, 32(%rdi)
849 ; AVX512-NEXT: movntiq %rax, 56(%rdi)
850 ; AVX512-NEXT: movntiq %rax, 48(%rdi)
852 store <64 x i8> zeroinitializer, ptr %dst, align 1, !nontemporal !1
856 define void @test_zero_v8f64_align16(ptr %dst) nounwind {
857 ; SSE-LABEL: test_zero_v8f64_align16:
859 ; SSE-NEXT: xorps %xmm0, %xmm0
860 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
861 ; SSE-NEXT: movntps %xmm0, (%rdi)
862 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
863 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
866 ; AVX-LABEL: test_zero_v8f64_align16:
868 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
869 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
870 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
871 ; AVX-NEXT: vmovntps %xmm0, 48(%rdi)
872 ; AVX-NEXT: vmovntps %xmm0, 32(%rdi)
875 ; AVX512-LABEL: test_zero_v8f64_align16:
877 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
878 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
879 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
880 ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi)
881 ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi)
883 store <8 x double> zeroinitializer, ptr %dst, align 16, !nontemporal !1
887 define void @test_zero_v16f32_align16(ptr %dst) nounwind {
888 ; SSE-LABEL: test_zero_v16f32_align16:
890 ; SSE-NEXT: xorps %xmm0, %xmm0
891 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
892 ; SSE-NEXT: movntps %xmm0, (%rdi)
893 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
894 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
897 ; AVX-LABEL: test_zero_v16f32_align16:
899 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
900 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
901 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
902 ; AVX-NEXT: vmovntps %xmm0, 48(%rdi)
903 ; AVX-NEXT: vmovntps %xmm0, 32(%rdi)
906 ; AVX512-LABEL: test_zero_v16f32_align16:
908 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
909 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
910 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
911 ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi)
912 ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi)
914 store <16 x float> zeroinitializer, ptr %dst, align 16, !nontemporal !1
918 define void @test_zero_v8i64_align16(ptr %dst) nounwind {
919 ; SSE-LABEL: test_zero_v8i64_align16:
921 ; SSE-NEXT: xorps %xmm0, %xmm0
922 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
923 ; SSE-NEXT: movntps %xmm0, (%rdi)
924 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
925 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
928 ; AVX-LABEL: test_zero_v8i64_align16:
930 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
931 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
932 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
933 ; AVX-NEXT: vmovntps %xmm0, 48(%rdi)
934 ; AVX-NEXT: vmovntps %xmm0, 32(%rdi)
937 ; AVX512-LABEL: test_zero_v8i64_align16:
939 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
940 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
941 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
942 ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi)
943 ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi)
945 store <8 x i64> zeroinitializer, ptr %dst, align 16, !nontemporal !1
949 define void @test_zero_v16i32_align16(ptr %dst) nounwind {
950 ; SSE-LABEL: test_zero_v16i32_align16:
952 ; SSE-NEXT: xorps %xmm0, %xmm0
953 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
954 ; SSE-NEXT: movntps %xmm0, (%rdi)
955 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
956 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
959 ; AVX-LABEL: test_zero_v16i32_align16:
961 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
962 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
963 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
964 ; AVX-NEXT: vmovntps %xmm0, 48(%rdi)
965 ; AVX-NEXT: vmovntps %xmm0, 32(%rdi)
968 ; AVX512-LABEL: test_zero_v16i32_align16:
970 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
971 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
972 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
973 ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi)
974 ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi)
976 store <16 x i32> zeroinitializer, ptr %dst, align 16, !nontemporal !1
980 define void @test_zero_v32i16_align16(ptr %dst) nounwind {
981 ; SSE-LABEL: test_zero_v32i16_align16:
983 ; SSE-NEXT: xorps %xmm0, %xmm0
984 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
985 ; SSE-NEXT: movntps %xmm0, (%rdi)
986 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
987 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
990 ; AVX-LABEL: test_zero_v32i16_align16:
992 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
993 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
994 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
995 ; AVX-NEXT: vmovntps %xmm0, 48(%rdi)
996 ; AVX-NEXT: vmovntps %xmm0, 32(%rdi)
999 ; AVX512-LABEL: test_zero_v32i16_align16:
1001 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1002 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
1003 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
1004 ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi)
1005 ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi)
1007 store <32 x i16> zeroinitializer, ptr %dst, align 16, !nontemporal !1
1011 define void @test_zero_v64i8_align16(ptr %dst) nounwind {
1012 ; SSE-LABEL: test_zero_v64i8_align16:
1014 ; SSE-NEXT: xorps %xmm0, %xmm0
1015 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1016 ; SSE-NEXT: movntps %xmm0, (%rdi)
1017 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1018 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1021 ; AVX-LABEL: test_zero_v64i8_align16:
1023 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1024 ; AVX-NEXT: vmovntps %xmm0, 16(%rdi)
1025 ; AVX-NEXT: vmovntps %xmm0, (%rdi)
1026 ; AVX-NEXT: vmovntps %xmm0, 48(%rdi)
1027 ; AVX-NEXT: vmovntps %xmm0, 32(%rdi)
1030 ; AVX512-LABEL: test_zero_v64i8_align16:
1032 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1033 ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi)
1034 ; AVX512-NEXT: vmovntps %xmm0, (%rdi)
1035 ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi)
1036 ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi)
1038 store <64 x i8> zeroinitializer, ptr %dst, align 16, !nontemporal !1
1042 define void @test_zero_v8f64_align32(ptr %dst) nounwind {
1043 ; SSE-LABEL: test_zero_v8f64_align32:
1045 ; SSE-NEXT: xorps %xmm0, %xmm0
1046 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1047 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1048 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1049 ; SSE-NEXT: movntps %xmm0, (%rdi)
1052 ; AVX-LABEL: test_zero_v8f64_align32:
1054 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1055 ; AVX-NEXT: vmovntps %ymm0, 32(%rdi)
1056 ; AVX-NEXT: vmovntps %ymm0, (%rdi)
1057 ; AVX-NEXT: vzeroupper
1060 ; AVX512-LABEL: test_zero_v8f64_align32:
1062 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1063 ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi)
1064 ; AVX512-NEXT: vmovntps %ymm0, (%rdi)
1065 ; AVX512-NEXT: vzeroupper
1067 store <8 x double> zeroinitializer, ptr %dst, align 32, !nontemporal !1
1071 define void @test_zero_v16f32_align32(ptr %dst) nounwind {
1072 ; SSE-LABEL: test_zero_v16f32_align32:
1074 ; SSE-NEXT: xorps %xmm0, %xmm0
1075 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1076 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1077 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1078 ; SSE-NEXT: movntps %xmm0, (%rdi)
1081 ; AVX-LABEL: test_zero_v16f32_align32:
1083 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1084 ; AVX-NEXT: vmovntps %ymm0, 32(%rdi)
1085 ; AVX-NEXT: vmovntps %ymm0, (%rdi)
1086 ; AVX-NEXT: vzeroupper
1089 ; AVX512-LABEL: test_zero_v16f32_align32:
1091 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1092 ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi)
1093 ; AVX512-NEXT: vmovntps %ymm0, (%rdi)
1094 ; AVX512-NEXT: vzeroupper
1096 store <16 x float> zeroinitializer, ptr %dst, align 32, !nontemporal !1
1100 define void @test_zero_v8i64_align32(ptr %dst) nounwind {
1101 ; SSE-LABEL: test_zero_v8i64_align32:
1103 ; SSE-NEXT: xorps %xmm0, %xmm0
1104 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1105 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1106 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1107 ; SSE-NEXT: movntps %xmm0, (%rdi)
1110 ; AVX-LABEL: test_zero_v8i64_align32:
1112 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1113 ; AVX-NEXT: vmovntps %ymm0, 32(%rdi)
1114 ; AVX-NEXT: vmovntps %ymm0, (%rdi)
1115 ; AVX-NEXT: vzeroupper
1118 ; AVX512-LABEL: test_zero_v8i64_align32:
1120 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1121 ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi)
1122 ; AVX512-NEXT: vmovntps %ymm0, (%rdi)
1123 ; AVX512-NEXT: vzeroupper
1125 store <8 x i64> zeroinitializer, ptr %dst, align 32, !nontemporal !1
1129 define void @test_zero_v16i32_align32(ptr %dst) nounwind {
1130 ; SSE-LABEL: test_zero_v16i32_align32:
1132 ; SSE-NEXT: xorps %xmm0, %xmm0
1133 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1134 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1135 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1136 ; SSE-NEXT: movntps %xmm0, (%rdi)
1139 ; AVX-LABEL: test_zero_v16i32_align32:
1141 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1142 ; AVX-NEXT: vmovntps %ymm0, 32(%rdi)
1143 ; AVX-NEXT: vmovntps %ymm0, (%rdi)
1144 ; AVX-NEXT: vzeroupper
1147 ; AVX512-LABEL: test_zero_v16i32_align32:
1149 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1150 ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi)
1151 ; AVX512-NEXT: vmovntps %ymm0, (%rdi)
1152 ; AVX512-NEXT: vzeroupper
1154 store <16 x i32> zeroinitializer, ptr %dst, align 32, !nontemporal !1
1158 define void @test_zero_v32i16_align32(ptr %dst) nounwind {
1159 ; SSE-LABEL: test_zero_v32i16_align32:
1161 ; SSE-NEXT: xorps %xmm0, %xmm0
1162 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1163 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1164 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1165 ; SSE-NEXT: movntps %xmm0, (%rdi)
1168 ; AVX-LABEL: test_zero_v32i16_align32:
1170 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1171 ; AVX-NEXT: vmovntps %ymm0, 32(%rdi)
1172 ; AVX-NEXT: vmovntps %ymm0, (%rdi)
1173 ; AVX-NEXT: vzeroupper
1176 ; AVX512-LABEL: test_zero_v32i16_align32:
1178 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1179 ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi)
1180 ; AVX512-NEXT: vmovntps %ymm0, (%rdi)
1181 ; AVX512-NEXT: vzeroupper
1183 store <32 x i16> zeroinitializer, ptr %dst, align 32, !nontemporal !1
1187 define void @test_zero_v64i8_align32(ptr %dst) nounwind {
1188 ; SSE-LABEL: test_zero_v64i8_align32:
1190 ; SSE-NEXT: xorps %xmm0, %xmm0
1191 ; SSE-NEXT: movntps %xmm0, 48(%rdi)
1192 ; SSE-NEXT: movntps %xmm0, 32(%rdi)
1193 ; SSE-NEXT: movntps %xmm0, 16(%rdi)
1194 ; SSE-NEXT: movntps %xmm0, (%rdi)
1197 ; AVX-LABEL: test_zero_v64i8_align32:
1199 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
1200 ; AVX-NEXT: vmovntps %ymm0, 32(%rdi)
1201 ; AVX-NEXT: vmovntps %ymm0, (%rdi)
1202 ; AVX-NEXT: vzeroupper
1205 ; AVX512-LABEL: test_zero_v64i8_align32:
1207 ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1208 ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi)
1209 ; AVX512-NEXT: vmovntps %ymm0, (%rdi)
1210 ; AVX512-NEXT: vzeroupper
1212 store <64 x i8> zeroinitializer, ptr %dst, align 32, !nontemporal !1