1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse | FileCheck %s --check-prefix=SSE
3 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2 | FileCheck %s --check-prefix=SSE
4 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2,-slow-unaligned-mem-16 | FileCheck %s --check-prefix=SSE2FAST
5 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
8 ; https://llvm.org/bugs/show_bug.cgi?id=27100
10 define void @memset_16_nonzero_bytes(i8* %x) {
11 ; SSE-LABEL: memset_16_nonzero_bytes:
13 ; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
14 ; SSE-NEXT: movq %rax, 8(%rdi)
15 ; SSE-NEXT: movq %rax, (%rdi)
18 ; SSE2FAST-LABEL: memset_16_nonzero_bytes:
20 ; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
21 ; SSE2FAST-NEXT: movups %xmm0, (%rdi)
24 ; AVX-LABEL: memset_16_nonzero_bytes:
26 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
27 ; AVX-NEXT: vmovups %xmm0, (%rdi)
29 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 16, i64 -1)
33 define void @memset_32_nonzero_bytes(i8* %x) {
34 ; SSE-LABEL: memset_32_nonzero_bytes:
36 ; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
37 ; SSE-NEXT: movq %rax, 24(%rdi)
38 ; SSE-NEXT: movq %rax, 16(%rdi)
39 ; SSE-NEXT: movq %rax, 8(%rdi)
40 ; SSE-NEXT: movq %rax, (%rdi)
43 ; SSE2FAST-LABEL: memset_32_nonzero_bytes:
45 ; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
46 ; SSE2FAST-NEXT: movups %xmm0, 16(%rdi)
47 ; SSE2FAST-NEXT: movups %xmm0, (%rdi)
50 ; AVX-LABEL: memset_32_nonzero_bytes:
52 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
53 ; AVX-NEXT: vmovups %ymm0, (%rdi)
54 ; AVX-NEXT: vzeroupper
56 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 32, i64 -1)
60 define void @memset_64_nonzero_bytes(i8* %x) {
61 ; SSE-LABEL: memset_64_nonzero_bytes:
63 ; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
64 ; SSE-NEXT: movq %rax, 56(%rdi)
65 ; SSE-NEXT: movq %rax, 48(%rdi)
66 ; SSE-NEXT: movq %rax, 40(%rdi)
67 ; SSE-NEXT: movq %rax, 32(%rdi)
68 ; SSE-NEXT: movq %rax, 24(%rdi)
69 ; SSE-NEXT: movq %rax, 16(%rdi)
70 ; SSE-NEXT: movq %rax, 8(%rdi)
71 ; SSE-NEXT: movq %rax, (%rdi)
74 ; SSE2FAST-LABEL: memset_64_nonzero_bytes:
76 ; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
77 ; SSE2FAST-NEXT: movups %xmm0, 48(%rdi)
78 ; SSE2FAST-NEXT: movups %xmm0, 32(%rdi)
79 ; SSE2FAST-NEXT: movups %xmm0, 16(%rdi)
80 ; SSE2FAST-NEXT: movups %xmm0, (%rdi)
83 ; AVX-LABEL: memset_64_nonzero_bytes:
85 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
86 ; AVX-NEXT: vmovups %ymm0, 32(%rdi)
87 ; AVX-NEXT: vmovups %ymm0, (%rdi)
88 ; AVX-NEXT: vzeroupper
90 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 64, i64 -1)
94 define void @memset_128_nonzero_bytes(i8* %x) {
95 ; SSE-LABEL: memset_128_nonzero_bytes:
97 ; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A
98 ; SSE-NEXT: movq %rax, 120(%rdi)
99 ; SSE-NEXT: movq %rax, 112(%rdi)
100 ; SSE-NEXT: movq %rax, 104(%rdi)
101 ; SSE-NEXT: movq %rax, 96(%rdi)
102 ; SSE-NEXT: movq %rax, 88(%rdi)
103 ; SSE-NEXT: movq %rax, 80(%rdi)
104 ; SSE-NEXT: movq %rax, 72(%rdi)
105 ; SSE-NEXT: movq %rax, 64(%rdi)
106 ; SSE-NEXT: movq %rax, 56(%rdi)
107 ; SSE-NEXT: movq %rax, 48(%rdi)
108 ; SSE-NEXT: movq %rax, 40(%rdi)
109 ; SSE-NEXT: movq %rax, 32(%rdi)
110 ; SSE-NEXT: movq %rax, 24(%rdi)
111 ; SSE-NEXT: movq %rax, 16(%rdi)
112 ; SSE-NEXT: movq %rax, 8(%rdi)
113 ; SSE-NEXT: movq %rax, (%rdi)
116 ; SSE2FAST-LABEL: memset_128_nonzero_bytes:
118 ; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
119 ; SSE2FAST-NEXT: movups %xmm0, 112(%rdi)
120 ; SSE2FAST-NEXT: movups %xmm0, 96(%rdi)
121 ; SSE2FAST-NEXT: movups %xmm0, 80(%rdi)
122 ; SSE2FAST-NEXT: movups %xmm0, 64(%rdi)
123 ; SSE2FAST-NEXT: movups %xmm0, 48(%rdi)
124 ; SSE2FAST-NEXT: movups %xmm0, 32(%rdi)
125 ; SSE2FAST-NEXT: movups %xmm0, 16(%rdi)
126 ; SSE2FAST-NEXT: movups %xmm0, (%rdi)
127 ; SSE2FAST-NEXT: retq
129 ; AVX-LABEL: memset_128_nonzero_bytes:
131 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
132 ; AVX-NEXT: vmovups %ymm0, 96(%rdi)
133 ; AVX-NEXT: vmovups %ymm0, 64(%rdi)
134 ; AVX-NEXT: vmovups %ymm0, 32(%rdi)
135 ; AVX-NEXT: vmovups %ymm0, (%rdi)
136 ; AVX-NEXT: vzeroupper
138 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 128, i64 -1)
142 define void @memset_256_nonzero_bytes(i8* %x) {
143 ; SSE-LABEL: memset_256_nonzero_bytes:
145 ; SSE-NEXT: pushq %rax
146 ; SSE-NEXT: .cfi_def_cfa_offset 16
147 ; SSE-NEXT: movl $256, %edx # imm = 0x100
148 ; SSE-NEXT: movl $42, %esi
149 ; SSE-NEXT: callq memset
150 ; SSE-NEXT: popq %rax
151 ; SSE-NEXT: .cfi_def_cfa_offset 8
154 ; SSE2FAST-LABEL: memset_256_nonzero_bytes:
156 ; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
157 ; SSE2FAST-NEXT: movups %xmm0, 240(%rdi)
158 ; SSE2FAST-NEXT: movups %xmm0, 224(%rdi)
159 ; SSE2FAST-NEXT: movups %xmm0, 208(%rdi)
160 ; SSE2FAST-NEXT: movups %xmm0, 192(%rdi)
161 ; SSE2FAST-NEXT: movups %xmm0, 176(%rdi)
162 ; SSE2FAST-NEXT: movups %xmm0, 160(%rdi)
163 ; SSE2FAST-NEXT: movups %xmm0, 144(%rdi)
164 ; SSE2FAST-NEXT: movups %xmm0, 128(%rdi)
165 ; SSE2FAST-NEXT: movups %xmm0, 112(%rdi)
166 ; SSE2FAST-NEXT: movups %xmm0, 96(%rdi)
167 ; SSE2FAST-NEXT: movups %xmm0, 80(%rdi)
168 ; SSE2FAST-NEXT: movups %xmm0, 64(%rdi)
169 ; SSE2FAST-NEXT: movups %xmm0, 48(%rdi)
170 ; SSE2FAST-NEXT: movups %xmm0, 32(%rdi)
171 ; SSE2FAST-NEXT: movups %xmm0, 16(%rdi)
172 ; SSE2FAST-NEXT: movups %xmm0, (%rdi)
173 ; SSE2FAST-NEXT: retq
175 ; AVX-LABEL: memset_256_nonzero_bytes:
177 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
178 ; AVX-NEXT: vmovups %ymm0, 224(%rdi)
179 ; AVX-NEXT: vmovups %ymm0, 192(%rdi)
180 ; AVX-NEXT: vmovups %ymm0, 160(%rdi)
181 ; AVX-NEXT: vmovups %ymm0, 128(%rdi)
182 ; AVX-NEXT: vmovups %ymm0, 96(%rdi)
183 ; AVX-NEXT: vmovups %ymm0, 64(%rdi)
184 ; AVX-NEXT: vmovups %ymm0, 32(%rdi)
185 ; AVX-NEXT: vmovups %ymm0, (%rdi)
186 ; AVX-NEXT: vzeroupper
188 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 256, i64 -1)
192 declare i8* @__memset_chk(i8*, i32, i64, i64)
194 ; Repeat with a non-constant value for the stores.
196 define void @memset_16_nonconst_bytes(i8* %x, i8 %c) {
197 ; SSE-LABEL: memset_16_nonconst_bytes:
199 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
200 ; SSE-NEXT: movzbl %sil, %eax
201 ; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
202 ; SSE-NEXT: imulq %rax, %rcx
203 ; SSE-NEXT: movq %rcx, 8(%rdi)
204 ; SSE-NEXT: movq %rcx, (%rdi)
207 ; SSE2FAST-LABEL: memset_16_nonconst_bytes:
209 ; SSE2FAST-NEXT: movd %esi, %xmm0
210 ; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
211 ; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
212 ; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
213 ; SSE2FAST-NEXT: movdqu %xmm0, (%rdi)
214 ; SSE2FAST-NEXT: retq
216 ; AVX1-LABEL: memset_16_nonconst_bytes:
218 ; AVX1-NEXT: vmovd %esi, %xmm0
219 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
220 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
221 ; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
224 ; AVX2-LABEL: memset_16_nonconst_bytes:
226 ; AVX2-NEXT: vmovd %esi, %xmm0
227 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
228 ; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
230 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 16, i1 false)
234 define void @memset_32_nonconst_bytes(i8* %x, i8 %c) {
235 ; SSE-LABEL: memset_32_nonconst_bytes:
237 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
238 ; SSE-NEXT: movzbl %sil, %eax
239 ; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
240 ; SSE-NEXT: imulq %rax, %rcx
241 ; SSE-NEXT: movq %rcx, 24(%rdi)
242 ; SSE-NEXT: movq %rcx, 16(%rdi)
243 ; SSE-NEXT: movq %rcx, 8(%rdi)
244 ; SSE-NEXT: movq %rcx, (%rdi)
247 ; SSE2FAST-LABEL: memset_32_nonconst_bytes:
249 ; SSE2FAST-NEXT: movd %esi, %xmm0
250 ; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
251 ; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
252 ; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
253 ; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi)
254 ; SSE2FAST-NEXT: movdqu %xmm0, (%rdi)
255 ; SSE2FAST-NEXT: retq
257 ; AVX1-LABEL: memset_32_nonconst_bytes:
259 ; AVX1-NEXT: vmovd %esi, %xmm0
260 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
261 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
262 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
263 ; AVX1-NEXT: vmovups %ymm0, (%rdi)
264 ; AVX1-NEXT: vzeroupper
267 ; AVX2-LABEL: memset_32_nonconst_bytes:
269 ; AVX2-NEXT: vmovd %esi, %xmm0
270 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
271 ; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
272 ; AVX2-NEXT: vzeroupper
274 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 32, i1 false)
278 define void @memset_64_nonconst_bytes(i8* %x, i8 %c) {
279 ; SSE-LABEL: memset_64_nonconst_bytes:
281 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
282 ; SSE-NEXT: movzbl %sil, %eax
283 ; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
284 ; SSE-NEXT: imulq %rax, %rcx
285 ; SSE-NEXT: movq %rcx, 56(%rdi)
286 ; SSE-NEXT: movq %rcx, 48(%rdi)
287 ; SSE-NEXT: movq %rcx, 40(%rdi)
288 ; SSE-NEXT: movq %rcx, 32(%rdi)
289 ; SSE-NEXT: movq %rcx, 24(%rdi)
290 ; SSE-NEXT: movq %rcx, 16(%rdi)
291 ; SSE-NEXT: movq %rcx, 8(%rdi)
292 ; SSE-NEXT: movq %rcx, (%rdi)
295 ; SSE2FAST-LABEL: memset_64_nonconst_bytes:
297 ; SSE2FAST-NEXT: movd %esi, %xmm0
298 ; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
299 ; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
300 ; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
301 ; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi)
302 ; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi)
303 ; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi)
304 ; SSE2FAST-NEXT: movdqu %xmm0, (%rdi)
305 ; SSE2FAST-NEXT: retq
307 ; AVX1-LABEL: memset_64_nonconst_bytes:
309 ; AVX1-NEXT: vmovd %esi, %xmm0
310 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
311 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
312 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
313 ; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
314 ; AVX1-NEXT: vmovups %ymm0, (%rdi)
315 ; AVX1-NEXT: vzeroupper
318 ; AVX2-LABEL: memset_64_nonconst_bytes:
320 ; AVX2-NEXT: vmovd %esi, %xmm0
321 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
322 ; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
323 ; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
324 ; AVX2-NEXT: vzeroupper
326 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 64, i1 false)
330 define void @memset_128_nonconst_bytes(i8* %x, i8 %c) {
331 ; SSE-LABEL: memset_128_nonconst_bytes:
333 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi
334 ; SSE-NEXT: movzbl %sil, %eax
335 ; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
336 ; SSE-NEXT: imulq %rax, %rcx
337 ; SSE-NEXT: movq %rcx, 120(%rdi)
338 ; SSE-NEXT: movq %rcx, 112(%rdi)
339 ; SSE-NEXT: movq %rcx, 104(%rdi)
340 ; SSE-NEXT: movq %rcx, 96(%rdi)
341 ; SSE-NEXT: movq %rcx, 88(%rdi)
342 ; SSE-NEXT: movq %rcx, 80(%rdi)
343 ; SSE-NEXT: movq %rcx, 72(%rdi)
344 ; SSE-NEXT: movq %rcx, 64(%rdi)
345 ; SSE-NEXT: movq %rcx, 56(%rdi)
346 ; SSE-NEXT: movq %rcx, 48(%rdi)
347 ; SSE-NEXT: movq %rcx, 40(%rdi)
348 ; SSE-NEXT: movq %rcx, 32(%rdi)
349 ; SSE-NEXT: movq %rcx, 24(%rdi)
350 ; SSE-NEXT: movq %rcx, 16(%rdi)
351 ; SSE-NEXT: movq %rcx, 8(%rdi)
352 ; SSE-NEXT: movq %rcx, (%rdi)
355 ; SSE2FAST-LABEL: memset_128_nonconst_bytes:
357 ; SSE2FAST-NEXT: movd %esi, %xmm0
358 ; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
359 ; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
360 ; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
361 ; SSE2FAST-NEXT: movdqu %xmm0, 112(%rdi)
362 ; SSE2FAST-NEXT: movdqu %xmm0, 96(%rdi)
363 ; SSE2FAST-NEXT: movdqu %xmm0, 80(%rdi)
364 ; SSE2FAST-NEXT: movdqu %xmm0, 64(%rdi)
365 ; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi)
366 ; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi)
367 ; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi)
368 ; SSE2FAST-NEXT: movdqu %xmm0, (%rdi)
369 ; SSE2FAST-NEXT: retq
371 ; AVX1-LABEL: memset_128_nonconst_bytes:
373 ; AVX1-NEXT: vmovd %esi, %xmm0
374 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
375 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
376 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
377 ; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
378 ; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
379 ; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
380 ; AVX1-NEXT: vmovups %ymm0, (%rdi)
381 ; AVX1-NEXT: vzeroupper
384 ; AVX2-LABEL: memset_128_nonconst_bytes:
386 ; AVX2-NEXT: vmovd %esi, %xmm0
387 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
388 ; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi)
389 ; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi)
390 ; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
391 ; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
392 ; AVX2-NEXT: vzeroupper
394 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 128, i1 false)
398 define void @memset_256_nonconst_bytes(i8* %x, i8 %c) {
399 ; SSE-LABEL: memset_256_nonconst_bytes:
401 ; SSE-NEXT: movl $256, %edx # imm = 0x100
402 ; SSE-NEXT: jmp memset # TAILCALL
404 ; SSE2FAST-LABEL: memset_256_nonconst_bytes:
406 ; SSE2FAST-NEXT: movd %esi, %xmm0
407 ; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
408 ; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
409 ; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
410 ; SSE2FAST-NEXT: movdqu %xmm0, 240(%rdi)
411 ; SSE2FAST-NEXT: movdqu %xmm0, 224(%rdi)
412 ; SSE2FAST-NEXT: movdqu %xmm0, 208(%rdi)
413 ; SSE2FAST-NEXT: movdqu %xmm0, 192(%rdi)
414 ; SSE2FAST-NEXT: movdqu %xmm0, 176(%rdi)
415 ; SSE2FAST-NEXT: movdqu %xmm0, 160(%rdi)
416 ; SSE2FAST-NEXT: movdqu %xmm0, 144(%rdi)
417 ; SSE2FAST-NEXT: movdqu %xmm0, 128(%rdi)
418 ; SSE2FAST-NEXT: movdqu %xmm0, 112(%rdi)
419 ; SSE2FAST-NEXT: movdqu %xmm0, 96(%rdi)
420 ; SSE2FAST-NEXT: movdqu %xmm0, 80(%rdi)
421 ; SSE2FAST-NEXT: movdqu %xmm0, 64(%rdi)
422 ; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi)
423 ; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi)
424 ; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi)
425 ; SSE2FAST-NEXT: movdqu %xmm0, (%rdi)
426 ; SSE2FAST-NEXT: retq
428 ; AVX1-LABEL: memset_256_nonconst_bytes:
430 ; AVX1-NEXT: vmovd %esi, %xmm0
431 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
432 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
433 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
434 ; AVX1-NEXT: vmovups %ymm0, 224(%rdi)
435 ; AVX1-NEXT: vmovups %ymm0, 192(%rdi)
436 ; AVX1-NEXT: vmovups %ymm0, 160(%rdi)
437 ; AVX1-NEXT: vmovups %ymm0, 128(%rdi)
438 ; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
439 ; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
440 ; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
441 ; AVX1-NEXT: vmovups %ymm0, (%rdi)
442 ; AVX1-NEXT: vzeroupper
445 ; AVX2-LABEL: memset_256_nonconst_bytes:
447 ; AVX2-NEXT: vmovd %esi, %xmm0
448 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
449 ; AVX2-NEXT: vmovdqu %ymm0, 224(%rdi)
450 ; AVX2-NEXT: vmovdqu %ymm0, 192(%rdi)
451 ; AVX2-NEXT: vmovdqu %ymm0, 160(%rdi)
452 ; AVX2-NEXT: vmovdqu %ymm0, 128(%rdi)
453 ; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi)
454 ; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi)
455 ; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
456 ; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
457 ; AVX2-NEXT: vzeroupper
459 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 256, i1 false)
463 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #1