1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
4 define i8* @test_memcpy1_generic(i8* %P, i8* %Q) {
5 ; CHECK-LABEL: test_memcpy1_generic:
7 ; CHECK-NEXT: pushq %rbx
8 ; CHECK-NEXT: .cfi_def_cfa_offset 16
9 ; CHECK-NEXT: .cfi_offset %rbx, -16
10 ; CHECK-NEXT: movq %rdi, %rbx
11 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
12 ; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_1
13 ; CHECK-NEXT: movq %rbx, %rax
14 ; CHECK-NEXT: popq %rbx
15 ; CHECK-NEXT: .cfi_def_cfa_offset 8
17 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1024, i32 1)
19 ; 3rd arg (%edx) -- length
22 define i8* @test_memcpy2_generic(i8* %P, i8* %Q) {
23 ; CHECK-LABEL: test_memcpy2_generic:
25 ; CHECK-NEXT: pushq %rbx
26 ; CHECK-NEXT: .cfi_def_cfa_offset 16
27 ; CHECK-NEXT: .cfi_offset %rbx, -16
28 ; CHECK-NEXT: movq %rdi, %rbx
29 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
30 ; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_2
31 ; CHECK-NEXT: movq %rbx, %rax
32 ; CHECK-NEXT: popq %rbx
33 ; CHECK-NEXT: .cfi_def_cfa_offset 8
35 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1024, i32 2)
37 ; 3rd arg (%edx) -- length
40 define i8* @test_memcpy4_generic(i8* %P, i8* %Q) {
41 ; CHECK-LABEL: test_memcpy4_generic:
43 ; CHECK-NEXT: pushq %rbx
44 ; CHECK-NEXT: .cfi_def_cfa_offset 16
45 ; CHECK-NEXT: .cfi_offset %rbx, -16
46 ; CHECK-NEXT: movq %rdi, %rbx
47 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
48 ; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_4
49 ; CHECK-NEXT: movq %rbx, %rax
50 ; CHECK-NEXT: popq %rbx
51 ; CHECK-NEXT: .cfi_def_cfa_offset 8
53 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1024, i32 4)
55 ; 3rd arg (%edx) -- length
58 define i8* @test_memcpy8(i8* %P, i8* %Q) {
59 ; CHECK-LABEL: test_memcpy8:
61 ; CHECK-NEXT: pushq %rbx
62 ; CHECK-NEXT: .cfi_def_cfa_offset 16
63 ; CHECK-NEXT: .cfi_offset %rbx, -16
64 ; CHECK-NEXT: movq %rdi, %rbx
65 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
66 ; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_8
67 ; CHECK-NEXT: movq %rbx, %rax
68 ; CHECK-NEXT: popq %rbx
69 ; CHECK-NEXT: .cfi_def_cfa_offset 8
71 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %P, i8* align 8 %Q, i32 1024, i32 8)
73 ; 3rd arg (%edx) -- length
76 define i8* @test_memcpy16_generic(i8* %P, i8* %Q) {
77 ; CHECK-LABEL: test_memcpy16_generic:
79 ; CHECK-NEXT: pushq %rbx
80 ; CHECK-NEXT: .cfi_def_cfa_offset 16
81 ; CHECK-NEXT: .cfi_offset %rbx, -16
82 ; CHECK-NEXT: movq %rdi, %rbx
83 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
84 ; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_16
85 ; CHECK-NEXT: movq %rbx, %rax
86 ; CHECK-NEXT: popq %rbx
87 ; CHECK-NEXT: .cfi_def_cfa_offset 8
89 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 1024, i32 16)
91 ; 3rd arg (%edx) -- length
94 define void @test_memcpy_args(i8** %Storage) {
95 ; CHECK-LABEL: test_memcpy_args:
97 ; CHECK-NEXT: pushq %rax
98 ; CHECK-NEXT: .cfi_def_cfa_offset 16
99 ; CHECK-NEXT: movq (%rdi), %rax
100 ; CHECK-NEXT: movq 8(%rdi), %rsi
101 ; CHECK-NEXT: movq %rax, %rdi
102 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
103 ; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_4
104 ; CHECK-NEXT: popq %rax
105 ; CHECK-NEXT: .cfi_def_cfa_offset 8
107 %Dst = load i8*, i8** %Storage
108 %Src.addr = getelementptr i8*, i8** %Storage, i64 1
109 %Src = load i8*, i8** %Src.addr
113 ; 3rd arg (%edx) -- length
114 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %Dst, i8* align 4 %Src, i32 1024, i32 4)
118 define i8* @test_memmove1_generic(i8* %P, i8* %Q) {
119 ; CHECK-LABEL: test_memmove1_generic:
121 ; CHECK-NEXT: pushq %rbx
122 ; CHECK-NEXT: .cfi_def_cfa_offset 16
123 ; CHECK-NEXT: .cfi_offset %rbx, -16
124 ; CHECK-NEXT: movq %rdi, %rbx
125 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
126 ; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_1
127 ; CHECK-NEXT: movq %rbx, %rax
128 ; CHECK-NEXT: popq %rbx
129 ; CHECK-NEXT: .cfi_def_cfa_offset 8
131 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1024, i32 1)
133 ; 3rd arg (%edx) -- length
136 define i8* @test_memmove2_generic(i8* %P, i8* %Q) {
137 ; CHECK-LABEL: test_memmove2_generic:
139 ; CHECK-NEXT: pushq %rbx
140 ; CHECK-NEXT: .cfi_def_cfa_offset 16
141 ; CHECK-NEXT: .cfi_offset %rbx, -16
142 ; CHECK-NEXT: movq %rdi, %rbx
143 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
144 ; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_2
145 ; CHECK-NEXT: movq %rbx, %rax
146 ; CHECK-NEXT: popq %rbx
147 ; CHECK-NEXT: .cfi_def_cfa_offset 8
149 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1024, i32 2)
151 ; 3rd arg (%edx) -- length
154 define i8* @test_memmove4_generic(i8* %P, i8* %Q) {
155 ; CHECK-LABEL: test_memmove4_generic:
157 ; CHECK-NEXT: pushq %rbx
158 ; CHECK-NEXT: .cfi_def_cfa_offset 16
159 ; CHECK-NEXT: .cfi_offset %rbx, -16
160 ; CHECK-NEXT: movq %rdi, %rbx
161 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
162 ; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_4
163 ; CHECK-NEXT: movq %rbx, %rax
164 ; CHECK-NEXT: popq %rbx
165 ; CHECK-NEXT: .cfi_def_cfa_offset 8
167 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1024, i32 4)
169 ; 3rd arg (%edx) -- length
172 define i8* @test_memmove8_generic(i8* %P, i8* %Q) {
173 ; CHECK-LABEL: test_memmove8_generic:
175 ; CHECK-NEXT: pushq %rbx
176 ; CHECK-NEXT: .cfi_def_cfa_offset 16
177 ; CHECK-NEXT: .cfi_offset %rbx, -16
178 ; CHECK-NEXT: movq %rdi, %rbx
179 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
180 ; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_8
181 ; CHECK-NEXT: movq %rbx, %rax
182 ; CHECK-NEXT: popq %rbx
183 ; CHECK-NEXT: .cfi_def_cfa_offset 8
185 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %P, i8* align 8 %Q, i32 1024, i32 8)
187 ; 3rd arg (%edx) -- length
190 define i8* @test_memmove16_generic(i8* %P, i8* %Q) {
191 ; CHECK-LABEL: test_memmove16_generic:
193 ; CHECK-NEXT: pushq %rbx
194 ; CHECK-NEXT: .cfi_def_cfa_offset 16
195 ; CHECK-NEXT: .cfi_offset %rbx, -16
196 ; CHECK-NEXT: movq %rdi, %rbx
197 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
198 ; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_16
199 ; CHECK-NEXT: movq %rbx, %rax
200 ; CHECK-NEXT: popq %rbx
201 ; CHECK-NEXT: .cfi_def_cfa_offset 8
203 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 1024, i32 16)
205 ; 3rd arg (%edx) -- length
208 define void @test_memmove_args(i8** %Storage) {
209 ; CHECK-LABEL: test_memmove_args:
211 ; CHECK-NEXT: pushq %rax
212 ; CHECK-NEXT: .cfi_def_cfa_offset 16
213 ; CHECK-NEXT: movq (%rdi), %rax
214 ; CHECK-NEXT: movq 8(%rdi), %rsi
215 ; CHECK-NEXT: movq %rax, %rdi
216 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
217 ; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_4
218 ; CHECK-NEXT: popq %rax
219 ; CHECK-NEXT: .cfi_def_cfa_offset 8
221 %Dst = load i8*, i8** %Storage
222 %Src.addr = getelementptr i8*, i8** %Storage, i64 1
223 %Src = load i8*, i8** %Src.addr
227 ; 3rd arg (%edx) -- length
228 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %Dst, i8* align 4 %Src, i32 1024, i32 4)
232 define i8* @test_memset1_generic(i8* %P, i8 %V) {
233 ; CHECK-LABEL: test_memset1_generic:
235 ; CHECK-NEXT: pushq %rbx
236 ; CHECK-NEXT: .cfi_def_cfa_offset 16
237 ; CHECK-NEXT: .cfi_offset %rbx, -16
238 ; CHECK-NEXT: movq %rdi, %rbx
239 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
240 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_1
241 ; CHECK-NEXT: movq %rbx, %rax
242 ; CHECK-NEXT: popq %rbx
243 ; CHECK-NEXT: .cfi_def_cfa_offset 8
245 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %P, i8 %V, i32 1024, i32 1)
247 ; 3rd arg (%edx) -- length
250 define i8* @test_memset2_generic(i8* %P, i8 %V) {
251 ; CHECK-LABEL: test_memset2_generic:
253 ; CHECK-NEXT: pushq %rbx
254 ; CHECK-NEXT: .cfi_def_cfa_offset 16
255 ; CHECK-NEXT: .cfi_offset %rbx, -16
256 ; CHECK-NEXT: movq %rdi, %rbx
257 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
258 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_2
259 ; CHECK-NEXT: movq %rbx, %rax
260 ; CHECK-NEXT: popq %rbx
261 ; CHECK-NEXT: .cfi_def_cfa_offset 8
263 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %P, i8 %V, i32 1024, i32 2)
265 ; 3rd arg (%edx) -- length
268 define i8* @test_memset4_generic(i8* %P, i8 %V) {
269 ; CHECK-LABEL: test_memset4_generic:
271 ; CHECK-NEXT: pushq %rbx
272 ; CHECK-NEXT: .cfi_def_cfa_offset 16
273 ; CHECK-NEXT: .cfi_offset %rbx, -16
274 ; CHECK-NEXT: movq %rdi, %rbx
275 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
276 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4
277 ; CHECK-NEXT: movq %rbx, %rax
278 ; CHECK-NEXT: popq %rbx
279 ; CHECK-NEXT: .cfi_def_cfa_offset 8
281 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1024, i32 4)
283 ; 3rd arg (%edx) -- length
286 define i8* @test_memset8_generic(i8* %P, i8 %V) {
287 ; CHECK-LABEL: test_memset8_generic:
289 ; CHECK-NEXT: pushq %rbx
290 ; CHECK-NEXT: .cfi_def_cfa_offset 16
291 ; CHECK-NEXT: .cfi_offset %rbx, -16
292 ; CHECK-NEXT: movq %rdi, %rbx
293 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
294 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_8
295 ; CHECK-NEXT: movq %rbx, %rax
296 ; CHECK-NEXT: popq %rbx
297 ; CHECK-NEXT: .cfi_def_cfa_offset 8
299 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %P, i8 %V, i32 1024, i32 8)
301 ; 3rd arg (%edx) -- length
304 define i8* @test_memset16_generic(i8* %P, i8 %V) {
305 ; CHECK-LABEL: test_memset16_generic:
307 ; CHECK-NEXT: pushq %rbx
308 ; CHECK-NEXT: .cfi_def_cfa_offset 16
309 ; CHECK-NEXT: .cfi_offset %rbx, -16
310 ; CHECK-NEXT: movq %rdi, %rbx
311 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
312 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_16
313 ; CHECK-NEXT: movq %rbx, %rax
314 ; CHECK-NEXT: popq %rbx
315 ; CHECK-NEXT: .cfi_def_cfa_offset 8
317 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 1024, i32 16)
319 ; 3rd arg (%edx) -- length
322 define void @test_memset_args(i8** %Storage, i8* %V) {
323 ; CHECK-LABEL: test_memset_args:
325 ; CHECK-NEXT: pushq %rax
326 ; CHECK-NEXT: .cfi_def_cfa_offset 16
327 ; CHECK-NEXT: movq (%rdi), %rdi
328 ; CHECK-NEXT: movzbl (%rsi), %esi
329 ; CHECK-NEXT: movl $1024, %edx # imm = 0x400
330 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4
331 ; CHECK-NEXT: popq %rax
332 ; CHECK-NEXT: .cfi_def_cfa_offset 8
334 %Dst = load i8*, i8** %Storage
335 %Val = load i8, i8* %V
339 ; 3rd arg (%edx) -- length
340 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %Dst, i8 %Val, i32 1024, i32 4)
344 ;; Next batch of tests are cases where we could profitably lower to
345 ;; atomic loads and stores directly, just as we do for non-atomic ones for
346 ;; non element.unorderered.atomic variants.
348 define i8* @test_memcpy1_64(i8* %P, i8* %Q) {
349 ; CHECK-LABEL: test_memcpy1_64:
351 ; CHECK-NEXT: pushq %rbx
352 ; CHECK-NEXT: .cfi_def_cfa_offset 16
353 ; CHECK-NEXT: .cfi_offset %rbx, -16
354 ; CHECK-NEXT: movq %rdi, %rbx
355 ; CHECK-NEXT: movl $64, %edx
356 ; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_1
357 ; CHECK-NEXT: movq %rbx, %rax
358 ; CHECK-NEXT: popq %rbx
359 ; CHECK-NEXT: .cfi_def_cfa_offset 8
361 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %P, i8* align 1 %Q, i32 64, i32 1)
365 ; Ensure align 16 generates vector load/stores even with small element size
366 define i8* @test_memcpy1_64_align4(i8* %P, i8* %Q) {
367 ; CHECK-LABEL: test_memcpy1_64_align4:
369 ; CHECK-NEXT: pushq %rbx
370 ; CHECK-NEXT: .cfi_def_cfa_offset 16
371 ; CHECK-NEXT: .cfi_offset %rbx, -16
372 ; CHECK-NEXT: movq %rdi, %rbx
373 ; CHECK-NEXT: movl $64, %edx
374 ; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_1
375 ; CHECK-NEXT: movq %rbx, %rax
376 ; CHECK-NEXT: popq %rbx
377 ; CHECK-NEXT: .cfi_def_cfa_offset 8
379 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 64, i32 1)
383 define i8* @test_memcpy1_64_align8(i8* %P, i8* %Q) {
384 ; CHECK-LABEL: test_memcpy1_64_align8:
386 ; CHECK-NEXT: pushq %rbx
387 ; CHECK-NEXT: .cfi_def_cfa_offset 16
388 ; CHECK-NEXT: .cfi_offset %rbx, -16
389 ; CHECK-NEXT: movq %rdi, %rbx
390 ; CHECK-NEXT: movl $64, %edx
391 ; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_1
392 ; CHECK-NEXT: movq %rbx, %rax
393 ; CHECK-NEXT: popq %rbx
394 ; CHECK-NEXT: .cfi_def_cfa_offset 8
396 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %P, i8* align 8 %Q, i32 64, i32 1)
401 define i8* @test_memcpy1_64_align16(i8* %P, i8* %Q) {
402 ; CHECK-LABEL: test_memcpy1_64_align16:
404 ; CHECK-NEXT: pushq %rbx
405 ; CHECK-NEXT: .cfi_def_cfa_offset 16
406 ; CHECK-NEXT: .cfi_offset %rbx, -16
407 ; CHECK-NEXT: movq %rdi, %rbx
408 ; CHECK-NEXT: movl $64, %edx
409 ; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_1
410 ; CHECK-NEXT: movq %rbx, %rax
411 ; CHECK-NEXT: popq %rbx
412 ; CHECK-NEXT: .cfi_def_cfa_offset 8
414 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 64, i32 1)
419 ; Make sure that different source & dest alignments are handled correctly.
420 define i8* @test_memcpy1_64_diff_aligns(i8* %P, i8* %Q) {
421 ; CHECK-LABEL: test_memcpy1_64_diff_aligns:
423 ; CHECK-NEXT: pushq %rbx
424 ; CHECK-NEXT: .cfi_def_cfa_offset 16
425 ; CHECK-NEXT: .cfi_offset %rbx, -16
426 ; CHECK-NEXT: movq %rdi, %rbx
427 ; CHECK-NEXT: movl $64, %edx
428 ; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_1
429 ; CHECK-NEXT: movq %rbx, %rax
430 ; CHECK-NEXT: popq %rbx
431 ; CHECK-NEXT: .cfi_def_cfa_offset 8
433 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %P, i8* align 16 %Q, i32 64, i32 1)
437 define i8* @test_memcpy2_64(i8* %P, i8* %Q) {
438 ; CHECK-LABEL: test_memcpy2_64:
440 ; CHECK-NEXT: pushq %rbx
441 ; CHECK-NEXT: .cfi_def_cfa_offset 16
442 ; CHECK-NEXT: .cfi_offset %rbx, -16
443 ; CHECK-NEXT: movq %rdi, %rbx
444 ; CHECK-NEXT: movl $64, %edx
445 ; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_2
446 ; CHECK-NEXT: movq %rbx, %rax
447 ; CHECK-NEXT: popq %rbx
448 ; CHECK-NEXT: .cfi_def_cfa_offset 8
450 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %P, i8* align 2 %Q, i32 64, i32 2)
454 define i8* @test_memcpy4_64(i8* %P, i8* %Q) {
455 ; CHECK-LABEL: test_memcpy4_64:
457 ; CHECK-NEXT: pushq %rbx
458 ; CHECK-NEXT: .cfi_def_cfa_offset 16
459 ; CHECK-NEXT: .cfi_offset %rbx, -16
460 ; CHECK-NEXT: movq %rdi, %rbx
461 ; CHECK-NEXT: movl $64, %edx
462 ; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_4
463 ; CHECK-NEXT: movq %rbx, %rax
464 ; CHECK-NEXT: popq %rbx
465 ; CHECK-NEXT: .cfi_def_cfa_offset 8
467 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 64, i32 4)
471 define i8* @test_memcpy8_64(i8* %P, i8* %Q) {
472 ; CHECK-LABEL: test_memcpy8_64:
474 ; CHECK-NEXT: pushq %rbx
475 ; CHECK-NEXT: .cfi_def_cfa_offset 16
476 ; CHECK-NEXT: .cfi_offset %rbx, -16
477 ; CHECK-NEXT: movq %rdi, %rbx
478 ; CHECK-NEXT: movl $64, %edx
479 ; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_8
480 ; CHECK-NEXT: movq %rbx, %rax
481 ; CHECK-NEXT: popq %rbx
482 ; CHECK-NEXT: .cfi_def_cfa_offset 8
484 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %P, i8* align 8 %Q, i32 64, i32 8)
488 define i8* @test_memcpy16_64(i8* %P, i8* %Q) {
489 ; CHECK-LABEL: test_memcpy16_64:
491 ; CHECK-NEXT: pushq %rbx
492 ; CHECK-NEXT: .cfi_def_cfa_offset 16
493 ; CHECK-NEXT: .cfi_offset %rbx, -16
494 ; CHECK-NEXT: movq %rdi, %rbx
495 ; CHECK-NEXT: movl $64, %edx
496 ; CHECK-NEXT: callq __llvm_memcpy_element_unordered_atomic_16
497 ; CHECK-NEXT: movq %rbx, %rax
498 ; CHECK-NEXT: popq %rbx
499 ; CHECK-NEXT: .cfi_def_cfa_offset 8
501 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 64, i32 16)
505 ; ==================================
507 define i8* @test_memmove1_64(i8* %P, i8* %Q) {
508 ; CHECK-LABEL: test_memmove1_64:
510 ; CHECK-NEXT: pushq %rbx
511 ; CHECK-NEXT: .cfi_def_cfa_offset 16
512 ; CHECK-NEXT: .cfi_offset %rbx, -16
513 ; CHECK-NEXT: movq %rdi, %rbx
514 ; CHECK-NEXT: movl $64, %edx
515 ; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_1
516 ; CHECK-NEXT: movq %rbx, %rax
517 ; CHECK-NEXT: popq %rbx
518 ; CHECK-NEXT: .cfi_def_cfa_offset 8
520 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %P, i8* align 1 %Q, i32 64, i32 1)
524 ; Ensure align 16 generates vector load/stores even with small element size
525 define i8* @test_memmove1_64_align16(i8* %P, i8* %Q) {
526 ; CHECK-LABEL: test_memmove1_64_align16:
528 ; CHECK-NEXT: pushq %rbx
529 ; CHECK-NEXT: .cfi_def_cfa_offset 16
530 ; CHECK-NEXT: .cfi_offset %rbx, -16
531 ; CHECK-NEXT: movq %rdi, %rbx
532 ; CHECK-NEXT: movl $64, %edx
533 ; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_1
534 ; CHECK-NEXT: movq %rbx, %rax
535 ; CHECK-NEXT: popq %rbx
536 ; CHECK-NEXT: .cfi_def_cfa_offset 8
538 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 64, i32 1)
542 ; Make sure that different source & dest alignments are handled correctly.
543 define i8* @test_memmove1_64_diff_aligns(i8* %P, i8* %Q) {
544 ; CHECK-LABEL: test_memmove1_64_diff_aligns:
546 ; CHECK-NEXT: pushq %rbx
547 ; CHECK-NEXT: .cfi_def_cfa_offset 16
548 ; CHECK-NEXT: .cfi_offset %rbx, -16
549 ; CHECK-NEXT: movq %rdi, %rbx
550 ; CHECK-NEXT: movl $64, %edx
551 ; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_1
552 ; CHECK-NEXT: movq %rbx, %rax
553 ; CHECK-NEXT: popq %rbx
554 ; CHECK-NEXT: .cfi_def_cfa_offset 8
556 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %P, i8* align 16 %Q, i32 64, i32 1)
560 define i8* @test_memmove2_64(i8* %P, i8* %Q) {
561 ; CHECK-LABEL: test_memmove2_64:
563 ; CHECK-NEXT: pushq %rbx
564 ; CHECK-NEXT: .cfi_def_cfa_offset 16
565 ; CHECK-NEXT: .cfi_offset %rbx, -16
566 ; CHECK-NEXT: movq %rdi, %rbx
567 ; CHECK-NEXT: movl $64, %edx
568 ; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_2
569 ; CHECK-NEXT: movq %rbx, %rax
570 ; CHECK-NEXT: popq %rbx
571 ; CHECK-NEXT: .cfi_def_cfa_offset 8
573 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %P, i8* align 2 %Q, i32 64, i32 2)
577 define i8* @test_memmove4_64(i8* %P, i8* %Q) {
578 ; CHECK-LABEL: test_memmove4_64:
580 ; CHECK-NEXT: pushq %rbx
581 ; CHECK-NEXT: .cfi_def_cfa_offset 16
582 ; CHECK-NEXT: .cfi_offset %rbx, -16
583 ; CHECK-NEXT: movq %rdi, %rbx
584 ; CHECK-NEXT: movl $64, %edx
585 ; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_4
586 ; CHECK-NEXT: movq %rbx, %rax
587 ; CHECK-NEXT: popq %rbx
588 ; CHECK-NEXT: .cfi_def_cfa_offset 8
590 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 64, i32 4)
594 define i8* @test_memmove8_64(i8* %P, i8* %Q) {
595 ; CHECK-LABEL: test_memmove8_64:
597 ; CHECK-NEXT: pushq %rbx
598 ; CHECK-NEXT: .cfi_def_cfa_offset 16
599 ; CHECK-NEXT: .cfi_offset %rbx, -16
600 ; CHECK-NEXT: movq %rdi, %rbx
601 ; CHECK-NEXT: movl $64, %edx
602 ; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_8
603 ; CHECK-NEXT: movq %rbx, %rax
604 ; CHECK-NEXT: popq %rbx
605 ; CHECK-NEXT: .cfi_def_cfa_offset 8
607 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %P, i8* align 8 %Q, i32 64, i32 8)
611 define i8* @test_memmove16_64(i8* %P, i8* %Q) {
612 ; CHECK-LABEL: test_memmove16_64:
614 ; CHECK-NEXT: pushq %rbx
615 ; CHECK-NEXT: .cfi_def_cfa_offset 16
616 ; CHECK-NEXT: .cfi_offset %rbx, -16
617 ; CHECK-NEXT: movq %rdi, %rbx
618 ; CHECK-NEXT: movl $64, %edx
619 ; CHECK-NEXT: callq __llvm_memmove_element_unordered_atomic_16
620 ; CHECK-NEXT: movq %rbx, %rax
621 ; CHECK-NEXT: popq %rbx
622 ; CHECK-NEXT: .cfi_def_cfa_offset 8
624 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %P, i8* align 16 %Q, i32 64, i32 16)
628 ; ==============================
631 define i8* @test_memset1_64(i8* %P, i8 %V) {
632 ; CHECK-LABEL: test_memset1_64:
634 ; CHECK-NEXT: pushq %rbx
635 ; CHECK-NEXT: .cfi_def_cfa_offset 16
636 ; CHECK-NEXT: .cfi_offset %rbx, -16
637 ; CHECK-NEXT: movq %rdi, %rbx
638 ; CHECK-NEXT: movl $64, %edx
639 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_1
640 ; CHECK-NEXT: movq %rbx, %rax
641 ; CHECK-NEXT: popq %rbx
642 ; CHECK-NEXT: .cfi_def_cfa_offset 8
644 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %P, i8 %V, i32 64, i32 1)
648 define i8* @test_memset1_64_align16(i8* %P, i8 %V) {
649 ; CHECK-LABEL: test_memset1_64_align16:
651 ; CHECK-NEXT: pushq %rbx
652 ; CHECK-NEXT: .cfi_def_cfa_offset 16
653 ; CHECK-NEXT: .cfi_offset %rbx, -16
654 ; CHECK-NEXT: movq %rdi, %rbx
655 ; CHECK-NEXT: movl $64, %edx
656 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_1
657 ; CHECK-NEXT: movq %rbx, %rax
658 ; CHECK-NEXT: popq %rbx
659 ; CHECK-NEXT: .cfi_def_cfa_offset 8
661 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 64, i32 1)
665 define i8* @test_memset2_64(i8* %P, i8 %V) {
666 ; CHECK-LABEL: test_memset2_64:
668 ; CHECK-NEXT: pushq %rbx
669 ; CHECK-NEXT: .cfi_def_cfa_offset 16
670 ; CHECK-NEXT: .cfi_offset %rbx, -16
671 ; CHECK-NEXT: movq %rdi, %rbx
672 ; CHECK-NEXT: movl $64, %edx
673 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_2
674 ; CHECK-NEXT: movq %rbx, %rax
675 ; CHECK-NEXT: popq %rbx
676 ; CHECK-NEXT: .cfi_def_cfa_offset 8
678 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %P, i8 %V, i32 64, i32 2)
682 ;; Use the memset4 case to explore alignment and sizing requirements in the
685 define i8* @test_memset4_64(i8* %P, i8 %V) {
686 ; CHECK-LABEL: test_memset4_64:
688 ; CHECK-NEXT: pushq %rbx
689 ; CHECK-NEXT: .cfi_def_cfa_offset 16
690 ; CHECK-NEXT: .cfi_offset %rbx, -16
691 ; CHECK-NEXT: movq %rdi, %rbx
692 ; CHECK-NEXT: movl $64, %edx
693 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4
694 ; CHECK-NEXT: movq %rbx, %rax
695 ; CHECK-NEXT: popq %rbx
696 ; CHECK-NEXT: .cfi_def_cfa_offset 8
698 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 64, i32 4)
702 define i8* @test_memset4_64_align8(i8* %P, i8 %V) {
703 ; CHECK-LABEL: test_memset4_64_align8:
705 ; CHECK-NEXT: pushq %rbx
706 ; CHECK-NEXT: .cfi_def_cfa_offset 16
707 ; CHECK-NEXT: .cfi_offset %rbx, -16
708 ; CHECK-NEXT: movq %rdi, %rbx
709 ; CHECK-NEXT: movl $64, %edx
710 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4
711 ; CHECK-NEXT: movq %rbx, %rax
712 ; CHECK-NEXT: popq %rbx
713 ; CHECK-NEXT: .cfi_def_cfa_offset 8
715 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %P, i8 %V, i32 64, i32 4)
719 define i8* @test_memset4_64_align16(i8* %P, i8 %V) {
720 ; CHECK-LABEL: test_memset4_64_align16:
722 ; CHECK-NEXT: pushq %rbx
723 ; CHECK-NEXT: .cfi_def_cfa_offset 16
724 ; CHECK-NEXT: .cfi_offset %rbx, -16
725 ; CHECK-NEXT: movq %rdi, %rbx
726 ; CHECK-NEXT: movl $64, %edx
727 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4
728 ; CHECK-NEXT: movq %rbx, %rax
729 ; CHECK-NEXT: popq %rbx
730 ; CHECK-NEXT: .cfi_def_cfa_offset 8
732 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 64, i32 4)
736 define i8* @test_memset4_64_align64(i8* %P, i8 %V) {
737 ; CHECK-LABEL: test_memset4_64_align64:
739 ; CHECK-NEXT: pushq %rbx
740 ; CHECK-NEXT: .cfi_def_cfa_offset 16
741 ; CHECK-NEXT: .cfi_offset %rbx, -16
742 ; CHECK-NEXT: movq %rdi, %rbx
743 ; CHECK-NEXT: movl $64, %edx
744 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4
745 ; CHECK-NEXT: movq %rbx, %rax
746 ; CHECK-NEXT: popq %rbx
747 ; CHECK-NEXT: .cfi_def_cfa_offset 8
749 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 64 %P, i8 %V, i32 64, i32 4)
753 define i8* @test_memset4_4(i8* %P, i8 %V) {
754 ; CHECK-LABEL: test_memset4_4:
756 ; CHECK-NEXT: pushq %rbx
757 ; CHECK-NEXT: .cfi_def_cfa_offset 16
758 ; CHECK-NEXT: .cfi_offset %rbx, -16
759 ; CHECK-NEXT: movq %rdi, %rbx
760 ; CHECK-NEXT: movl $4, %edx
761 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4
762 ; CHECK-NEXT: movq %rbx, %rax
763 ; CHECK-NEXT: popq %rbx
764 ; CHECK-NEXT: .cfi_def_cfa_offset 8
766 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 4, i32 4)
770 define i8* @test_memset4_8(i8* %P, i8 %V) {
771 ; CHECK-LABEL: test_memset4_8:
773 ; CHECK-NEXT: pushq %rbx
774 ; CHECK-NEXT: .cfi_def_cfa_offset 16
775 ; CHECK-NEXT: .cfi_offset %rbx, -16
776 ; CHECK-NEXT: movq %rdi, %rbx
777 ; CHECK-NEXT: movl $8, %edx
778 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4
779 ; CHECK-NEXT: movq %rbx, %rax
780 ; CHECK-NEXT: popq %rbx
781 ; CHECK-NEXT: .cfi_def_cfa_offset 8
783 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 8, i32 4)
787 define i8* @test_memset4_8_align8(i8* %P, i8 %V) {
788 ; CHECK-LABEL: test_memset4_8_align8:
790 ; CHECK-NEXT: pushq %rbx
791 ; CHECK-NEXT: .cfi_def_cfa_offset 16
792 ; CHECK-NEXT: .cfi_offset %rbx, -16
793 ; CHECK-NEXT: movq %rdi, %rbx
794 ; CHECK-NEXT: movl $8, %edx
795 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4
796 ; CHECK-NEXT: movq %rbx, %rax
797 ; CHECK-NEXT: popq %rbx
798 ; CHECK-NEXT: .cfi_def_cfa_offset 8
800 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %P, i8 %V, i32 8, i32 4)
804 define i8* @test_memset4_12(i8* %P, i8 %V) {
805 ; CHECK-LABEL: test_memset4_12:
807 ; CHECK-NEXT: pushq %rbx
808 ; CHECK-NEXT: .cfi_def_cfa_offset 16
809 ; CHECK-NEXT: .cfi_offset %rbx, -16
810 ; CHECK-NEXT: movq %rdi, %rbx
811 ; CHECK-NEXT: movl $12, %edx
812 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4
813 ; CHECK-NEXT: movq %rbx, %rax
814 ; CHECK-NEXT: popq %rbx
815 ; CHECK-NEXT: .cfi_def_cfa_offset 8
817 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 12, i32 4)
821 define i8* @test_memset4_16(i8* %P, i8 %V) {
822 ; CHECK-LABEL: test_memset4_16:
824 ; CHECK-NEXT: pushq %rbx
825 ; CHECK-NEXT: .cfi_def_cfa_offset 16
826 ; CHECK-NEXT: .cfi_offset %rbx, -16
827 ; CHECK-NEXT: movq %rdi, %rbx
828 ; CHECK-NEXT: movl $16, %edx
829 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4
830 ; CHECK-NEXT: movq %rbx, %rax
831 ; CHECK-NEXT: popq %rbx
832 ; CHECK-NEXT: .cfi_def_cfa_offset 8
834 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 16, i32 4)
838 define i8* @test_memset4_16_align16(i8* %P, i8 %V) {
839 ; CHECK-LABEL: test_memset4_16_align16:
841 ; CHECK-NEXT: pushq %rbx
842 ; CHECK-NEXT: .cfi_def_cfa_offset 16
843 ; CHECK-NEXT: .cfi_offset %rbx, -16
844 ; CHECK-NEXT: movq %rdi, %rbx
845 ; CHECK-NEXT: movl $16, %edx
846 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4
847 ; CHECK-NEXT: movq %rbx, %rax
848 ; CHECK-NEXT: popq %rbx
849 ; CHECK-NEXT: .cfi_def_cfa_offset 8
851 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 16, i32 4)
855 define i8* @test_memset4_60(i8* %P, i8 %V) {
856 ; CHECK-LABEL: test_memset4_60:
858 ; CHECK-NEXT: pushq %rbx
859 ; CHECK-NEXT: .cfi_def_cfa_offset 16
860 ; CHECK-NEXT: .cfi_offset %rbx, -16
861 ; CHECK-NEXT: movq %rdi, %rbx
862 ; CHECK-NEXT: movl $60, %edx
863 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_4
864 ; CHECK-NEXT: movq %rbx, %rax
865 ; CHECK-NEXT: popq %rbx
866 ; CHECK-NEXT: .cfi_def_cfa_offset 8
868 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 60, i32 4)
872 define i8* @test_memset8_64(i8* %P, i8 %V) {
873 ; CHECK-LABEL: test_memset8_64:
875 ; CHECK-NEXT: pushq %rbx
876 ; CHECK-NEXT: .cfi_def_cfa_offset 16
877 ; CHECK-NEXT: .cfi_offset %rbx, -16
878 ; CHECK-NEXT: movq %rdi, %rbx
879 ; CHECK-NEXT: movl $64, %edx
880 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_8
881 ; CHECK-NEXT: movq %rbx, %rax
882 ; CHECK-NEXT: popq %rbx
883 ; CHECK-NEXT: .cfi_def_cfa_offset 8
885 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %P, i8 %V, i32 64, i32 8)
889 define i8* @test_memset16_64(i8* %P, i8 %V) {
890 ; CHECK-LABEL: test_memset16_64:
892 ; CHECK-NEXT: pushq %rbx
893 ; CHECK-NEXT: .cfi_def_cfa_offset 16
894 ; CHECK-NEXT: .cfi_offset %rbx, -16
895 ; CHECK-NEXT: movq %rdi, %rbx
896 ; CHECK-NEXT: movl $64, %edx
897 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_16
898 ; CHECK-NEXT: movq %rbx, %rax
899 ; CHECK-NEXT: popq %rbx
900 ; CHECK-NEXT: .cfi_def_cfa_offset 8
902 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 64, i32 16)
906 define i8* @test_memset16_64_zero(i8* %P) {
907 ; CHECK-LABEL: test_memset16_64_zero:
909 ; CHECK-NEXT: pushq %rbx
910 ; CHECK-NEXT: .cfi_def_cfa_offset 16
911 ; CHECK-NEXT: .cfi_offset %rbx, -16
912 ; CHECK-NEXT: movq %rdi, %rbx
913 ; CHECK-NEXT: xorl %esi, %esi
914 ; CHECK-NEXT: movl $64, %edx
915 ; CHECK-NEXT: callq __llvm_memset_element_unordered_atomic_16
916 ; CHECK-NEXT: movq %rbx, %rax
917 ; CHECK-NEXT: popq %rbx
918 ; CHECK-NEXT: .cfi_def_cfa_offset 8
920 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 0, i32 64, i32 16)
925 declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
926 declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
927 declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture, i8, i32, i32) nounwind