Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / compiler-rt / lib / builtins / hexagon / memcpy_likely_aligned.S
blob492298f10326188bfe126480d786a76af9281616
1 //===------------------------- memcopy routines ---------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
10         .macro FUNCTION_BEGIN name
11         .text
12         .p2align 5
13         .globl \name
14         .type  \name, @function
15 \name:
16         .endm
18         .macro FUNCTION_END name
19         .size  \name, . - \name
20         .endm
22 FUNCTION_BEGIN __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
23         {
24                 p0 = bitsclr(r1,#7)
25                 p0 = bitsclr(r0,#7)
26                 if (p0.new) r5:4 = memd(r1)
27                 r3 = #-3
28         }
29         {
30                 if (!p0) jump .Lmemcpy_call
31                 if (p0) memd(r0++#8) = r5:4
32                 if (p0) r5:4 = memd(r1+#8)
33                 r3 += lsr(r2,#3)
34         }
35         {
36                 memd(r0++#8) = r5:4
37                 r5:4 = memd(r1+#16)
38                 r1 = add(r1,#24)
39                 loop0(1f,r3)
40         }
41         .falign
43         {
44                 memd(r0++#8) = r5:4
45                 r5:4 = memd(r1++#8)
46         }:endloop0
47         {
48                 memd(r0) = r5:4
49                 r0 -= add(r2,#-8)
50                 jumpr r31
51         }
52 FUNCTION_END __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
54 .Lmemcpy_call:
55 #ifdef __PIC__
56         jump memcpy@PLT
57 #else
58         jump memcpy
59 #endif
61   .globl __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes
62   .set   __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes, \
63          __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes