1 /* SPDX-License-Identifier: GPL-2.0-only */
3 /* This code originates from Linux 5.19 */
6 * Implement memmove(). This can handle overlap between src and dst.
21 /* Decide forward/backward copy mode */
23 jge .Lmemmove_begin_forward
29 /* Don't optimize for FSRM and ERMS like Linux */
30 .Lmemmove_begin_forward:
35 * movsq instruction have many startup latency
36 * so we handle small size by general register.
41 * movsq instruction is only good for aligned case.
49 * We gobble 32 bytes forward in each loop.
68 * Handle data forward by movsq.
73 movq -8(%rsi, %rdx), %r11
74 lea -8(%rdi, %rdx), %r10
79 .Lmemmove_end_forward:
82 * Handle data backward by movsq.
89 leaq -8(%rsi, %rdx), %rsi
90 leaq -8(%rdi, %rdx), %rdi
99 * Start to prepare for backward copy.
111 * Calculate copy position to tail.
117 * We gobble 32 bytes backward in each loop.
121 movq -1*8(%rsi), %r11
122 movq -2*8(%rsi), %r10
125 leaq -4*8(%rsi), %rsi
127 movq %r11, -1*8(%rdi)
128 movq %r10, -2*8(%rdi)
131 leaq -4*8(%rdi), %rdi
134 * Calculate copy position to head.
143 * Move data from 16 bytes to 31 bytes.
147 movq -2*8(%rsi, %rdx), %r9
148 movq -1*8(%rsi, %rdx), %r8
151 movq %r9, -2*8(%rdi, %rdx)
152 movq %r8, -1*8(%rdi, %rdx)
159 * Move data from 8 bytes to 15 bytes.
162 movq -1*8(%rsi, %rdx), %r10
164 movq %r10, -1*8(%rdi, %rdx)
170 * Move data from 4 bytes to 7 bytes.
173 movl -4(%rsi, %rdx), %r10d
175 movl %r10d, -4(%rdi, %rdx)
181 * Move data from 2 bytes to 3 bytes.
184 movw -2(%rsi, %rdx), %r10w
186 movw %r10w, -2(%rdi, %rdx)
192 * Move data for 1 byte.