2 * Normally compiler builtins are used, but sometimes the compiler calls out
3 * of line code. Based on asm-i386/string.h.
5 * This assembly file is re-written from memmove_64.c file.
6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
9 #include <linux/linkage.h>
10 #include <asm/dwarf2.h>
11 #include <asm/cpufeature.h>
16 * Implement memmove(). This can handle overlap between src and dst.
29 /* Handle more 32bytes in loop */
34 /* Decide forward/backward copy mode */
36 jge .Lmemmove_begin_forward
42 .Lmemmove_begin_forward:
44 * movsq instruction have many startup latency
45 * so we handle small size by general register.
50 * movsq instruction is only good for aligned case.
58 * We gobble 32byts forward in each loop.
77 * Handle data forward by movsq.
82 movq -8(%rsi, %rdx), %r11
83 lea -8(%rdi, %rdx), %r10
88 .Lmemmove_end_forward:
91 * Handle data backward by movsq.
98 leaq -8(%rsi, %rdx), %rsi
99 leaq -8(%rdi, %rdx), %rdi
108 * Start to prepare for backward copy.
118 * Calculate copy position to tail.
124 * We gobble 32byts backward in each loop.
128 movq -1*8(%rsi), %r11
129 movq -2*8(%rsi), %r10
132 leaq -4*8(%rsi), %rsi
134 movq %r11, -1*8(%rdi)
135 movq %r10, -2*8(%rdi)
138 leaq -4*8(%rdi), %rdi
141 * Calculate copy position to head.
150 * Move data from 16 bytes to 31 bytes.
154 movq -2*8(%rsi, %rdx), %r9
155 movq -1*8(%rsi, %rdx), %r8
158 movq %r9, -2*8(%rdi, %rdx)
159 movq %r8, -1*8(%rdi, %rdx)
166 * Move data from 8 bytes to 15 bytes.
169 movq -1*8(%rsi, %rdx), %r10
171 movq %r10, -1*8(%rdi, %rdx)
177 * Move data from 4 bytes to 7 bytes.
180 movl -4(%rsi, %rdx), %r10d
182 movl %r10d, -4(%rdi, %rdx)
188 * Move data from 2 bytes to 3 bytes.
191 movw -2(%rsi, %rdx), %r10w
193 movw %r10w, -2(%rdi, %rdx)
199 * Move data for 1 byte.
207 .section .altinstr_replacement,"ax"
208 .Lmemmove_begin_forward_efs:
209 /* Forward moving data. */
213 .Lmemmove_end_forward_efs:
216 .section .altinstructions,"a"
218 .quad .Lmemmove_begin_forward
219 .quad .Lmemmove_begin_forward_efs
220 .word X86_FEATURE_ERMS
221 .byte .Lmemmove_end_forward-.Lmemmove_begin_forward
222 .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs