2 * Normally compiler builtins are used, but sometimes the compiler calls out
3 * of line code. Based on asm-i386/string.h.
5 * This assembly file is re-written from memmove_64.c file.
6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
9 #include <linux/linkage.h>
10 #include <asm/dwarf2.h>
15 * Implement memmove(). This can handle overlap between src and dst.
27 /* Handle more 32bytes in loop */
32 /* Decide forward/backward copy mode */
37 * movsq instruction have many startup latency
38 * so we handle small size by general register.
43 * movsq instruction is only good for aligned case.
51 * We gobble 32byts forward in each loop.
70 * Handle data forward by movsq.
75 movq -8(%rsi, %rdx), %r11
76 lea -8(%rdi, %rdx), %r10
82 * Handle data backward by movsq.
89 leaq -8(%rsi, %rdx), %rsi
90 leaq -8(%rdi, %rdx), %rdi
99 * Start to prepare for backward copy.
109 * Calculate copy position to tail.
115 * We gobble 32byts backward in each loop.
119 movq -1*8(%rsi), %r11
120 movq -2*8(%rsi), %r10
123 leaq -4*8(%rsi), %rsi
125 movq %r11, -1*8(%rdi)
126 movq %r10, -2*8(%rdi)
129 leaq -4*8(%rdi), %rdi
132 * Calculate copy position to head.
141 * Move data from 16 bytes to 31 bytes.
145 movq -2*8(%rsi, %rdx), %r9
146 movq -1*8(%rsi, %rdx), %r8
149 movq %r9, -2*8(%rdi, %rdx)
150 movq %r8, -1*8(%rdi, %rdx)
157 * Move data from 8 bytes to 15 bytes.
160 movq -1*8(%rsi, %rdx), %r10
162 movq %r10, -1*8(%rdi, %rdx)
168 * Move data from 4 bytes to 7 bytes.
171 movl -4(%rsi, %rdx), %r10d
173 movl %r10d, -4(%rdi, %rdx)
179 * Move data from 2 bytes to 3 bytes.
182 movw -2(%rsi, %rdx), %r10w
184 movw %r10w, -2(%rdi, %rdx)
190 * Move data for 1 byte.