crossgcc: Upgrade CMake from 3.29.3 to 3.30.2
[coreboot.git] / src / arch / x86 / memmove_32.c
blob7c2529d622778fed24795d1f76fcd74b366a665f
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * This file is derived from memcpy_32.c in the Linux kernel.
4 */
6 #include <string.h>
7 #include <stdbool.h>
8 #include <asan.h>
10 void *memmove(void *dest, const void *src, size_t n)
12 int d0, d1, d2, d3, d4, d5;
13 char *ret = dest;
15 #if (ENV_SEPARATE_ROMSTAGE && CONFIG(ASAN_IN_ROMSTAGE)) || \
16 (ENV_RAMSTAGE && CONFIG(ASAN_IN_RAMSTAGE))
17 check_memory_region((unsigned long)src, n, false, _RET_IP_);
18 check_memory_region((unsigned long)dest, n, true, _RET_IP_);
19 #endif
21 __asm__ __volatile__(
22 /* Handle more 16bytes in loop */
23 "cmp $0x10, %0\n\t"
24 "jb 1f\n\t"
26 /* Decide forward/backward copy mode */
27 "cmp %2, %1\n\t"
28 "jb 2f\n\t"
31 * movs instruction have many startup latency
32 * so we handle small size by general register.
34 "cmp $680, %0\n\t"
35 "jb 3f\n\t"
37 * movs instruction is only good for aligned case.
39 "mov %1, %3\n\t"
40 "xor %2, %3\n\t"
41 "and $0xff, %3\n\t"
42 "jz 4f\n\t"
43 "3:\n\t"
44 "sub $0x10, %0\n\t"
47 * We gobble 16byts forward in each loop.
49 "3:\n\t"
50 "sub $0x10, %0\n\t"
51 "mov 0*4(%1), %3\n\t"
52 "mov 1*4(%1), %4\n\t"
53 "mov %3, 0*4(%2)\n\t"
54 "mov %4, 1*4(%2)\n\t"
55 "mov 2*4(%1), %3\n\t"
56 "mov 3*4(%1), %4\n\t"
57 "mov %3, 2*4(%2)\n\t"
58 "mov %4, 3*4(%2)\n\t"
59 "lea 0x10(%1), %1\n\t"
60 "lea 0x10(%2), %2\n\t"
61 "jae 3b\n\t"
62 "add $0x10, %0\n\t"
63 "jmp 1f\n\t"
66 * Handle data forward by movs.
68 ".p2align 4\n\t"
69 "4:\n\t"
70 "mov -4(%1, %0), %3\n\t"
71 "lea -4(%2, %0), %4\n\t"
72 "shr $2, %0\n\t"
73 "rep movsl\n\t"
74 "mov %3, (%4)\n\t"
75 "jmp 11f\n\t"
77 * Handle data backward by movs.
79 ".p2align 4\n\t"
80 "6:\n\t"
81 "mov (%1), %3\n\t"
82 "mov %2, %4\n\t"
83 "lea -4(%1, %0), %1\n\t"
84 "lea -4(%2, %0), %2\n\t"
85 "shr $2, %0\n\t"
86 "std\n\t"
87 "rep movsl\n\t"
88 "mov %3,(%4)\n\t"
89 "cld\n\t"
90 "jmp 11f\n\t"
93 * Start to prepare for backward copy.
95 ".p2align 4\n\t"
96 "2:\n\t"
97 "cmp $680, %0\n\t"
98 "jb 5f\n\t"
99 "mov %1, %3\n\t"
100 "xor %2, %3\n\t"
101 "and $0xff, %3\n\t"
102 "jz 6b\n\t"
105 * Calculate copy position to tail.
107 "5:\n\t"
108 "add %0, %1\n\t"
109 "add %0, %2\n\t"
110 "sub $0x10, %0\n\t"
113 * We gobble 16byts backward in each loop.
115 "7:\n\t"
116 "sub $0x10, %0\n\t"
118 "mov -1*4(%1), %3\n\t"
119 "mov -2*4(%1), %4\n\t"
120 "mov %3, -1*4(%2)\n\t"
121 "mov %4, -2*4(%2)\n\t"
122 "mov -3*4(%1), %3\n\t"
123 "mov -4*4(%1), %4\n\t"
124 "mov %3, -3*4(%2)\n\t"
125 "mov %4, -4*4(%2)\n\t"
126 "lea -0x10(%1), %1\n\t"
127 "lea -0x10(%2), %2\n\t"
128 "jae 7b\n\t"
130 * Calculate copy position to head.
132 "add $0x10, %0\n\t"
133 "sub %0, %1\n\t"
134 "sub %0, %2\n\t"
137 * Move data from 8 bytes to 15 bytes.
139 ".p2align 4\n\t"
140 "1:\n\t"
141 "cmp $8, %0\n\t"
142 "jb 8f\n\t"
143 "mov 0*4(%1), %3\n\t"
144 "mov 1*4(%1), %4\n\t"
145 "mov -2*4(%1, %0), %5\n\t"
146 "mov -1*4(%1, %0), %1\n\t"
148 "mov %3, 0*4(%2)\n\t"
149 "mov %4, 1*4(%2)\n\t"
150 "mov %5, -2*4(%2, %0)\n\t"
151 "mov %1, -1*4(%2, %0)\n\t"
152 "jmp 11f\n\t"
155 * Move data from 4 bytes to 7 bytes.
157 ".p2align 4\n\t"
158 "8:\n\t"
159 "cmp $4, %0\n\t"
160 "jb 9f\n\t"
161 "mov 0*4(%1), %3\n\t"
162 "mov -1*4(%1, %0), %4\n\t"
163 "mov %3, 0*4(%2)\n\t"
164 "mov %4, -1*4(%2, %0)\n\t"
165 "jmp 11f\n\t"
168 * Move data from 2 bytes to 3 bytes.
170 ".p2align 4\n\t"
171 "9:\n\t"
172 "cmp $2, %0\n\t"
173 "jb 10f\n\t"
174 "movw 0*2(%1), %%dx\n\t"
175 "movw -1*2(%1, %0), %%bx\n\t"
176 "movw %%dx, 0*2(%2)\n\t"
177 "movw %%bx, -1*2(%2, %0)\n\t"
178 "jmp 11f\n\t"
181 * Move data for 1 byte.
183 ".p2align 4\n\t"
184 "10:\n\t"
185 "cmp $1, %0\n\t"
186 "jb 11f\n\t"
187 "movb (%1), %%cl\n\t"
188 "movb %%cl, (%2)\n\t"
189 ".p2align 4\n\t"
190 "11:"
191 : "=&c" (d0), "=&S" (d1), "=&D" (d2),
192 "=r" (d3), "=r" (d4), "=r"(d5)
193 : "0" (n),
194 "1" (src),
195 "2" (dest)
196 : "memory");
198 return ret;