src/arch/x86/memmove_32.c

   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  * This file is derived from memcpy_32.c in the Linux kernel.
   4  */
   5
   6 #include <string.h>
   7 #include <stdbool.h>
   8 #include <asan.h>
   9
  10 void *memmove(void *dest, const void *src, size_t n)
  11 {
  12         int d0, d1, d2, d3, d4, d5;
  13         char *ret = dest;
  14
  15 #if (ENV_SEPARATE_ROMSTAGE && CONFIG(ASAN_IN_ROMSTAGE)) || \
  16                 (ENV_RAMSTAGE && CONFIG(ASAN_IN_RAMSTAGE))
  17         check_memory_region((unsigned long)src, n, false, _RET_IP_);
  18         check_memory_region((unsigned long)dest, n, true, _RET_IP_);
  19 #endif
  20
  21         __asm__ __volatile__(
  22                 /* Handle more 16bytes in loop */
  23                 "cmp $0x10, %0\n\t"
  24                 "jb     1f\n\t"
  25
  26                 /* Decide forward/backward copy mode */
  27                 "cmp %2, %1\n\t"
  28                 "jb     2f\n\t"
  29
  30                 /*
  31                  * movs instruction have many startup latency
  32                  * so we handle small size by general register.
  33                  */
  34                 "cmp  $680, %0\n\t"
  35                 "jb 3f\n\t"
  36                 /*
  37                  * movs instruction is only good for aligned case.
  38                  */
  39                 "mov %1, %3\n\t"
  40                 "xor %2, %3\n\t"
  41                 "and $0xff, %3\n\t"
  42                 "jz 4f\n\t"
  43                 "3:\n\t"
  44                 "sub $0x10, %0\n\t"
  45
  46                 /*
  47                  * We gobble 16byts forward in each loop.
  48                  */
  49                 "3:\n\t"
  50                 "sub $0x10, %0\n\t"
  51                 "mov 0*4(%1), %3\n\t"
  52                 "mov 1*4(%1), %4\n\t"
  53                 "mov  %3, 0*4(%2)\n\t"
  54                 "mov  %4, 1*4(%2)\n\t"
  55                 "mov 2*4(%1), %3\n\t"
  56                 "mov 3*4(%1), %4\n\t"
  57                 "mov  %3, 2*4(%2)\n\t"
  58                 "mov  %4, 3*4(%2)\n\t"
  59                 "lea  0x10(%1), %1\n\t"
  60                 "lea  0x10(%2), %2\n\t"
  61                 "jae 3b\n\t"
  62                 "add $0x10, %0\n\t"
  63                 "jmp 1f\n\t"
  64
  65                 /*
  66                  * Handle data forward by movs.
  67                  */
  68                 ".p2align 4\n\t"
  69                 "4:\n\t"
  70                 "mov -4(%1, %0), %3\n\t"
  71                 "lea -4(%2, %0), %4\n\t"
  72                 "shr $2, %0\n\t"
  73                 "rep movsl\n\t"
  74                 "mov %3, (%4)\n\t"
  75                 "jmp 11f\n\t"
  76                 /*
  77                  * Handle data backward by movs.
  78                  */
  79                 ".p2align 4\n\t"
  80                 "6:\n\t"
  81                 "mov (%1), %3\n\t"
  82                 "mov %2, %4\n\t"
  83                 "lea -4(%1, %0), %1\n\t"
  84                 "lea -4(%2, %0), %2\n\t"
  85                 "shr $2, %0\n\t"
  86                 "std\n\t"
  87                 "rep movsl\n\t"
  88                 "mov %3,(%4)\n\t"
  89                 "cld\n\t"
  90                 "jmp 11f\n\t"
  91
  92                 /*
  93                  * Start to prepare for backward copy.
  94                  */
  95                 ".p2align 4\n\t"
  96                 "2:\n\t"
  97                 "cmp  $680, %0\n\t"
  98                 "jb 5f\n\t"
  99                 "mov %1, %3\n\t"
 100                 "xor %2, %3\n\t"
 101                 "and $0xff, %3\n\t"
 102                 "jz 6b\n\t"
 103
 104                 /*
 105                  * Calculate copy position to tail.
 106                  */
 107                 "5:\n\t"
 108                 "add %0, %1\n\t"
 109                 "add %0, %2\n\t"
 110                 "sub $0x10, %0\n\t"
 111
 112                 /*
 113                  * We gobble 16byts backward in each loop.
 114                  */
 115                 "7:\n\t"
 116                 "sub $0x10, %0\n\t"
 117
 118                 "mov -1*4(%1), %3\n\t"
 119                 "mov -2*4(%1), %4\n\t"
 120                 "mov  %3, -1*4(%2)\n\t"
 121                 "mov  %4, -2*4(%2)\n\t"
 122                 "mov -3*4(%1), %3\n\t"
 123                 "mov -4*4(%1), %4\n\t"
 124                 "mov  %3, -3*4(%2)\n\t"
 125                 "mov  %4, -4*4(%2)\n\t"
 126                 "lea  -0x10(%1), %1\n\t"
 127                 "lea  -0x10(%2), %2\n\t"
 128                 "jae 7b\n\t"
 129                 /*
 130                  * Calculate copy position to head.
 131                  */
 132                 "add $0x10, %0\n\t"
 133                 "sub %0, %1\n\t"
 134                 "sub %0, %2\n\t"
 135
 136                 /*
 137                  * Move data from 8 bytes to 15 bytes.
 138                  */
 139                 ".p2align 4\n\t"
 140                 "1:\n\t"
 141                 "cmp $8, %0\n\t"
 142                 "jb 8f\n\t"
 143                 "mov 0*4(%1), %3\n\t"
 144                 "mov 1*4(%1), %4\n\t"
 145                 "mov -2*4(%1, %0), %5\n\t"
 146                 "mov -1*4(%1, %0), %1\n\t"
 147
 148                 "mov  %3, 0*4(%2)\n\t"
 149                 "mov  %4, 1*4(%2)\n\t"
 150                 "mov  %5, -2*4(%2, %0)\n\t"
 151                 "mov  %1, -1*4(%2, %0)\n\t"
 152                 "jmp 11f\n\t"
 153
 154                 /*
 155                  * Move data from 4 bytes to 7 bytes.
 156                  */
 157                 ".p2align 4\n\t"
 158                 "8:\n\t"
 159                 "cmp $4, %0\n\t"
 160                 "jb 9f\n\t"
 161                 "mov 0*4(%1), %3\n\t"
 162                 "mov -1*4(%1, %0), %4\n\t"
 163                 "mov  %3, 0*4(%2)\n\t"
 164                 "mov  %4, -1*4(%2, %0)\n\t"
 165                 "jmp 11f\n\t"
 166
 167                 /*
 168                  * Move data from 2 bytes to 3 bytes.
 169                  */
 170                 ".p2align 4\n\t"
 171                 "9:\n\t"
 172                 "cmp $2, %0\n\t"
 173                 "jb 10f\n\t"
 174                 "movw 0*2(%1), %%dx\n\t"
 175                 "movw -1*2(%1, %0), %%bx\n\t"
 176                 "movw %%dx, 0*2(%2)\n\t"
 177                 "movw %%bx, -1*2(%2, %0)\n\t"
 178                 "jmp 11f\n\t"
 179
 180                 /*
 181                  * Move data for 1 byte.
 182                  */
 183                 ".p2align 4\n\t"
 184                 "10:\n\t"
 185                 "cmp $1, %0\n\t"
 186                 "jb 11f\n\t"
 187                 "movb (%1), %%cl\n\t"
 188                 "movb %%cl, (%2)\n\t"
 189                 ".p2align 4\n\t"
 190                 "11:"
 191                 : "=&c" (d0), "=&S" (d1), "=&D" (d2),
 192                   "=r" (d3), "=r" (d4), "=r"(d5)
 193                 : "0" (n),
 194                  "1" (src),
 195                  "2" (dest)
 196                 : "memory");
 197
 198         return ret;
 199 }