arch/x86/lib/memcpy_32.c

   1 #include <linux/string.h>
   2 #include <linux/module.h>
   3
   4 #undef memcpy
   5 #undef memset
   6
   7 void *memcpy(void *to, const void *from, size_t n)
   8 {
   9 #ifdef CONFIG_X86_USE_3DNOW
  10         return __memcpy3d(to, from, n);
  11 #else
  12         return __memcpy(to, from, n);
  13 #endif
  14 }
  15 EXPORT_SYMBOL(memcpy);
  16
  17 void *memset(void *s, int c, size_t count)
  18 {
  19         return __memset(s, c, count);
  20 }
  21 EXPORT_SYMBOL(memset);
  22
  23 void *memmove(void *dest, const void *src, size_t n)
  24 {
  25         int d0,d1,d2,d3,d4,d5;
  26         char *ret = dest;
  27
  28         __asm__ __volatile__(
  29                 /* Handle more 16bytes in loop */
  30                 "cmp $0x10, %0\n\t"
  31                 "jb     1f\n\t"
  32
  33                 /* Decide forward/backward copy mode */
  34                 "cmp %2, %1\n\t"
  35                 "jb     2f\n\t"
  36
  37                 /*
  38                  * movs instruction have many startup latency
  39                  * so we handle small size by general register.
  40                  */
  41                 "cmp  $680, %0\n\t"
  42                 "jb 3f\n\t"
  43                 /*
  44                  * movs instruction is only good for aligned case.
  45                  */
  46                 "mov %1, %3\n\t"
  47                 "xor %2, %3\n\t"
  48                 "and $0xff, %3\n\t"
  49                 "jz 4f\n\t"
  50                 "3:\n\t"
  51                 "sub $0x10, %0\n\t"
  52
  53                 /*
  54                  * We gobble 16byts forward in each loop.
  55                  */
  56                 "3:\n\t"
  57                 "sub $0x10, %0\n\t"
  58                 "mov 0*4(%1), %3\n\t"
  59                 "mov 1*4(%1), %4\n\t"
  60                 "mov  %3, 0*4(%2)\n\t"
  61                 "mov  %4, 1*4(%2)\n\t"
  62                 "mov 2*4(%1), %3\n\t"
  63                 "mov 3*4(%1), %4\n\t"
  64                 "mov  %3, 2*4(%2)\n\t"
  65                 "mov  %4, 3*4(%2)\n\t"
  66                 "lea  0x10(%1), %1\n\t"
  67                 "lea  0x10(%2), %2\n\t"
  68                 "jae 3b\n\t"
  69                 "add $0x10, %0\n\t"
  70                 "jmp 1f\n\t"
  71
  72                 /*
  73                  * Handle data forward by movs.
  74                  */
  75                 ".p2align 4\n\t"
  76                 "4:\n\t"
  77                 "mov -4(%1, %0), %3\n\t"
  78                 "lea -4(%2, %0), %4\n\t"
  79                 "shr $2, %0\n\t"
  80                 "rep movsl\n\t"
  81                 "mov %3, (%4)\n\t"
  82                 "jmp 11f\n\t"
  83                 /*
  84                  * Handle data backward by movs.
  85                  */
  86                 ".p2align 4\n\t"
  87                 "6:\n\t"
  88                 "mov (%1), %3\n\t"
  89                 "mov %2, %4\n\t"
  90                 "lea -4(%1, %0), %1\n\t"
  91                 "lea -4(%2, %0), %2\n\t"
  92                 "shr $2, %0\n\t"
  93                 "std\n\t"
  94                 "rep movsl\n\t"
  95                 "mov %3,(%4)\n\t"
  96                 "cld\n\t"
  97                 "jmp 11f\n\t"
  98
  99                 /*
 100                  * Start to prepare for backward copy.
 101                  */
 102                 ".p2align 4\n\t"
 103                 "2:\n\t"
 104                 "cmp  $680, %0\n\t"
 105                 "jb 5f\n\t"
 106                 "mov %1, %3\n\t"
 107                 "xor %2, %3\n\t"
 108                 "and $0xff, %3\n\t"
 109                 "jz 6b\n\t"
 110
 111                 /*
 112                  * Calculate copy position to tail.
 113                  */
 114                 "5:\n\t"
 115                 "add %0, %1\n\t"
 116                 "add %0, %2\n\t"
 117                 "sub $0x10, %0\n\t"
 118
 119                 /*
 120                  * We gobble 16byts backward in each loop.
 121                  */
 122                 "7:\n\t"
 123                 "sub $0x10, %0\n\t"
 124
 125                 "mov -1*4(%1), %3\n\t"
 126                 "mov -2*4(%1), %4\n\t"
 127                 "mov  %3, -1*4(%2)\n\t"
 128                 "mov  %4, -2*4(%2)\n\t"
 129                 "mov -3*4(%1), %3\n\t"
 130                 "mov -4*4(%1), %4\n\t"
 131                 "mov  %3, -3*4(%2)\n\t"
 132                 "mov  %4, -4*4(%2)\n\t"
 133                 "lea  -0x10(%1), %1\n\t"
 134                 "lea  -0x10(%2), %2\n\t"
 135                 "jae 7b\n\t"
 136                 /*
 137                  * Calculate copy position to head.
 138                  */
 139                 "add $0x10, %0\n\t"
 140                 "sub %0, %1\n\t"
 141                 "sub %0, %2\n\t"
 142
 143                 /*
 144                  * Move data from 8 bytes to 15 bytes.
 145                  */
 146                 ".p2align 4\n\t"
 147                 "1:\n\t"
 148                 "cmp $8, %0\n\t"
 149                 "jb 8f\n\t"
 150                 "mov 0*4(%1), %3\n\t"
 151                 "mov 1*4(%1), %4\n\t"
 152                 "mov -2*4(%1, %0), %5\n\t"
 153                 "mov -1*4(%1, %0), %1\n\t"
 154
 155                 "mov  %3, 0*4(%2)\n\t"
 156                 "mov  %4, 1*4(%2)\n\t"
 157                 "mov  %5, -2*4(%2, %0)\n\t"
 158                 "mov  %1, -1*4(%2, %0)\n\t"
 159                 "jmp 11f\n\t"
 160
 161                 /*
 162                  * Move data from 4 bytes to 7 bytes.
 163                  */
 164                 ".p2align 4\n\t"
 165                 "8:\n\t"
 166                 "cmp $4, %0\n\t"
 167                 "jb 9f\n\t"
 168                 "mov 0*4(%1), %3\n\t"
 169                 "mov -1*4(%1, %0), %4\n\t"
 170                 "mov  %3, 0*4(%2)\n\t"
 171                 "mov  %4, -1*4(%2, %0)\n\t"
 172                 "jmp 11f\n\t"
 173
 174                 /*
 175                  * Move data from 2 bytes to 3 bytes.
 176                  */
 177                 ".p2align 4\n\t"
 178                 "9:\n\t"
 179                 "cmp $2, %0\n\t"
 180                 "jb 10f\n\t"
 181                 "movw 0*2(%1), %%dx\n\t"
 182                 "movw -1*2(%1, %0), %%bx\n\t"
 183                 "movw %%dx, 0*2(%2)\n\t"
 184                 "movw %%bx, -1*2(%2, %0)\n\t"
 185                 "jmp 11f\n\t"
 186
 187                 /*
 188                  * Move data for 1 byte.
 189                  */
 190                 ".p2align 4\n\t"
 191                 "10:\n\t"
 192                 "cmp $1, %0\n\t"
 193                 "jb 11f\n\t"
 194                 "movb (%1), %%cl\n\t"
 195                 "movb %%cl, (%2)\n\t"
 196                 ".p2align 4\n\t"
 197                 "11:"
 198                 : "=&c" (d0), "=&S" (d1), "=&D" (d2),
 199                   "=r" (d3),"=r" (d4), "=r"(d5)
 200                 :"0" (n),
 201                  "1" (src),
 202                  "2" (dest)
 203                 :"memory");
 204
 205         return ret;
 206
 207 }
 208 EXPORT_SYMBOL(memmove);