arch/alpha/lib/memcpy.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  *  linux/arch/alpha/lib/memcpy.c
   4  *
   5  *  Copyright (C) 1995  Linus Torvalds
   6  */
   7
   8 /*
   9  * This is a reasonably optimized memcpy() routine.
  10  */
  11
  12 /*
  13  * Note that the C code is written to be optimized into good assembly. However,
  14  * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
  15  * explicit compare against 0 (instead of just using the proper "blt reg, xx" or
  16  * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
  17  */
  18
  19 #include <linux/types.h>
  20 #include <linux/export.h>
  21 #include <linux/string.h>
  22
  23 /*
  24  * This should be done in one go with ldq_u*2/mask/stq_u. Do it
  25  * with a macro so that we can fix it up later..
  26  */
  27 #define ALIGN_DEST_TO8_UP(d,s,n) \
  28         while (d & 7) { \
  29                 if (n <= 0) return; \
  30                 n--; \
  31                 *(char *) d = *(char *) s; \
  32                 d++; s++; \
  33         }
  34 #define ALIGN_DEST_TO8_DN(d,s,n) \
  35         while (d & 7) { \
  36                 if (n <= 0) return; \
  37                 n--; \
  38                 d--; s--; \
  39                 *(char *) d = *(char *) s; \
  40         }
  41
  42 /*
  43  * This should similarly be done with ldq_u*2/mask/stq. The destination
  44  * is aligned, but we don't fill in a full quad-word
  45  */
  46 #define DO_REST_UP(d,s,n) \
  47         while (n > 0) { \
  48                 n--; \
  49                 *(char *) d = *(char *) s; \
  50                 d++; s++; \
  51         }
  52 #define DO_REST_DN(d,s,n) \
  53         while (n > 0) { \
  54                 n--; \
  55                 d--; s--; \
  56                 *(char *) d = *(char *) s; \
  57         }
  58
  59 /*
  60  * This should be done with ldq/mask/stq. The source and destination are
  61  * aligned, but we don't fill in a full quad-word
  62  */
  63 #define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n)
  64 #define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n)
  65
  66 /*
  67  * This does unaligned memory copies. We want to avoid storing to
  68  * an unaligned address, as that would do a read-modify-write cycle.
  69  * We also want to avoid double-reading the unaligned reads.
  70  *
  71  * Note the ordering to try to avoid load (and address generation) latencies.
  72  */
  73 static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s,
  74                                           long n)
  75 {
  76         ALIGN_DEST_TO8_UP(d,s,n);
  77         n -= 8;                 /* to avoid compare against 8 in the loop */
  78         if (n >= 0) {
  79                 unsigned long low_word, high_word;
  80                 __asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s));
  81                 do {
  82                         unsigned long tmp;
  83                         __asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8)));
  84                         n -= 8;
  85                         __asm__("extql %1,%2,%0"
  86                                 :"=r" (low_word)
  87                                 :"r" (low_word), "r" (s));
  88                         __asm__("extqh %1,%2,%0"
  89                                 :"=r" (tmp)
  90                                 :"r" (high_word), "r" (s));
  91                         s += 8;
  92                         *(unsigned long *) d = low_word | tmp;
  93                         d += 8;
  94                         low_word = high_word;
  95                 } while (n >= 0);
  96         }
  97         n += 8;
  98         DO_REST_UP(d,s,n);
  99 }
 100
 101 static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s,
 102                                           long n)
 103 {
 104         /* I don't understand AXP assembler well enough for this. -Tim */
 105         s += n;
 106         d += n;
 107         while (n--)
 108                 * (char *) --d = * (char *) --s;
 109 }
 110
 111 /*
 112  * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register
 113  * for the load-store. I don't know why, but it would seem that using a floating
 114  * point register for the move seems to slow things down (very small difference,
 115  * though).
 116  *
 117  * Note the ordering to try to avoid load (and address generation) latencies.
 118  */
 119 static inline void __memcpy_aligned_up (unsigned long d, unsigned long s,
 120                                         long n)
 121 {
 122         ALIGN_DEST_TO8_UP(d,s,n);
 123         n -= 8;
 124         while (n >= 0) {
 125                 unsigned long tmp;
 126                 __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
 127                 n -= 8;
 128                 s += 8;
 129                 *(unsigned long *) d = tmp;
 130                 d += 8;
 131         }
 132         n += 8;
 133         DO_REST_ALIGNED_UP(d,s,n);
 134 }
 135 static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s,
 136                                         long n)
 137 {
 138         s += n;
 139         d += n;
 140         ALIGN_DEST_TO8_DN(d,s,n);
 141         n -= 8;
 142         while (n >= 0) {
 143                 unsigned long tmp;
 144                 s -= 8;
 145                 __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
 146                 n -= 8;
 147                 d -= 8;
 148                 *(unsigned long *) d = tmp;
 149         }
 150         n += 8;
 151         DO_REST_ALIGNED_DN(d,s,n);
 152 }
 153
 154 #undef memcpy
 155
 156 void * memcpy(void * dest, const void *src, size_t n)
 157 {
 158         if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
 159                 __memcpy_aligned_up ((unsigned long) dest, (unsigned long) src,
 160                                      n);
 161                 return dest;
 162         }
 163         __memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n);
 164         return dest;
 165 }
 166 EXPORT_SYMBOL(memcpy);