arch/xtensa/lib/usercopy.S

   1 /*
   2  *  arch/xtensa/lib/usercopy.S
   3  *
   4  *  Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
   5  *
   6  *  DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
   7  *  It needs to remain separate and distinct.  The hal files are part
   8  *  of the Xtensa link-time HAL, and those files may differ per
   9  *  processor configuration.  Patching the kernel for another
  10  *  processor configuration includes replacing the hal files, and we
  11  *  could lose the special functionality for accessing user-space
  12  *  memory during such a patch.  We sacrifice a little code space here
  13  *  in favor to simplify code maintenance.
  14  *
  15  *  This file is subject to the terms and conditions of the GNU General
  16  *  Public License.  See the file "COPYING" in the main directory of
  17  *  this archive for more details.
  18  *
  19  *  Copyright (C) 2002 Tensilica Inc.
  20  */
  21
  22
  23 /*
  24  * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
  25  *
  26  * The returned value is the number of bytes not copied.  Implies zero
  27  * is success.
  28  *
  29  * The general case algorithm is as follows:
  30  *   If the destination and source are both aligned,
  31  *     do 16B chunks with a loop, and then finish up with
  32  *     8B, 4B, 2B, and 1B copies conditional on the length.
  33  *   If destination is aligned and source unaligned,
  34  *     do the same, but use SRC to align the source data.
  35  *   If destination is unaligned, align it by conditionally
  36  *     copying 1B and 2B and then retest.
  37  *   This code tries to use fall-through braches for the common
  38  *     case of aligned destinations (except for the branches to
  39  *     the alignment label).
  40  *
  41  * Register use:
  42  *      a0/ return address
  43  *      a1/ stack pointer
  44  *      a2/ return value
  45  *      a3/ src
  46  *      a4/ length
  47  *      a5/ dst
  48  *      a6/ tmp
  49  *      a7/ tmp
  50  *      a8/ tmp
  51  *      a9/ tmp
  52  *      a10/ tmp
  53  *      a11/ original length
  54  */
  55
  56 #include <linux/linkage.h>
  57 #include <asm/asmmacro.h>
  58 #include <asm/core.h>
  59
  60         .text
  61 ENTRY(__xtensa_copy_user)
  62
  63 #if !XCHAL_HAVE_LOOPS && defined(__XTENSA_CALL0_ABI__)
  64 #define STACK_SIZE 4
  65 #else
  66 #define STACK_SIZE 0
  67 #endif
  68         abi_entry(STACK_SIZE)
  69         # a2/ dst, a3/ src, a4/ len
  70         mov     a5, a2          # copy dst so that a2 is return value
  71         mov     a11, a4         # preserve original len for error case
  72 .Lcommon:
  73         bbsi.l  a2, 0, .Ldst1mod2 # if dst is 1 mod 2
  74         bbsi.l  a2, 1, .Ldst2mod4 # if dst is 2 mod 4
  75 .Ldstaligned:   # return here from .Ldstunaligned when dst is aligned
  76         srli    a7, a4, 4       # number of loop iterations with 16B
  77                                 # per iteration
  78         movi    a8, 3             # if source is also aligned,
  79         bnone   a3, a8, .Laligned # then use word copy
  80         __ssa8  a3              # set shift amount from byte offset
  81         bnez    a4, .Lsrcunaligned
  82         movi    a2, 0           # return success for len==0
  83         abi_ret(STACK_SIZE)
  84
  85 /*
  86  * Destination is unaligned
  87  */
  88
  89 .Ldst1mod2:     # dst is only byte aligned
  90         bltui   a4, 7, .Lbytecopy       # do short copies byte by byte
  91
  92         # copy 1 byte
  93 EX(10f) l8ui    a6, a3, 0
  94         addi    a3, a3,  1
  95 EX(10f) s8i     a6, a5,  0
  96         addi    a5, a5,  1
  97         addi    a4, a4, -1
  98         bbci.l  a5, 1, .Ldstaligned     # if dst is now aligned, then
  99                                         # return to main algorithm
 100 .Ldst2mod4:     # dst 16-bit aligned
 101         # copy 2 bytes
 102         bltui   a4, 6, .Lbytecopy       # do short copies byte by byte
 103 EX(10f) l8ui    a6, a3, 0
 104 EX(10f) l8ui    a7, a3, 1
 105         addi    a3, a3,  2
 106 EX(10f) s8i     a6, a5,  0
 107 EX(10f) s8i     a7, a5,  1
 108         addi    a5, a5,  2
 109         addi    a4, a4, -2
 110         j       .Ldstaligned    # dst is now aligned, return to main algorithm
 111
 112 /*
 113  * Byte by byte copy
 114  */
 115         .align  4
 116         .byte   0               # 1 mod 4 alignment for LOOPNEZ
 117                                 # (0 mod 4 alignment for LBEG)
 118 .Lbytecopy:
 119 #if XCHAL_HAVE_LOOPS
 120         loopnez a4, .Lbytecopydone
 121 #else /* !XCHAL_HAVE_LOOPS */
 122         beqz    a4, .Lbytecopydone
 123         add     a7, a3, a4      # a7 = end address for source
 124 #endif /* !XCHAL_HAVE_LOOPS */
 125 .Lnextbyte:
 126 EX(10f) l8ui    a6, a3, 0
 127         addi    a3, a3, 1
 128 EX(10f) s8i     a6, a5, 0
 129         addi    a5, a5, 1
 130 #if !XCHAL_HAVE_LOOPS
 131         blt     a3, a7, .Lnextbyte
 132 #endif /* !XCHAL_HAVE_LOOPS */
 133 .Lbytecopydone:
 134         movi    a2, 0           # return success for len bytes copied
 135         abi_ret(STACK_SIZE)
 136
 137 /*
 138  * Destination and source are word-aligned.
 139  */
 140         # copy 16 bytes per iteration for word-aligned dst and word-aligned src
 141         .align  4               # 1 mod 4 alignment for LOOPNEZ
 142         .byte   0               # (0 mod 4 alignment for LBEG)
 143 .Laligned:
 144 #if XCHAL_HAVE_LOOPS
 145         loopnez a7, .Loop1done
 146 #else /* !XCHAL_HAVE_LOOPS */
 147         beqz    a7, .Loop1done
 148         slli    a8, a7, 4
 149         add     a8, a8, a3      # a8 = end of last 16B source chunk
 150 #endif /* !XCHAL_HAVE_LOOPS */
 151 .Loop1:
 152 EX(10f) l32i    a6, a3,  0
 153 EX(10f) l32i    a7, a3,  4
 154 EX(10f) s32i    a6, a5,  0
 155 EX(10f) l32i    a6, a3,  8
 156 EX(10f) s32i    a7, a5,  4
 157 EX(10f) l32i    a7, a3, 12
 158 EX(10f) s32i    a6, a5,  8
 159         addi    a3, a3, 16
 160 EX(10f) s32i    a7, a5, 12
 161         addi    a5, a5, 16
 162 #if !XCHAL_HAVE_LOOPS
 163         blt     a3, a8, .Loop1
 164 #endif /* !XCHAL_HAVE_LOOPS */
 165 .Loop1done:
 166         bbci.l  a4, 3, .L2
 167         # copy 8 bytes
 168 EX(10f) l32i    a6, a3,  0
 169 EX(10f) l32i    a7, a3,  4
 170         addi    a3, a3,  8
 171 EX(10f) s32i    a6, a5,  0
 172 EX(10f) s32i    a7, a5,  4
 173         addi    a5, a5,  8
 174 .L2:
 175         bbci.l  a4, 2, .L3
 176         # copy 4 bytes
 177 EX(10f) l32i    a6, a3,  0
 178         addi    a3, a3,  4
 179 EX(10f) s32i    a6, a5,  0
 180         addi    a5, a5,  4
 181 .L3:
 182         bbci.l  a4, 1, .L4
 183         # copy 2 bytes
 184 EX(10f) l16ui   a6, a3,  0
 185         addi    a3, a3,  2
 186 EX(10f) s16i    a6, a5,  0
 187         addi    a5, a5,  2
 188 .L4:
 189         bbci.l  a4, 0, .L5
 190         # copy 1 byte
 191 EX(10f) l8ui    a6, a3,  0
 192 EX(10f) s8i     a6, a5,  0
 193 .L5:
 194         movi    a2, 0           # return success for len bytes copied
 195         abi_ret(STACK_SIZE)
 196
 197 /*
 198  * Destination is aligned, Source is unaligned
 199  */
 200
 201         .align  4
 202         .byte   0               # 1 mod 4 alignement for LOOPNEZ
 203                                 # (0 mod 4 alignment for LBEG)
 204 .Lsrcunaligned:
 205         # copy 16 bytes per iteration for word-aligned dst and unaligned src
 206         and     a10, a3, a8     # save unalignment offset for below
 207         sub     a3, a3, a10     # align a3 (to avoid sim warnings only; not needed for hardware)
 208 EX(10f) l32i    a6, a3, 0       # load first word
 209 #if XCHAL_HAVE_LOOPS
 210         loopnez a7, .Loop2done
 211 #else /* !XCHAL_HAVE_LOOPS */
 212         beqz    a7, .Loop2done
 213 #if defined(__XTENSA_CALL0_ABI__)
 214         s32i    a10, a1, 0
 215         slli    a10, a7, 4
 216         add     a10, a10, a3    # a10 = end of last 16B source chunk
 217 #else
 218         slli    a12, a7, 4
 219         add     a12, a12, a3    # a12 = end of last 16B source chunk
 220 #endif
 221 #endif /* !XCHAL_HAVE_LOOPS */
 222 .Loop2:
 223 EX(10f) l32i    a7, a3,  4
 224 EX(10f) l32i    a8, a3,  8
 225         __src_b a6, a6, a7
 226 EX(10f) s32i    a6, a5,  0
 227 EX(10f) l32i    a9, a3, 12
 228         __src_b a7, a7, a8
 229 EX(10f) s32i    a7, a5,  4
 230 EX(10f) l32i    a6, a3, 16
 231         __src_b a8, a8, a9
 232 EX(10f) s32i    a8, a5,  8
 233         addi    a3, a3, 16
 234         __src_b a9, a9, a6
 235 EX(10f) s32i    a9, a5, 12
 236         addi    a5, a5, 16
 237 #if !XCHAL_HAVE_LOOPS
 238 #if defined(__XTENSA_CALL0_ABI__)
 239         blt     a3, a10, .Loop2
 240         l32i    a10, a1, 0
 241 #else
 242         blt     a3, a12, .Loop2
 243 #endif
 244 #endif /* !XCHAL_HAVE_LOOPS */
 245 .Loop2done:
 246         bbci.l  a4, 3, .L12
 247         # copy 8 bytes
 248 EX(10f) l32i    a7, a3,  4
 249 EX(10f) l32i    a8, a3,  8
 250         __src_b a6, a6, a7
 251 EX(10f) s32i    a6, a5,  0
 252         addi    a3, a3,  8
 253         __src_b a7, a7, a8
 254 EX(10f) s32i    a7, a5,  4
 255         addi    a5, a5,  8
 256         mov     a6, a8
 257 .L12:
 258         bbci.l  a4, 2, .L13
 259         # copy 4 bytes
 260 EX(10f) l32i    a7, a3,  4
 261         addi    a3, a3,  4
 262         __src_b a6, a6, a7
 263 EX(10f) s32i    a6, a5,  0
 264         addi    a5, a5,  4
 265         mov     a6, a7
 266 .L13:
 267         add     a3, a3, a10     # readjust a3 with correct misalignment
 268         bbci.l  a4, 1, .L14
 269         # copy 2 bytes
 270 EX(10f) l8ui    a6, a3,  0
 271 EX(10f) l8ui    a7, a3,  1
 272         addi    a3, a3,  2
 273 EX(10f) s8i     a6, a5,  0
 274 EX(10f) s8i     a7, a5,  1
 275         addi    a5, a5,  2
 276 .L14:
 277         bbci.l  a4, 0, .L15
 278         # copy 1 byte
 279 EX(10f) l8ui    a6, a3,  0
 280 EX(10f) s8i     a6, a5,  0
 281 .L15:
 282         movi    a2, 0           # return success for len bytes copied
 283         abi_ret(STACK_SIZE)
 284
 285 ENDPROC(__xtensa_copy_user)
 286 EXPORT_SYMBOL(__xtensa_copy_user)
 287
 288         .section .fixup, "ax"
 289         .align  4
 290
 291 /* a2 = original dst; a5 = current dst; a11= original len
 292  * bytes_copied = a5 - a2
 293  * retval = bytes_not_copied = original len - bytes_copied
 294  * retval = a11 - (a5 - a2)
 295  */
 296
 297
 298 10:
 299         sub     a2, a5, a2      /* a2 <-- bytes copied */
 300         sub     a2, a11, a2     /* a2 <-- bytes not copied */
 301         abi_ret(STACK_SIZE)