1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
5 #include <asm/processor.h>
6 #include <asm/ppc_asm.h>
7 #include <asm/export.h>
8 #include <asm/asm-compat.h>
9 #include <asm/feature-fixups.h>
12 /* 0 == most CPUs, 1 == POWER6, 2 == Cell */
13 #define SELFTEST_CASE 0
17 #define sLd sld /* Shift towards low-numbered address. */
18 #define sHd srd /* Shift towards high-numbered address. */
20 #define sLd srd /* Shift towards low-numbered address. */
21 #define sHd sld /* Shift towards high-numbered address. */
25 * These macros are used to generate exception table entries.
26 * The exception handlers below use the original arguments
27 * (stored on the stack) and the point where we're up to in
28 * the destination buffer, i.e. the address of the first
29 * unmodified byte. Generally r3 points into the destination
30 * buffer, but the first unmodified byte is at a variable
31 * offset from r3. In the code below, the symbol r3_offset
32 * is set to indicate the current offset at each point in
33 * the code. This offset is then used as a negative offset
34 * from the exception handler code, and those instructions
35 * before the exception handlers are addi instructions that
36 * adjust r3 to point to the correct place.
38 .macro lex /* exception handler for load */
39 100: EX_TABLE(100b, .Lld_exc - r3_offset)
42 .macro stex /* exception handler for store */
43 100: EX_TABLE(100b, .Lst_exc - r3_offset)
47 _GLOBAL_TOC(__copy_tofrom_user)
48 #ifdef CONFIG_PPC_BOOK3S_64
52 b __copy_tofrom_user_power7
53 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
55 _GLOBAL(__copy_tofrom_user_base)
56 /* first check for a 4kB copy on a 4kB boundary */
60 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
63 crand cr0*4+2,cr0*4+2,cr6*4+2
71 /* Below we want to nop out the bne if we're on a CPU that has the
72 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
74 * At the time of writing the only CPU that has this combination of bits
77 test_feature = (SELFTEST_CASE == 1)
82 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
83 CPU_FTR_UNALIGNED_LD_STD)
87 test_feature = (SELFTEST_CASE == 0)
91 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
92 blt cr1,.Ldo_tail /* if < 16 bytes to copy */
170 lex; ld r9,0(r4) /* 3+2n loads, 2+2n stores */
182 lex; ld r0,0(r4) /* 4+2n loads, 3+2n stores */
237 #ifdef __BIG_ENDIAN__
241 #ifdef __LITTLE_ENDIAN__
246 #ifdef __BIG_ENDIAN__
250 #ifdef __LITTLE_ENDIAN__
255 #ifdef __BIG_ENDIAN__
259 #ifdef __LITTLE_ENDIAN__
267 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
272 100: EX_TABLE(100b, .Lld_exc_r7)
274 100: EX_TABLE(100b, .Lst_exc_r7)
278 100: EX_TABLE(100b, .Lld_exc_r7)
280 100: EX_TABLE(100b, .Lst_exc_r7)
284 100: EX_TABLE(100b, .Lld_exc_r7)
286 100: EX_TABLE(100b, .Lst_exc_r7)
288 3: PPC_MTOCRF(0x01,r5)
319 * exception handlers follow
320 * we have to return the number of bytes not copied
321 * for an exception on a load, we set the rest of the destination to 0
322 * Note that the number of bytes of instructions for adjusting r3 needs
323 * to equal the amount of the adjustment, due to the trick of using
324 * .Lld_exc - r3_offset as the handler address.
342 * Here we have had a fault on a load and r3 points to the first
343 * unmodified byte of the destination. We use the original arguments
344 * and r3 to work out how much wasn't copied. Since we load some
345 * distance ahead of the stores, we continue copying byte-by-byte until
346 * we hit the load fault again in order to copy as much as possible.
354 subf r5,r6,r5 /* #bytes left to go */
357 * first see if we can copy any more bytes before hitting another exception
361 100: EX_TABLE(100b, .Ldone)
367 li r3,0 /* huh? all copied successfully this time? */
371 * here we have trapped again, amount remaining is in ctr.
378 * exception handlers for stores: we need to work out how many bytes
379 * weren't copied, and we may need to copy some more.
380 * Note that the number of bytes of instructions for adjusting r3 needs
381 * to equal the amount of the adjustment, due to the trick of using
382 * .Lst_exc - r3_offset as the handler address.
399 ld r6,-24(r1) /* original destination pointer */
400 ld r4,-16(r1) /* original source pointer */
401 ld r5,-8(r1) /* original number of bytes */
404 * If the destination pointer isn't 8-byte aligned,
405 * we may have got the exception as a result of a
406 * store that overlapped a page boundary, so we may be
407 * able to copy a few more bytes.
411 subf r8,r6,r3 /* #bytes copied */
412 100: EX_TABLE(100b,19f)
414 100: EX_TABLE(100b,19f)
419 19: subf r3,r3,r7 /* #bytes not copied in r3 */
423 * Routine to copy a whole page of data, optimized for POWER4.
424 * On POWER4 it is more than 50% faster than the simple loop
425 * above (following the .Ldst_aligned label).
428 100: EX_TABLE(100b, .Labort)
544 * on an exception, reset to the beginning and jump back into the
545 * standard __copy_tofrom_user
564 EXPORT_SYMBOL(__copy_tofrom_user)