2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
6 * Quick'n'dirty IP checksum ...
8 * Copyright (C) 1998, 1999 Ralf Baechle
9 * Copyright (C) 1999 Silicon Graphics, Inc.
10 * Copyright (C) 2007 Maciej W. Rozycki
11 * Copyright (C) 2014 Imagination Technologies Ltd.
13 #include <linux/errno.h>
15 #include <asm/asm-offsets.h>
16 #include <asm/export.h>
17 #include <asm/regdef.h>
21 * As we are sharing code base with the mips32 tree (which use the o32 ABI
22 * register definitions). We need to redefine the register definitions from
23 * the n64 ABI register naming to the o32 ABI register naming.
55 #endif /* USE_DOUBLE */
57 #define UNIT(unit) ((unit)*NBYTES)
59 #define ADDC(sum,reg) \
67 #define ADDC32(sum,reg) \
75 #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
76 LOAD _t0, (offset + UNIT(0))(src); \
77 LOAD _t1, (offset + UNIT(1))(src); \
78 LOAD _t2, (offset + UNIT(2))(src); \
79 LOAD _t3, (offset + UNIT(3))(src); \
86 #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
87 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
89 #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
90 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \
91 CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
96 * a1: length of the area to checksum
97 * a2: partial checksum
107 EXPORT_SYMBOL(csum_partial)
112 bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */
115 andi t7, src, 0x1 /* odd buffer? */
118 beqz t7, .Lword_align
122 LONG_SUBU a1, a1, 0x1
127 PTR_ADDU src, src, 0x1
131 beqz t8, .Ldword_align
135 LONG_SUBU a1, a1, 0x2
138 PTR_ADDU src, src, 0x2
141 bnez t8, .Ldo_end_words
145 beqz t8, .Lqword_align
149 LONG_SUBU a1, a1, 0x4
151 PTR_ADDU src, src, 0x4
155 beqz t8, .Loword_align
160 LONG_SUBU a1, a1, 0x8
165 LONG_SUBU a1, a1, 0x8
169 PTR_ADDU src, src, 0x8
173 beqz t8, .Lbegin_movement
182 CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
184 LONG_SUBU a1, a1, 0x10
185 PTR_ADDU src, src, 0x10
193 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
194 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
195 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
196 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
197 LONG_SUBU t8, t8, 0x01
198 .set reorder /* DADDI_WAR */
199 PTR_ADDU src, src, 0x80
200 bnez t8, .Lmove_128bytes
208 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
209 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
210 PTR_ADDU src, src, 0x40
213 beqz t2, .Ldo_end_words
217 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
219 PTR_ADDU src, src, 0x20
222 beqz t8, .Lsmall_csumcpy
228 LONG_SUBU t8, t8, 0x1
230 .set reorder /* DADDI_WAR */
231 PTR_ADDU src, src, 0x4
235 /* unknown src alignment and < 8 bytes to go */
243 /* Still a full word to go */
247 dsll t1, t1, 32 /* clear lower 32bit */
255 /* Still a halfword to go */
281 /* odd buffer alignment? */
282 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
283 defined(CONFIG_CPU_LOONGSON64)
290 beqz t7, 1f /* odd buffer alignment? */
301 /* Add the passed partial csum. */
309 * checksum and copy routines based on memcpy.S
311 * csum_partial_copy_nocheck(src, dst, len)
312 * __csum_partial_copy_kernel(src, dst, len)
314 * See "Spec" in memcpy.S for details. Unlike __copy_user, all
315 * function in this file use the standard calling convention.
325 * All exception handlers simply return 0.
328 /* Instruction type */
331 #define LEGACY_MODE 1
337 * Wrapper to add an entry in the exception table
338 * in case the insn causes a memory exception.
340 * insn : Load/store instruction
341 * type : Instruction type
344 * handler : Exception handler
346 #define EXC(insn, type, reg, addr) \
347 .if \mode == LEGACY_MODE; \
349 .section __ex_table,"a"; \
352 /* This is enabled in EVA mode */ \
354 /* If loading from user or storing to user */ \
355 .if ((\from == USEROP) && (type == LD_INSN)) || \
356 ((\to == USEROP) && (type == ST_INSN)); \
357 9: __BUILD_EVA_INSN(insn##e, reg, addr); \
358 .section __ex_table,"a"; \
362 /* EVA without exception */ \
371 #define LOADK ld /* No exception */
372 #define LOAD(reg, addr) EXC(ld, LD_INSN, reg, addr)
373 #define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr)
374 #define LOADL(reg, addr) EXC(ldl, LD_INSN, reg, addr)
375 #define LOADR(reg, addr) EXC(ldr, LD_INSN, reg, addr)
376 #define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr)
377 #define STOREL(reg, addr) EXC(sdl, ST_INSN, reg, addr)
378 #define STORER(reg, addr) EXC(sdr, ST_INSN, reg, addr)
379 #define STORE(reg, addr) EXC(sd, ST_INSN, reg, addr)
391 #define LOADK lw /* No exception */
392 #define LOAD(reg, addr) EXC(lw, LD_INSN, reg, addr)
393 #define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr)
394 #define LOADL(reg, addr) EXC(lwl, LD_INSN, reg, addr)
395 #define LOADR(reg, addr) EXC(lwr, LD_INSN, reg, addr)
396 #define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr)
397 #define STOREL(reg, addr) EXC(swl, ST_INSN, reg, addr)
398 #define STORER(reg, addr) EXC(swr, ST_INSN, reg, addr)
399 #define STORE(reg, addr) EXC(sw, ST_INSN, reg, addr)
409 #endif /* USE_DOUBLE */
411 #ifdef CONFIG_CPU_LITTLE_ENDIAN
412 #define LDFIRST LOADR
414 #define STFIRST STORER
415 #define STREST STOREL
416 #define SHIFT_DISCARD SLLV
417 #define SHIFT_DISCARD_REVERT SRLV
419 #define LDFIRST LOADL
421 #define STFIRST STOREL
422 #define STREST STORER
423 #define SHIFT_DISCARD SRLV
424 #define SHIFT_DISCARD_REVERT SLLV
427 #define FIRST(unit) ((unit)*NBYTES)
428 #define REST(unit) (FIRST(unit)+NBYTES-1)
430 #define ADDRMASK (NBYTES-1)
432 #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
438 .macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to
443 * Note: dst & src may be unaligned, len may be 0
447 * The "issue break"s below are very approximate.
448 * Issue delays for dcache fills will perturb the schedule, as will
449 * load queue full replay traps, etc.
451 * If len < NBYTES use byte operations.
454 and t1, dst, ADDRMASK
455 bnez t2, .Lcopy_bytes_checklen\@
456 and t0, src, ADDRMASK
457 andi odd, dst, 0x1 /* odd buffer? */
458 bnez t1, .Ldst_unaligned\@
460 bnez t0, .Lsrc_unaligned_dst_aligned\@
462 * use delay slot for fall-through
463 * src and dst are aligned; need to compute rem
466 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
467 beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
469 SUB len, 8*NBYTES # subtract here for bgez loop
472 LOAD(t0, UNIT(0)(src))
473 LOAD(t1, UNIT(1)(src))
474 LOAD(t2, UNIT(2)(src))
475 LOAD(t3, UNIT(3)(src))
476 LOAD(t4, UNIT(4)(src))
477 LOAD(t5, UNIT(5)(src))
478 LOAD(t6, UNIT(6)(src))
479 LOAD(t7, UNIT(7)(src))
480 SUB len, len, 8*NBYTES
481 ADD src, src, 8*NBYTES
482 STORE(t0, UNIT(0)(dst))
484 STORE(t1, UNIT(1)(dst))
486 STORE(t2, UNIT(2)(dst))
488 STORE(t3, UNIT(3)(dst))
490 STORE(t4, UNIT(4)(dst))
492 STORE(t5, UNIT(5)(dst))
494 STORE(t6, UNIT(6)(dst))
496 STORE(t7, UNIT(7)(dst))
498 .set reorder /* DADDI_WAR */
499 ADD dst, dst, 8*NBYTES
502 ADD len, 8*NBYTES # revert len (see above)
505 * len == the number of bytes left to copy < 8*NBYTES
507 .Lcleanup_both_aligned\@:
510 sltu t0, len, 4*NBYTES
511 bnez t0, .Lless_than_4units\@
512 and rem, len, (NBYTES-1) # rem = len % NBYTES
516 LOAD(t0, UNIT(0)(src))
517 LOAD(t1, UNIT(1)(src))
518 LOAD(t2, UNIT(2)(src))
519 LOAD(t3, UNIT(3)(src))
520 SUB len, len, 4*NBYTES
521 ADD src, src, 4*NBYTES
522 STORE(t0, UNIT(0)(dst))
524 STORE(t1, UNIT(1)(dst))
526 STORE(t2, UNIT(2)(dst))
528 STORE(t3, UNIT(3)(dst))
530 .set reorder /* DADDI_WAR */
531 ADD dst, dst, 4*NBYTES
534 .Lless_than_4units\@:
538 beq rem, len, .Lcopy_bytes\@
546 .set reorder /* DADDI_WAR */
552 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
553 * A loop would do only a byte at a time with possible branch
554 * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE
555 * because can't assume read-access to dst. Instead, use
556 * STREST dst, which doesn't require read access to dst.
558 * This code should perform better than a simple loop on modern,
559 * wide-issue mips processors because the code has fewer branches and
560 * more instruction-level parallelism.
564 ADD t1, dst, len # t1 is just past last byte of dst
566 SLL rem, len, 3 # rem = number of bits to keep
568 SUB bits, bits, rem # bits = number of bits to discard
569 SHIFT_DISCARD t0, t0, bits
571 SHIFT_DISCARD_REVERT t0, t0, bits
579 * t0 = src & ADDRMASK
580 * t1 = dst & ADDRMASK; T1 > 0
583 * Copy enough bytes to align dst
584 * Set match = (src and dst have same alignment)
587 LDFIRST(t3, FIRST(0)(src))
589 LDREST(t3, REST(0)(src))
590 SUB t2, t2, t1 # t2 = number of bytes copied
592 STFIRST(t3, FIRST(0)(dst))
593 SLL t4, t1, 3 # t4 = number of bits to discard
594 SHIFT_DISCARD t3, t3, t4
595 /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
597 beq len, t2, .Ldone\@
600 beqz match, .Lboth_aligned\@
603 .Lsrc_unaligned_dst_aligned\@:
604 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
605 beqz t0, .Lcleanup_src_unaligned\@
606 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
609 * Avoid consecutive LD*'s to the same register since some mips
610 * implementations can't issue them in the same cycle.
611 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
612 * are to the same unit (unless src is aligned, but it's not).
614 LDFIRST(t0, FIRST(0)(src))
615 LDFIRST(t1, FIRST(1)(src))
616 SUB len, len, 4*NBYTES
617 LDREST(t0, REST(0)(src))
618 LDREST(t1, REST(1)(src))
619 LDFIRST(t2, FIRST(2)(src))
620 LDFIRST(t3, FIRST(3)(src))
621 LDREST(t2, REST(2)(src))
622 LDREST(t3, REST(3)(src))
623 ADD src, src, 4*NBYTES
624 #ifdef CONFIG_CPU_SB1
625 nop # improves slotting
627 STORE(t0, UNIT(0)(dst))
629 STORE(t1, UNIT(1)(dst))
631 STORE(t2, UNIT(2)(dst))
633 STORE(t3, UNIT(3)(dst))
635 .set reorder /* DADDI_WAR */
636 ADD dst, dst, 4*NBYTES
640 .Lcleanup_src_unaligned\@:
642 and rem, len, NBYTES-1 # rem = len % NBYTES
643 beq rem, len, .Lcopy_bytes\@
646 LDFIRST(t0, FIRST(0)(src))
647 LDREST(t0, REST(0)(src))
652 .set reorder /* DADDI_WAR */
657 .Lcopy_bytes_checklen\@:
661 /* 0 < len < NBYTES */
662 #ifdef CONFIG_CPU_LITTLE_ENDIAN
663 #define SHIFT_START 0
666 #define SHIFT_START 8*(NBYTES-1)
669 move t2, zero # partial word
670 li t3, SHIFT_START # shift
671 #define COPY_BYTE(N) \
672 LOADBU(t0, N(src)); \
674 STOREB(t0, N(dst)); \
676 addu t3, SHIFT_INC; \
677 beqz len, .Lcopy_bytes_done\@; \
688 LOADBU(t0, NBYTES-2(src))
690 STOREB(t0, NBYTES-2(dst))
707 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
708 defined(CONFIG_CPU_LOONGSON64)
715 beqz odd, 1f /* odd buffer alignment? */
736 FEXPORT(__csum_partial_copy_nocheck)
737 EXPORT_SYMBOL(__csum_partial_copy_nocheck)
739 FEXPORT(__csum_partial_copy_to_user)
740 EXPORT_SYMBOL(__csum_partial_copy_to_user)
741 FEXPORT(__csum_partial_copy_from_user)
742 EXPORT_SYMBOL(__csum_partial_copy_from_user)
744 __BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP
747 LEAF(__csum_partial_copy_to_user)
748 __BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP
749 END(__csum_partial_copy_to_user)
751 LEAF(__csum_partial_copy_from_user)
752 __BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP
753 END(__csum_partial_copy_from_user)