arch/sh/lib/checksum.S

   1 /* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
   2  *
   3  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   4  *              operating system.  INET is implemented using the  BSD Socket
   5  *              interface as the means of communication with the user level.
   6  *
   7  *              IP/TCP/UDP checksumming routines
   8  *
   9  * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
  10  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  11  *              Tom May, <ftom@netcom.com>
  12  *              Pentium Pro/II routines:
  13  *              Alexander Kjeldaas <astor@guardian.no>
  14  *              Finn Arne Gangstad <finnag@guardian.no>
  15  *              Lots of code moved from tcp.c and ip.c; see those files
  16  *              for more names.
  17  *
  18  * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  19  *                           handling.
  20  *              Andi Kleen,  add zeroing on error
  21  *                   converted to pure assembler
  22  *
  23  * SuperH version:  Copyright (C) 1999  Niibe Yutaka
  24  *
  25  *              This program is free software; you can redistribute it and/or
  26  *              modify it under the terms of the GNU General Public License
  27  *              as published by the Free Software Foundation; either version
  28  *              2 of the License, or (at your option) any later version.
  29  */
  30
  31 #include <asm/errno.h>
  32 #include <linux/linkage.h>
  33
  34 /*
  35  * computes a partial checksum, e.g. for TCP/UDP fragments
  36  */
  37
  38 /*
  39  * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
  40  */
  41
  42 .text
  43 ENTRY(csum_partial)
  44           /*
  45            * Experiments with Ethernet and SLIP connections show that buff
  46            * is aligned on either a 2-byte or 4-byte boundary.  We get at
  47            * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  48            * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  49            * alignment for the unrolled loop.
  50            */
  51         mov     r4, r0
  52         tst     #3, r0          ! Check alignment.
  53         bt/s    2f              ! Jump if alignment is ok.
  54          mov    r4, r7          ! Keep a copy to check for alignment
  55         !
  56         tst     #1, r0          ! Check alignment.
  57         bt      21f             ! Jump if alignment is boundary of 2bytes.
  58
  59         ! buf is odd
  60         tst     r5, r5
  61         add     #-1, r5
  62         bt      9f
  63         mov.b   @r4+, r0
  64         extu.b  r0, r0
  65         addc    r0, r6          ! t=0 from previous tst
  66         mov     r6, r0
  67         shll8   r6
  68         shlr16  r0
  69         shlr8   r0
  70         or      r0, r6
  71         mov     r4, r0
  72         tst     #2, r0
  73         bt      2f
  74 21:
  75         ! buf is 2 byte aligned (len could be 0)
  76         add     #-2, r5         ! Alignment uses up two bytes.
  77         cmp/pz  r5              !
  78         bt/s    1f              ! Jump if we had at least two bytes.
  79          clrt
  80         bra     6f
  81          add    #2, r5          ! r5 was < 2.  Deal with it.
  82 1:
  83         mov.w   @r4+, r0
  84         extu.w  r0, r0
  85         addc    r0, r6
  86         bf      2f
  87         add     #1, r6
  88 2:
  89         ! buf is 4 byte aligned (len could be 0)
  90         mov     r5, r1
  91         mov     #-5, r0
  92         shld    r0, r1
  93         tst     r1, r1
  94         bt/s    4f              ! if it's =0, go to 4f
  95          clrt
  96         .align  2
  97 3:
  98         mov.l   @r4+, r0
  99         mov.l   @r4+, r2
 100         mov.l   @r4+, r3
 101         addc    r0, r6
 102         mov.l   @r4+, r0
 103         addc    r2, r6
 104         mov.l   @r4+, r2
 105         addc    r3, r6
 106         mov.l   @r4+, r3
 107         addc    r0, r6
 108         mov.l   @r4+, r0
 109         addc    r2, r6
 110         mov.l   @r4+, r2
 111         addc    r3, r6
 112         addc    r0, r6
 113         addc    r2, r6
 114         movt    r0
 115         dt      r1
 116         bf/s    3b
 117          cmp/eq #1, r0
 118         ! here, we know r1==0
 119         addc    r1, r6                  ! add carry to r6
 120 4:
 121         mov     r5, r0
 122         and     #0x1c, r0
 123         tst     r0, r0
 124         bt      6f
 125         ! 4 bytes or more remaining
 126         mov     r0, r1
 127         shlr2   r1
 128         mov     #0, r2
 129 5:
 130         addc    r2, r6
 131         mov.l   @r4+, r2
 132         movt    r0
 133         dt      r1
 134         bf/s    5b
 135          cmp/eq #1, r0
 136         addc    r2, r6
 137         addc    r1, r6          ! r1==0 here, so it means add carry-bit
 138 6:
 139         ! 3 bytes or less remaining
 140         mov     #3, r0
 141         and     r0, r5
 142         tst     r5, r5
 143         bt      9f              ! if it's =0 go to 9f
 144         mov     #2, r1
 145         cmp/hs  r1, r5
 146         bf      7f
 147         mov.w   @r4+, r0
 148         extu.w  r0, r0
 149         cmp/eq  r1, r5
 150         bt/s    8f
 151          clrt
 152         shll16  r0
 153         addc    r0, r6
 154 7:
 155         mov.b   @r4+, r0
 156         extu.b  r0, r0
 157 #ifndef __LITTLE_ENDIAN__
 158         shll8   r0
 159 #endif
 160 8:
 161         addc    r0, r6
 162         mov     #0, r0
 163         addc    r0, r6
 164 9:
 165         ! Check if the buffer was misaligned, if so realign sum
 166         mov     r7, r0
 167         tst     #1, r0
 168         bt      10f
 169         mov     r6, r0
 170         shll8   r6
 171         shlr16  r0
 172         shlr8   r0
 173         or      r0, r6
 174 10:
 175         rts
 176          mov    r6, r0
 177
 178 /*
 179 unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
 180                                         int sum, int *src_err_ptr, int *dst_err_ptr)
 181  */
 182
 183 /*
 184  * Copy from ds while checksumming, otherwise like csum_partial
 185  *
 186  * The macros SRC and DST specify the type of access for the instruction.
 187  * thus we can call a custom exception handler for all access types.
 188  *
 189  * FIXME: could someone double-check whether I haven't mixed up some SRC and
 190  *        DST definitions? It's damn hard to trigger all cases.  I hope I got
 191  *        them all but there's no guarantee.
 192  */
 193
 194 #define SRC(...)                        \
 195         9999: __VA_ARGS__ ;             \
 196         .section __ex_table, "a";       \
 197         .long 9999b, 6001f      ;       \
 198         .previous
 199
 200 #define DST(...)                        \
 201         9999: __VA_ARGS__ ;             \
 202         .section __ex_table, "a";       \
 203         .long 9999b, 6002f      ;       \
 204         .previous
 205
 206 !
 207 ! r4:   const char *SRC
 208 ! r5:   char *DST
 209 ! r6:   int LEN
 210 ! r7:   int SUM
 211 !
 212 ! on stack:
 213 ! int *SRC_ERR_PTR
 214 ! int *DST_ERR_PTR
 215 !
 216 ENTRY(csum_partial_copy_generic)
 217         mov.l   r5,@-r15
 218         mov.l   r6,@-r15
 219
 220         mov     #3,r0           ! Check src and dest are equally aligned
 221         mov     r4,r1
 222         and     r0,r1
 223         and     r5,r0
 224         cmp/eq  r1,r0
 225         bf      3f              ! Different alignments, use slow version
 226         tst     #1,r0           ! Check dest word aligned
 227         bf      3f              ! If not, do it the slow way
 228
 229         mov     #2,r0
 230         tst     r0,r5           ! Check dest alignment.
 231         bt      2f              ! Jump if alignment is ok.
 232         add     #-2,r6          ! Alignment uses up two bytes.
 233         cmp/pz  r6              ! Jump if we had at least two bytes.
 234         bt/s    1f
 235          clrt
 236         add     #2,r6           ! r6 was < 2.   Deal with it.
 237         bra     4f
 238          mov    r6,r2
 239
 240 3:      ! Handle different src and dest alignments.
 241         ! This is not common, so simple byte by byte copy will do.
 242         mov     r6,r2
 243         shlr    r6
 244         tst     r6,r6
 245         bt      4f
 246         clrt
 247         .align  2
 248 5:
 249 SRC(    mov.b   @r4+,r1         )
 250 SRC(    mov.b   @r4+,r0         )
 251         extu.b  r1,r1
 252 DST(    mov.b   r1,@r5          )
 253 DST(    mov.b   r0,@(1,r5)      )
 254         extu.b  r0,r0
 255         add     #2,r5
 256
 257 #ifdef  __LITTLE_ENDIAN__
 258         shll8   r0
 259 #else
 260         shll8   r1
 261 #endif
 262         or      r1,r0
 263
 264         addc    r0,r7
 265         movt    r0
 266         dt      r6
 267         bf/s    5b
 268          cmp/eq #1,r0
 269         mov     #0,r0
 270         addc    r0, r7
 271
 272         mov     r2, r0
 273         tst     #1, r0
 274         bt      7f
 275         bra     5f
 276          clrt
 277
 278         ! src and dest equally aligned, but to a two byte boundary.
 279         ! Handle first two bytes as a special case
 280         .align  2
 281 1:
 282 SRC(    mov.w   @r4+,r0         )
 283 DST(    mov.w   r0,@r5          )
 284         add     #2,r5
 285         extu.w  r0,r0
 286         addc    r0,r7
 287         mov     #0,r0
 288         addc    r0,r7
 289 2:
 290         mov     r6,r2
 291         mov     #-5,r0
 292         shld    r0,r6
 293         tst     r6,r6
 294         bt/s    2f
 295          clrt
 296         .align  2
 297 1:
 298 SRC(    mov.l   @r4+,r0         )
 299 SRC(    mov.l   @r4+,r1         )
 300         addc    r0,r7
 301 DST(    mov.l   r0,@r5          )
 302 DST(    mov.l   r1,@(4,r5)      )
 303         addc    r1,r7
 304
 305 SRC(    mov.l   @r4+,r0         )
 306 SRC(    mov.l   @r4+,r1         )
 307         addc    r0,r7
 308 DST(    mov.l   r0,@(8,r5)      )
 309 DST(    mov.l   r1,@(12,r5)     )
 310         addc    r1,r7
 311
 312 SRC(    mov.l   @r4+,r0         )
 313 SRC(    mov.l   @r4+,r1         )
 314         addc    r0,r7
 315 DST(    mov.l   r0,@(16,r5)     )
 316 DST(    mov.l   r1,@(20,r5)     )
 317         addc    r1,r7
 318
 319 SRC(    mov.l   @r4+,r0         )
 320 SRC(    mov.l   @r4+,r1         )
 321         addc    r0,r7
 322 DST(    mov.l   r0,@(24,r5)     )
 323 DST(    mov.l   r1,@(28,r5)     )
 324         addc    r1,r7
 325         add     #32,r5
 326         movt    r0
 327         dt      r6
 328         bf/s    1b
 329          cmp/eq #1,r0
 330         mov     #0,r0
 331         addc    r0,r7
 332
 333 2:      mov     r2,r6
 334         mov     #0x1c,r0
 335         and     r0,r6
 336         cmp/pl  r6
 337         bf/s    4f
 338          clrt
 339         shlr2   r6
 340 3:
 341 SRC(    mov.l   @r4+,r0 )
 342         addc    r0,r7
 343 DST(    mov.l   r0,@r5  )
 344         add     #4,r5
 345         movt    r0
 346         dt      r6
 347         bf/s    3b
 348          cmp/eq #1,r0
 349         mov     #0,r0
 350         addc    r0,r7
 351 4:      mov     r2,r6
 352         mov     #3,r0
 353         and     r0,r6
 354         cmp/pl  r6
 355         bf      7f
 356         mov     #2,r1
 357         cmp/hs  r1,r6
 358         bf      5f
 359 SRC(    mov.w   @r4+,r0 )
 360 DST(    mov.w   r0,@r5  )
 361         extu.w  r0,r0
 362         add     #2,r5
 363         cmp/eq  r1,r6
 364         bt/s    6f
 365          clrt
 366         shll16  r0
 367         addc    r0,r7
 368 5:
 369 SRC(    mov.b   @r4+,r0 )
 370 DST(    mov.b   r0,@r5  )
 371         extu.b  r0,r0
 372 #ifndef __LITTLE_ENDIAN__
 373         shll8   r0
 374 #endif
 375 6:      addc    r0,r7
 376         mov     #0,r0
 377         addc    r0,r7
 378 7:
 379 5000:
 380
 381 # Exception handler:
 382 .section .fixup, "ax"
 383
 384 6001:
 385         mov.l   @(8,r15),r0                     ! src_err_ptr
 386         mov     #-EFAULT,r1
 387         mov.l   r1,@r0
 388
 389         ! zero the complete destination - computing the rest
 390         ! is too much work
 391         mov.l   @(4,r15),r5             ! dst
 392         mov.l   @r15,r6                 ! len
 393         mov     #0,r7
 394 1:      mov.b   r7,@r5
 395         dt      r6
 396         bf/s    1b
 397          add    #1,r5
 398         mov.l   8000f,r0
 399         jmp     @r0
 400          nop
 401         .align  2
 402 8000:   .long   5000b
 403
 404 6002:
 405         mov.l   @(12,r15),r0                    ! dst_err_ptr
 406         mov     #-EFAULT,r1
 407         mov.l   r1,@r0
 408         mov.l   8001f,r0
 409         jmp     @r0
 410          nop
 411         .align  2
 412 8001:   .long   5000b
 413
 414 .previous
 415         add     #8,r15
 416         rts
 417          mov    r7,r0