arch/mn10300/lib/do_csum.S

   1 /* Optimised simple memory checksum
   2  *
   3  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
   4  * Written by David Howells (dhowells@redhat.com)
   5  *
   6  * This program is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU General Public Licence
   8  * as published by the Free Software Foundation; either version
   9  * 2 of the Licence, or (at your option) any later version.
  10  */
  11 #include <asm/cache.h>
  12
  13         .section .text
  14         .balign L1_CACHE_BYTES
  15
  16 ###############################################################################
  17 #
  18 # unsigned int do_csum(const unsigned char *buff, size_t len)
  19 #
  20 ###############################################################################
  21         .globl  do_csum
  22         .type   do_csum,@function
  23 do_csum:
  24         movm    [d2,d3],(sp)
  25         mov     d0,(12,sp)
  26         mov     d1,(16,sp)
  27         mov     d1,d2                           # count
  28         mov     d0,a0                           # buff
  29         clr     d1                              # accumulator
  30
  31         cmp     +0,d2
  32         beq     do_csum_done                    # return if zero-length buffer
  33
  34         # 4-byte align the buffer pointer
  35         btst    +3,a0
  36         beq     do_csum_now_4b_aligned
  37
  38         btst    +1,a0
  39         beq     do_csum_addr_not_odd
  40         movbu   (a0),d0
  41         inc     a0
  42         asl     +8,d0
  43         add     d0,d1
  44         addc    +0,d1
  45         add     -1,d2
  46 do_csum_addr_not_odd:
  47
  48         cmp     +2,d2
  49         bcs     do_csum_fewer_than_4
  50         btst    +2,a0
  51         beq     do_csum_now_4b_aligned
  52         movhu   (a0+),d0
  53         add     d0,d1
  54         addc    +0,d1
  55         add     -2,d2
  56         cmp     +4,d2
  57         bcs     do_csum_fewer_than_4
  58
  59 do_csum_now_4b_aligned:
  60         # we want to checksum as much as we can in chunks of 32 bytes
  61         cmp     +31,d2
  62         bls     do_csum_remainder               # 4-byte aligned remainder
  63
  64         add     -32,d2
  65         mov     +32,d3
  66
  67 do_csum_loop:
  68         mov     (a0+),d0
  69         add     d0,d1
  70         mov     (a0+),e0
  71         addc    e0,d1
  72         mov     (a0+),e1
  73         addc    e1,d1
  74         mov     (a0+),e3
  75         addc    e3,d1
  76         mov     (a0+),d0
  77         addc    d0,d1
  78         mov     (a0+),e0
  79         addc    e0,d1
  80         mov     (a0+),e1
  81         addc    e1,d1
  82         mov     (a0+),e3
  83         addc    e3,d1
  84         addc    +0,d1
  85
  86         sub     d3,d2
  87         bcc     do_csum_loop
  88
  89         add     d3,d2
  90         beq     do_csum_done
  91
  92 do_csum_remainder:
  93         # cut 16-31 bytes down to 0-15
  94         cmp     +16,d2
  95         bcs     do_csum_fewer_than_16
  96         mov     (a0+),d0
  97         add     d0,d1
  98         mov     (a0+),e0
  99         addc    e0,d1
 100         mov     (a0+),e1
 101         addc    e1,d1
 102         mov     (a0+),e3
 103         addc    e3,d1
 104         addc    +0,d1
 105         add     -16,d2
 106         beq     do_csum_done
 107
 108 do_csum_fewer_than_16:
 109         # copy the remaining whole words
 110         cmp     +4,d2
 111         bcs     do_csum_fewer_than_4
 112         cmp     +8,d2
 113         bcs     do_csum_one_word
 114         cmp     +12,d2
 115         bcs     do_csum_two_words
 116         mov     (a0+),d0
 117         add     d0,d1
 118         addc    +0,d1
 119 do_csum_two_words:
 120         mov     (a0+),d0
 121         add     d0,d1
 122         addc    +0,d1
 123 do_csum_one_word:
 124         mov     (a0+),d0
 125         add     d0,d1
 126         addc    +0,d1
 127
 128 do_csum_fewer_than_4:
 129         and     +3,d2
 130         beq     do_csum_done
 131         xor_cmp d0,d0,+2,d2
 132         bcs     do_csum_fewer_than_2
 133         movhu   (a0+),d0
 134 do_csum_fewer_than_2:
 135         and     +1,d2
 136         beq     do_csum_add_last_bit
 137         movbu   (a0),d3
 138         add     d3,d0
 139 do_csum_add_last_bit:
 140         add     d0,d1
 141         addc    +0,d1
 142
 143 do_csum_done:
 144         # compress the checksum down to 16 bits
 145         mov     +0xffff0000,d2
 146         and     d1,d2
 147         asl     +16,d1
 148         add     d2,d1,d0
 149         addc    +0xffff,d0
 150         lsr     +16,d0
 151
 152         # flip the halves of the word result if the buffer was oddly aligned
 153         mov     (12,sp),d1
 154         and     +1,d1
 155         beq     do_csum_not_oddly_aligned
 156         swaph   d0,d0                           # exchange bits 15:8 with 7:0
 157
 158 do_csum_not_oddly_aligned:
 159         ret     [d2,d3],8
 160
 161 do_csum_end:
 162         .size   do_csum, do_csum_end-do_csum