arch/riscv/lib/csum.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Checksum library
   4  *
   5  * Influenced by arch/arm64/lib/csum.c
   6  * Copyright (C) 2023-2024 Rivos Inc.
   7  */
   8 #include <linux/bitops.h>
   9 #include <linux/compiler.h>
  10 #include <linux/jump_label.h>
  11 #include <linux/kasan-checks.h>
  12 #include <linux/kernel.h>
  13
  14 #include <asm/cpufeature.h>
  15
  16 #include <net/checksum.h>
  17
  18 /* Default version is sufficient for 32 bit */
  19 #ifndef CONFIG_32BIT
  20 __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
  21                         const struct in6_addr *daddr,
  22                         __u32 len, __u8 proto, __wsum csum)
  23 {
  24         unsigned int ulen, uproto;
  25         unsigned long sum = (__force unsigned long)csum;
  26
  27         sum += (__force unsigned long)saddr->s6_addr32[0];
  28         sum += (__force unsigned long)saddr->s6_addr32[1];
  29         sum += (__force unsigned long)saddr->s6_addr32[2];
  30         sum += (__force unsigned long)saddr->s6_addr32[3];
  31
  32         sum += (__force unsigned long)daddr->s6_addr32[0];
  33         sum += (__force unsigned long)daddr->s6_addr32[1];
  34         sum += (__force unsigned long)daddr->s6_addr32[2];
  35         sum += (__force unsigned long)daddr->s6_addr32[3];
  36
  37         ulen = (__force unsigned int)htonl((unsigned int)len);
  38         sum += ulen;
  39
  40         uproto = (__force unsigned int)htonl(proto);
  41         sum += uproto;
  42
  43         /*
  44          * Zbb support saves 4 instructions, so not worth checking without
  45          * alternatives if supported
  46          */
  47         if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
  48             IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
  49                 unsigned long fold_temp;
  50
  51                 /*
  52                  * Zbb is likely available when the kernel is compiled with Zbb
  53                  * support, so nop when Zbb is available and jump when Zbb is
  54                  * not available.
  55                  */
  56                 asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
  57                                               RISCV_ISA_EXT_ZBB, 1)
  58                                   :
  59                                   :
  60                                   :
  61                                   : no_zbb);
  62                 asm(".option push                                       \n\
  63                 .option arch,+zbb                                       \n\
  64                         rori    %[fold_temp], %[sum], 32                \n\
  65                         add     %[sum], %[fold_temp], %[sum]            \n\
  66                         srli    %[sum], %[sum], 32                      \n\
  67                         not     %[fold_temp], %[sum]                    \n\
  68                         roriw   %[sum], %[sum], 16                      \n\
  69                         subw    %[sum], %[fold_temp], %[sum]            \n\
  70                 .option pop"
  71                 : [sum] "+r" (sum), [fold_temp] "=&r" (fold_temp));
  72                 return (__force __sum16)(sum >> 16);
  73         }
  74 no_zbb:
  75         sum += ror64(sum, 32);
  76         sum >>= 32;
  77         return csum_fold((__force __wsum)sum);
  78 }
  79 EXPORT_SYMBOL(csum_ipv6_magic);
  80 #endif /* !CONFIG_32BIT */
  81
  82 #ifdef CONFIG_32BIT
  83 #define OFFSET_MASK 3
  84 #elif CONFIG_64BIT
  85 #define OFFSET_MASK 7
  86 #endif
  87
  88 static inline __no_sanitize_address unsigned long
  89 do_csum_common(const unsigned long *ptr, const unsigned long *end,
  90                unsigned long data)
  91 {
  92         unsigned int shift;
  93         unsigned long csum = 0, carry = 0;
  94
  95         /*
  96          * Do 32-bit reads on RV32 and 64-bit reads otherwise. This should be
  97          * faster than doing 32-bit reads on architectures that support larger
  98          * reads.
  99          */
 100         while (ptr < end) {
 101                 csum += data;
 102                 carry += csum < data;
 103                 data = *(ptr++);
 104         }
 105
 106         /*
 107          * Perform alignment (and over-read) bytes on the tail if any bytes
 108          * leftover.
 109          */
 110         shift = ((long)ptr - (long)end) * 8;
 111 #ifdef __LITTLE_ENDIAN
 112         data = (data << shift) >> shift;
 113 #else
 114         data = (data >> shift) << shift;
 115 #endif
 116         csum += data;
 117         carry += csum < data;
 118         csum += carry;
 119         csum += csum < carry;
 120
 121         return csum;
 122 }
 123
 124 /*
 125  * Algorithm accounts for buff being misaligned.
 126  * If buff is not aligned, will over-read bytes but not use the bytes that it
 127  * shouldn't. The same thing will occur on the tail-end of the read.
 128  */
 129 static inline __no_sanitize_address unsigned int
 130 do_csum_with_alignment(const unsigned char *buff, int len)
 131 {
 132         unsigned int offset, shift;
 133         unsigned long csum, data;
 134         const unsigned long *ptr, *end;
 135
 136         /*
 137          * Align address to closest word (double word on rv64) that comes before
 138          * buff. This should always be in the same page and cache line.
 139          * Directly call KASAN with the alignment we will be using.
 140          */
 141         offset = (unsigned long)buff & OFFSET_MASK;
 142         kasan_check_read(buff, len);
 143         ptr = (const unsigned long *)(buff - offset);
 144
 145         /*
 146          * Clear the most significant bytes that were over-read if buff was not
 147          * aligned.
 148          */
 149         shift = offset * 8;
 150         data = *(ptr++);
 151 #ifdef __LITTLE_ENDIAN
 152         data = (data >> shift) << shift;
 153 #else
 154         data = (data << shift) >> shift;
 155 #endif
 156         end = (const unsigned long *)(buff + len);
 157         csum = do_csum_common(ptr, end, data);
 158
 159 #ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT
 160         /*
 161          * Zbb support saves 6 instructions, so not worth checking without
 162          * alternatives if supported
 163          */
 164         if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
 165             IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
 166                 unsigned long fold_temp;
 167
 168                 /*
 169                  * Zbb is likely available when the kernel is compiled with Zbb
 170                  * support, so nop when Zbb is available and jump when Zbb is
 171                  * not available.
 172                  */
 173                 asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
 174                                               RISCV_ISA_EXT_ZBB, 1)
 175                                   :
 176                                   :
 177                                   :
 178                                   : no_zbb);
 179
 180 #ifdef CONFIG_32BIT
 181                 asm_goto_output(".option push                   \n\
 182                 .option arch,+zbb                               \n\
 183                         rori    %[fold_temp], %[csum], 16       \n\
 184                         andi    %[offset], %[offset], 1         \n\
 185                         add     %[csum], %[fold_temp], %[csum]  \n\
 186                         beq     %[offset], zero, %l[end]        \n\
 187                         rev8    %[csum], %[csum]                \n\
 188                 .option pop"
 189                         : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
 190                         : [offset] "r" (offset)
 191                         :
 192                         : end);
 193
 194                 return (unsigned short)csum;
 195 #else /* !CONFIG_32BIT */
 196                 asm_goto_output(".option push                   \n\
 197                 .option arch,+zbb                               \n\
 198                         rori    %[fold_temp], %[csum], 32       \n\
 199                         add     %[csum], %[fold_temp], %[csum]  \n\
 200                         srli    %[csum], %[csum], 32            \n\
 201                         roriw   %[fold_temp], %[csum], 16       \n\
 202                         addw    %[csum], %[fold_temp], %[csum]  \n\
 203                         andi    %[offset], %[offset], 1         \n\
 204                         beq     %[offset], zero, %l[end]        \n\
 205                         rev8    %[csum], %[csum]                \n\
 206                 .option pop"
 207                         : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
 208                         : [offset] "r" (offset)
 209                         :
 210                         : end);
 211
 212                 return (csum << 16) >> 48;
 213 #endif /* !CONFIG_32BIT */
 214 end:
 215                 return csum >> 16;
 216         }
 217 no_zbb:
 218 #endif /* CC_HAS_ASM_GOTO_TIED_OUTPUT */
 219 #ifndef CONFIG_32BIT
 220         csum += ror64(csum, 32);
 221         csum >>= 32;
 222 #endif
 223         csum = (u32)csum + ror32((u32)csum, 16);
 224         if (offset & 1)
 225                 return (u16)swab32(csum);
 226         return csum >> 16;
 227 }
 228
 229 /*
 230  * Does not perform alignment, should only be used if machine has fast
 231  * misaligned accesses, or when buff is known to be aligned.
 232  */
 233 static inline __no_sanitize_address unsigned int
 234 do_csum_no_alignment(const unsigned char *buff, int len)
 235 {
 236         unsigned long csum, data;
 237         const unsigned long *ptr, *end;
 238
 239         ptr = (const unsigned long *)(buff);
 240         data = *(ptr++);
 241
 242         kasan_check_read(buff, len);
 243
 244         end = (const unsigned long *)(buff + len);
 245         csum = do_csum_common(ptr, end, data);
 246
 247         /*
 248          * Zbb support saves 6 instructions, so not worth checking without
 249          * alternatives if supported
 250          */
 251         if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
 252             IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
 253                 unsigned long fold_temp;
 254
 255                 /*
 256                  * Zbb is likely available when the kernel is compiled with Zbb
 257                  * support, so nop when Zbb is available and jump when Zbb is
 258                  * not available.
 259                  */
 260                 asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
 261                                               RISCV_ISA_EXT_ZBB, 1)
 262                                   :
 263                                   :
 264                                   :
 265                                   : no_zbb);
 266
 267 #ifdef CONFIG_32BIT
 268                 asm (".option push                              \n\
 269                 .option arch,+zbb                               \n\
 270                         rori    %[fold_temp], %[csum], 16       \n\
 271                         add     %[csum], %[fold_temp], %[csum]  \n\
 272                 .option pop"
 273                         : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
 274                         :
 275                         : );
 276
 277 #else /* !CONFIG_32BIT */
 278                 asm (".option push                              \n\
 279                 .option arch,+zbb                               \n\
 280                         rori    %[fold_temp], %[csum], 32       \n\
 281                         add     %[csum], %[fold_temp], %[csum]  \n\
 282                         srli    %[csum], %[csum], 32            \n\
 283                         roriw   %[fold_temp], %[csum], 16       \n\
 284                         addw    %[csum], %[fold_temp], %[csum]  \n\
 285                 .option pop"
 286                         : [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
 287                         :
 288                         : );
 289 #endif /* !CONFIG_32BIT */
 290                 return csum >> 16;
 291         }
 292 no_zbb:
 293 #ifndef CONFIG_32BIT
 294         csum += ror64(csum, 32);
 295         csum >>= 32;
 296 #endif
 297         csum = (u32)csum + ror32((u32)csum, 16);
 298         return csum >> 16;
 299 }
 300
 301 /*
 302  * Perform a checksum on an arbitrary memory address.
 303  * Will do a light-weight address alignment if buff is misaligned, unless
 304  * cpu supports fast misaligned accesses.
 305  */
 306 unsigned int do_csum(const unsigned char *buff, int len)
 307 {
 308         if (unlikely(len <= 0))
 309                 return 0;
 310
 311         /*
 312          * Significant performance gains can be seen by not doing alignment
 313          * on machines with fast misaligned accesses.
 314          *
 315          * There is some duplicate code between the "with_alignment" and
 316          * "no_alignment" implmentations, but the overlap is too awkward to be
 317          * able to fit in one function without introducing multiple static
 318          * branches. The largest chunk of overlap was delegated into the
 319          * do_csum_common function.
 320          */
 321         if (has_fast_unaligned_accesses() || (((unsigned long)buff & OFFSET_MASK) == 0))
 322                 return do_csum_no_alignment(buff, len);
 323
 324         return do_csum_with_alignment(buff, len);
 325 }