arch/arm64/crypto/aes-ce-ccm-core.S

   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
   4  *
   5  * Copyright (C) 2013 - 2017 Linaro Ltd.
   6  * Copyright (C) 2024 Google LLC
   7  *
   8  * Author: Ard Biesheuvel <ardb@kernel.org>
   9  */
  10
  11 #include <linux/linkage.h>
  12 #include <asm/assembler.h>
  13
  14         .text
  15         .arch   armv8-a+crypto
  16
  17         .macro  load_round_keys, rk, nr, tmp
  18         sub     w\tmp, \nr, #10
  19         add     \tmp, \rk, w\tmp, sxtw #4
  20         ld1     {v10.4s-v13.4s}, [\rk]
  21         ld1     {v14.4s-v17.4s}, [\tmp], #64
  22         ld1     {v18.4s-v21.4s}, [\tmp], #64
  23         ld1     {v3.4s-v5.4s}, [\tmp]
  24         .endm
  25
  26         .macro  dround, va, vb, vk
  27         aese    \va\().16b, \vk\().16b
  28         aesmc   \va\().16b, \va\().16b
  29         aese    \vb\().16b, \vk\().16b
  30         aesmc   \vb\().16b, \vb\().16b
  31         .endm
  32
  33         .macro  aes_encrypt, va, vb, nr
  34         tbz     \nr, #2, .L\@
  35         dround  \va, \vb, v10
  36         dround  \va, \vb, v11
  37         tbz     \nr, #1, .L\@
  38         dround  \va, \vb, v12
  39         dround  \va, \vb, v13
  40 .L\@:   .irp    v, v14, v15, v16, v17, v18, v19, v20, v21, v3
  41         dround  \va, \vb, \v
  42         .endr
  43         aese    \va\().16b, v4.16b
  44         aese    \vb\().16b, v4.16b
  45         .endm
  46
  47         .macro  aes_ccm_do_crypt,enc
  48         load_round_keys x3, w4, x10
  49
  50         ld1     {v0.16b}, [x5]                  /* load mac */
  51         cbz     x2, ce_aes_ccm_final
  52         ldr     x8, [x6, #8]                    /* load lower ctr */
  53 CPU_LE( rev     x8, x8                  )       /* keep swabbed ctr in reg */
  54 0:      /* outer loop */
  55         ld1     {v1.8b}, [x6]                   /* load upper ctr */
  56         prfm    pldl1strm, [x1]
  57         add     x8, x8, #1
  58         rev     x9, x8
  59         ins     v1.d[1], x9                     /* no carry in lower ctr */
  60
  61         aes_encrypt     v0, v1, w4
  62
  63         subs    w2, w2, #16
  64         bmi     ce_aes_ccm_crypt_tail
  65         ld1     {v2.16b}, [x1], #16             /* load next input block */
  66         .if     \enc == 1
  67         eor     v2.16b, v2.16b, v5.16b          /* final round enc+mac */
  68         eor     v6.16b, v1.16b, v2.16b          /* xor with crypted ctr */
  69         .else
  70         eor     v2.16b, v2.16b, v1.16b          /* xor with crypted ctr */
  71         eor     v6.16b, v2.16b, v5.16b          /* final round enc */
  72         .endif
  73         eor     v0.16b, v0.16b, v2.16b          /* xor mac with pt ^ rk[last] */
  74         st1     {v6.16b}, [x0], #16             /* write output block */
  75         bne     0b
  76 CPU_LE( rev     x8, x8                  )
  77         str     x8, [x6, #8]                    /* store lsb end of ctr (BE) */
  78         cbnz    x7, ce_aes_ccm_final
  79         st1     {v0.16b}, [x5]                  /* store mac */
  80         ret
  81         .endm
  82
  83 SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
  84         eor     v0.16b, v0.16b, v5.16b          /* final round mac */
  85         eor     v1.16b, v1.16b, v5.16b          /* final round enc */
  86
  87         add     x1, x1, w2, sxtw                /* rewind the input pointer (w2 < 0) */
  88         add     x0, x0, w2, sxtw                /* rewind the output pointer */
  89
  90         adr_l   x8, .Lpermute                   /* load permute vectors */
  91         add     x9, x8, w2, sxtw
  92         sub     x8, x8, w2, sxtw
  93         ld1     {v7.16b-v8.16b}, [x9]
  94         ld1     {v9.16b}, [x8]
  95
  96         ld1     {v2.16b}, [x1]                  /* load a full block of input */
  97         tbl     v1.16b, {v1.16b}, v7.16b        /* move keystream to end of register */
  98         eor     v7.16b, v2.16b, v1.16b          /* encrypt partial input block */
  99         bif     v2.16b, v7.16b, v22.16b         /* select plaintext */
 100         tbx     v7.16b, {v6.16b}, v8.16b        /* insert output from previous iteration */
 101         tbl     v2.16b, {v2.16b}, v9.16b        /* copy plaintext to start of v2 */
 102         eor     v0.16b, v0.16b, v2.16b          /* fold plaintext into mac */
 103
 104         st1     {v7.16b}, [x0]                  /* store output block */
 105         cbz     x7, 0f
 106
 107 SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL)
 108         ld1     {v1.16b}, [x7]                  /* load 1st ctriv */
 109
 110         aes_encrypt     v0, v1, w4
 111
 112         /* final round key cancels out */
 113         eor     v0.16b, v0.16b, v1.16b          /* en-/decrypt the mac */
 114 0:      st1     {v0.16b}, [x5]                  /* store result */
 115         ret
 116 SYM_FUNC_END(ce_aes_ccm_crypt_tail)
 117
 118         /*
 119          * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
 120          *                         u8 const rk[], u32 rounds, u8 mac[],
 121          *                         u8 ctr[], u8 const final_iv[]);
 122          * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
 123          *                         u8 const rk[], u32 rounds, u8 mac[],
 124          *                         u8 ctr[], u8 const final_iv[]);
 125          */
 126 SYM_FUNC_START(ce_aes_ccm_encrypt)
 127         movi    v22.16b, #255
 128         aes_ccm_do_crypt        1
 129 SYM_FUNC_END(ce_aes_ccm_encrypt)
 130
 131 SYM_FUNC_START(ce_aes_ccm_decrypt)
 132         movi    v22.16b, #0
 133         aes_ccm_do_crypt        0
 134 SYM_FUNC_END(ce_aes_ccm_decrypt)
 135
 136         .section ".rodata", "a"
 137         .align  6
 138         .fill   15, 1, 0xff
 139 .Lpermute:
 140         .byte   0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
 141         .byte   0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
 142         .fill   15, 1, 0xff