arch/x86/crypto/twofish-i586-asm_32.S

   1 /***************************************************************************
   2 *   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
   3 *                                                                         *
   4 *   This program is free software; you can redistribute it and/or modify  *
   5 *   it under the terms of the GNU General Public License as published by  *
   6 *   the Free Software Foundation; either version 2 of the License, or     *
   7 *   (at your option) any later version.                                   *
   8 *                                                                         *
   9 *   This program is distributed in the hope that it will be useful,       *
  10 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
  11 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
  12 *   GNU General Public License for more details.                          *
  13 *                                                                         *
  14 *   You should have received a copy of the GNU General Public License     *
  15 *   along with this program; if not, write to the                         *
  16 *   Free Software Foundation, Inc.,                                       *
  17 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
  18 ***************************************************************************/
  19
  20 .file "twofish-i586-asm.S"
  21 .text
  22
  23 #include <linux/linkage.h>
  24 #include <asm/asm-offsets.h>
  25
  26 /* return address at 0 */
  27
  28 #define in_blk    12  /* input byte array address parameter*/
  29 #define out_blk   8  /* output byte array address parameter*/
  30 #define ctx       4  /* Twofish context structure */
  31
  32 #define a_offset        0
  33 #define b_offset        4
  34 #define c_offset        8
  35 #define d_offset        12
  36
  37 /* Structure of the crypto context struct*/
  38
  39 #define s0      0       /* S0 Array 256 Words each */
  40 #define s1      1024    /* S1 Array */
  41 #define s2      2048    /* S2 Array */
  42 #define s3      3072    /* S3 Array */
  43 #define w       4096    /* 8 whitening keys (word) */
  44 #define k       4128    /* key 1-32 ( word ) */
  45
  46 /* define a few register aliases to allow macro substitution */
  47
  48 #define R0D    %eax
  49 #define R0B    %al
  50 #define R0H    %ah
  51
  52 #define R1D    %ebx
  53 #define R1B    %bl
  54 #define R1H    %bh
  55
  56 #define R2D    %ecx
  57 #define R2B    %cl
  58 #define R2H    %ch
  59
  60 #define R3D    %edx
  61 #define R3B    %dl
  62 #define R3H    %dh
  63
  64
  65 /* performs input whitening */
  66 #define input_whitening(src,context,offset)\
  67         xor     w+offset(context),      src;
  68
  69 /* performs input whitening */
  70 #define output_whitening(src,context,offset)\
  71         xor     w+16+offset(context),   src;
  72
  73 /*
  74  * a input register containing a (rotated 16)
  75  * b input register containing b
  76  * c input register containing c
  77  * d input register containing d (already rol $1)
  78  * operations on a and b are interleaved to increase performance
  79  */
  80 #define encrypt_round(a,b,c,d,round)\
  81         push    d ## D;\
  82         movzx   b ## B,         %edi;\
  83         mov     s1(%ebp,%edi,4),d ## D;\
  84         movzx   a ## B,         %edi;\
  85         mov     s2(%ebp,%edi,4),%esi;\
  86         movzx   b ## H,         %edi;\
  87         ror     $16,            b ## D;\
  88         xor     s2(%ebp,%edi,4),d ## D;\
  89         movzx   a ## H,         %edi;\
  90         ror     $16,            a ## D;\
  91         xor     s3(%ebp,%edi,4),%esi;\
  92         movzx   b ## B,         %edi;\
  93         xor     s3(%ebp,%edi,4),d ## D;\
  94         movzx   a ## B,         %edi;\
  95         xor     (%ebp,%edi,4),  %esi;\
  96         movzx   b ## H,         %edi;\
  97         ror     $15,            b ## D;\
  98         xor     (%ebp,%edi,4),  d ## D;\
  99         movzx   a ## H,         %edi;\
 100         xor     s1(%ebp,%edi,4),%esi;\
 101         pop     %edi;\
 102         add     d ## D,         %esi;\
 103         add     %esi,           d ## D;\
 104         add     k+round(%ebp),  %esi;\
 105         xor     %esi,           c ## D;\
 106         rol     $15,            c ## D;\
 107         add     k+4+round(%ebp),d ## D;\
 108         xor     %edi,           d ## D;
 109
 110 /*
 111  * a input register containing a (rotated 16)
 112  * b input register containing b
 113  * c input register containing c
 114  * d input register containing d (already rol $1)
 115  * operations on a and b are interleaved to increase performance
 116  * last round has different rotations for the output preparation
 117  */
 118 #define encrypt_last_round(a,b,c,d,round)\
 119         push    d ## D;\
 120         movzx   b ## B,         %edi;\
 121         mov     s1(%ebp,%edi,4),d ## D;\
 122         movzx   a ## B,         %edi;\
 123         mov     s2(%ebp,%edi,4),%esi;\
 124         movzx   b ## H,         %edi;\
 125         ror     $16,            b ## D;\
 126         xor     s2(%ebp,%edi,4),d ## D;\
 127         movzx   a ## H,         %edi;\
 128         ror     $16,            a ## D;\
 129         xor     s3(%ebp,%edi,4),%esi;\
 130         movzx   b ## B,         %edi;\
 131         xor     s3(%ebp,%edi,4),d ## D;\
 132         movzx   a ## B,         %edi;\
 133         xor     (%ebp,%edi,4),  %esi;\
 134         movzx   b ## H,         %edi;\
 135         ror     $16,            b ## D;\
 136         xor     (%ebp,%edi,4),  d ## D;\
 137         movzx   a ## H,         %edi;\
 138         xor     s1(%ebp,%edi,4),%esi;\
 139         pop     %edi;\
 140         add     d ## D,         %esi;\
 141         add     %esi,           d ## D;\
 142         add     k+round(%ebp),  %esi;\
 143         xor     %esi,           c ## D;\
 144         ror     $1,             c ## D;\
 145         add     k+4+round(%ebp),d ## D;\
 146         xor     %edi,           d ## D;
 147
 148 /*
 149  * a input register containing a
 150  * b input register containing b (rotated 16)
 151  * c input register containing c
 152  * d input register containing d (already rol $1)
 153  * operations on a and b are interleaved to increase performance
 154  */
 155 #define decrypt_round(a,b,c,d,round)\
 156         push    c ## D;\
 157         movzx   a ## B,         %edi;\
 158         mov     (%ebp,%edi,4),  c ## D;\
 159         movzx   b ## B,         %edi;\
 160         mov     s3(%ebp,%edi,4),%esi;\
 161         movzx   a ## H,         %edi;\
 162         ror     $16,            a ## D;\
 163         xor     s1(%ebp,%edi,4),c ## D;\
 164         movzx   b ## H,         %edi;\
 165         ror     $16,            b ## D;\
 166         xor     (%ebp,%edi,4),  %esi;\
 167         movzx   a ## B,         %edi;\
 168         xor     s2(%ebp,%edi,4),c ## D;\
 169         movzx   b ## B,         %edi;\
 170         xor     s1(%ebp,%edi,4),%esi;\
 171         movzx   a ## H,         %edi;\
 172         ror     $15,            a ## D;\
 173         xor     s3(%ebp,%edi,4),c ## D;\
 174         movzx   b ## H,         %edi;\
 175         xor     s2(%ebp,%edi,4),%esi;\
 176         pop     %edi;\
 177         add     %esi,           c ## D;\
 178         add     c ## D,         %esi;\
 179         add     k+round(%ebp),  c ## D;\
 180         xor     %edi,           c ## D;\
 181         add     k+4+round(%ebp),%esi;\
 182         xor     %esi,           d ## D;\
 183         rol     $15,            d ## D;
 184
 185 /*
 186  * a input register containing a
 187  * b input register containing b (rotated 16)
 188  * c input register containing c
 189  * d input register containing d (already rol $1)
 190  * operations on a and b are interleaved to increase performance
 191  * last round has different rotations for the output preparation
 192  */
 193 #define decrypt_last_round(a,b,c,d,round)\
 194         push    c ## D;\
 195         movzx   a ## B,         %edi;\
 196         mov     (%ebp,%edi,4),  c ## D;\
 197         movzx   b ## B,         %edi;\
 198         mov     s3(%ebp,%edi,4),%esi;\
 199         movzx   a ## H,         %edi;\
 200         ror     $16,            a ## D;\
 201         xor     s1(%ebp,%edi,4),c ## D;\
 202         movzx   b ## H,         %edi;\
 203         ror     $16,            b ## D;\
 204         xor     (%ebp,%edi,4),  %esi;\
 205         movzx   a ## B,         %edi;\
 206         xor     s2(%ebp,%edi,4),c ## D;\
 207         movzx   b ## B,         %edi;\
 208         xor     s1(%ebp,%edi,4),%esi;\
 209         movzx   a ## H,         %edi;\
 210         ror     $16,            a ## D;\
 211         xor     s3(%ebp,%edi,4),c ## D;\
 212         movzx   b ## H,         %edi;\
 213         xor     s2(%ebp,%edi,4),%esi;\
 214         pop     %edi;\
 215         add     %esi,           c ## D;\
 216         add     c ## D,         %esi;\
 217         add     k+round(%ebp),  c ## D;\
 218         xor     %edi,           c ## D;\
 219         add     k+4+round(%ebp),%esi;\
 220         xor     %esi,           d ## D;\
 221         ror     $1,             d ## D;
 222
 223 ENTRY(twofish_enc_blk)
 224         push    %ebp                    /* save registers according to calling convention*/
 225         push    %ebx
 226         push    %esi
 227         push    %edi
 228
 229         mov     ctx + 16(%esp), %ebp    /* abuse the base pointer: set new base
 230                                          * pointer to the ctx address */
 231         mov     in_blk+16(%esp),%edi    /* input address in edi */
 232
 233         mov     (%edi),         %eax
 234         mov     b_offset(%edi), %ebx
 235         mov     c_offset(%edi), %ecx
 236         mov     d_offset(%edi), %edx
 237         input_whitening(%eax,%ebp,a_offset)
 238         ror     $16,    %eax
 239         input_whitening(%ebx,%ebp,b_offset)
 240         input_whitening(%ecx,%ebp,c_offset)
 241         input_whitening(%edx,%ebp,d_offset)
 242         rol     $1,     %edx
 243
 244         encrypt_round(R0,R1,R2,R3,0);
 245         encrypt_round(R2,R3,R0,R1,8);
 246         encrypt_round(R0,R1,R2,R3,2*8);
 247         encrypt_round(R2,R3,R0,R1,3*8);
 248         encrypt_round(R0,R1,R2,R3,4*8);
 249         encrypt_round(R2,R3,R0,R1,5*8);
 250         encrypt_round(R0,R1,R2,R3,6*8);
 251         encrypt_round(R2,R3,R0,R1,7*8);
 252         encrypt_round(R0,R1,R2,R3,8*8);
 253         encrypt_round(R2,R3,R0,R1,9*8);
 254         encrypt_round(R0,R1,R2,R3,10*8);
 255         encrypt_round(R2,R3,R0,R1,11*8);
 256         encrypt_round(R0,R1,R2,R3,12*8);
 257         encrypt_round(R2,R3,R0,R1,13*8);
 258         encrypt_round(R0,R1,R2,R3,14*8);
 259         encrypt_last_round(R2,R3,R0,R1,15*8);
 260
 261         output_whitening(%eax,%ebp,c_offset)
 262         output_whitening(%ebx,%ebp,d_offset)
 263         output_whitening(%ecx,%ebp,a_offset)
 264         output_whitening(%edx,%ebp,b_offset)
 265         mov     out_blk+16(%esp),%edi;
 266         mov     %eax,           c_offset(%edi)
 267         mov     %ebx,           d_offset(%edi)
 268         mov     %ecx,           (%edi)
 269         mov     %edx,           b_offset(%edi)
 270
 271         pop     %edi
 272         pop     %esi
 273         pop     %ebx
 274         pop     %ebp
 275         mov     $1,     %eax
 276         ret
 277 ENDPROC(twofish_enc_blk)
 278
 279 ENTRY(twofish_dec_blk)
 280         push    %ebp                    /* save registers according to calling convention*/
 281         push    %ebx
 282         push    %esi
 283         push    %edi
 284
 285
 286         mov     ctx + 16(%esp), %ebp    /* abuse the base pointer: set new base
 287                                          * pointer to the ctx address */
 288         mov     in_blk+16(%esp),%edi    /* input address in edi */
 289
 290         mov     (%edi),         %eax
 291         mov     b_offset(%edi), %ebx
 292         mov     c_offset(%edi), %ecx
 293         mov     d_offset(%edi), %edx
 294         output_whitening(%eax,%ebp,a_offset)
 295         output_whitening(%ebx,%ebp,b_offset)
 296         ror     $16,    %ebx
 297         output_whitening(%ecx,%ebp,c_offset)
 298         output_whitening(%edx,%ebp,d_offset)
 299         rol     $1,     %ecx
 300
 301         decrypt_round(R0,R1,R2,R3,15*8);
 302         decrypt_round(R2,R3,R0,R1,14*8);
 303         decrypt_round(R0,R1,R2,R3,13*8);
 304         decrypt_round(R2,R3,R0,R1,12*8);
 305         decrypt_round(R0,R1,R2,R3,11*8);
 306         decrypt_round(R2,R3,R0,R1,10*8);
 307         decrypt_round(R0,R1,R2,R3,9*8);
 308         decrypt_round(R2,R3,R0,R1,8*8);
 309         decrypt_round(R0,R1,R2,R3,7*8);
 310         decrypt_round(R2,R3,R0,R1,6*8);
 311         decrypt_round(R0,R1,R2,R3,5*8);
 312         decrypt_round(R2,R3,R0,R1,4*8);
 313         decrypt_round(R0,R1,R2,R3,3*8);
 314         decrypt_round(R2,R3,R0,R1,2*8);
 315         decrypt_round(R0,R1,R2,R3,1*8);
 316         decrypt_last_round(R2,R3,R0,R1,0);
 317
 318         input_whitening(%eax,%ebp,c_offset)
 319         input_whitening(%ebx,%ebp,d_offset)
 320         input_whitening(%ecx,%ebp,a_offset)
 321         input_whitening(%edx,%ebp,b_offset)
 322         mov     out_blk+16(%esp),%edi;
 323         mov     %eax,           c_offset(%edi)
 324         mov     %ebx,           d_offset(%edi)
 325         mov     %ecx,           (%edi)
 326         mov     %edx,           b_offset(%edi)
 327
 328         pop     %edi
 329         pop     %esi
 330         pop     %ebx
 331         pop     %ebp
 332         mov     $1,     %eax
 333         ret
 334 ENDPROC(twofish_dec_blk)