crypto/external/bsd/openssl/lib/libcrypto/arch/vax/bn_asm_vax.S

   1 #       $NetBSD: bn_asm_vax.S,v 1.1 2009/07/19 23:30:47 christos Exp $
   2 #
   3 # w.j.m. 15-jan-1999
   4 #
   5 # it's magic ...
   6 #
   7 # ULONG bn_mul_add_words(ULONG r[],ULONG a[],int n,ULONG w) {
   8 #       ULONG c = 0;
   9 #       int i;
  10 #       for(i = 0; i < n; i++) <c,r[i]> := r[i] + c + a[i] * w ;
  11 #       return c;
  12 # }
  13
  14         .globl  bn_mul_add_words
  15         .type   bn_mul_add_words@function
  16
  17 bn_mul_add_words:
  18         .word   0x40
  19
  20         movl    4(%ap),%r2              # *r
  21         movl    8(%ap),%r3              # *a
  22         movl    12(%ap),%r4             # n
  23         movl    16(%ap),%r5             # w
  24         clrl    %r6                     # return value ("carry")
  25
  26 0:      emul    %r5,(%r3),(%r2),%r0     # w * a[0] + r[0] -> r0
  27
  28         # fixup for "negative" r[]
  29         tstl    (%r2)
  30         bgeq    1f
  31         incl    %r1                     # add 1 to highword
  32
  33 1:      # add saved carry to result
  34         addl2   %r6,%r0
  35         adwc    $0,%r1
  36
  37         # combined fixup for "negative" w, a[]
  38         tstl    %r5             # if w is negative...
  39         bgeq    1f
  40         addl2   (%r3),%r1       # ...add a[0] again to highword
  41 1:      tstl    (%r3)           # if a[0] is negative...
  42         bgeq    1f
  43         addl2   %r5,%r1         # ...add w again to highword
  44 1:
  45         movl    %r0,(%r2)+      # save low word in dest & advance *r
  46         addl2   $4,%r3          # advance *a
  47         movl    %r1,%r6         # high word in r6 for return value
  48
  49         sobgtr  %r4,0b          # loop?
  50
  51         movl    %r6,%r0
  52         ret
  53
  54 #       .title  vax_bn_mul_words  unsigned multiply & add, 32*32+32=>64
  55 #;
  56 #; w.j.m. 15-jan-1999
  57 #;
  58 #; it's magic ...
  59 #;
  60 #; ULONG bn_mul_words(ULONG r[],ULONG a[],int n,ULONG w) {
  61 #;      ULONG c = 0;
  62 #;      int i;
  63 #;      for(i = 0; i < num; i++) <c,r[i]> := a[i] * w + c ;
  64 #;      return(c);
  65 #; }
  66 #
  67         .globl  bn_mul_words
  68         .type   bn_mul_words@function
  69 bn_mul_words:
  70         .word   0x40
  71
  72         movl    4(%ap),%r2              # *r
  73         movl    8(%ap),%r3              # *a
  74         movl    12(%ap),%r4             # n
  75         movl    16(%ap),%r5             # w
  76         clrl    %r6                     # carry
  77
  78 0:      emul    %r5,(%r3),%r6,%r0       # w * a[0] + carry -> r0
  79
  80         # fixup for "negative" carry
  81         tstl    %r6
  82         bgeq    1f
  83         incl    %r1
  84
  85 1:      # combined fixup for "negative" w, a[]
  86         tstl    %r5
  87         bgeq    1f
  88         addl2   (%r3),%r1
  89 1:      tstl    (%r3)
  90         bgeq    1f
  91         addl2   %r5,%r1
  92
  93 1:      movl    %r0,(%r2)+
  94         addl2   $4,%r3
  95         movl    %r1,%r6
  96
  97         sobgtr  %r4,0b
  98
  99         movl    %r6,%r0
 100         ret
 101
 102
 103
 104 #       .title  vax_bn_sqr_words  unsigned square, 32*32=>64
 105 #;
 106 #; w.j.m. 15-jan-1999
 107 #;
 108 #; it's magic ...
 109 #;
 110 #; void bn_sqr_words(ULONG r[],ULONG a[],int n) {
 111 #;      int i;
 112 #;      for(i = 0; i < n; i++) <r[2*i+1],r[2*i]> := a[i] * a[i] ;
 113 #; }
 114 #
 115         .globl  bn_sqr_words
 116         .type   bn_sqr_words@function
 117 bn_sqr_words:
 118         .word   0
 119
 120         movl    4(%ap),%r2              # r
 121         movl    8(%ap),%r3              # a
 122         movl    12(%ap),%r4             # n
 123
 124 0:      movl    (%r3)+,%r5              # r5 = a[] & advance
 125
 126         emul    %r5,%r5,$0,%r0          # a[0] * a[0] + 0 -> r0
 127
 128         # fixup for "negative" a[]
 129         tstl    %r5
 130         bgeq    1f
 131         addl2   %r5,%r1
 132         addl2   %r5,%r1
 133
 134 1:      movq    %r0,(%r2)+              # store 64-bit result
 135
 136         sobgtr  %r4,0b                  # loop
 137
 138         ret
 139
 140
 141 #       .title  vax_bn_div_words  unsigned divide
 142 #;
 143 #; Richard Levitte 20-Nov-2000
 144 #;
 145 #; ULONG bn_div_words(ULONG h, ULONG l, ULONG d)
 146 #; {
 147 #;      return ((ULONG)((((ULLONG)h)<<32)|l) / (ULLONG)d);
 148 #; }
 149 #;
 150 #; Using EDIV would be very easy, if it didn't do signed calculations.
 151 #; Any time any of the input numbers are signed, there are problems,
 152 #; usually with integer overflow, at which point it returns useless
 153 #; data (the quotient gets the value of l, and the remainder becomes 0).
 154 #;
 155 #; If it was just for the dividend, it would be very easy, just divide
 156 #; it by 2 (unsigned), do the division, multiply the resulting quotient
 157 #; and remainder by 2, add the bit that was dropped when dividing by 2
 158 #; to the remainder, and do some adjustment so the remainder doesn't
 159 #; end up larger than the divisor.  For some cases when the divisor is
 160 #; negative (from EDIV's point of view, i.e. when the highest bit is set),
 161 #; dividing the dividend by 2 isn't enough, and since some operations
 162 #; might generate integer overflows even when the dividend is divided by
 163 #; 4 (when the high part of the shifted down dividend ends up being exactly
 164 #; half of the divisor, the result is the quotient 0x80000000, which is
 165 #; negative...) it needs to be divided by 8.  Furthermore, the divisor needs
 166 #; to be divided by 2 (unsigned) as well, to avoid more problems with the sign.
 167 #; In this case, a little extra fiddling with the remainder is required.
 168 #;
 169 #; So, the simplest way to handle this is always to divide the dividend
 170 #; by 8, and to divide the divisor by 2 if it's highest bit is set.
 171 #; After EDIV has been used, the quotient gets multiplied by 8 if the
 172 #; original divisor was positive, otherwise 4.  The remainder, oddly
 173 #; enough, is *always* multiplied by 8.
 174 #; NOTE: in the case mentioned above, where the high part of the shifted
 175 #; down dividend ends up being exactly half the shifted down divisor, we
 176 #; end up with a 33 bit quotient.  That's no problem however, it usually
 177 #; means we have ended up with a too large remainder as well, and the
 178 #; problem is fixed by the last part of the algorithm (next paragraph).
 179 #;
 180 #; The routine ends with comparing the resulting remainder with the
 181 #; original divisor and if the remainder is larger, subtract the
 182 #; original divisor from it, and increase the quotient by 1.  This is
 183 #; done until the remainder is smaller than the divisor.
 184 #;
 185 #; The complete algorithm looks like this:
 186 #;
 187 #; d'    = d
 188 #; l'    = l & 7
 189 #; [h,l] = [h,l] >> 3
 190 #; [q,r] = floor([h,l] / d)     # This is the EDIV operation
 191 #; if (q < 0) q = -q            # I doubt this is necessary any more
 192 #;
 193 #; r'    = r >> 29
 194 #; if (d' >= 0)
 195 #;   q'  = q >> 29
 196 #;   q   = q << 3
 197 #; else
 198 #;   q'  = q >> 30
 199 #;   q   = q << 2
 200 #; r     = (r << 3) + l'
 201 #;
 202 #; if (d' < 0)
 203 #;   {
 204 #;     [r',r] = [r',r] - q
 205 #;     while ([r',r] < 0)
 206 #;       {
 207 #;         [r',r] = [r',r] + d
 208 #;         [q',q] = [q',q] - 1
 209 #;       }
 210 #;   }
 211 #;
 212 #; while ([r',r] >= d')
 213 #;   {
 214 #;     [r',r] = [r',r] - d'
 215 #;     [q',q] = [q',q] + 1
 216 #;   }
 217 #;
 218 #; return q
 219 #
 220 #;r2 = l, q
 221 #;r3 = h, r
 222 #;r4 = d
 223 #;r5 = l'
 224 #;r6 = r'
 225 #;r7 = d'
 226 #;r8 = q'
 227 #
 228         .globl  bn_div_words
 229         .type   bn_div_words@function
 230 bn_div_words:
 231         .word   0x1c0
 232
 233         movl    4(%ap),%r3              # h
 234         movl    8(%ap),%r2              # l
 235         movl    12(%ap),%r4             # d
 236
 237         bicl3   $-8,%r2,%r5             # l' = l & 7
 238         bicl3   $7,%r2,%r2
 239
 240         bicl3   $-8,%r3,%r6
 241         bicl3   $7,%r3,%r3
 242
 243         addl2   %r6,%r2
 244
 245         rotl    $-3,%r2,%r2             # l = l >> 3
 246         rotl    $-3,%r3,%r3             # h = h >> 3
 247
 248         movl    %r4,%r7                 # d' = d
 249
 250         clrl    %r6                     # r' = 0
 251         clrl    %r8                     # q' = 0
 252
 253         tstl    %r4
 254         beql    0f                      # Uh-oh, the divisor is 0...
 255         bgtr    1f
 256         rotl    $-1,%r4,%r4     # If d is negative, shift it right.
 257         bicl2   $0x80000000,%r4 # Since d is then a large number, the
 258                                 # lowest bit is insignificant
 259                                 # (contradict that, and I'll fix the problem!)
 260 1:
 261         ediv    %r4,%r2,%r2,%r3         # Do the actual division
 262
 263         tstl    %r2
 264         bgeq    1f
 265         mnegl   %r2,%r2         # if q < 0, negate it
 266 1:
 267         tstl    %r7
 268         blss    1f
 269         rotl    $3,%r2,%r2      #   q = q << 3
 270         bicl3   $-8,%r2,%r8     #   q' gets the high bits from q
 271         bicl3   $7,%r2,%r2
 272         brb     2f
 273
 274 1:                              # else
 275         rotl    $2,%r2,%r2      #   q = q << 2
 276         bicl3   $-4,%r2,%r8     #   q' gets the high bits from q
 277         bicl3   $3,%r2,%r2
 278 2:
 279         rotl    $3,%r3,%r3      # r = r << 3
 280         bicl3   $-8,%r3,%r6     # r' gets the high bits from r
 281         bicl3   $7,%r3,%r3
 282         addl2   %r5,%r3         # r = r + l'
 283
 284         tstl    %r7
 285         bgeq    5f
 286         bitl    $1,%r7
 287         beql    5f              # if d' < 0 && d' & 1
 288         subl2   %r2,%r3         #   [r',r] = [r',r] - [q',q]
 289         sbwc    %r8,%r6
 290 3:
 291         bgeq    5f              #   while r < 0
 292         decl    %r2             #     [q',q] = [q',q] - 1
 293         sbwc    $0,%r8
 294         addl2   %r7,%r3         #     [r',r] = [r',r] + d'
 295         adwc    $0,%r6
 296         brb     3b
 297
 298 # The return points are placed in the middle to keep a short distance from
 299 # all the branch points
 300 1:
 301 #       movl    %r3,%r1
 302         movl    %r2,%r0
 303         ret
 304 0:
 305         movl    $-1,%r0
 306         ret
 307 5:
 308         tstl    %r6
 309         bneq    6f
 310         cmpl    %r3,%r7
 311         blssu   1b              # while [r',r] >= d'
 312 6:
 313         subl2   %r7,%r3         #   [r',r] = [r',r] - d'
 314         sbwc    $0,%r6
 315         incl    %r2             #   [q',q] = [q',q] + 1
 316         adwc    $0,%r8
 317         brb     5b
 318
 319
 320
 321 #       .title  vax_bn_add_words  unsigned add of two arrays
 322 #;
 323 #; Richard Levitte 20-Nov-2000
 324 #;
 325 #; ULONG bn_add_words(ULONG r[], ULONG a[], ULONG b[], int n) {
 326 #;      ULONG c = 0;
 327 #;      int i;
 328 #;      for (i = 0; i < n; i++) <c,r[i]> = a[i] + b[i] + c;
 329 #;      return(c);
 330 #; }
 331 #
 332
 333         .globl  bn_add_words
 334         .type   bn_add_words@function
 335 bn_add_words:
 336         .word   0
 337
 338         movl    4(%ap),%r2      # r
 339         movl    8(%ap),%r3      # a
 340         movl    12(%ap),%r4     # b
 341         movl    16(%ap),%r5     # n
 342         clrl    %r0
 343
 344         tstl    %r5
 345         bleq    1f
 346
 347 0:      movl    (%r3)+,%r1      # carry untouched
 348         adwc    (%r4)+,%r1      # carry used and touched
 349         movl    %r1,(%r2)+      # carry untouched
 350         sobgtr  %r5,0b          # carry untouched
 351
 352         adwc    $0,%r0
 353 1:      ret
 354
 355 #;
 356 #; Richard Levitte 20-Nov-2000
 357 #;
 358 #; ULONG bn_sub_words(ULONG r[], ULONG a[], ULONG b[], int n) {
 359 #;      ULONG c = 0;
 360 #;      int i;
 361 #;      for (i = 0; i < n; i++) <c,r[i]> = a[i] - b[i] - c;
 362 #;      return(c);
 363 #; }
 364 #
 365         .globl  bn_sub_words
 366         .type   bn_sub_words@function
 367 bn_sub_words:
 368         .word   0x40
 369
 370         movl    4(%ap),%r2      # r
 371         movl    8(%ap),%r3      # a
 372         movl    12(%ap),%r4     # b
 373         movl    16(%ap),%r5     # n
 374         clrl    %r0
 375
 376         tstl    %r5
 377         bleq    1f
 378
 379 0:      movl    (%r3)+,%r6      # carry untouched
 380         sbwc    (%r4)+,%r6      # carry used and touched
 381         movl    %r6,(%r2)+      # carry untouched
 382         sobgtr  %r5,0b          # carry untouched
 383
 384 1:      adwc    $0,%r0
 385         ret
 386
 387 #
 388 #       Ragge 20-Sep-2003
 389 #
 390 #       Multiply a vector of 4/8 longword by another.
 391 #       Uses two loops and 16/64 emuls.
 392 #
 393         .globl  bn_mul_comba4
 394         .type   bn_mul_comba4@function
 395 bn_mul_comba4:
 396         .word   0x3c0
 397         movl    $4,%r9          # 4*4
 398         brb     6f
 399
 400         .globl  bn_mul_comba8
 401         .type   bn_mul_comba8@function
 402 bn_mul_comba8:
 403         .word   0x3c0
 404         movl    $8,%r9          # 8*8
 405
 406 6:      movl    8(%ap),%r3      # a[]
 407         movl    12(%ap),%r7     # b[]
 408         brb     5f
 409
 410         .globl  bn_sqr_comba4
 411         .type   bn_sqr_comba4@function
 412 bn_sqr_comba4:
 413         .word   0x3c0
 414         movl    $4,%r9          # 4*4
 415         brb 0f
 416
 417         .globl  bn_sqr_comba8
 418         .type   bn_sqr_comba8@function
 419 bn_sqr_comba8:
 420         .word   0x3c0
 421         movl    $8,%r9          # 8*8
 422
 423 0:
 424         movl    8(%ap),%r3      # a[]
 425         movl    %r3,%r7         # a[]
 426
 427 5:      movl    4(%ap),%r5      # r[]
 428         movl    %r9,%r8
 429
 430         clrq    (%r5)           # clear destinatino, for add.
 431         clrq    8(%r5)
 432         clrq    16(%r5)         # these only needed for comba8
 433         clrq    24(%r5)
 434
 435 2:      clrl    %r4             # carry
 436         movl    %r9,%r6         # inner loop count
 437         movl    (%r7)+,%r2      # value to multiply with
 438
 439 1:      emul    %r2,(%r3),%r4,%r0
 440         tstl    %r4
 441         bgeq    3f
 442         incl    %r1
 443 3:      tstl    %r2
 444         bgeq    3f
 445         addl2   (%r3),%r1
 446 3:      tstl    (%r3)
 447         bgeq    3f
 448         addl2   %r2,%r1
 449
 450 3:      addl2   %r0,(%r5)+      # add to destination
 451         adwc    $0,%r1          # remember carry
 452         movl    %r1,%r4         # add carry in next emul
 453         addl2   $4,%r3
 454         sobgtr  %r6,1b
 455
 456         movl    %r4,(%r5)       # save highest add result
 457
 458         ashl    $2,%r9,%r4
 459         subl2   %r4,%r3
 460         subl2   $4,%r4
 461         subl2   %r4,%r5
 462
 463         sobgtr  %r8,2b
 464
 465         ret