sys/arch/hppa/spmath/impys.S

   1 /*      $NetBSD: impys.S,v 1.1.10.3 2004/09/21 13:16:34 skrll Exp $     */
   2
   3 /*      $OpenBSD: impys.S,v 1.5 2001/03/29 03:58:18 mickey Exp $        */
   4
   5 /*
   6  * Copyright 1996 1995 by Open Software Foundation, Inc.
   7  *              All Rights Reserved
   8  *
   9  * Permission to use, copy, modify, and distribute this software and
  10  * its documentation for any purpose and without fee is hereby granted,
  11  * provided that the above copyright notice appears in all copies and
  12  * that both the copyright notice and this permission notice appear in
  13  * supporting documentation.
  14  *
  15  * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
  16  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  17  * FOR A PARTICULAR PURPOSE.
  18  *
  19  * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
  20  * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  21  * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
  22  * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
  23  * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  24  *
  25  */
  26 /*
  27  * pmk1.1
  28  */
  29 /*
  30  * (c) Copyright 1986 HEWLETT-PACKARD COMPANY
  31  *
  32  * To anyone who acknowledges that this file is provided "AS IS"
  33  * without any express or implied warranty:
  34  *     permission to use, copy, modify, and distribute this file
  35  * for any purpose is hereby granted without fee, provided that
  36  * the above copyright notice and this notice appears in all
  37  * copies, and that the name of Hewlett-Packard Company not be
  38  * used in advertising or publicity pertaining to distribution
  39  * of the software without specific, written prior permission.
  40  * Hewlett-Packard Company makes no representations about the
  41  * suitability of this software for any purpose.
  42  */
  43
  44 #include <machine/asm.h>
  45
  46 /****************************************************************************
  47  *
  48  * Implement an integer multiply routine for 32-bit operands and 64-bit product
  49  * with operand values of zero (multiplicand only) and -2**31 treated specially.
  50  * The algorithm uses the absolute value of the multiplier, four bits at a time,
  51  * from right to left, to generate partial product.  Execution speed is more
  52  * important than program size in this implementation.
  53  *
  54  ***************************************************************************/
  55 /*
  56  * Definitions - General registers
  57  */
  58 gr0:    .equ            0               /* General register zero */
  59 pu:     .equ            3               /* upper part of product */
  60 pl:     .equ            4               /* lower part of product */
  61 op2:    .equ            4               /* multiplier */
  62 op1:    .equ            5               /* multiplicand */
  63 cnt:    .equ            6               /* count in multiply */
  64 brindex:        .equ            7               /* index into the br. table */
  65 sign:   .equ            8               /* sign of product */
  66 pc:     .equ            9               /* carry bit of product, = 00...01 */
  67 pm:     .equ           10               /* value of -1 used in shifting */
  68
  69         .text
  70
  71 ENTRY(impys,32)
  72         stws,ma         pu,4(%sp)               ; save registers on stack
  73         stws,ma         pl,4(%sp)               ; save registers on stack
  74         stws,ma         op1,4(%sp)              ; save registers on stack
  75         stws,ma         cnt,4(%sp)              ; save registers on stack
  76         stws,ma         brindex,4(%sp)          ; save registers on stack
  77         stws,ma         sign,4(%sp)             ; save registers on stack
  78         stws,ma         pc,4(%sp)               ; save registers on stack
  79         stws,ma         pm,4(%sp)               ; save registers on stack
  80 ;
  81 ;   Start multiply process
  82 ;
  83         ldws            0(%arg1),op2            ; get multiplier
  84         ldws            0(%arg0),op1            ; get multiplicand
  85         addi            -1,gr0,pm               ; initialize pm to 111...1
  86         comb,<          op2,gr0,mpyb            ; br. if multiplier < 0
  87         xor             op2,op1,sign            ; sign(0) = sign of product
  88 mpy1:   comb,<          op1,gr0,mpya            ; br. if multiplicand < 0
  89         addi            0,gr0,pu                ; clear product
  90         addib,=         0,op1,fini0             ; op1 = 0, product = 0
  91 mpy2:   addi            1,gr0,pc                ; initialize pc to 00...01
  92         movib,tr        8,cnt,mloop             ; set count for mpy loop
  93         extru           op2,31,4,brindex        ; 4 bits as index into table
  94 ;
  95         .align          8
  96 ;
  97         b               sh4c                    ; br. if sign overflow
  98 sh4n:   shd             pu,pl,4,pl              ; shift product right 4 bits
  99         addib,<=        -1,cnt,mulend           ; reduce count by 1, exit if
 100         extru           pu,27,28,pu             ;   <= zero
 101 ;
 102 mloop:  blr             brindex,gr0             ; br. into table
 103                                                 ;   entries of 2 words
 104         extru           op2,27,4,brindex        ; next 4 bits into index
 105 ;
 106 ;
 107 ;       branch table for the multiplication process with four multiplier bits
 108 ;
 109 mtable:                                         ; two words per entry
 110 ;
 111 ; ----  bits = 0000 ---- shift product 4 bits -------------------------------
 112 ;
 113         b               sh4n+4                  ; just shift partial
 114         shd             pu,pl,4,pl              ;   product right 4 bits
 115 ;
 116 ;  ---- bits = 0001 ---- add op1, then shift 4 bits
 117 ;
 118         addb,tr         op1,pu,sh4n+4           ; add op1 to product, to shift
 119         shd             pu,pl,4,pl              ;   product right 4 bits
 120 ;
 121 ;  ---- bits = 0010 ---- add op1, add op1, then shift 4 bits
 122 ;
 123         addb,tr         op1,pu,sh4n             ; add 2*op1, to shift
 124         addb,uv         op1,pu,sh4c             ;   product right 4 bits
 125 ;
 126 ;  ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits
 127 ;
 128         addb,tr         op1,pu,sh4n-4           ; add op1 & 2*op1, shift
 129         sh1add,nsv      op1,pu,pu               ;   product right 4 bits
 130 ;
 131 ;  ---- bits = 0100 ---- shift 2, add op1, shift 2
 132 ;
 133         b               sh2sa
 134         shd             pu,pl,2,pl              ; shift product 2 bits
 135 ;
 136 ;  ---- bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again
 137 ;
 138         addb,tr         op1,pu,sh2us            ; add op1 to product
 139         shd             pu,pl,2,pl              ; shift 2 bits
 140 ;
 141 ;  ---- bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again
 142 ;
 143         addb,tr         op1,pu,sh2c             ; add 2*op1, to shift 2 bits
 144         addb,nuv        op1,pu,sh2us            ; br. if not overflow
 145 ;
 146 ;  ---- bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1
 147 ;
 148         b               sh3s
 149         sub             pu,op1,pu               ; subtract op1, br. to sh3s
 150
 151 ;
 152 ;  ---- bits = 1000 ---- shift 3, add op1, shift 1
 153 ;
 154         b               sh3sa
 155         shd             pu,pl,3,pl              ; shift product right 3 bits
 156 ;
 157 ;  ---- bits = 1001 ---- add op1, shift 3, add op1, shift 1
 158 ;
 159         addb,tr         op1,pu,sh3us            ; add op1, to shift 3, add op1,
 160         shd             pu,pl,3,pl              ;   and shift 1
 161 ;
 162 ;  ---- bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1
 163 ;
 164         addb,tr         op1,pu,sh3c             ; add 2*op1, to shift 3 bits
 165         addb,nuv        op1,pu,sh3us            ;   br. if no overflow
 166 ;
 167 ;  ---- bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index
 168 ;
 169         addib,tr        1,brindex,sh2s          ; add 1 to index, subtract op1,
 170         sub             pu,op1,pu               ;   shift 2 with minus sign
 171 ;
 172 ;  ---- bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index
 173 ;
 174         addib,tr        1,brindex,sh2sb         ; add 1 to index, to shift
 175         shd             pu,pl,2,pl              ; shift right 2 bits signed
 176 ;
 177 ;  ---- bits = 1101 ---- add op1, shift 2, add -op1, shift 2
 178 ;
 179         addb,tr         op1,pu,sh2ns            ; add op1, to shift 2
 180         shd             pu,pl,2,pl              ;   right 2 unsigned, etc.
 181 ;
 182 ;  ---- bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed
 183 ;
 184         addib,tr        1,brindex,sh1sa         ; add 1 to index, to shift
 185         shd             pu,pl,1,pl              ; shift 1 bit
 186 ;
 187 ;  ---- bits = 1111 ---- add -op1, shift 4 signed
 188 ;
 189         addib,tr        1,brindex,sh4s          ; add 1 to index, subtract op1,
 190         sub             pu,op1,pu               ;   to shift 4 signed
 191
 192 ;
 193 ;  ---- bits = 10000 ---- shift 4 signed
 194 ;
 195         addib,tr        1,brindex,sh4s+4        ; add 1 to index
 196         shd             pu,pl,4,pl              ; shift 4 signed
 197 ;
 198 ;  ---- end of table ---------------------------------------------------------
 199 ;
 200 sh4s:   shd             pu,pl,4,pl
 201         addib,tr        -1,cnt,mloop            ; loop (count > 0 always here)
 202         shd             pm,pu,4,pu              ; shift 4, minus signed
 203 ;
 204 sh4c:   addib,>         -1,cnt,mloop            ; decrement count, loop if > 0
 205         shd             pc,pu,4,pu              ; shift 4 with overflow
 206         b               signs                   ; end of multiply
 207         bb,>=,n         sign,0,fini             ; test sign of procduct
 208 ;
 209 mpyb:   add,=           op2,op2,gr0             ; if <> 0, back to main sect.
 210         b               mpy1
 211         sub             0,op2,op2               ; op2 = |multiplier|
 212         add,>=          op1,gr0,gr0             ; if op1 < 0, invert sign,
 213         xor             pm,sign,sign            ;   for correct result
 214 ;
 215 ;       special case for multiplier = -2**31, op1 = signed multiplicand
 216 ;               or multiplicand = -2**31, op1 = signed multiplier
 217 ;
 218         shd             op1,0,1,pl              ; shift op1 left 31 bits
 219 mmax:   extrs           op1,30,31,pu
 220         b               signs                   ; negate product (if needed)
 221         bb,>=,n         sign,0,fini             ; test sign of product
 222 ;
 223 mpya:   add,=           op1,op1,gr0             ; op1 = -2**31, special case
 224         b               mpy2
 225         sub             0,op1,op1               ; op1 = |multiplicand|
 226         add,>=          op2,gr0,gr0             ; if op2 < 0, invert sign,
 227         xor             pm,sign,sign            ;   for correct result
 228         movb,tr         op2,op1,mmax            ; use op2 as multiplicand
 229         shd             op1,0,1,pl              ; shift it left 31 bits
 230 ;
 231 sh3c:   shd             pu,pl,3,pl              ; shift product 3 bits
 232         shd             pc,pu,3,pu              ; shift 3 signed
 233         addb,tr         op1,pu,sh1              ; add op1, to shift 1 bit
 234         shd             pu,pl,1,pl
 235 ;
 236 sh3us:  extru           pu,28,29,pu             ; shift 3 unsigned
 237         addb,tr         op1,pu,sh1              ; add op1, to shift 1 bit
 238         shd             pu,pl,1,pl
 239 ;
 240 sh3sa:  extrs           pu,28,29,pu             ; shift 3 signed
 241         addb,tr         op1,pu,sh1              ; add op1, to shift 1 bit
 242         shd             pu,pl,1,pl
 243 ;
 244 sh3s:   shd             pu,pl,3,pl              ; shift 3 minus signed
 245         shd             pm,pu,3,pu
 246         addb,tr         op1,pu,sh1              ; add op1, to shift 1 bit
 247         shd             pu,pl,1,pl
 248 ;
 249 sh1:    addib,>         -1,cnt,mloop            ; loop if count > 0
 250         extru           pu,30,31,pu
 251         b               signs                   ; end of multiply
 252         bb,>=,n         sign,0,fini             ; test sign of product
 253 ;
 254 sh2ns:  addib,tr        1,brindex,sh2sb+4       ; increment index
 255         extru           pu,29,30,pu             ; shift unsigned
 256 ;
 257 sh2s:   shd             pu,pl,2,pl              ; shift with minus sign
 258         shd             pm,pu,2,pu              ;
 259         sub             pu,op1,pu               ; subtract op1
 260         shd             pu,pl,2,pl              ; shift with minus sign
 261         addib,tr        -1,cnt,mloop            ; decrement count, loop
 262         shd             pm,pu,2,pu              ; shift with minus sign
 263                                                 ; count never reaches 0 here
 264 ;
 265 sh2sb:  extrs           pu,29,30,pu             ; shift 2 signed
 266         sub             pu,op1,pu               ; subtract op1 from product
 267         shd             pu,pl,2,pl              ; shift with minus sign
 268         addib,tr        -1,cnt,mloop            ; decrement count, loop
 269         shd             pm,pu,2,pu              ; shift with minus sign
 270                                                 ; count never reaches 0 here
 271 ;
 272 sh1sa:  extrs           pu,30,31,pu             ;   signed
 273         sub             pu,op1,pu               ; subtract op1 from product
 274         shd             pu,pl,3,pl              ; shift 3 with minus sign
 275         addib,tr        -1,cnt,mloop            ; dec. count, to loop
 276         shd             pm,pu,3,pu              ; count never reaches 0 here
 277 ;
 278 fini0:  movib,tr,n      0,pl,fini               ; product = 0 as op1 = 0
 279 ;
 280 sh2us:  extru           pu,29,30,pu             ; shift 2 unsigned
 281         addb,tr         op1,pu,sh2a             ; add op1
 282         shd             pu,pl,2,pl              ; shift 2 bits
 283 ;
 284 sh2c:   shd             pu,pl,2,pl
 285         shd             pc,pu,2,pu              ; shift with carry
 286         addb,tr         op1,pu,sh2a             ; add op1 to product
 287         shd             pu,pl,2,pl              ; br. to sh2 to shift pu
 288 ;
 289 sh2sa:  extrs           pu,29,30,pu             ; shift with sign
 290         addb,tr         op1,pu,sh2a             ; add op1 to product
 291         shd             pu,pl,2,pl              ; br. to sh2 to shift pu
 292 ;
 293 sh2a:   addib,>         -1,cnt,mloop            ; loop if count > 0
 294         extru           pu,29,30,pu
 295 ;
 296 mulend: bb,>=,n         sign,0,fini             ; test sign of product
 297 signs:  sub             0,pl,pl                 ; negate product if sign
 298         subb            0,pu,pu                 ;   is negative
 299 ;
 300 ;       finish
 301 ;
 302 fini:   stws            pu,0(%arg2)             ; save high part of result
 303         stws            pl,4(%arg2)             ; save low part of result
 304
 305         ldws,mb         -4(%sp),pm              ; restore registers
 306         ldws,mb         -4(%sp),pc              ; restore registers
 307         ldws,mb         -4(%sp),sign            ; restore registers
 308         ldws,mb         -4(%sp),brindex         ; restore registers
 309         ldws,mb         -4(%sp),cnt             ; restore registers
 310         ldws,mb         -4(%sp),op1             ; restore registers
 311         ldws,mb         -4(%sp),pl              ; restore registers
 312         bv              0(%rp)                  ; return
 313         ldws,mb         -4(%sp),pu              ; restore registers
 314
 315 EXIT(impys)
 316         .end