sys/arch/hppa/spmath/impyu.S

   1 /*      $NetBSD: impyu.S,v 1.1.10.3 2004/09/21 13:16:34 skrll Exp $     */
   2
   3 /*      $OpenBSD: impyu.S,v 1.5 2001/03/29 03:58:18 mickey Exp $        */
   4
   5 /*
   6  * Copyright 1996 1995 by Open Software Foundation, Inc.
   7  *              All Rights Reserved
   8  *
   9  * Permission to use, copy, modify, and distribute this software and
  10  * its documentation for any purpose and without fee is hereby granted,
  11  * provided that the above copyright notice appears in all copies and
  12  * that both the copyright notice and this permission notice appear in
  13  * supporting documentation.
  14  *
  15  * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
  16  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  17  * FOR A PARTICULAR PURPOSE.
  18  *
  19  * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
  20  * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  21  * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
  22  * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
  23  * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  24  *
  25  */
  26 /*
  27  * pmk1.1
  28  */
  29 /*
  30  * (c) Copyright 1986 HEWLETT-PACKARD COMPANY
  31  *
  32  * To anyone who acknowledges that this file is provided "AS IS"
  33  * without any express or implied warranty:
  34  *     permission to use, copy, modify, and distribute this file
  35  * for any purpose is hereby granted without fee, provided that
  36  * the above copyright notice and this notice appears in all
  37  * copies, and that the name of Hewlett-Packard Company not be
  38  * used in advertising or publicity pertaining to distribution
  39  * of the software without specific, written prior permission.
  40  * Hewlett-Packard Company makes no representations about the
  41  * suitability of this software for any purpose.
  42  */
  43
  44 #include <machine/asm.h>
  45
  46 /****************************************************************************
  47  *
  48  *Implement an integer multiply routine for 32-bit operands and 64-bit product
  49  * with operand values of zero (multiplicand only) and 2**32reated specially.
  50  * The algorithm uses the multiplier, four bits at a time, from right to left,
  51  * to generate partial product.  Execution speed is more important than program
  52  * size in this implementation.
  53  *
  54  *****************************************************************************/
  55 ;
  56 ; Definitions - General registers
  57 ;
  58 gr0:    .equ            0               ; General register zero
  59 pu:     .equ            3               ; upper part of product
  60 pl:     .equ            4               ; lower part of product
  61 op2:    .equ            4               ; multiplier
  62 op1:    .equ            5               ; multiplicand
  63 cnt:    .equ            6               ; count in multiply
  64 brindex:.equ            7               ; index into the br. table
  65 saveop2:.equ            8               ; save op2 if high bit of multiplicand
  66                                         ; is set
  67 pc:     .equ            9               ; carry bit of product, = 00...01
  68 pm:     .equ           10               ; value of -1 used in shifting
  69 temp:   .equ            6
  70
  71 ;****************************************************************************
  72         .export impyu,entry
  73         .text
  74         .align 4
  75         .proc
  76         .callinfo
  77 ;
  78 ;****************************************************************************
  79 impyu:  stws,ma         pu,4(%sp)               ; save registers on stack
  80         stws,ma         pl,4(%sp)               ; save registers on stack
  81         stws,ma         op1,4(%sp)              ; save registers on stack
  82         stws,ma         cnt,4(%sp)              ; save registers on stack
  83         stws,ma         brindex,4(%sp)          ; save registers on stack
  84         stws,ma         saveop2,4(%sp)          ; save registers on stack
  85         stws,ma         pc,4(%sp)               ; save registers on stack
  86         stws,ma         pm,4(%sp)               ; save registers on stack
  87 ;
  88 ;   Start multiply process
  89 ;
  90         ldws            0(%arg0),op1            ; get multiplicand
  91         ldws            0(%arg1),op2            ; get multiplier
  92         addib,=         0,op1,fini0             ; op1 = 0, product = 0
  93         addi            0,gr0,pu                ; clear product
  94         bb,>=           op1,0,mpy1              ; test msb of multiplicand
  95         addi            0,gr0,saveop2           ; clear saveop2
  96 ;
  97 ; msb of multiplicand is set so will save multiplier for a final
  98 ; addition into the result
  99 ;
 100         extru,=         op1,31,31,op1           ; clear msb of multiplicand
 101         b               mpy1                    ; if op1 < 2**32, start multiply
 102         add             op2,gr0,saveop2         ;   save op2 in saveop2
 103         shd             gr0,op2,1,pu            ; shift op2 left 31 for result
 104         b               fini                    ; go to finish
 105         shd             op2,gr0,1,pl
 106 ;
 107 mpy1:   addi            -1,gr0,pm               ; initialize pm to 111...1
 108         addi            1,gr0,pc                ; initialize pc to 00...01
 109         movib,tr        8,cnt,mloop             ; set count for mpy loop
 110         extru           op2,31,4,brindex        ; 4 bits as index into table
 111 ;
 112         .align          8
 113 ;
 114         b               sh4c                    ; br. if sign overflow
 115 sh4n:   shd             pu,pl,4,pl              ; shift product right 4 bits
 116         addib,<=        -1,cnt,mulend           ; reduce count by 1, exit if
 117         extru           pu,27,28,pu             ;   <= zero
 118 ;
 119 mloop:  blr             brindex,gr0             ; br. into table
 120                                                 ;   entries of 2 words
 121         extru           op2,27,4,brindex        ; next 4 bits into index
 122 ;
 123 ;
 124 ;       branch table for the multiplication process with four multiplier bits
 125 ;
 126 mtable:                                         ; two words per entry
 127 ;
 128 ; ----  bits = 0000 ---- shift product 4 bits -------------------------------
 129 ;
 130         b               sh4n+4                  ; just shift partial
 131         shd             pu,pl,4,pl              ;   product right 4 bits
 132 ;
 133 ;  ---- bits = 0001 ---- add op1, then shift 4 bits
 134 ;
 135         addb,tr         op1,pu,sh4n+4           ; add op1 to product, to shift
 136         shd             pu,pl,4,pl              ;   product right 4 bits
 137 ;
 138 ;  ---- bits = 0010 ---- add op1, add op1, then shift 4 bits
 139 ;
 140         addb,tr         op1,pu,sh4n             ; add 2*op1, to shift
 141         addb,uv         op1,pu,sh4c             ;   product right 4 bits
 142 ;
 143 ;  ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits
 144 ;
 145         addb,tr         op1,pu,sh4n-4           ; add op1 & 2*op1, shift
 146         sh1add,nuv      op1,pu,pu               ;   product right 4 bits
 147 ;
 148 ;  ---- bits = 0100 ---- shift 2, add op1, shift 2
 149 ;
 150         b               sh2sa
 151         shd             pu,pl,2,pl              ; shift product 2 bits
 152 ;
 153 ;  ---- bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again
 154 ;
 155         addb,tr         op1,pu,sh2us            ; add op1 to product
 156         shd             pu,pl,2,pl              ; shift 2 bits
 157 ;
 158 ;  ---- bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again
 159 ;
 160         addb,tr         op1,pu,sh2c             ; add 2*op1, to shift 2 bits
 161         addb,nuv        op1,pu,sh2us            ; br. if not overflow
 162 ;
 163 ;  ---- bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1
 164 ;
 165         b               sh3s
 166         sub             pu,op1,pu               ; subtract op1, br. to sh3s
 167
 168 ;
 169 ;  ---- bits = 1000 ---- shift 3, add op1, shift 1
 170 ;
 171         b               sh3sa
 172         shd             pu,pl,3,pl              ; shift product right 3 bits
 173 ;
 174 ;  ---- bits = 1001 ---- add op1, shift 3, add op1, shift 1
 175 ;
 176         addb,tr         op1,pu,sh3us            ; add op1, to shift 3, add op1,
 177         shd             pu,pl,3,pl              ;   and shift 1
 178 ;
 179 ;  ---- bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1
 180 ;
 181         addb,tr         op1,pu,sh3c             ; add 2*op1, to shift 3 bits
 182         addb,nuv        op1,pu,sh3us            ;   br. if no overflow
 183 ;
 184 ;  ---- bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index
 185 ;
 186         addib,tr        1,brindex,sh2s          ; add 1 to index, subtract op1,
 187         sub             pu,op1,pu               ;   shift 2 with minus sign
 188 ;
 189 ;  ---- bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index
 190 ;
 191         addib,tr        1,brindex,sh2sb         ; add 1 to index, to shift
 192         shd             pu,pl,2,pl              ; shift right 2 bits signed
 193 ;
 194 ;  ---- bits = 1101 ---- add op1, shift 2, add -op1, shift 2
 195 ;
 196         addb,tr         op1,pu,sh2ns            ; add op1, to shift 2
 197         shd             pu,pl,2,pl              ;   right 2 unsigned, etc.
 198 ;
 199 ;  ---- bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed
 200 ;
 201         addib,tr        1,brindex,sh1sa         ; add 1 to index, to shift
 202         shd             pu,pl,1,pl              ; shift 1 bit
 203 ;
 204 ;  ---- bits = 1111 ---- add -op1, shift 4 signed
 205 ;
 206         addib,tr        1,brindex,sh4s          ; add 1 to index, subtract op1,
 207         sub             pu,op1,pu               ;   to shift 4 signed
 208
 209 ;
 210 ;  ---- bits = 10000 ---- shift 4 signed
 211 ;
 212         addib,tr        1,brindex,sh4s+4        ; add 1 to index
 213         shd             pu,pl,4,pl              ; shift 4 signed
 214 ;
 215 ;  ---- end of table ---------------------------------------------------------
 216 ;
 217 sh4s:   shd             pu,pl,4,pl
 218         addib,>         -1,cnt,mloop            ; decrement count, loop if > 0
 219         shd             pm,pu,4,pu              ; shift 4, minus signed
 220         addb,tr         op1,pu,lastadd          ; do one more add, then finish
 221         addb,=,n        saveop2,gr0,fini        ; check saveop2
 222 ;
 223 sh4c:   addib,>         -1,cnt,mloop            ; decrement count, loop if > 0
 224         shd             pc,pu,4,pu              ; shift 4 with overflow
 225         b               lastadd                 ; end of multiply
 226         addb,=,n        saveop2,gr0,fini        ; check saveop2
 227 ;
 228 sh3c:   shd             pu,pl,3,pl              ; shift product 3 bits
 229         shd             pc,pu,3,pu              ; shift 3 signed
 230         addb,tr         op1,pu,sh1              ; add op1, to shift 1 bit
 231         shd             pu,pl,1,pl
 232 ;
 233 sh3us:  extru           pu,28,29,pu             ; shift 3 unsigned
 234         addb,tr         op1,pu,sh1              ; add op1, to shift 1 bit
 235         shd             pu,pl,1,pl
 236 ;
 237 sh3sa:  extrs           pu,28,29,pu             ; shift 3 signed
 238         addb,tr         op1,pu,sh1              ; add op1, to shift 1 bit
 239         shd             pu,pl,1,pl
 240 ;
 241 sh3s:   shd             pu,pl,3,pl              ; shift 3 minus signed
 242         shd             pm,pu,3,pu
 243         addb,tr         op1,pu,sh1              ; add op1, to shift 1 bit
 244         shd             pu,pl,1,pl
 245 ;
 246 sh1:    addib,>         -1,cnt,mloop            ; loop if count > 0
 247         extru           pu,30,31,pu
 248         b               lastadd                 ; end of multiply
 249         addb,=,n        saveop2,gr0,fini        ; check saveop2
 250 ;
 251 sh2ns:  addib,tr        1,brindex,sh2sb+4       ; increment index
 252         extru           pu,29,30,pu             ; shift unsigned
 253 ;
 254 sh2s:   shd             pu,pl,2,pl              ; shift with minus sign
 255         shd             pm,pu,2,pu              ;
 256         sub             pu,op1,pu               ; subtract op1
 257         shd             pu,pl,2,pl              ; shift with minus sign
 258         addib,>         -1,cnt,mloop            ; decrement count, loop if > 0
 259         shd             pm,pu,2,pu              ; shift with minus sign
 260         addb,tr         op1,pu,lastadd          ; do one more add, then finish
 261         addb,=,n        saveop2,gr0,fini        ; check saveop2
 262 ;
 263 sh2sb:  extrs           pu,29,30,pu             ; shift 2 signed
 264         sub             pu,op1,pu               ; subtract op1 from product
 265         shd             pu,pl,2,pl              ; shift with minus sign
 266         addib,>         -1,cnt,mloop            ; decrement count, loop if > 0
 267         shd             pm,pu,2,pu              ; shift with minus sign
 268         addb,tr         op1,pu,lastadd          ; do one more add, then finish
 269         addb,=,n        saveop2,gr0,fini        ; check saveop2
 270 ;
 271 sh1sa:  extrs           pu,30,31,pu             ;   signed
 272         sub             pu,op1,pu               ; subtract op1 from product
 273         shd             pu,pl,3,pl              ; shift 3 with minus sign
 274         addib,>         -1,cnt,mloop            ; decrement count, loop if >0
 275         shd             pm,pu,3,pu
 276         addb,tr         op1,pu,lastadd          ; do one more add, then finish
 277         addb,=,n        saveop2,gr0,fini        ; check saveop2
 278 ;
 279 fini0:  movib,tr        0,pl,fini               ; product = 0 as op1 = 0
 280         stws            pu,0(%arg2)             ; save high part of result
 281 ;
 282 sh2us:  extru           pu,29,30,pu             ; shift 2 unsigned
 283         addb,tr         op1,pu,sh2a             ; add op1
 284         shd             pu,pl,2,pl              ; shift 2 bits
 285 ;
 286 sh2c:   shd             pu,pl,2,pl
 287         shd             pc,pu,2,pu              ; shift with carry
 288         addb,tr         op1,pu,sh2a             ; add op1 to product
 289         shd             pu,pl,2,pl              ; br. to sh2 to shift pu
 290 ;
 291 sh2sa:  extrs           pu,29,30,pu             ; shift with sign
 292         addb,tr         op1,pu,sh2a             ; add op1 to product
 293         shd             pu,pl,2,pl              ; br. to sh2 to shift pu
 294 ;
 295 sh2a:   addib,>         -1,cnt,mloop            ; loop if count > 0
 296         extru           pu,29,30,pu
 297 ;
 298 mulend: addb,=,n        saveop2,gr0,fini        ; check saveop2
 299 lastadd:shd             saveop2,gr0,1,temp      ;  if saveop2 <> 0, shift it
 300         shd             gr0,saveop2,1,saveop2   ;  left 31 and add to result
 301         add             pl,temp,pl
 302         addc            pu,saveop2,pu
 303 ;
 304 ;       finish
 305 ;
 306 fini:   stws            pu,0(%arg2)             ; save high part of result
 307         stws            pl,4(%arg2)             ; save low part of result
 308
 309         ldws,mb         -4(%sp),pm              ; restore registers
 310         ldws,mb         -4(%sp),pc              ; restore registers
 311         ldws,mb         -4(%sp),saveop2         ; restore registers
 312         ldws,mb         -4(%sp),brindex         ; restore registers
 313         ldws,mb         -4(%sp),cnt             ; restore registers
 314         ldws,mb         -4(%sp),op1             ; restore registers
 315         ldws,mb         -4(%sp),pl              ; restore registers
 316         bv              0(%rp)                  ; return
 317         ldws,mb         -4(%sp),pu              ; restore registers
 318
 319         .procend
 320         .end