sysdeps/alpha/remqu.S

   1 /* Copyright (C) 2004 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    The GNU C Library is free software; you can redistribute it and/or
   5    modify it under the terms of the GNU Lesser General Public
   6    License as published by the Free Software Foundation; either
   7    version 2.1 of the License, or (at your option) any later version.
   8
   9    The GNU C Library is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12    Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public
  15    License along with the GNU C Library; if not, write to the Free
  16    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  17    02111-1307 USA.  */
  18
  19 #include "div_libc.h"
  20
  21
  22 /* 64-bit unsigned long remainder.  These are not normal C functions.  Argument
  23    registers are t10 and t11, the result goes in t12.  Only t12 and AT may be
  24    clobbered.
  25
  26    Theory of operation here is that we can use the FPU divider for virtually
  27    all operands that we see: all dividend values between -2**53 and 2**53-1
  28    can be computed directly.  Note that divisor values need not be checked
  29    against that range because the rounded fp value will be close enough such
  30    that the quotient is < 1, which will properly be truncated to zero when we
  31    convert back to integer.
  32
  33    When the dividend is outside the range for which we can compute exact
  34    results, we use the fp quotent as an estimate from which we begin refining
  35    an exact integral value.  This reduces the number of iterations in the
  36    shift-and-subtract loop significantly.
  37
  38    The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE
  39    for cvttq/c even without /sui being set.  It will not, however, properly
  40    raise the exception, so we don't have to worry about FPCR_INED being clear
  41    and so dying by SIGFPE.  */
  42
  43         .text
  44         .align  4
  45         .globl  __remqu
  46         .type   __remqu, @function
  47         .usepv  __remqu, no
  48
  49         cfi_startproc
  50         cfi_return_column (RA)
  51 __remqu:
  52         lda     sp, -FRAME(sp)
  53         cfi_def_cfa_offset (FRAME)
  54         CALL_MCOUNT
  55
  56         /* Get the fp divide insn issued as quickly as possible.  After
  57            that's done, we have at least 22 cycles until its results are
  58            ready -- all the time in the world to figure out how we're
  59            going to use the results.  */
  60         subq    Y, 1, AT
  61         stt     $f0, 0(sp)
  62         and     Y, AT, AT
  63
  64         stt     $f1, 8(sp)
  65         excb
  66         stt     $f3, 48(sp)
  67         beq     AT, $powerof2
  68         cfi_rel_offset ($f0, 0)
  69         cfi_rel_offset ($f1, 8)
  70         cfi_rel_offset ($f3, 48)
  71
  72         _ITOFT2 X, $f0, 16, Y, $f1, 24
  73         mf_fpcr $f3
  74         cvtqt   $f0, $f0
  75         cvtqt   $f1, $f1
  76
  77         blt     X, $x_is_neg
  78         divt/c  $f0, $f1, $f0
  79
  80         /* Check to see if Y was mis-converted as signed value.  */
  81         ldt     $f1, 8(sp)
  82         blt     Y, $y_is_neg
  83
  84         /* Check to see if X fit in the double as an exact value.  */
  85         srl     X, 53, AT
  86         bne     AT, $x_big
  87
  88         /* If we get here, we're expecting exact results from the division.
  89            Do nothing else besides convert, compute remainder, clean up.  */
  90         cvttq/c $f0, $f0
  91         excb
  92         mt_fpcr $f3
  93         _FTOIT  $f0, AT, 16
  94
  95         mulq    AT, Y, AT
  96         ldt     $f0, 0(sp)
  97         ldt     $f3, 48(sp)
  98         lda     sp, FRAME(sp)
  99         cfi_remember_state
 100         cfi_restore ($f0)
 101         cfi_restore ($f1)
 102         cfi_restore ($f3)
 103         cfi_def_cfa_offset (0)
 104
 105         .align  4
 106         subq    X, AT, RV
 107         ret     $31, (RA), 1
 108
 109         .align  4
 110         cfi_restore_state
 111 $x_is_neg:
 112         /* If we get here, X is so big that bit 63 is set, which made the
 113            conversion come out negative.  Fix it up lest we not even get
 114            a good estimate.  */
 115         ldah    AT, 0x5f80              /* 2**64 as float.  */
 116         stt     $f2, 24(sp)
 117         cfi_rel_offset ($f2, 24)
 118         _ITOFS  AT, $f2, 16
 119
 120         addt    $f0, $f2, $f0
 121         divt/c  $f0, $f1, $f0
 122
 123         /* Ok, we've now the divide issued.  Continue with other checks.  */
 124         .align  4
 125         ldt     $f1, 8(sp)
 126         unop
 127         ldt     $f2, 24(sp)
 128         blt     Y, $y_is_neg
 129         cfi_restore ($f1)
 130         cfi_restore ($f2)
 131         cfi_remember_state      /* for y_is_neg */
 132
 133         .align  4
 134 $x_big:
 135         /* If we get here, X is large enough that we don't expect exact
 136            results, and neither X nor Y got mis-translated for the fp
 137            division.  Our task is to take the fp result, figure out how
 138            far it's off from the correct result and compute a fixup.  */
 139         stq     t0, 16(sp)
 140         stq     t1, 24(sp)
 141         stq     t2, 32(sp)
 142         stq     t3, 40(sp)
 143         cfi_rel_offset (t0, 16)
 144         cfi_rel_offset (t1, 24)
 145         cfi_rel_offset (t2, 32)
 146         cfi_rel_offset (t3, 40)
 147
 148 #define Q       t0              /* quotient */
 149 #define R       RV              /* remainder */
 150 #define SY      t1              /* scaled Y */
 151 #define S       t2              /* scalar */
 152 #define QY      t3              /* Q*Y */
 153
 154         cvttq/c $f0, $f0
 155         _FTOIT  $f0, Q, 8
 156         mulq    Q, Y, QY
 157
 158         .align  4
 159         stq     t4, 8(sp)
 160         excb
 161         ldt     $f0, 0(sp)
 162         mt_fpcr $f3
 163         cfi_rel_offset (t4, 8)
 164         cfi_restore ($f0)
 165
 166         subq    QY, X, R
 167         mov     Y, SY
 168         mov     1, S
 169         bgt     R, $q_high
 170
 171 $q_high_ret:
 172         subq    X, QY, R
 173         mov     Y, SY
 174         mov     1, S
 175         bgt     R, $q_low
 176
 177 $q_low_ret:
 178         ldq     t4, 8(sp)
 179         ldq     t0, 16(sp)
 180         ldq     t1, 24(sp)
 181         ldq     t2, 32(sp)
 182
 183         ldq     t3, 40(sp)
 184         ldt     $f3, 48(sp)
 185         lda     sp, FRAME(sp)
 186         cfi_remember_state
 187         cfi_restore (t0)
 188         cfi_restore (t1)
 189         cfi_restore (t2)
 190         cfi_restore (t3)
 191         cfi_restore (t4)
 192         cfi_restore ($f3)
 193         cfi_def_cfa_offset (0)
 194         ret     $31, (RA), 1
 195
 196         .align  4
 197         cfi_restore_state
 198         /* The quotient that we computed was too large.  We need to reduce
 199            it by S such that Y*S >= R.  Obviously the closer we get to the
 200            correct value the better, but overshooting high is ok, as we'll
 201            fix that up later.  */
 202 0:
 203         addq    SY, SY, SY
 204         addq    S, S, S
 205 $q_high:
 206         cmpult  SY, R, AT
 207         bne     AT, 0b
 208
 209         subq    Q, S, Q
 210         unop
 211         subq    QY, SY, QY
 212         br      $q_high_ret
 213
 214         .align  4
 215         /* The quotient that we computed was too small.  Divide Y by the
 216            current remainder (R) and add that to the existing quotient (Q).
 217            The expectation, of course, is that R is much smaller than X.  */
 218         /* Begin with a shift-up loop.  Compute S such that Y*S >= R.  We
 219            already have a copy of Y in SY and the value 1 in S.  */
 220 0:
 221         addq    SY, SY, SY
 222         addq    S, S, S
 223 $q_low:
 224         cmpult  SY, R, AT
 225         bne     AT, 0b
 226
 227         /* Shift-down and subtract loop.  Each iteration compares our scaled
 228            Y (SY) with the remainder (R); if SY <= R then X is divisible by
 229            Y's scalar (S) so add it to the quotient (Q).  */
 230 2:      addq    Q, S, t3
 231         srl     S, 1, S
 232         cmpule  SY, R, AT
 233         subq    R, SY, t4
 234
 235         cmovne  AT, t3, Q
 236         cmovne  AT, t4, R
 237         srl     SY, 1, SY
 238         bne     S, 2b
 239
 240         br      $q_low_ret
 241
 242         .align  4
 243         cfi_restore_state
 244 $y_is_neg:
 245         /* If we get here, Y is so big that bit 63 is set.  The results
 246            from the divide will be completely wrong.  Fortunately, the
 247            quotient must be either 0 or 1, so the remainder must be X
 248            or X-Y, so just compute it directly.  */
 249         cmpult  Y, X, AT
 250         subq    X, Y, RV
 251         ldt     $f0, 0(sp)
 252         cmoveq  AT, X, RV
 253
 254         lda     sp, FRAME(sp)
 255         cfi_restore ($f0)
 256         cfi_def_cfa_offset (0)
 257         ret     $31, (RA), 1
 258
 259         .align  4
 260         cfi_def_cfa_offset (FRAME)
 261 $powerof2:
 262         subq    Y, 1, AT
 263         beq     Y, DIVBYZERO
 264         and     X, AT, RV
 265         lda     sp, FRAME(sp)
 266         cfi_def_cfa_offset (0)
 267         ret     $31, (RA), 1
 268
 269         cfi_endproc
 270         .size   __remqu, .-__remqu
 271
 272         DO_DIVBYZERO