arch/c6x/lib/divremu.S

   1 ;;  Copyright 2011  Free Software Foundation, Inc.
   2 ;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
   3 ;;
   4 ;; This program is free software; you can redistribute it and/or modify
   5 ;; it under the terms of the GNU General Public License as published by
   6 ;; the Free Software Foundation; either version 2 of the License, or
   7 ;; (at your option) any later version.
   8 ;;
   9 ;; This program is distributed in the hope that it will be useful,
  10 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 ;; GNU General Public License for more details.
  13 ;;
  14 ;; You should have received a copy of the GNU General Public License
  15 ;; along with this program; if not, write to the Free Software
  16 ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17
  18 #include <linux/linkage.h>
  19
  20         .text
  21 ENTRY(__c6xabi_divremu)
  22         ;; We use a series of up to 31 subc instructions.  First, we find
  23         ;; out how many leading zero bits there are in the divisor.  This
  24         ;; gives us both a shift count for aligning (shifting) the divisor
  25         ;; to the, and the number of times we have to execute subc.
  26
  27         ;; At the end, we have both the remainder and most of the quotient
  28         ;; in A4.  The top bit of the quotient is computed first and is
  29         ;; placed in A2.
  30
  31         ;; Return immediately if the dividend is zero.  Setting B4 to 1
  32         ;; is a trick to allow us to leave the following insns in the jump
  33         ;; delay slot without affecting the result.
  34         mv      .s2x    A4, B1
  35
  36   [b1]  lmbd    .l2     1, B4, B1
  37 ||[!b1] b       .s2     B3      ; RETURN A
  38 ||[!b1] mvk     .d2     1, B4
  39
  40 ||[!b1] zero    .s1     A5
  41         mv      .l1x    B1, A6
  42 ||      shl     .s2     B4, B1, B4
  43
  44         ;; The loop performs a maximum of 28 steps, so we do the
  45         ;; first 3 here.
  46         cmpltu  .l1x    A4, B4, A2
  47   [!A2] sub     .l1x    A4, B4, A4
  48 ||      shru    .s2     B4, 1, B4
  49 ||      xor     .s1     1, A2, A2
  50
  51         shl     .s1     A2, 31, A2
  52 || [b1] subc    .l1x    A4,B4,A4
  53 || [b1] add     .s2     -1, B1, B1
  54    [b1] subc    .l1x    A4,B4,A4
  55 || [b1] add     .s2     -1, B1, B1
  56
  57         ;; RETURN A may happen here (note: must happen before the next branch)
  58 __divremu0:
  59         cmpgt   .l2     B1, 7, B0
  60 || [b1] subc    .l1x    A4,B4,A4
  61 || [b1] add     .s2     -1, B1, B1
  62    [b1] subc    .l1x    A4,B4,A4
  63 || [b1] add     .s2     -1, B1, B1
  64 || [b0] b       .s1     __divremu0
  65    [b1] subc    .l1x    A4,B4,A4
  66 || [b1] add     .s2     -1, B1, B1
  67    [b1] subc    .l1x    A4,B4,A4
  68 || [b1] add     .s2     -1, B1, B1
  69    [b1] subc    .l1x    A4,B4,A4
  70 || [b1] add     .s2     -1, B1, B1
  71    [b1] subc    .l1x    A4,B4,A4
  72 || [b1] add     .s2     -1, B1, B1
  73    [b1] subc    .l1x    A4,B4,A4
  74 || [b1] add     .s2     -1, B1, B1
  75         ;; loop backwards branch happens here
  76
  77         ret     .s2     B3
  78 ||      mvk     .s1     32, A1
  79         sub     .l1     A1, A6, A6
  80 ||      extu    .s1     A4, A6, A5
  81         shl     .s1     A4, A6, A4
  82         shru    .s1     A4, 1, A4
  83 ||      sub     .l1     A6, 1, A6
  84         or      .l1     A2, A4, A4
  85         shru    .s1     A4, A6, A4
  86         nop
  87 ENDPROC(__c6xabi_divremu)