1 ;; This code used to be expanded through interesting expansions in
2 ;; the machine description, compiled from this code:
5 ;; long __Mul (unsigned long a, unsigned long b) __attribute__ ((__const__));
7 ;; /* This must be compiled with the -mexpand-mul flag, to synthesize the
8 ;; multiplication from the mstep instructions. The check for
9 ;; smaller-size multiplication pays off in the order of .5-10%;
10 ;; estimated median 1%, depending on application.
11 ;; FIXME: It can be further optimized if we go to assembler code, as
12 ;; gcc 2.7.2 adds a few unnecessary instructions and does not put the
13 ;; basic blocks in optimal order. */
15 ;; __Mul (unsigned long a, unsigned long b)
17 ;; #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
18 ;; /* In case other code is compiled without -march=v10, they will
19 ;; contain calls to __Mul, regardless of flags at link-time. The
20 ;; "else"-code below will work, but is unnecessarily slow. This
21 ;; sometimes cuts a few minutes off from simulation time by just
22 ;; returning a "mulu.d". */
27 ;; /* Get minimum via the bound insn. */
28 ;; min = a < b ? a : b;
30 ;; /* Can we omit computation of the high part? */
32 ;; /* No. Perform full multiplication. */
36 ;; /* Check if both operands are within 16 bits. */
39 ;; /* Get maximum, by knowing the minimum.
40 ;; This will partition a and b into max and min.
41 ;; This is not currently something GCC understands,
42 ;; so do this trick by asm. */
43 ;; __asm__ ("xor %1,%0\n\txor %2,%0"
45 ;; : "r" (b), "r" (a), "0" (min));
48 ;; /* Make GCC understand that only the low part of "min" will be
50 ;; return max * (unsigned short) min;
52 ;; /* Only the low parts of both operands are necessary. */
53 ;; return ((unsigned short) max) * (unsigned short) min;
55 ;; #endif /* not __CRIS_arch_version >= 10 */
57 ;; #endif /* L_mulsi3 */
59 ;; That approach was abandoned since the caveats outweighted the
60 ;; benefits. The expand-multiplication machinery is also removed, so you
61 ;; can't do this anymore.
63 ;; For doubters of there being any benefits, some where: insensitivity to:
64 ;; - ABI changes (mostly for experimentation)
65 ;; - assembler syntax differences (mostly debug format).
66 ;; - insn scheduling issues.
67 ;; Most ABI experiments will presumably happen with arches with mul insns,
68 ;; so that argument doesn't really hold anymore, and it's unlikely there
69 ;; being new arch variants needing insn scheduling and not having mul
72 ;; ELF and a.out have different syntax for local labels: the "wrong"
73 ;; one may not be omitted from the object.
82 .
type ___Mul
,@function
84 #if defined
(__CRIS_arch_version
) && __CRIS_arch_version
>= 10
85 ;; Can't have the mulu.d last on a cache-line (in the delay-slot of the
86 ;; "ret"), due to hardware bug. See documentation for -mmul-bug-workaround.
87 ;; Not worthwhile to conditionalize here.
232 .
size ___Mul
,L
(Lfe1
)-___Mul