Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gcc4 / gcc / config / cris / mulsi3.asm
blob69d9dedd9665c4aef56797c42a0d53df691ea7aa
1 ;; This code used to be expanded through interesting expansions in
2 ;; the machine description, compiled from this code:
3 ;;
4 ;; #ifdef L_mulsi3
5 ;; long __Mul (unsigned long a, unsigned long b) __attribute__ ((__const__));
6 ;;
7 ;; /* This must be compiled with the -mexpand-mul flag, to synthesize the
8 ;; multiplication from the mstep instructions. The check for
9 ;; smaller-size multiplication pays off in the order of .5-10%;
10 ;; estimated median 1%, depending on application.
11 ;; FIXME: It can be further optimized if we go to assembler code, as
12 ;; gcc 2.7.2 adds a few unnecessary instructions and does not put the
13 ;; basic blocks in optimal order. */
14 ;; long
15 ;; __Mul (unsigned long a, unsigned long b)
16 ;; {
17 ;; #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
18 ;; /* In case other code is compiled without -march=v10, they will
19 ;; contain calls to __Mul, regardless of flags at link-time. The
20 ;; "else"-code below will work, but is unnecessarily slow. This
21 ;; sometimes cuts a few minutes off from simulation time by just
22 ;; returning a "mulu.d". */
23 ;; return a * b;
24 ;; #else
25 ;; unsigned long min;
26 ;;
27 ;; /* Get minimum via the bound insn. */
28 ;; min = a < b ? a : b;
29 ;;
30 ;; /* Can we omit computation of the high part? */
31 ;; if (min > 65535)
32 ;; /* No. Perform full multiplication. */
33 ;; return a * b;
34 ;; else
35 ;; {
36 ;; /* Check if both operands are within 16 bits. */
37 ;; unsigned long max;
38 ;;
39 ;; /* Get maximum, by knowing the minimum.
40 ;; This will partition a and b into max and min.
41 ;; This is not currently something GCC understands,
42 ;; so do this trick by asm. */
43 ;; __asm__ ("xor %1,%0\n\txor %2,%0"
44 ;; : "=r" (max)
45 ;; : "r" (b), "r" (a), "0" (min));
46 ;;
47 ;; if (max > 65535)
48 ;; /* Make GCC understand that only the low part of "min" will be
49 ;; used. */
50 ;; return max * (unsigned short) min;
51 ;; else
52 ;; /* Only the low parts of both operands are necessary. */
53 ;; return ((unsigned short) max) * (unsigned short) min;
54 ;; }
55 ;; #endif /* not __CRIS_arch_version >= 10 */
56 ;; }
57 ;; #endif /* L_mulsi3 */
59 ;; That approach was abandoned since the caveats outweighted the
60 ;; benefits. The expand-multiplication machinery is also removed, so you
61 ;; can't do this anymore.
63 ;; For doubters of there being any benefits, some where: insensitivity to:
64 ;; - ABI changes (mostly for experimentation)
65 ;; - assembler syntax differences (mostly debug format).
66 ;; - insn scheduling issues.
67 ;; Most ABI experiments will presumably happen with arches with mul insns,
68 ;; so that argument doesn't really hold anymore, and it's unlikely there
69 ;; being new arch variants needing insn scheduling and not having mul
70 ;; insns.
72 ;; ELF and a.out have different syntax for local labels: the "wrong"
73 ;; one may not be omitted from the object.
74 #undef L
75 #ifdef __AOUT__
76 # define L(x) x
77 #else
78 # define L(x) .x
79 #endif
81 .global ___Mul
82 .type ___Mul,@function
83 ___Mul:
84 #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
85 ;; Can't have the mulu.d last on a cache-line (in the delay-slot of the
86 ;; "ret"), due to hardware bug. See documentation for -mmul-bug-workaround.
87 ;; Not worthwhile to conditionalize here.
88 .p2alignw 2,0x050f
89 mulu.d $r11,$r10
90 ret
91 nop
92 #else
93 move.d $r10,$r12
94 move.d $r11,$r9
95 bound.d $r12,$r9
96 cmpu.w 65535,$r9
97 bls L(L3)
98 move.d $r12,$r13
100 movu.w $r11,$r9
101 lslq 16,$r13
102 mstep $r9,$r13
103 mstep $r9,$r13
104 mstep $r9,$r13
105 mstep $r9,$r13
106 mstep $r9,$r13
107 mstep $r9,$r13
108 mstep $r9,$r13
109 mstep $r9,$r13
110 mstep $r9,$r13
111 mstep $r9,$r13
112 mstep $r9,$r13
113 mstep $r9,$r13
114 mstep $r9,$r13
115 mstep $r9,$r13
116 mstep $r9,$r13
117 mstep $r9,$r13
118 clear.w $r10
119 test.d $r10
120 mstep $r9,$r10
121 mstep $r9,$r10
122 mstep $r9,$r10
123 mstep $r9,$r10
124 mstep $r9,$r10
125 mstep $r9,$r10
126 mstep $r9,$r10
127 mstep $r9,$r10
128 mstep $r9,$r10
129 mstep $r9,$r10
130 mstep $r9,$r10
131 mstep $r9,$r10
132 mstep $r9,$r10
133 mstep $r9,$r10
134 mstep $r9,$r10
135 mstep $r9,$r10
136 movu.w $r12,$r12
137 move.d $r11,$r9
138 clear.w $r9
139 test.d $r9
140 mstep $r12,$r9
141 mstep $r12,$r9
142 mstep $r12,$r9
143 mstep $r12,$r9
144 mstep $r12,$r9
145 mstep $r12,$r9
146 mstep $r12,$r9
147 mstep $r12,$r9
148 mstep $r12,$r9
149 mstep $r12,$r9
150 mstep $r12,$r9
151 mstep $r12,$r9
152 mstep $r12,$r9
153 mstep $r12,$r9
154 mstep $r12,$r9
155 mstep $r12,$r9
156 add.w $r9,$r10
157 lslq 16,$r10
159 add.d $r13,$r10
161 L(L3):
162 move.d $r9,$r10
163 xor $r11,$r10
164 xor $r12,$r10
165 cmpu.w 65535,$r10
166 bls L(L5)
167 movu.w $r9,$r13
169 movu.w $r13,$r13
170 move.d $r10,$r9
171 lslq 16,$r9
172 mstep $r13,$r9
173 mstep $r13,$r9
174 mstep $r13,$r9
175 mstep $r13,$r9
176 mstep $r13,$r9
177 mstep $r13,$r9
178 mstep $r13,$r9
179 mstep $r13,$r9
180 mstep $r13,$r9
181 mstep $r13,$r9
182 mstep $r13,$r9
183 mstep $r13,$r9
184 mstep $r13,$r9
185 mstep $r13,$r9
186 mstep $r13,$r9
187 mstep $r13,$r9
188 clear.w $r10
189 test.d $r10
190 mstep $r13,$r10
191 mstep $r13,$r10
192 mstep $r13,$r10
193 mstep $r13,$r10
194 mstep $r13,$r10
195 mstep $r13,$r10
196 mstep $r13,$r10
197 mstep $r13,$r10
198 mstep $r13,$r10
199 mstep $r13,$r10
200 mstep $r13,$r10
201 mstep $r13,$r10
202 mstep $r13,$r10
203 mstep $r13,$r10
204 mstep $r13,$r10
205 mstep $r13,$r10
206 lslq 16,$r10
208 add.d $r9,$r10
210 L(L5):
211 movu.w $r9,$r9
212 lslq 16,$r10
213 mstep $r9,$r10
214 mstep $r9,$r10
215 mstep $r9,$r10
216 mstep $r9,$r10
217 mstep $r9,$r10
218 mstep $r9,$r10
219 mstep $r9,$r10
220 mstep $r9,$r10
221 mstep $r9,$r10
222 mstep $r9,$r10
223 mstep $r9,$r10
224 mstep $r9,$r10
225 mstep $r9,$r10
226 mstep $r9,$r10
227 mstep $r9,$r10
229 mstep $r9,$r10
230 #endif
231 L(Lfe1):
232 .size ___Mul,L(Lfe1)-___Mul