FS#8961 - Anti-Aliased Fonts.
[kugel-rb/myfork.git] / apps / codecs / lib / asm_mcf5249.h
blob88b7961431fc4ca4c37cfaa65e4fe90eac1cdb57
1 /***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
9 * Copyright (C) 2005 by Pedro Vasconcelos
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version 2
14 * of the License, or (at your option) any later version.
16 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
17 * KIND, either express or implied.
19 ****************************************************************************/
20 /* asm routines for wide math on the MCF5249 */
22 #if defined(CPU_COLDFIRE)
24 /* attribute for 16-byte alignment */
25 #define LINE_ATTR __attribute__ ((aligned (16)))
27 #ifndef _V_WIDE_MATH
28 #define _V_WIDE_MATH
30 #define MB()
32 static inline int32_t MULT32(int32_t x, int32_t y) {
34 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */
35 "movclr.l %%acc0, %[x];" /* move & clear acc */
36 "asr.l #1, %[x];" /* no overflow test */
37 : [x] "+&d" (x)
38 : [y] "r" (y)
39 : "cc");
40 return x;
43 static inline int32_t MULT31(int32_t x, int32_t y) {
44 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
45 "movclr.l %%acc0, %[x];" /* move and clear */
46 : [x] "+&r" (x)
47 : [y] "r" (y)
48 : "cc");
49 return x;
52 static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
53 int32_t r;
55 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
56 "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */
57 "movclr.l %%acc0, %[r];" /* get higher half */
58 "asl.l #8, %[r];" /* hi<<16, plus one free */
59 "asl.l #8, %[r];"
60 "lsr.l #8, %[x];" /* (unsigned)lo >> 15 */
61 "lsr.l #7, %[x];"
62 "or.l %[x], %[r];" /* logical-or results */
63 : [r] "=&d" (r), [x] "+d" (x)
64 : [y] "d" (y)
65 : "cc");
66 return r;
69 static inline
70 void XPROD31(int32_t a, int32_t b,
71 int32_t t, int32_t v,
72 int32_t *x, int32_t *y)
74 asm volatile ("mac.l %[a], %[t], %%acc0;"
75 "mac.l %[b], %[v], %%acc0;"
76 "mac.l %[b], %[t], %%acc1;"
77 "msac.l %[a], %[v], %%acc1;"
78 "movclr.l %%acc0, %[a];"
79 "move.l %[a], (%[x]);"
80 "movclr.l %%acc1, %[a];"
81 "move.l %[a], (%[y]);"
82 : [a] "+&r" (a)
83 : [x] "a" (x), [y] "a" (y),
84 [b] "r" (b), [t] "r" (t), [v] "r" (v)
85 : "cc", "memory");
88 static inline
89 void XNPROD31(int32_t a, int32_t b,
90 int32_t t, int32_t v,
91 int32_t *x, int32_t *y)
93 asm volatile ("mac.l %[a], %[t], %%acc0;"
94 "msac.l %[b], %[v], %%acc0;"
95 "mac.l %[b], %[t], %%acc1;"
96 "mac.l %[a], %[v], %%acc1;"
97 "movclr.l %%acc0, %[a];"
98 "move.l %[a], (%[x]);"
99 "movclr.l %%acc1, %[a];"
100 "move.l %[a], (%[y]);"
101 : [a] "+&r" (a)
102 : [x] "a" (x), [y] "a" (y),
103 [b] "r" (b), [t] "r" (t), [v] "r" (v)
104 : "cc", "memory");
107 #if 0 /* canonical Tremor definition */
108 #define XPROD32(_a, _b, _t, _v, _x, _y) \
109 { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
110 (_y)=MULT32(_b,_t)-MULT32(_a,_v); }
111 #endif
113 /* this could lose the LSB by overflow, but i don't think it'll ever happen.
114 if anyone think they can hear a bug caused by this, please try the above
115 version. */
116 #define XPROD32(_a, _b, _t, _v, _x, _y) \
117 asm volatile ("mac.l %[a], %[t], %%acc0;" \
118 "mac.l %[b], %[v], %%acc0;" \
119 "mac.l %[b], %[t], %%acc1;" \
120 "msac.l %[a], %[v], %%acc1;" \
121 "movclr.l %%acc0, %[x];" \
122 "asr.l #1, %[x];" \
123 "movclr.l %%acc1, %[y];" \
124 "asr.l #1, %[y];" \
125 : [x] "=&d" (_x), [y] "=&d" (_y) \
126 : [a] "r" (_a), [b] "r" (_b), \
127 [t] "r" (_t), [v] "r" (_v) \
128 : "cc");
130 #ifndef _V_VECT_OPS
131 #define _V_VECT_OPS
133 /* asm versions of vector operations for block.c, window.c */
134 /* assumes MAC is initialized & accumulators cleared */
135 static inline
136 void vect_add(int32_t *x, const int32_t *y, int n)
138 /* align to 16 bytes */
139 while(n>0 && (int)x&15) {
140 *x++ += *y++;
141 n--;
143 asm volatile ("bra 1f;"
144 "0:" /* loop start */
145 "movem.l (%[x]), %%d0-%%d3;" /* fetch values */
146 "movem.l (%[y]), %%a0-%%a3;"
147 /* add */
148 "add.l %%a0, %%d0;"
149 "add.l %%a1, %%d1;"
150 "add.l %%a2, %%d2;"
151 "add.l %%a3, %%d3;"
152 /* store and advance */
153 "movem.l %%d0-%%d3, (%[x]);"
154 "lea.l (4*4, %[x]), %[x];"
155 "lea.l (4*4, %[y]), %[y];"
156 "subq.l #4, %[n];" /* done 4 elements */
157 "1: cmpi.l #4, %[n];"
158 "bge 0b;"
159 : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
160 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
161 "cc", "memory");
162 /* add final elements */
163 while (n>0) {
164 *x++ += *y++;
165 n--;
169 static inline
170 void vect_copy(int32_t *x, int32_t *y, int n)
172 /* align to 16 bytes */
173 while(n>0 && (int)x&15) {
174 *x++ = *y++;
175 n--;
177 asm volatile ("bra 1f;"
178 "0:" /* loop start */
179 "movem.l (%[y]), %%d0-%%d3;" /* fetch values */
180 "movem.l %%d0-%%d3, (%[x]);" /* store */
181 "lea.l (4*4, %[x]), %[x];" /* advance */
182 "lea.l (4*4, %[y]), %[y];"
183 "subq.l #4, %[n];" /* done 4 elements */
184 "1: cmpi.l #4, %[n];"
185 "bge 0b;"
186 : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
187 : : "%d0", "%d1", "%d2", "%d3", "cc", "memory");
188 /* copy final elements */
189 while (n>0) {
190 *x++ = *y++;
191 n--;
195 static inline
196 void vect_mult_fw(int32_t *data, int32_t *window, int n)
198 /* ensure data is aligned to 16-bytes */
199 while(n>0 && (int)data&15) {
200 *data = MULT31(*data, *window);
201 data++;
202 window++;
203 n--;
205 asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */
206 "movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */
207 "lea.l (4*4, %[w]), %[w];"
208 "bra 1f;" /* jump to loop condition */
209 "0:" /* loop body */
210 /* multiply and load next window values */
211 "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
212 "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
213 "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
214 "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
215 "movclr.l %%acc0, %%d0;" /* get the products */
216 "movclr.l %%acc1, %%d1;"
217 "movclr.l %%acc2, %%d2;"
218 "movclr.l %%acc3, %%d3;"
219 /* store and advance */
220 "movem.l %%d0-%%d3, (%[d]);"
221 "lea.l (4*4, %[d]), %[d];"
222 "movem.l (%[d]), %%d0-%%d3;"
223 "subq.l #4, %[n];" /* done 4 elements */
224 "1: cmpi.l #4, %[n];"
225 "bge 0b;"
226 /* multiply final elements */
227 "tst.l %[n];"
228 "beq 1f;" /* n=0 */
229 "mac.l %%d0, %%a0, %%acc0;"
230 "movclr.l %%acc0, %%d0;"
231 "move.l %%d0, (%[d])+;"
232 "subq.l #1, %[n];"
233 "beq 1f;" /* n=1 */
234 "mac.l %%d1, %%a1, %%acc0;"
235 "movclr.l %%acc0, %%d1;"
236 "move.l %%d1, (%[d])+;"
237 "subq.l #1, %[n];"
238 "beq 1f;" /* n=2 */
239 /* otherwise n = 3 */
240 "mac.l %%d2, %%a2, %%acc0;"
241 "movclr.l %%acc0, %%d2;"
242 "move.l %%d2, (%[d])+;"
243 "1:"
244 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
245 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
246 "cc", "memory");
249 static inline
250 void vect_mult_bw(int32_t *data, int32_t *window, int n)
252 /* ensure at least data is aligned to 16-bytes */
253 while(n>0 && (int)data&15) {
254 *data = MULT31(*data, *window);
255 data++;
256 window--;
257 n--;
259 asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */
260 "movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */
261 "movem.l (%[w]), %%a0-%%a3;"
262 "bra 1f;" /* jump to loop condition */
263 "0:" /* loop body */
264 /* multiply and load next window value */
265 "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
266 "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
267 "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
268 "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
269 "movclr.l %%acc0, %%d0;" /* get the products */
270 "movclr.l %%acc1, %%d1;"
271 "movclr.l %%acc2, %%d2;"
272 "movclr.l %%acc3, %%d3;"
273 /* store and advance */
274 "movem.l %%d0-%%d3, (%[d]);"
275 "lea.l (4*4, %[d]), %[d];"
276 "movem.l (%[d]), %%d0-%%d3;"
277 "subq.l #4, %[n];" /* done 4 elements */
278 "1: cmpi.l #4, %[n];"
279 "bge 0b;"
280 /* multiply final elements */
281 "tst.l %[n];"
282 "beq 1f;" /* n=0 */
283 "mac.l %%d0, %%a3, %%acc0;"
284 "movclr.l %%acc0, %%d0;"
285 "move.l %%d0, (%[d])+;"
286 "subq.l #1, %[n];"
287 "beq 1f;" /* n=1 */
288 "mac.l %%d1, %%a2, %%acc0;"
289 "movclr.l %%acc0, %%d1;"
290 "move.l %%d1, (%[d])+;"
291 "subq.l #1, %[n];"
292 "beq 1f;" /* n=2 */
293 /* otherwise n = 3 */
294 "mac.l %%d2, %%a1, %%acc0;"
295 "movclr.l %%acc0, %%d2;"
296 "move.l %%d2, (%[d])+;"
297 "1:"
298 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
299 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
300 "cc", "memory");
303 #endif
305 #endif
307 #ifndef _V_CLIP_MATH
308 #define _V_CLIP_MATH
310 /* this is portable C and simple; why not use this as default? */
311 static inline int32_t CLIP_TO_15(register int32_t x) {
312 register int32_t hi=32767, lo=-32768;
313 return (x>=hi ? hi : (x<=lo ? lo : x));
316 #endif
317 #else
318 #define LINE_ATTR
319 #endif