FS#8961 - Anti-Aliased Fonts.
[kugel-rb/myfork.git] / apps / codecs / libwma / wmafixed.c
blob5569309145058d6cd1edd27158bd7d9049d64c49
1 /****************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
9 * Copyright (C) 2007 Michael Giacomelli
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version 2
14 * of the License, or (at your option) any later version.
16 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
17 * KIND, either express or implied.
19 ****************************************************************************/
21 #include "wmadec.h"
22 #include "wmafixed.h"
23 #include <codecs.h>
25 fixed64 IntTo64(int x){
26 fixed64 res = 0;
27 unsigned char *p = (unsigned char *)&res;
29 #ifdef ROCKBOX_BIG_ENDIAN
30 p[5] = x & 0xff;
31 p[4] = (x & 0xff00)>>8;
32 p[3] = (x & 0xff0000)>>16;
33 p[2] = (x & 0xff000000)>>24;
34 #else
35 p[2] = x & 0xff;
36 p[3] = (x & 0xff00)>>8;
37 p[4] = (x & 0xff0000)>>16;
38 p[5] = (x & 0xff000000)>>24;
39 #endif
40 return res;
43 int IntFrom64(fixed64 x)
45 int res = 0;
46 unsigned char *p = (unsigned char *)&x;
48 #ifdef ROCKBOX_BIG_ENDIAN
49 res = p[5] | (p[4]<<8) | (p[3]<<16) | (p[2]<<24);
50 #else
51 res = p[2] | (p[3]<<8) | (p[4]<<16) | (p[5]<<24);
52 #endif
53 return res;
56 fixed32 Fixed32From64(fixed64 x)
58 return x & 0xFFFFFFFF;
61 fixed64 Fixed32To64(fixed32 x)
63 return (fixed64)x;
68 * Helper functions for wma_window.
73 #ifdef CPU_ARM
74 inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data,
75 const fixed32 *window, int n)
77 /* Block sizes are always power of two */
78 asm volatile (
79 "0:"
80 "ldmia %[d]!, {r0, r1};"
81 "ldmia %[w]!, {r4, r5};"
82 /* consume the first data and window value so we can use those
83 * registers again */
84 "smull r8, r9, r0, r4;"
85 "ldmia %[dst], {r0, r4};"
86 "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/
87 "smull r8, r9, r1, r5;"
88 "add r1, r4, r9, lsl #1;"
89 "stmia %[dst]!, {r0, r1};"
90 "subs %[n], %[n], #2;"
91 "bne 0b;"
92 : [d] "+r" (data), [w] "+r" (window), [dst] "+r" (dst), [n] "+r" (n)
93 : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
96 inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1,
97 int len)
99 /* Block sizes are always power of two */
100 asm volatile (
101 "add %[s1], %[s1], %[n], lsl #2;"
102 "0:"
103 "ldmia %[s0]!, {r0, r1};"
104 "ldmdb %[s1]!, {r4, r5};"
105 "smull r8, r9, r0, r5;"
106 "mov r0, r9, lsl #1;"
107 "smull r8, r9, r1, r4;"
108 "mov r1, r9, lsl #1;"
109 "stmia %[dst]!, {r0, r1};"
110 "subs %[n], %[n], #2;"
111 "bne 0b;"
112 : [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len)
113 : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
116 #elif defined(CPU_COLDFIRE)
118 inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data,
119 const fixed32 *window, int n)
121 /* Block sizes are always power of two. Smallest block is always way bigger
122 * than four too.*/
123 asm volatile (
124 "0:"
125 "movem.l (%[d]), %%d0-%%d3;"
126 "movem.l (%[w]), %%d4-%%d5/%%a0-%%a1;"
127 "mac.l %%d0, %%d4, %%acc0;"
128 "mac.l %%d1, %%d5, %%acc1;"
129 "mac.l %%d2, %%a0, %%acc2;"
130 "mac.l %%d3, %%a1, %%acc3;"
131 "lea.l (16, %[d]), %[d];"
132 "lea.l (16, %[w]), %[w];"
133 "movclr.l %%acc0, %%d0;"
134 "movclr.l %%acc1, %%d1;"
135 "movclr.l %%acc2, %%d2;"
136 "movclr.l %%acc3, %%d3;"
137 "movem.l (%[dst]), %%d4-%%d5/%%a0-%%a1;"
138 "add.l %%d4, %%d0;"
139 "add.l %%d5, %%d1;"
140 "add.l %%a0, %%d2;"
141 "add.l %%a1, %%d3;"
142 "movem.l %%d0-%%d3, (%[dst]);"
143 "lea.l (16, %[dst]), %[dst];"
144 "subq.l #4, %[n];"
145 "jne 0b;"
146 : [d] "+a" (data), [w] "+a" (window), [dst] "+a" (dst), [n] "+d" (n)
147 : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
150 inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1,
151 int len)
153 /* Block sizes are always power of two. Smallest block is always way bigger
154 * than four too.*/
155 asm volatile (
156 "lea.l (-16, %[s1], %[n]*4), %[s1];"
157 "0:"
158 "movem.l (%[s0]), %%d0-%%d3;"
159 "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;"
160 "mac.l %%d0, %%a1, %%acc0;"
161 "mac.l %%d1, %%a0, %%acc1;"
162 "mac.l %%d2, %%d5, %%acc2;"
163 "mac.l %%d3, %%d4, %%acc3;"
164 "lea.l (16, %[s0]), %[s0];"
165 "lea.l (-16, %[s1]), %[s1];"
166 "movclr.l %%acc0, %%d0;"
167 "movclr.l %%acc1, %%d1;"
168 "movclr.l %%acc2, %%d2;"
169 "movclr.l %%acc3, %%d3;"
170 "movem.l %%d0-%%d3, (%[dst]);"
171 "lea.l (16, %[dst]), %[dst];"
172 "subq.l #4, %[n];"
173 "jne 0b;"
174 : [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len)
175 : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
178 #else
180 inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
181 int i;
182 for(i=0; i<len; i++)
183 dst[i] = fixmul32b(src0[i], src1[i]) + dst[i];
186 inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
187 int i;
188 src1 += len-1;
189 for(i=0; i<len; i++)
190 dst[i] = fixmul32b(src0[i], src1[-i]);
193 #endif
196 Not performance senstitive code here
200 fixed32 fixdiv32(fixed32 x, fixed32 y)
202 fixed64 temp;
204 if(x == 0)
205 return 0;
206 if(y == 0)
207 return 0x7fffffff;
208 temp = x;
209 temp <<= PRECISION;
210 return (fixed32)(temp / y);
213 fixed64 fixdiv64(fixed64 x, fixed64 y)
215 fixed64 temp;
217 if(x == 0)
218 return 0;
219 if(y == 0)
220 return 0x07ffffffffffffffLL;
221 temp = x;
222 temp <<= PRECISION64;
223 return (fixed64)(temp / y);
226 fixed32 fixsqrt32(fixed32 x)
229 unsigned long r = 0, s, v = (unsigned long)x;
231 #define STEP(k) s = r + (1 << k * 2); r >>= 1; \
232 if (s <= v) { v -= s; r |= (1 << k * 2); }
234 STEP(15);
235 STEP(14);
236 STEP(13);
237 STEP(12);
238 STEP(11);
239 STEP(10);
240 STEP(9);
241 STEP(8);
242 STEP(7);
243 STEP(6);
244 STEP(5);
245 STEP(4);
246 STEP(3);
247 STEP(2);
248 STEP(1);
249 STEP(0);
251 return (fixed32)(r << (PRECISION / 2));
256 /* Inverse gain of circular cordic rotation in s0.31 format. */
257 static const long cordic_circular_gain = 0xb2458939; /* 0.607252929 */
259 /* Table of values of atan(2^-i) in 0.32 format fractions of pi where pi = 0xffffffff / 2 */
260 static const unsigned long atan_table[] = {
261 0x1fffffff, /* +0.785398163 (or pi/4) */
262 0x12e4051d, /* +0.463647609 */
263 0x09fb385b, /* +0.244978663 */
264 0x051111d4, /* +0.124354995 */
265 0x028b0d43, /* +0.062418810 */
266 0x0145d7e1, /* +0.031239833 */
267 0x00a2f61e, /* +0.015623729 */
268 0x00517c55, /* +0.007812341 */
269 0x0028be53, /* +0.003906230 */
270 0x00145f2e, /* +0.001953123 */
271 0x000a2f98, /* +0.000976562 */
272 0x000517cc, /* +0.000488281 */
273 0x00028be6, /* +0.000244141 */
274 0x000145f3, /* +0.000122070 */
275 0x0000a2f9, /* +0.000061035 */
276 0x0000517c, /* +0.000030518 */
277 0x000028be, /* +0.000015259 */
278 0x0000145f, /* +0.000007629 */
279 0x00000a2f, /* +0.000003815 */
280 0x00000517, /* +0.000001907 */
281 0x0000028b, /* +0.000000954 */
282 0x00000145, /* +0.000000477 */
283 0x000000a2, /* +0.000000238 */
284 0x00000051, /* +0.000000119 */
285 0x00000028, /* +0.000000060 */
286 0x00000014, /* +0.000000030 */
287 0x0000000a, /* +0.000000015 */
288 0x00000005, /* +0.000000007 */
289 0x00000002, /* +0.000000004 */
290 0x00000001, /* +0.000000002 */
291 0x00000000, /* +0.000000001 */
292 0x00000000, /* +0.000000000 */
298 Below here functions do not use standard fixed precision!
303 * Implements sin and cos using CORDIC rotation.
305 * @param phase has range from 0 to 0xffffffff, representing 0 and
306 * 2*pi respectively.
307 * @param cos return address for cos
308 * @return sin of phase, value is a signed value from LONG_MIN to LONG_MAX,
309 * representing -1 and 1 respectively.
311 * Gives at least 24 bits precision (last 2-8 bits or so are probably off)
313 long fsincos(unsigned long phase, fixed32 *cos)
315 int32_t x, x1, y, y1;
316 unsigned long z, z1;
317 int i;
319 /* Setup initial vector */
320 x = cordic_circular_gain;
321 y = 0;
322 z = phase;
324 /* The phase has to be somewhere between 0..pi for this to work right */
325 if (z < 0xffffffff / 4) {
326 /* z in first quadrant, z += pi/2 to correct */
327 x = -x;
328 z += 0xffffffff / 4;
329 } else if (z < 3 * (0xffffffff / 4)) {
330 /* z in third quadrant, z -= pi/2 to correct */
331 z -= 0xffffffff / 4;
332 } else {
333 /* z in fourth quadrant, z -= 3pi/2 to correct */
334 x = -x;
335 z -= 3 * (0xffffffff / 4);
338 /* Each iteration adds roughly 1-bit of extra precision */
339 for (i = 0; i < 31; i++) {
340 x1 = x >> i;
341 y1 = y >> i;
342 z1 = atan_table[i];
344 /* Decided which direction to rotate vector. Pivot point is pi/2 */
345 if (z >= 0xffffffff / 4) {
346 x -= y1;
347 y += x1;
348 z -= z1;
349 } else {
350 x += y1;
351 y -= x1;
352 z += z1;
356 if (cos)
357 *cos = x;
359 return y;