FS#8961 - Anti-Aliased Fonts.
[kugel-rb/myfork.git] / apps / codecs / libmad / fixed.h
blob7e580c4e8f9310f66916c6d0e85b9796f670413a
1 /*
2 * libmad - MPEG audio decoder library
3 * Copyright (C) 2000-2004 Underbit Technologies, Inc.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 * $Id$
22 # ifndef LIBMAD_FIXED_H
23 # define LIBMAD_FIXED_H
25 #include <inttypes.h>
27 typedef int32_t mad_fixed_t;
29 typedef int32_t mad_fixed64hi_t;
30 typedef uint32_t mad_fixed64lo_t;
32 # if defined(_MSC_VER)
33 # define mad_fixed64_t signed __int64
34 # elif 1 || defined(__GNUC__)
35 # define mad_fixed64_t signed long long
36 # endif
38 # if defined(FPM_FLOAT)
39 typedef double mad_sample_t;
40 # else
41 typedef mad_fixed_t mad_sample_t;
42 # endif
45 * Fixed-point format: 0xABBBBBBB
46 * A == whole part (sign + 3 bits)
47 * B == fractional part (28 bits)
49 * Values are signed two's complement, so the effective range is:
50 * 0x80000000 to 0x7fffffff
51 * -8.0 to +7.9999999962747097015380859375
53 * The smallest representable value is:
54 * 0x00000001 == 0.0000000037252902984619140625 (i.e. about 3.725e-9)
56 * 28 bits of fractional accuracy represent about
57 * 8.6 digits of decimal accuracy.
59 * Fixed-point numbers can be added or subtracted as normal
60 * integers, but multiplication requires shifting the 64-bit result
61 * from 56 fractional bits back to 28 (and rounding.)
63 * Changing the definition of MAD_F_FRACBITS is only partially
64 * supported, and must be done with care.
67 # define MAD_F_FRACBITS 28
69 # if MAD_F_FRACBITS == 28
70 # define MAD_F(x) ((mad_fixed_t) (x##L))
71 # else
72 # if MAD_F_FRACBITS < 28
73 # warning "MAD_F_FRACBITS < 28"
74 # define MAD_F(x) ((mad_fixed_t) \
75 (((x##L) + \
76 (1L << (28 - MAD_F_FRACBITS - 1))) >> \
77 (28 - MAD_F_FRACBITS)))
78 # elif MAD_F_FRACBITS > 28
79 # error "MAD_F_FRACBITS > 28 not currently supported"
80 # define MAD_F(x) ((mad_fixed_t) \
81 ((x##L) << (MAD_F_FRACBITS - 28)))
82 # endif
83 # endif
85 # define MAD_F_MIN ((mad_fixed_t) -0x80000000L)
86 # define MAD_F_MAX ((mad_fixed_t) +0x7fffffffL)
88 # define MAD_F_ONE MAD_F(0x10000000)
90 # define mad_f_tofixed(x) ((mad_fixed_t) \
91 ((x) * (double) (1L << MAD_F_FRACBITS) + 0.5))
92 # define mad_f_todouble(x) ((double) \
93 ((x) / (double) (1L << MAD_F_FRACBITS)))
95 # define mad_f_intpart(x) ((x) >> MAD_F_FRACBITS)
96 # define mad_f_fracpart(x) ((x) & ((1L << MAD_F_FRACBITS) - 1))
97 /* (x should be positive) */
99 # define mad_f_fromint(x) ((x) << MAD_F_FRACBITS)
101 # define mad_f_add(x, y) ((x) + (y))
102 # define mad_f_sub(x, y) ((x) - (y))
104 # if defined(FPM_FLOAT)
105 # error "FPM_FLOAT not yet supported"
107 # undef MAD_F
108 # define MAD_F(x) mad_f_todouble(x)
110 # define mad_f_mul(x, y) ((x) * (y))
111 # define mad_f_scale64
113 # undef ASO_ZEROCHECK
115 # elif defined(FPM_64BIT)
118 * This version should be the most accurate if 64-bit types are supported by
119 * the compiler, although it may not be the most efficient.
121 # if defined(OPT_ACCURACY)
122 # define mad_f_mul(x, y) \
123 ((mad_fixed_t) \
124 ((((mad_fixed64_t) (x) * (y)) + \
125 (1L << (MAD_F_SCALEBITS - 1))) >> MAD_F_SCALEBITS))
126 # else
127 # define mad_f_mul(x, y) \
128 ((mad_fixed_t) (((mad_fixed64_t) (x) * (y)) >> MAD_F_SCALEBITS))
129 # endif
131 # define MAD_F_SCALEBITS MAD_F_FRACBITS
133 /* --- Intel --------------------------------------------------------------- */
135 # elif defined(FPM_INTEL)
137 # if defined(_MSC_VER)
138 # pragma warning(push)
139 # pragma warning(disable: 4035) /* no return value */
140 static __forceinline
141 mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
143 enum {
144 fracbits = MAD_F_FRACBITS
147 __asm {
148 mov eax, x
149 imul y
150 shrd eax, edx, fracbits
153 /* implicit return of eax */
155 # pragma warning(pop)
157 # define mad_f_mul mad_f_mul_inline
158 # define mad_f_scale64
159 # else
161 * This Intel version is fast and accurate; the disposition of the least
162 * significant bit depends on OPT_ACCURACY via mad_f_scale64().
164 # define MAD_F_MLX(hi, lo, x, y) \
165 asm ("imull %3" \
166 : "=a" (lo), "=d" (hi) \
167 : "%a" (x), "rm" (y) \
168 : "cc")
170 # if defined(OPT_ACCURACY)
172 * This gives best accuracy but is not very fast.
174 # define MAD_F_MLA(hi, lo, x, y) \
175 ({ mad_fixed64hi_t __hi; \
176 mad_fixed64lo_t __lo; \
177 MAD_F_MLX(__hi, __lo, (x), (y)); \
178 asm ("addl %2,%0\n\t" \
179 "adcl %3,%1" \
180 : "=rm" (lo), "=rm" (hi) \
181 : "r" (__lo), "r" (__hi), "0" (lo), "1" (hi) \
182 : "cc"); \
184 # endif /* OPT_ACCURACY */
186 # if defined(OPT_ACCURACY)
188 * Surprisingly, this is faster than SHRD followed by ADC.
190 # define mad_f_scale64(hi, lo) \
191 ({ mad_fixed64hi_t __hi_; \
192 mad_fixed64lo_t __lo_; \
193 mad_fixed_t __result; \
194 asm ("addl %4,%2\n\t" \
195 "adcl %5,%3" \
196 : "=rm" (__lo_), "=rm" (__hi_) \
197 : "0" (lo), "1" (hi), \
198 "ir" (1L << (MAD_F_SCALEBITS - 1)), "ir" (0) \
199 : "cc"); \
200 asm ("shrdl %3,%2,%1" \
201 : "=rm" (__result) \
202 : "0" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS) \
203 : "cc"); \
204 __result; \
206 # elif defined(OPT_INTEL)
208 * Alternate Intel scaling that may or may not perform better.
210 # define mad_f_scale64(hi, lo) \
211 ({ mad_fixed_t __result; \
212 asm ("shrl %3,%1\n\t" \
213 "shll %4,%2\n\t" \
214 "orl %2,%1" \
215 : "=rm" (__result) \
216 : "0" (lo), "r" (hi), \
217 "I" (MAD_F_SCALEBITS), "I" (32 - MAD_F_SCALEBITS) \
218 : "cc"); \
219 __result; \
221 # else
222 # define mad_f_scale64(hi, lo) \
223 ({ mad_fixed_t __result; \
224 asm ("shrdl %3,%2,%1" \
225 : "=rm" (__result) \
226 : "0" (lo), "r" (hi), "I" (MAD_F_SCALEBITS) \
227 : "cc"); \
228 __result; \
230 # endif /* OPT_ACCURACY */
232 # define MAD_F_SCALEBITS MAD_F_FRACBITS
233 # endif
235 /* --- ARM ----------------------------------------------------------------- */
237 # elif defined(FPM_ARM)
240 * This ARM V4 version is as accurate as FPM_64BIT but much faster. The
241 * least significant bit is properly rounded at no CPU cycle cost!
243 # if 1
245 * This is faster than the default implementation via MAD_F_MLX() and
246 * mad_f_scale64().
248 # define mad_f_mul(x, y) \
249 ({ mad_fixed64hi_t __hi; \
250 mad_fixed64lo_t __lo; \
251 mad_fixed_t __result; \
252 asm ("smull %0, %1, %3, %4\n\t" \
253 "movs %0, %0, lsr %5\n\t" \
254 "adc %2, %0, %1, lsl %6" \
255 : "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
256 : "%r" (x), "r" (y), \
257 "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS) \
258 : "cc"); \
259 __result; \
261 # endif
263 # define MAD_F_MLX(hi, lo, x, y) \
264 asm ("smull %0, %1, %2, %3" \
265 : "=&r" (lo), "=&r" (hi) \
266 : "%r" (x), "r" (y))
268 # define MAD_F_MLA(hi, lo, x, y) \
269 asm ("smlal %0, %1, %2, %3" \
270 : "+r" (lo), "+r" (hi) \
271 : "%r" (x), "r" (y))
273 # define MAD_F_MLN(hi, lo) \
274 asm ("rsbs %0, %2, #0\n\t" \
275 "rsc %1, %3, #0" \
276 : "=r" (lo), "=r" (hi) \
277 : "0" (lo), "1" (hi) \
278 : "cc")
280 # define mad_f_scale64(hi, lo) \
281 ({ mad_fixed_t __result; \
282 asm ("movs %0, %1, lsr %3\n\t" \
283 "adc %0, %0, %2, lsl %4" \
284 : "=&r" (__result) \
285 : "r" (lo), "r" (hi), \
286 "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS) \
287 : "cc"); \
288 __result; \
291 # define MAD_F_SCALEBITS MAD_F_FRACBITS
293 /* --- MIPS ---------------------------------------------------------------- */
295 # elif defined(FPM_MIPS)
298 * This MIPS version is fast and accurate; the disposition of the least
299 * significant bit depends on OPT_ACCURACY via mad_f_scale64().
301 # define MAD_F_MLX(hi, lo, x, y) \
302 asm ("mult %2,%3" \
303 : "=l" (lo), "=h" (hi) \
304 : "%r" (x), "r" (y))
306 # if defined(HAVE_MADD_ASM)
307 # define MAD_F_MLA(hi, lo, x, y) \
308 asm ("madd %2,%3" \
309 : "+l" (lo), "+h" (hi) \
310 : "%r" (x), "r" (y))
311 # elif defined(HAVE_MADD16_ASM)
313 * This loses significant accuracy due to the 16-bit integer limit in the
314 * multiply/accumulate instruction.
316 # define MAD_F_ML0(hi, lo, x, y) \
317 asm ("mult %2,%3" \
318 : "=l" (lo), "=h" (hi) \
319 : "%r" ((x) >> 12), "r" ((y) >> 16))
320 # define MAD_F_MLA(hi, lo, x, y) \
321 asm ("madd16 %2,%3" \
322 : "+l" (lo), "+h" (hi) \
323 : "%r" ((x) >> 12), "r" ((y) >> 16))
324 # define MAD_F_MLZ(hi, lo) ((mad_fixed_t) (lo))
325 # endif
327 # if defined(OPT_SPEED)
328 # define mad_f_scale64(hi, lo) \
329 ((mad_fixed_t) ((hi) << (32 - MAD_F_SCALEBITS)))
330 # define MAD_F_SCALEBITS MAD_F_FRACBITS
331 # endif
333 /* --- SPARC --------------------------------------------------------------- */
335 # elif defined(FPM_SPARC)
338 * This SPARC V8 version is fast and accurate; the disposition of the least
339 * significant bit depends on OPT_ACCURACY via mad_f_scale64().
341 # define MAD_F_MLX(hi, lo, x, y) \
342 asm ("smul %2, %3, %0\n\t" \
343 "rd %%y, %1" \
344 : "=r" (lo), "=r" (hi) \
345 : "%r" (x), "rI" (y))
347 /* --- PowerPC ------------------------------------------------------------- */
349 # elif defined(FPM_PPC)
352 * This PowerPC version is fast and accurate; the disposition of the least
353 * significant bit depends on OPT_ACCURACY via mad_f_scale64().
355 # define MAD_F_MLX(hi, lo, x, y) \
356 do { \
357 asm ("mullw %0,%1,%2" \
358 : "=r" (lo) \
359 : "%r" (x), "r" (y)); \
360 asm ("mulhw %0,%1,%2" \
361 : "=r" (hi) \
362 : "%r" (x), "r" (y)); \
364 while (0)
366 # if defined(OPT_ACCURACY)
368 * This gives best accuracy but is not very fast.
370 # define MAD_F_MLA(hi, lo, x, y) \
371 ({ mad_fixed64hi_t __hi; \
372 mad_fixed64lo_t __lo; \
373 MAD_F_MLX(__hi, __lo, (x), (y)); \
374 asm ("addc %0,%2,%3\n\t" \
375 "adde %1,%4,%5" \
376 : "=r" (lo), "=r" (hi) \
377 : "%r" (lo), "r" (__lo), \
378 "%r" (hi), "r" (__hi) \
379 : "xer"); \
381 # endif
383 # if defined(OPT_ACCURACY)
385 * This is slower than the truncating version below it.
387 # define mad_f_scale64(hi, lo) \
388 ({ mad_fixed_t __result, __round; \
389 asm ("rotrwi %0,%1,%2" \
390 : "=r" (__result) \
391 : "r" (lo), "i" (MAD_F_SCALEBITS)); \
392 asm ("extrwi %0,%1,1,0" \
393 : "=r" (__round) \
394 : "r" (__result)); \
395 asm ("insrwi %0,%1,%2,0" \
396 : "+r" (__result) \
397 : "r" (hi), "i" (MAD_F_SCALEBITS)); \
398 asm ("add %0,%1,%2" \
399 : "=r" (__result) \
400 : "%r" (__result), "r" (__round)); \
401 __result; \
403 # else
404 # define mad_f_scale64(hi, lo) \
405 ({ mad_fixed_t __result; \
406 asm ("rotrwi %0,%1,%2" \
407 : "=r" (__result) \
408 : "r" (lo), "i" (MAD_F_SCALEBITS)); \
409 asm ("insrwi %0,%1,%2,0" \
410 : "+r" (__result) \
411 : "r" (hi), "i" (MAD_F_SCALEBITS)); \
412 __result; \
414 # endif
416 # define MAD_F_SCALEBITS MAD_F_FRACBITS
418 # elif defined(FPM_COLDFIRE_EMAC)
420 /* mad_f_mul using the Coldfire MCF5249 EMAC unit. Loses 3 bits of accuracy.
421 Note that we don't define any of the libmad accumulator macros, as
422 any functions that use these should have the relevant sections rewritten
423 in assembler to utilise the EMAC accumulators properly.
424 Assumes the default +/- 3.28 fixed point format
426 #define mad_f_mul(x, y) \
427 ({ \
428 mad_fixed64hi_t hi; \
429 asm volatile("mac.l %[a], %[b], %%acc0\n\t" \
430 "movclr.l %%acc0, %[hi]\n\t" \
431 "asl.l #3, %[hi]" \
432 : [hi] "=d" (hi) \
433 : [a] "r" ((x)), [b] "r" ((y))); \
434 hi; \
436 /* Define dummy mad_f_scale64 to prevent libmad from defining MAD_F_SCALEBITS
437 below. Having MAD_F_SCALEBITS defined screws up the PRESHIFT macro in synth.c
439 #define mad_f_scale64(hi, lo) (lo)
441 /* --- Default ------------------------------------------------------------- */
443 # elif defined(FPM_DEFAULT)
446 * This version is the most portable but it loses significant accuracy.
447 * Furthermore, accuracy is biased against the second argument, so care
448 * should be taken when ordering operands.
450 * The scale factors are constant as this is not used with SSO.
452 * Pre-rounding is required to stay within the limits of compliance.
454 # if defined(OPT_SPEED)
455 # define mad_f_mul(x, y) (((x) >> 12) * ((y) >> 16))
456 # else
457 # define mad_f_mul(x, y) ((((x) + (1L << 11)) >> 12) * \
458 (((y) + (1L << 15)) >> 16))
459 # endif
461 /* ------------------------------------------------------------------------- */
463 # else
464 # error "no FPM selected"
465 # endif
467 /* default implementations */
469 # if !defined(mad_f_mul)
470 # define mad_f_mul(x, y) \
471 ({ register mad_fixed64hi_t __hi; \
472 register mad_fixed64lo_t __lo; \
473 MAD_F_MLX(__hi, __lo, (x), (y)); \
474 mad_f_scale64(__hi, __lo); \
476 # endif
478 # if !defined(MAD_F_MLA)
479 # define MAD_F_ML0(hi, lo, x, y) ((lo) = mad_f_mul((x), (y)))
480 # define MAD_F_MLA(hi, lo, x, y) ((lo) += mad_f_mul((x), (y)))
481 # define MAD_F_MLN(hi, lo) ((lo) = -(lo))
482 # define MAD_F_MLZ(hi, lo) ((void) (hi), (mad_fixed_t) (lo))
483 # endif
485 # if !defined(MAD_F_ML0)
486 # define MAD_F_ML0(hi, lo, x, y) MAD_F_MLX((hi), (lo), (x), (y))
487 # endif
489 # if !defined(MAD_F_MLN)
490 # define MAD_F_MLN(hi, lo) ((hi) = ((lo) = -(lo)) ? ~(hi) : -(hi))
491 # endif
493 # if !defined(MAD_F_MLZ)
494 # define MAD_F_MLZ(hi, lo) mad_f_scale64((hi), (lo))
495 # endif
497 # if !defined(mad_f_scale64)
498 # if defined(OPT_ACCURACY)
499 # define mad_f_scale64(hi, lo) \
500 ((((mad_fixed_t) \
501 (((hi) << (32 - (MAD_F_SCALEBITS - 1))) | \
502 ((lo) >> (MAD_F_SCALEBITS - 1)))) + 1) >> 1)
503 # else
504 # define mad_f_scale64(hi, lo) \
505 ((mad_fixed_t) \
506 (((hi) << (32 - MAD_F_SCALEBITS)) | \
507 ((lo) >> MAD_F_SCALEBITS)))
508 # endif
509 # define MAD_F_SCALEBITS MAD_F_FRACBITS
510 # endif
512 /* C routines */
514 mad_fixed_t mad_f_abs(mad_fixed_t);
515 mad_fixed_t mad_f_div(mad_fixed_t, mad_fixed_t);
517 # endif