apps/codecs/libwma/wmafixed.c

   1 /****************************************************************************
   2  *             __________               __   ___.
   3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7  *                     \/            \/     \/    \/            \/
   8  *
   9  * Copyright (C) 2007 Michael Giacomelli
  10  *
  11  * This program is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU General Public License
  13  * as published by the Free Software Foundation; either version 2
  14  * of the License, or (at your option) any later version.
  15  *
  16  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  17  * KIND, either express or implied.
  18  *
  19  ****************************************************************************/
  20
  21 #include "wmadec.h"
  22 #include "wmafixed.h"
  23 #include <codecs.h>
  24
  25 fixed64 IntTo64(int x){
  26     fixed64 res = 0;
  27     unsigned char *p = (unsigned char *)&res;
  28
  29 #ifdef ROCKBOX_BIG_ENDIAN
  30     p[5] = x & 0xff;
  31     p[4] = (x & 0xff00)>>8;
  32     p[3] = (x & 0xff0000)>>16;
  33     p[2] = (x & 0xff000000)>>24;
  34 #else
  35     p[2] = x & 0xff;
  36     p[3] = (x & 0xff00)>>8;
  37     p[4] = (x & 0xff0000)>>16;
  38     p[5] = (x & 0xff000000)>>24;
  39 #endif
  40     return res;
  41 }
  42
  43 int IntFrom64(fixed64 x)
  44 {
  45     int res = 0;
  46     unsigned char *p = (unsigned char *)&x;
  47
  48 #ifdef ROCKBOX_BIG_ENDIAN
  49     res = p[5] | (p[4]<<8) | (p[3]<<16) | (p[2]<<24);
  50 #else
  51     res = p[2] | (p[3]<<8) | (p[4]<<16) | (p[5]<<24);
  52 #endif
  53     return res;
  54 }
  55
  56 fixed32 Fixed32From64(fixed64 x)
  57 {
  58   return x & 0xFFFFFFFF;
  59 }
  60
  61 fixed64 Fixed32To64(fixed32 x)
  62 {
  63   return (fixed64)x;
  64 }
  65
  66
  67 /*
  68  * Helper functions for wma_window.
  69  *
  70  *
  71  */
  72
  73 #ifdef CPU_ARM
  74 inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data,
  75                          const fixed32 *window, int n)
  76 {
  77     /* Block sizes are always power of two */
  78     asm volatile (
  79         "0:"
  80         "ldmia %[d]!, {r0, r1};"
  81         "ldmia %[w]!, {r4, r5};"
  82         /* consume the first data and window value so we can use those
  83          * registers again */
  84         "smull r8, r9, r0, r4;"
  85         "ldmia %[dst], {r0, r4};"
  86         "add   r0, r0, r9, lsl #1;"  /* *dst=*dst+(r9<<1)*/
  87         "smull r8, r9, r1, r5;"
  88         "add   r1, r4, r9, lsl #1;"
  89         "stmia %[dst]!, {r0, r1};"
  90         "subs  %[n], %[n], #2;"
  91         "bne   0b;"
  92         : [d] "+r" (data), [w] "+r" (window), [dst] "+r" (dst), [n] "+r" (n)
  93         : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
  94 }
  95
  96 inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1,
  97                          int len)
  98 {
  99     /* Block sizes are always power of two */
 100     asm volatile (
 101         "add   %[s1], %[s1], %[n], lsl #2;"
 102         "0:"
 103         "ldmia %[s0]!, {r0, r1};"
 104         "ldmdb %[s1]!, {r4, r5};"
 105         "smull r8, r9, r0, r5;"
 106         "mov   r0, r9, lsl #1;"
 107         "smull r8, r9, r1, r4;"
 108         "mov   r1, r9, lsl #1;"
 109         "stmia %[dst]!, {r0, r1};"
 110         "subs  %[n], %[n], #2;"
 111         "bne   0b;"
 112         : [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len)
 113         : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
 114 }
 115
 116 #elif defined(CPU_COLDFIRE)
 117
 118 inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data,
 119                          const fixed32 *window, int n)
 120 {
 121     /* Block sizes are always power of two. Smallest block is always way bigger
 122      * than four too.*/
 123     asm volatile (
 124         "0:"
 125         "movem.l (%[d]), %%d0-%%d3;"
 126         "movem.l (%[w]), %%d4-%%d5/%%a0-%%a1;"
 127         "mac.l %%d0, %%d4, %%acc0;"
 128         "mac.l %%d1, %%d5, %%acc1;"
 129         "mac.l %%d2, %%a0, %%acc2;"
 130         "mac.l %%d3, %%a1, %%acc3;"
 131         "lea.l (16, %[d]), %[d];"
 132         "lea.l (16, %[w]), %[w];"
 133         "movclr.l %%acc0, %%d0;"
 134         "movclr.l %%acc1, %%d1;"
 135         "movclr.l %%acc2, %%d2;"
 136         "movclr.l %%acc3, %%d3;"
 137         "movem.l (%[dst]), %%d4-%%d5/%%a0-%%a1;"
 138         "add.l %%d4, %%d0;"
 139         "add.l %%d5, %%d1;"
 140         "add.l %%a0, %%d2;"
 141         "add.l %%a1, %%d3;"
 142         "movem.l %%d0-%%d3, (%[dst]);"
 143         "lea.l (16, %[dst]), %[dst];"
 144         "subq.l #4, %[n];"
 145         "jne 0b;"
 146         : [d] "+a" (data), [w] "+a" (window), [dst] "+a" (dst), [n] "+d" (n)
 147         : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
 148 }
 149
 150 inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1,
 151                          int len)
 152 {
 153     /* Block sizes are always power of two. Smallest block is always way bigger
 154      * than four too.*/
 155     asm volatile (
 156         "lea.l (-16, %[s1], %[n]*4), %[s1];"
 157         "0:"
 158         "movem.l (%[s0]), %%d0-%%d3;"
 159         "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;"
 160         "mac.l %%d0, %%a1, %%acc0;"
 161         "mac.l %%d1, %%a0, %%acc1;"
 162         "mac.l %%d2, %%d5, %%acc2;"
 163         "mac.l %%d3, %%d4, %%acc3;"
 164         "lea.l (16, %[s0]), %[s0];"
 165         "lea.l (-16, %[s1]), %[s1];"
 166         "movclr.l %%acc0, %%d0;"
 167         "movclr.l %%acc1, %%d1;"
 168         "movclr.l %%acc2, %%d2;"
 169         "movclr.l %%acc3, %%d3;"
 170         "movem.l %%d0-%%d3, (%[dst]);"
 171         "lea.l (16, %[dst]), %[dst];"
 172         "subq.l #4, %[n];"
 173         "jne 0b;"
 174         : [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len)
 175         : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
 176 }
 177
 178 #else
 179
 180 inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
 181     int i;
 182     for(i=0; i<len; i++)
 183         dst[i] = fixmul32b(src0[i], src1[i]) + dst[i];
 184 }
 185
 186 inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
 187     int i;
 188     src1 += len-1;
 189     for(i=0; i<len; i++)
 190         dst[i] = fixmul32b(src0[i], src1[-i]);
 191 }
 192
 193 #endif
 194
 195 /*
 196     Not performance senstitive code here
 197
 198 */
 199
 200 fixed32 fixdiv32(fixed32 x, fixed32 y)
 201 {
 202     fixed64 temp;
 203
 204     if(x == 0)
 205         return 0;
 206     if(y == 0)
 207         return 0x7fffffff;
 208     temp = x;
 209     temp <<= PRECISION;
 210     return (fixed32)(temp / y);
 211 }
 212
 213 fixed64 fixdiv64(fixed64 x, fixed64 y)
 214 {
 215     fixed64 temp;
 216
 217     if(x == 0)
 218         return 0;
 219     if(y == 0)
 220         return 0x07ffffffffffffffLL;
 221     temp = x;
 222     temp <<= PRECISION64;
 223     return (fixed64)(temp / y);
 224 }
 225
 226  fixed32 fixsqrt32(fixed32 x)
 227 {
 228
 229     unsigned long r = 0, s, v = (unsigned long)x;
 230
 231 #define STEP(k) s = r + (1 << k * 2); r >>= 1; \
 232     if (s <= v) { v -= s; r |= (1 << k * 2); }
 233
 234     STEP(15);
 235     STEP(14);
 236     STEP(13);
 237     STEP(12);
 238     STEP(11);
 239     STEP(10);
 240     STEP(9);
 241     STEP(8);
 242     STEP(7);
 243     STEP(6);
 244     STEP(5);
 245     STEP(4);
 246     STEP(3);
 247     STEP(2);
 248     STEP(1);
 249     STEP(0);
 250
 251     return (fixed32)(r << (PRECISION / 2));
 252 }
 253
 254
 255
 256 /* Inverse gain of circular cordic rotation in s0.31 format. */
 257 static const long cordic_circular_gain = 0xb2458939; /* 0.607252929 */
 258
 259 /* Table of values of atan(2^-i) in 0.32 format fractions of pi where pi = 0xffffffff / 2 */
 260 static const unsigned long atan_table[] = {
 261     0x1fffffff, /* +0.785398163 (or pi/4) */
 262     0x12e4051d, /* +0.463647609 */
 263     0x09fb385b, /* +0.244978663 */
 264     0x051111d4, /* +0.124354995 */
 265     0x028b0d43, /* +0.062418810 */
 266     0x0145d7e1, /* +0.031239833 */
 267     0x00a2f61e, /* +0.015623729 */
 268     0x00517c55, /* +0.007812341 */
 269     0x0028be53, /* +0.003906230 */
 270     0x00145f2e, /* +0.001953123 */
 271     0x000a2f98, /* +0.000976562 */
 272     0x000517cc, /* +0.000488281 */
 273     0x00028be6, /* +0.000244141 */
 274     0x000145f3, /* +0.000122070 */
 275     0x0000a2f9, /* +0.000061035 */
 276     0x0000517c, /* +0.000030518 */
 277     0x000028be, /* +0.000015259 */
 278     0x0000145f, /* +0.000007629 */
 279     0x00000a2f, /* +0.000003815 */
 280     0x00000517, /* +0.000001907 */
 281     0x0000028b, /* +0.000000954 */
 282     0x00000145, /* +0.000000477 */
 283     0x000000a2, /* +0.000000238 */
 284     0x00000051, /* +0.000000119 */
 285     0x00000028, /* +0.000000060 */
 286     0x00000014, /* +0.000000030 */
 287     0x0000000a, /* +0.000000015 */
 288     0x00000005, /* +0.000000007 */
 289     0x00000002, /* +0.000000004 */
 290     0x00000001, /* +0.000000002 */
 291     0x00000000, /* +0.000000001 */
 292     0x00000000, /* +0.000000000 */
 293 };
 294
 295
 296 /*
 297
 298     Below here functions do not use standard fixed precision!
 299 */
 300
 301
 302 /**
 303  * Implements sin and cos using CORDIC rotation.
 304  *
 305  * @param phase has range from 0 to 0xffffffff, representing 0 and
 306  *        2*pi respectively.
 307  * @param cos return address for cos
 308  * @return sin of phase, value is a signed value from LONG_MIN to LONG_MAX,
 309  *         representing -1 and 1 respectively.
 310  *
 311  *        Gives at least 24 bits precision (last 2-8 bits or so are probably off)
 312  */
 313 long fsincos(unsigned long phase, fixed32 *cos)
 314 {
 315     int32_t x, x1, y, y1;
 316     unsigned long z, z1;
 317     int i;
 318
 319     /* Setup initial vector */
 320     x = cordic_circular_gain;
 321     y = 0;
 322     z = phase;
 323
 324     /* The phase has to be somewhere between 0..pi for this to work right */
 325     if (z < 0xffffffff / 4) {
 326         /* z in first quadrant, z += pi/2 to correct */
 327         x = -x;
 328         z += 0xffffffff / 4;
 329     } else if (z < 3 * (0xffffffff / 4)) {
 330         /* z in third quadrant, z -= pi/2 to correct */
 331         z -= 0xffffffff / 4;
 332     } else {
 333         /* z in fourth quadrant, z -= 3pi/2 to correct */
 334         x = -x;
 335         z -= 3 * (0xffffffff / 4);
 336     }
 337
 338     /* Each iteration adds roughly 1-bit of extra precision */
 339     for (i = 0; i < 31; i++) {
 340         x1 = x >> i;
 341         y1 = y >> i;
 342         z1 = atan_table[i];
 343
 344         /* Decided which direction to rotate vector. Pivot point is pi/2 */
 345         if (z >= 0xffffffff / 4) {
 346             x -= y1;
 347             y += x1;
 348             z -= z1;
 349         } else {
 350             x += y1;
 351             y -= x1;
 352             z += z1;
 353         }
 354     }
 355
 356     if (cos)
 357         *cos = x;
 358
 359     return y;
 360 }
 361
 362