libmpeg3/audio/dct.c

   1 /*
   2  *
   3  *  This file is part of libmpeg3
   4  *
   5  *  libmpeg3 is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU General Public License as published by
   7  *  the Free Software Foundation; either version 2, or (at your option)
   8  *  any later version.
   9  *
  10  *  libmpeg3 is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  *  GNU General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU General Public License
  16  *  along with GNU Make; see the file COPYING.  If not, write to
  17  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  18  *
  19  */
  20
  21 /*
  22  * Discrete Cosine Tansform (DCT) for subband synthesis
  23  * optimized for machines with no auto-increment.
  24  * The performance is highly compiler dependend. Maybe
  25  * the dct64.c version for 'normal' processor may be faster
  26  * even for Intel processors.
  27  */
  28
  29 #include "mpeg3private.h"
  30 #include "mpeg3protos.h"
  31 #include "tables.h"
  32
  33 #include <math.h>
  34
  35 int mpeg3audio_dct64_1(float *out0, float *out1, float *b1, float *b2, float *samples)
  36 {
  37         register float *costab = mpeg3_pnts[0];
  38
  39         b1[0x00] = samples[0x00] + samples[0x1F];
  40         b1[0x01] = samples[0x01] + samples[0x1E];
  41         b1[0x1F] = (samples[0x00] - samples[0x1F]) * costab[0x0];
  42         b1[0x1E] = (samples[0x01] - samples[0x1E]) * costab[0x1];
  43
  44         b1[0x02] = samples[0x02] + samples[0x1D];
  45         b1[0x03] = samples[0x03] + samples[0x1C];
  46         b1[0x1D] = (samples[0x02] - samples[0x1D]) * costab[0x2];
  47         b1[0x1C] = (samples[0x03] - samples[0x1C]) * costab[0x3];
  48
  49         b1[0x04] = samples[0x04] + samples[0x1B];
  50         b1[0x05] = samples[0x05] + samples[0x1A];
  51         b1[0x1B] = (samples[0x04] - samples[0x1B]) * costab[0x4];
  52         b1[0x1A] = (samples[0x05] - samples[0x1A]) * costab[0x5];
  53
  54         b1[0x06] = samples[0x06] + samples[0x19];
  55         b1[0x07] = samples[0x07] + samples[0x18];
  56         b1[0x19] = (samples[0x06] - samples[0x19]) * costab[0x6];
  57         b1[0x18] = (samples[0x07] - samples[0x18]) * costab[0x7];
  58
  59         b1[0x08] = samples[0x08] + samples[0x17];
  60         b1[0x09] = samples[0x09] + samples[0x16];
  61         b1[0x17] = (samples[0x08] - samples[0x17]) * costab[0x8];
  62         b1[0x16] = (samples[0x09] - samples[0x16]) * costab[0x9];
  63
  64         b1[0x0A] = samples[0x0A] + samples[0x15];
  65         b1[0x0B] = samples[0x0B] + samples[0x14];
  66         b1[0x15] = (samples[0x0A] - samples[0x15]) * costab[0xA];
  67         b1[0x14] = (samples[0x0B] - samples[0x14]) * costab[0xB];
  68
  69         b1[0x0C] = samples[0x0C] + samples[0x13];
  70         b1[0x0D] = samples[0x0D] + samples[0x12];
  71         b1[0x13] = (samples[0x0C] - samples[0x13]) * costab[0xC];
  72         b1[0x12] = (samples[0x0D] - samples[0x12]) * costab[0xD];
  73
  74         b1[0x0E] = samples[0x0E] + samples[0x11];
  75         b1[0x0F] = samples[0x0F] + samples[0x10];
  76         b1[0x11] = (samples[0x0E] - samples[0x11]) * costab[0xE];
  77         b1[0x10] = (samples[0x0F] - samples[0x10]) * costab[0xF];
  78
  79         costab = mpeg3_pnts[1];
  80
  81         b2[0x00] = b1[0x00] + b1[0x0F];
  82         b2[0x01] = b1[0x01] + b1[0x0E];
  83         b2[0x0F] = (b1[0x00] - b1[0x0F]) * costab[0];
  84         b2[0x0E] = (b1[0x01] - b1[0x0E]) * costab[1];
  85
  86         b2[0x02] = b1[0x02] + b1[0x0D];
  87         b2[0x03] = b1[0x03] + b1[0x0C];
  88         b2[0x0D] = (b1[0x02] - b1[0x0D]) * costab[2];
  89         b2[0x0C] = (b1[0x03] - b1[0x0C]) * costab[3];
  90
  91         b2[0x04] = b1[0x04] + b1[0x0B];
  92         b2[0x05] = b1[0x05] + b1[0x0A];
  93         b2[0x0B] = (b1[0x04] - b1[0x0B]) * costab[4];
  94         b2[0x0A] = (b1[0x05] - b1[0x0A]) * costab[5];
  95
  96         b2[0x06] = b1[0x06] + b1[0x09];
  97         b2[0x07] = b1[0x07] + b1[0x08];
  98         b2[0x09] = (b1[0x06] - b1[0x09]) * costab[6];
  99         b2[0x08] = (b1[0x07] - b1[0x08]) * costab[7];
 100
 101         /* */
 102
 103         b2[0x10] = b1[0x10] + b1[0x1F];
 104         b2[0x11] = b1[0x11] + b1[0x1E];
 105         b2[0x1F] = (b1[0x1F] - b1[0x10]) * costab[0];
 106         b2[0x1E] = (b1[0x1E] - b1[0x11]) * costab[1];
 107
 108         b2[0x12] = b1[0x12] + b1[0x1D];
 109         b2[0x13] = b1[0x13] + b1[0x1C];
 110         b2[0x1D] = (b1[0x1D] - b1[0x12]) * costab[2];
 111         b2[0x1C] = (b1[0x1C] - b1[0x13]) * costab[3];
 112
 113         b2[0x14] = b1[0x14] + b1[0x1B];
 114         b2[0x15] = b1[0x15] + b1[0x1A];
 115         b2[0x1B] = (b1[0x1B] - b1[0x14]) * costab[4];
 116         b2[0x1A] = (b1[0x1A] - b1[0x15]) * costab[5];
 117
 118         b2[0x16] = b1[0x16] + b1[0x19];
 119         b2[0x17] = b1[0x17] + b1[0x18];
 120         b2[0x19] = (b1[0x19] - b1[0x16]) * costab[6];
 121         b2[0x18] = (b1[0x18] - b1[0x17]) * costab[7];
 122
 123         costab = mpeg3_pnts[2];
 124
 125         b1[0x00] = b2[0x00] + b2[0x07];
 126         b1[0x07] = (b2[0x00] - b2[0x07]) * costab[0];
 127         b1[0x01] = b2[0x01] + b2[0x06];
 128         b1[0x06] = (b2[0x01] - b2[0x06]) * costab[1];
 129         b1[0x02] = b2[0x02] + b2[0x05];
 130         b1[0x05] = (b2[0x02] - b2[0x05]) * costab[2];
 131         b1[0x03] = b2[0x03] + b2[0x04];
 132         b1[0x04] = (b2[0x03] - b2[0x04]) * costab[3];
 133
 134         b1[0x08] = b2[0x08] + b2[0x0F];
 135         b1[0x0F] = (b2[0x0F] - b2[0x08]) * costab[0];
 136         b1[0x09] = b2[0x09] + b2[0x0E];
 137         b1[0x0E] = (b2[0x0E] - b2[0x09]) * costab[1];
 138         b1[0x0A] = b2[0x0A] + b2[0x0D];
 139         b1[0x0D] = (b2[0x0D] - b2[0x0A]) * costab[2];
 140         b1[0x0B] = b2[0x0B] + b2[0x0C];
 141         b1[0x0C] = (b2[0x0C] - b2[0x0B]) * costab[3];
 142
 143         b1[0x10] = b2[0x10] + b2[0x17];
 144         b1[0x17] = (b2[0x10] - b2[0x17]) * costab[0];
 145         b1[0x11] = b2[0x11] + b2[0x16];
 146         b1[0x16] = (b2[0x11] - b2[0x16]) * costab[1];
 147         b1[0x12] = b2[0x12] + b2[0x15];
 148         b1[0x15] = (b2[0x12] - b2[0x15]) * costab[2];
 149         b1[0x13] = b2[0x13] + b2[0x14];
 150         b1[0x14] = (b2[0x13] - b2[0x14]) * costab[3];
 151
 152         b1[0x18] = b2[0x18] + b2[0x1F];
 153         b1[0x1F] = (b2[0x1F] - b2[0x18]) * costab[0];
 154         b1[0x19] = b2[0x19] + b2[0x1E];
 155         b1[0x1E] = (b2[0x1E] - b2[0x19]) * costab[1];
 156         b1[0x1A] = b2[0x1A] + b2[0x1D];
 157         b1[0x1D] = (b2[0x1D] - b2[0x1A]) * costab[2];
 158         b1[0x1B] = b2[0x1B] + b2[0x1C];
 159         b1[0x1C] = (b2[0x1C] - b2[0x1B]) * costab[3];
 160
 161         {
 162                 register float const cos0 = mpeg3_pnts[3][0];
 163                 register float const cos1 = mpeg3_pnts[3][1];
 164
 165                 b2[0x00] = b1[0x00] + b1[0x03];
 166                 b2[0x03] = (b1[0x00] - b1[0x03]) * cos0;
 167                 b2[0x01] = b1[0x01] + b1[0x02];
 168                 b2[0x02] = (b1[0x01] - b1[0x02]) * cos1;
 169
 170                 b2[0x04] = b1[0x04] + b1[0x07];
 171                 b2[0x07] = (b1[0x07] - b1[0x04]) * cos0;
 172                 b2[0x05] = b1[0x05] + b1[0x06];
 173                 b2[0x06] = (b1[0x06] - b1[0x05]) * cos1;
 174
 175                 b2[0x08] = b1[0x08] + b1[0x0B];
 176                 b2[0x0B] = (b1[0x08] - b1[0x0B]) * cos0;
 177                 b2[0x09] = b1[0x09] + b1[0x0A];
 178                 b2[0x0A] = (b1[0x09] - b1[0x0A]) * cos1;
 179
 180                 b2[0x0C] = b1[0x0C] + b1[0x0F];
 181                 b2[0x0F] = (b1[0x0F] - b1[0x0C]) * cos0;
 182                 b2[0x0D] = b1[0x0D] + b1[0x0E];
 183                 b2[0x0E] = (b1[0x0E] - b1[0x0D]) * cos1;
 184
 185                 b2[0x10] = b1[0x10] + b1[0x13];
 186                 b2[0x13] = (b1[0x10] - b1[0x13]) * cos0;
 187                 b2[0x11] = b1[0x11] + b1[0x12];
 188                 b2[0x12] = (b1[0x11] - b1[0x12]) * cos1;
 189
 190                 b2[0x14] = b1[0x14] + b1[0x17];
 191                 b2[0x17] = (b1[0x17] - b1[0x14]) * cos0;
 192                 b2[0x15] = b1[0x15] + b1[0x16];
 193                 b2[0x16] = (b1[0x16] - b1[0x15]) * cos1;
 194
 195                 b2[0x18] = b1[0x18] + b1[0x1B];
 196                 b2[0x1B] = (b1[0x18] - b1[0x1B]) * cos0;
 197                 b2[0x19] = b1[0x19] + b1[0x1A];
 198                 b2[0x1A] = (b1[0x19] - b1[0x1A]) * cos1;
 199
 200                 b2[0x1C] = b1[0x1C] + b1[0x1F];
 201                 b2[0x1F] = (b1[0x1F] - b1[0x1C]) * cos0;
 202                 b2[0x1D] = b1[0x1D] + b1[0x1E];
 203                 b2[0x1E] = (b1[0x1E] - b1[0x1D]) * cos1;
 204         }
 205
 206         {
 207                 register float const cos0 = mpeg3_pnts[4][0];
 208
 209                 b1[0x00] = b2[0x00] + b2[0x01];
 210                 b1[0x01] = (b2[0x00] - b2[0x01]) * cos0;
 211                 b1[0x02] = b2[0x02] + b2[0x03];
 212                 b1[0x03] = (b2[0x03] - b2[0x02]) * cos0;
 213                 b1[0x02] += b1[0x03];
 214
 215                 b1[0x04] = b2[0x04] + b2[0x05];
 216                 b1[0x05] = (b2[0x04] - b2[0x05]) * cos0;
 217                 b1[0x06] = b2[0x06] + b2[0x07];
 218                 b1[0x07] = (b2[0x07] - b2[0x06]) * cos0;
 219                 b1[0x06] += b1[0x07];
 220                 b1[0x04] += b1[0x06];
 221                 b1[0x06] += b1[0x05];
 222                 b1[0x05] += b1[0x07];
 223
 224                 b1[0x08] = b2[0x08] + b2[0x09];
 225                 b1[0x09] = (b2[0x08] - b2[0x09]) * cos0;
 226                 b1[0x0A] = b2[0x0A] + b2[0x0B];
 227                 b1[0x0B] = (b2[0x0B] - b2[0x0A]) * cos0;
 228                 b1[0x0A] += b1[0x0B];
 229
 230                 b1[0x0C] = b2[0x0C] + b2[0x0D];
 231                 b1[0x0D] = (b2[0x0C] - b2[0x0D]) * cos0;
 232                 b1[0x0E] = b2[0x0E] + b2[0x0F];
 233                 b1[0x0F] = (b2[0x0F] - b2[0x0E]) * cos0;
 234                 b1[0x0E] += b1[0x0F];
 235                 b1[0x0C] += b1[0x0E];
 236                 b1[0x0E] += b1[0x0D];
 237                 b1[0x0D] += b1[0x0F];
 238
 239                 b1[0x10] = b2[0x10] + b2[0x11];
 240                 b1[0x11] = (b2[0x10] - b2[0x11]) * cos0;
 241                 b1[0x12] = b2[0x12] + b2[0x13];
 242                 b1[0x13] = (b2[0x13] - b2[0x12]) * cos0;
 243                 b1[0x12] += b1[0x13];
 244
 245                 b1[0x14] = b2[0x14] + b2[0x15];
 246                 b1[0x15] = (b2[0x14] - b2[0x15]) * cos0;
 247                 b1[0x16] = b2[0x16] + b2[0x17];
 248                 b1[0x17] = (b2[0x17] - b2[0x16]) * cos0;
 249                 b1[0x16] += b1[0x17];
 250                 b1[0x14] += b1[0x16];
 251                 b1[0x16] += b1[0x15];
 252                 b1[0x15] += b1[0x17];
 253
 254                 b1[0x18] = b2[0x18] + b2[0x19];
 255                 b1[0x19] = (b2[0x18] - b2[0x19]) * cos0;
 256                 b1[0x1A] = b2[0x1A] + b2[0x1B];
 257                 b1[0x1B] = (b2[0x1B] - b2[0x1A]) * cos0;
 258                 b1[0x1A] += b1[0x1B];
 259
 260                 b1[0x1C] = b2[0x1C] + b2[0x1D];
 261                 b1[0x1D] = (b2[0x1C] - b2[0x1D]) * cos0;
 262                 b1[0x1E] = b2[0x1E] + b2[0x1F];
 263                 b1[0x1F] = (b2[0x1F] - b2[0x1E]) * cos0;
 264                 b1[0x1E] += b1[0x1F];
 265                 b1[0x1C] += b1[0x1E];
 266                 b1[0x1E] += b1[0x1D];
 267                 b1[0x1D] += b1[0x1F];
 268         }
 269
 270         out0[0x10*16] = b1[0x00];
 271         out0[0x10*12] = b1[0x04];
 272         out0[0x10* 8] = b1[0x02];
 273         out0[0x10* 4] = b1[0x06];
 274         out0[0x10* 0] = b1[0x01];
 275         out1[0x10* 0] = b1[0x01];
 276         out1[0x10* 4] = b1[0x05];
 277         out1[0x10* 8] = b1[0x03];
 278         out1[0x10*12] = b1[0x07];
 279
 280         out0[0x10*14] = b1[0x08] + b1[0x0C];
 281         out0[0x10*10] = b1[0x0C] + b1[0x0a];
 282         out0[0x10* 6] = b1[0x0A] + b1[0x0E];
 283         out0[0x10* 2] = b1[0x0E] + b1[0x09];
 284         out1[0x10* 2] = b1[0x09] + b1[0x0D];
 285         out1[0x10* 6] = b1[0x0D] + b1[0x0B];
 286         out1[0x10*10] = b1[0x0B] + b1[0x0F];
 287         out1[0x10*14] = b1[0x0F];
 288
 289         {
 290                 register float tmp;
 291                 tmp = b1[0x18] + b1[0x1C];
 292                 out0[0x10*15] = tmp + b1[0x10];
 293                 out0[0x10*13] = tmp + b1[0x14];
 294                 tmp = b1[0x1C] + b1[0x1A];
 295                 out0[0x10*11] = tmp + b1[0x14];
 296                 out0[0x10* 9] = tmp + b1[0x12];
 297                 tmp = b1[0x1A] + b1[0x1E];
 298                 out0[0x10* 7] = tmp + b1[0x12];
 299                 out0[0x10* 5] = tmp + b1[0x16];
 300                 tmp = b1[0x1E] + b1[0x19];
 301                 out0[0x10* 3] = tmp + b1[0x16];
 302                 out0[0x10* 1] = tmp + b1[0x11];
 303                 tmp = b1[0x19] + b1[0x1D];
 304                 out1[0x10* 1] = tmp + b1[0x11];
 305                 out1[0x10* 3] = tmp + b1[0x15];
 306                 tmp = b1[0x1D] + b1[0x1B];
 307                 out1[0x10* 5] = tmp + b1[0x15];
 308                 out1[0x10* 7] = tmp + b1[0x13];
 309                 tmp = b1[0x1B] + b1[0x1F];
 310                 out1[0x10* 9] = tmp + b1[0x13];
 311                 out1[0x10*11] = tmp + b1[0x17];
 312                 out1[0x10*13] = b1[0x17] + b1[0x1F];
 313                 out1[0x10*15] = b1[0x1F];
 314         }
 315         return 0;
 316 }
 317
 318 /*
 319  * the call via dct64 is a trick to force GCC to use
 320  * (new) registers for the b1,b2 pointer to the bufs[xx] field
 321  */
 322 int mpeg3audio_dct64(float *a, float *b, float *c)
 323 {
 324         float bufs[0x40];
 325         return mpeg3audio_dct64_1(a, b, bufs, bufs + 0x20, c);
 326 }
 327
 328 /*//////////////////////////////////////////////////////////////// */
 329 /* */
 330 /* 9 Point Inverse Discrete Cosine Transform */
 331 /* */
 332 /* This piece of code is Copyright 1997 Mikko Tommila and is freely usable */
 333 /* by anybody. The algorithm itself is of course in the public domain. */
 334 /* */
 335 /* Again derived heuristically from the 9-point WFTA. */
 336 /* */
 337 /* The algorithm is optimized (?) for speed, not for small rounding errors or */
 338 /* good readability. */
 339 /* */
 340 /* 36 additions, 11 multiplications */
 341 /* */
 342 /* Again this is very likely sub-optimal. */
 343 /* */
 344 /* The code is optimized to use a minimum number of temporary variables, */
 345 /* so it should compile quite well even on 8-register Intel x86 processors. */
 346 /* This makes the code quite obfuscated and very difficult to understand. */
 347 /* */
 348 /* References: */
 349 /* [1] S. Winograd: "On Computing the Discrete Fourier Transform", */
 350 /*     Mathematics of Computation, Volume 32, Number 141, January 1978, */
 351 /*     Pages 175-199 */
 352
 353
 354 /*------------------------------------------------------------------*/
 355 /*                                                                  */
 356 /*    Function: Calculation of the inverse MDCT                     */
 357 /*                                                                  */
 358 /*------------------------------------------------------------------*/
 359
 360 int mpeg3audio_dct36(float *inbuf, float *o1, float *o2, float *wintab, float *tsbuf)
 361 {
 362     float tmp[18];
 363
 364         {
 365         register float *in = inbuf;
 366
 367         in[17]+=in[16]; in[16]+=in[15]; in[15]+=in[14];
 368         in[14]+=in[13]; in[13]+=in[12]; in[12]+=in[11];
 369         in[11]+=in[10]; in[10]+=in[9];  in[9] +=in[8];
 370         in[8] +=in[7];  in[7] +=in[6];  in[6] +=in[5];
 371         in[5] +=in[4];  in[4] +=in[3];  in[3] +=in[2];
 372         in[2] +=in[1];  in[1] +=in[0];
 373
 374         in[17]+=in[15]; in[15]+=in[13]; in[13]+=in[11]; in[11]+=in[9];
 375         in[9] +=in[7];  in[7] +=in[5];  in[5] +=in[3];  in[3] +=in[1];
 376
 377
 378         {
 379                 float t3;
 380                 {
 381                         float t0, t1, t2;
 382
 383                         t0 = mpeg3_COS6_2 * (in[8] + in[16] - in[4]);
 384                         t1 = mpeg3_COS6_2 * in[12];
 385
 386                         t3 = in[0];
 387                         t2 = t3 - t1 - t1;
 388                         tmp[1] = tmp[7] = t2 - t0;
 389                         tmp[4]          = t2 + t0 + t0;
 390                         t3 += t1;
 391
 392                         t2 = mpeg3_COS6_1 * (in[10] + in[14] - in[2]);
 393                         tmp[1] -= t2;
 394                         tmp[7] += t2;
 395                 }
 396                 {
 397                         float t0, t1, t2;
 398
 399                         t0 = mpeg3_cos9[0] * (in[4] + in[8] );
 400                         t1 = mpeg3_cos9[1] * (in[8] - in[16]);
 401                         t2 = mpeg3_cos9[2] * (in[4] + in[16]);
 402
 403                         tmp[2] = tmp[6] = t3 - t0      - t2;
 404                         tmp[0] = tmp[8] = t3 + t0 + t1;
 405                         tmp[3] = tmp[5] = t3      - t1 + t2;
 406                 }
 407         }
 408         {
 409                 float t1, t2, t3;
 410
 411                 t1 = mpeg3_cos18[0] * (in[2]  + in[10]);
 412                 t2 = mpeg3_cos18[1] * (in[10] - in[14]);
 413                 t3 = mpeg3_COS6_1   * in[6];
 414
 415                 {
 416                         float t0 = t1 + t2 + t3;
 417                         tmp[0] += t0;
 418                         tmp[8] -= t0;
 419                 }
 420
 421                 t2 -= t3;
 422                 t1 -= t3;
 423
 424                 t3 = mpeg3_cos18[2] * (in[2] + in[14]);
 425
 426                 t1 += t3;
 427                 tmp[3] += t1;
 428                 tmp[5] -= t1;
 429
 430                 t2 -= t3;
 431                 tmp[2] += t2;
 432                 tmp[6] -= t2;
 433         }
 434
 435
 436         {
 437                 float t0, t1, t2, t3, t4, t5, t6, t7;
 438
 439                 t1 = mpeg3_COS6_2 * in[13];
 440                 t2 = mpeg3_COS6_2 * (in[9] + in[17] - in[5]);
 441
 442                 t3 = in[1] + t1;
 443                 t4 = in[1] - t1 - t1;
 444                 t5 = t4 - t2;
 445
 446                 t0 = mpeg3_cos9[0] * (in[5] + in[9]);
 447                 t1 = mpeg3_cos9[1] * (in[9] - in[17]);
 448
 449                 tmp[13] = (t4 + t2 + t2) * mpeg3_tfcos36[17-13];
 450                 t2 = mpeg3_cos9[2] * (in[5] + in[17]);
 451
 452                 t6 = t3 - t0 - t2;
 453                 t0 += t3 + t1;
 454                 t3 += t2 - t1;
 455
 456                 t2 = mpeg3_cos18[0] * (in[3]  + in[11]);
 457                 t4 = mpeg3_cos18[1] * (in[11] - in[15]);
 458                 t7 = mpeg3_COS6_1 * in[7];
 459
 460                 t1 = t2 + t4 + t7;
 461                 tmp[17] = (t0 + t1) * mpeg3_tfcos36[17-17];
 462                 tmp[9]  = (t0 - t1) * mpeg3_tfcos36[17-9];
 463                 t1 = mpeg3_cos18[2] * (in[3] + in[15]);
 464                 t2 += t1 - t7;
 465
 466                 tmp[14] = (t3 + t2) * mpeg3_tfcos36[17-14];
 467                 t0 = mpeg3_COS6_1 * (in[11] + in[15] - in[3]);
 468                 tmp[12] = (t3 - t2) * mpeg3_tfcos36[17-12];
 469
 470                 t4 -= t1 + t7;
 471
 472                 tmp[16] = (t5 - t0) * mpeg3_tfcos36[17-16];
 473                 tmp[10] = (t5 + t0) * mpeg3_tfcos36[17-10];
 474                 tmp[15] = (t6 + t4) * mpeg3_tfcos36[17-15];
 475                 tmp[11] = (t6 - t4) * mpeg3_tfcos36[17-11];
 476             }
 477
 478 #define MACRO(v) \
 479         { \
 480         float tmpval; \
 481         tmpval = tmp[(v)] + tmp[17-(v)]; \
 482         out2[9+(v)] = tmpval * w[27+(v)]; \
 483         out2[8-(v)] = tmpval * w[26-(v)]; \
 484         tmpval = tmp[(v)] - tmp[17-(v)]; \
 485         ts[SBLIMIT*(8-(v))] = out1[8-(v)] + tmpval * w[8-(v)]; \
 486         ts[SBLIMIT*(9+(v))] = out1[9+(v)] + tmpval * w[9+(v)]; \
 487         }
 488
 489                 {
 490                         register float *out2 = o2;
 491                         register float *w = wintab;
 492                         register float *out1 = o1;
 493                         register float *ts = tsbuf;
 494
 495                         MACRO(0);
 496                         MACRO(1);
 497                         MACRO(2);
 498                         MACRO(3);
 499                         MACRO(4);
 500                         MACRO(5);
 501                         MACRO(6);
 502                         MACRO(7);
 503                         MACRO(8);
 504                 }
 505         }
 506         return 0;
 507 }
 508
 509 /*
 510  * new DCT12
 511  */
 512 int mpeg3audio_dct12(float *in,float *rawout1,float *rawout2,register float *wi,register float *ts)
 513 {
 514 #define DCT12_PART1 \
 515             in5 = in[5*3]; \
 516     in5 += (in4 = in[4*3]); \
 517     in4 += (in3 = in[3*3]); \
 518     in3 += (in2 = in[2*3]); \
 519     in2 += (in1 = in[1*3]); \
 520     in1 += (in0 = in[0*3]); \
 521                             \
 522     in5 += in3; in3 += in1; \
 523                             \
 524     in2 *= mpeg3_COS6_1; \
 525     in3 *= mpeg3_COS6_1; \
 526
 527 #define DCT12_PART2 \
 528         in0 += in4 * mpeg3_COS6_2; \
 529                          \
 530         in4 = in0 + in2;     \
 531         in0 -= in2;          \
 532                          \
 533         in1 += in5 * mpeg3_COS6_2; \
 534                          \
 535         in5 = (in1 + in3) * mpeg3_tfcos12[0]; \
 536         in1 = (in1 - in3) * mpeg3_tfcos12[2]; \
 537                         \
 538         in3 = in4 + in5;    \
 539         in4 -= in5;         \
 540                         \
 541         in2 = in0 + in1;    \
 542         in0 -= in1;
 543
 544
 545         {
 546         float in0,in1,in2,in3,in4,in5;
 547         register float *out1 = rawout1;
 548         ts[SBLIMIT*0] = out1[0]; ts[SBLIMIT*1] = out1[1]; ts[SBLIMIT*2] = out1[2];
 549         ts[SBLIMIT*3] = out1[3]; ts[SBLIMIT*4] = out1[4]; ts[SBLIMIT*5] = out1[5];
 550
 551         DCT12_PART1
 552
 553         {
 554                 float tmp0,tmp1 = (in0 - in4);
 555                 {
 556                         float tmp2 = (in1 - in5) * mpeg3_tfcos12[1];
 557                         tmp0 = tmp1 + tmp2;
 558                         tmp1 -= tmp2;
 559                 }
 560                 ts[(17-1)*SBLIMIT] = out1[17-1] + tmp0 * wi[11-1];
 561                 ts[(12+1)*SBLIMIT] = out1[12+1] + tmp0 * wi[6+1];
 562                 ts[(6 +1)*SBLIMIT] = out1[6 +1] + tmp1 * wi[1];
 563                 ts[(11-1)*SBLIMIT] = out1[11-1] + tmp1 * wi[5-1];
 564         }
 565
 566         DCT12_PART2
 567
 568         ts[(17-0)*SBLIMIT] = out1[17-0] + in2 * wi[11-0];
 569         ts[(12+0)*SBLIMIT] = out1[12+0] + in2 * wi[6+0];
 570         ts[(12+2)*SBLIMIT] = out1[12+2] + in3 * wi[6+2];
 571         ts[(17-2)*SBLIMIT] = out1[17-2] + in3 * wi[11-2];
 572
 573         ts[(6+0)*SBLIMIT]  = out1[6+0] + in0 * wi[0];
 574         ts[(11-0)*SBLIMIT] = out1[11-0] + in0 * wi[5-0];
 575         ts[(6+2)*SBLIMIT]  = out1[6+2] + in4 * wi[2];
 576         ts[(11-2)*SBLIMIT] = out1[11-2] + in4 * wi[5-2];
 577     }
 578
 579         in++;
 580
 581         {
 582          float in0,in1,in2,in3,in4,in5;
 583          register float *out2 = rawout2;
 584
 585          DCT12_PART1
 586
 587          {
 588                  float tmp0,tmp1 = (in0 - in4);
 589                  {
 590                          float tmp2 = (in1 - in5) * mpeg3_tfcos12[1];
 591                          tmp0 = tmp1 + tmp2;
 592                          tmp1 -= tmp2;
 593                  }
 594                  out2[5-1] = tmp0 * wi[11-1];
 595                  out2[0+1] = tmp0 * wi[6+1];
 596                  ts[(12+1)*SBLIMIT] += tmp1 * wi[1];
 597                  ts[(17-1)*SBLIMIT] += tmp1 * wi[5-1];
 598          }
 599
 600          DCT12_PART2
 601
 602          out2[5-0] = in2 * wi[11-0];
 603          out2[0+0] = in2 * wi[6+0];
 604          out2[0+2] = in3 * wi[6+2];
 605          out2[5-2] = in3 * wi[11-2];
 606
 607          ts[(12+0)*SBLIMIT] += in0 * wi[0];
 608          ts[(17-0)*SBLIMIT] += in0 * wi[5-0];
 609          ts[(12+2)*SBLIMIT] += in4 * wi[2];
 610          ts[(17-2)*SBLIMIT] += in4 * wi[5-2];
 611         }
 612
 613     in++;
 614
 615         {
 616         float in0,in1,in2,in3,in4,in5;
 617         register float *out2 = rawout2;
 618         out2[12]=out2[13]=out2[14]=out2[15]=out2[16]=out2[17]=0.0;
 619
 620         DCT12_PART1
 621
 622         {
 623                 float tmp0,tmp1 = (in0 - in4);
 624                 {
 625                         float tmp2 = (in1 - in5) * mpeg3_tfcos12[1];
 626                         tmp0 = tmp1 + tmp2;
 627                         tmp1 -= tmp2;
 628                 }
 629                 out2[11-1] = tmp0 * wi[11-1];
 630                 out2[6 +1] = tmp0 * wi[6+1];
 631                 out2[0+1] += tmp1 * wi[1];
 632                 out2[5-1] += tmp1 * wi[5-1];
 633         }
 634
 635         DCT12_PART2
 636
 637         out2[11-0] = in2 * wi[11-0];
 638         out2[6 +0] = in2 * wi[6+0];
 639         out2[6 +2] = in3 * wi[6+2];
 640         out2[11-2] = in3 * wi[11-2];
 641
 642         out2[0+0] += in0 * wi[0];
 643         out2[5-0] += in0 * wi[5-0];
 644         out2[0+2] += in4 * wi[2];
 645         out2[5-2] += in4 * wi[5-2];
 646         }
 647         return 0;
 648 }