theora-old/lib/idct.c

   1 /********************************************************************
   2  *                                                                  *
   3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
   4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
   5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
   6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
   7  *                                                                  *
   8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003                *
   9  * by the Xiph.Org Foundation http://www.xiph.org/                  *
  10  *                                                                  *
  11  ********************************************************************
  12
  13   function: C implementation of the Theora iDCT
  14   last mod: $Id$
  15
  16  ********************************************************************/
  17
  18 #include <string.h>
  19 #include "codec_internal.h"
  20
  21 #include "quant_lookup.h"
  22
  23 #define IdctAdjustBeforeShift 8
  24 /* cos(n*pi/16) or sin(8-n)*pi/16) */
  25 #define xC1S7 64277
  26 #define xC2S6 60547
  27 #define xC3S5 54491
  28 #define xC4S4 46341
  29 #define xC5S3 36410
  30 #define xC6S2 25080
  31 #define xC7S1 12785
  32
  33 /* compute the 16 bit signed 1D inverse DCT - spec version */
  34 /*
  35 static void idct_short__c ( ogg_int16_t * InputData, ogg_int16_t * OutputData ) {
  36   ogg_int32_t t[8], r;
  37   ogg_int16_t *y = InputData;
  38   ogg_int16_t *x = OutputData;
  39
  40   t[0] = y[0] + y[4];
  41   t[0] &= 0xffff;
  42   t[0] = (xC4S4 * t[0]) >> 16;
  43
  44   t[1] = y[0] - y[4];
  45   t[1] &= 0xffff;
  46   t[1] = (xC4S4 * t[1]) >> 16;
  47
  48   t[2] = ((xC6S2 * t[2]) >> 16) - ((xC2S6 * y[6]) >> 16);
  49   t[3] = ((xC2S6 * t[2]) >> 16) + ((xC6S2 * y[6]) >> 16);
  50   t[4] = ((xC7S1 * t[1]) >> 16) - ((xC1S7 * y[7]) >> 16);
  51   t[5] = ((xC3S5 * t[5]) >> 16) - ((xC5S3 * y[3]) >> 16);
  52   t[6] = ((xC5S3 * t[5]) >> 16) + ((xC3S5 * y[3]) >> 16);
  53   t[7] = ((xC1S7 * t[1]) >> 16) + ((xC7S1 * y[7]) >> 16);
  54
  55   r = t[4] + t[5];
  56   t[5] = t[4] - t[5];
  57   t[5] &= 0xffff;
  58   t[5] = (xC4S4 * (-t[5])) >> 16;
  59   t[4] = r;
  60
  61   r = t[7] + t[6];
  62   t[6] = t[7] - t[6];
  63   t[6] &= 0xffff;
  64   t[6] = (xC4S4 * t[6]) >> 16;
  65   t[7] = r;
  66
  67   r = t[0] + t[3];
  68   t[3] = t[0] - t[3];
  69   t[0] = r;
  70
  71   r = t[1] + t[2];
  72   t[2] = t[1] - t[2];
  73   t[1] = r;
  74
  75   r = t[6] + t[5];
  76   t[5] = t[6] - t[5];
  77   t[6] = r;
  78
  79   r = t[0] + t[7];
  80   r &= 0xffff;
  81   x[0] = r;
  82
  83   r = t[1] + t[6];
  84   r &= 0xffff;
  85   x[1] = r;
  86
  87   r = t[2] + t[5];
  88   r &= 0xffff;
  89   x[2] = r;
  90
  91   r = t[3] + t[4];
  92   r &= 0xffff;
  93   x[3] = r;
  94
  95   r = t[3] - t[4];
  96   r &= 0xffff;
  97   x[4] = r;
  98
  99   r = t[2] - t[5];
 100   r &= 0xffff;
 101   x[5] = r;
 102
 103   r = t[1] - t[6];
 104   r &= 0xffff;
 105   x[6] = r;
 106
 107   r = t[0] - t[7];
 108   r &= 0xffff;
 109   x[7] = r;
 110
 111 }
 112 */
 113
 114 static void dequant_slow( ogg_int16_t * dequant_coeffs,
 115                    ogg_int16_t * quantized_list,
 116                    ogg_int32_t * DCT_block) {
 117   int i;
 118   for(i=0;i<64;i++)
 119     DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
 120 }
 121
 122
 123
 124 void IDctSlow__c(  Q_LIST_ENTRY * InputData,
 125                 ogg_int16_t *QuantMatrix,
 126                 ogg_int16_t * OutputData ) {
 127   ogg_int32_t IntermediateData[64];
 128   ogg_int32_t * ip = IntermediateData;
 129   ogg_int16_t * op = OutputData;
 130
 131   ogg_int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
 132   ogg_int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
 133   ogg_int32_t t1, t2;
 134
 135   int loop;
 136
 137   dequant_slow( QuantMatrix, InputData, IntermediateData);
 138
 139   /* Inverse DCT on the rows now */
 140   for ( loop = 0; loop < 8; loop++){
 141     /* Check for non-zero values */
 142     if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
 143       t1 = (xC1S7 * ip[1]);
 144       t2 = (xC7S1 * ip[7]);
 145       t1 >>= 16;
 146       t2 >>= 16;
 147       _A = t1 + t2;
 148
 149       t1 = (xC7S1 * ip[1]);
 150       t2 = (xC1S7 * ip[7]);
 151       t1 >>= 16;
 152       t2 >>= 16;
 153       _B = t1 - t2;
 154
 155       t1 = (xC3S5 * ip[3]);
 156       t2 = (xC5S3 * ip[5]);
 157       t1 >>= 16;
 158       t2 >>= 16;
 159       _C = t1 + t2;
 160
 161       t1 = (xC3S5 * ip[5]);
 162       t2 = (xC5S3 * ip[3]);
 163       t1 >>= 16;
 164       t2 >>= 16;
 165       _D = t1 - t2;
 166
 167       t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
 168       t1 >>= 16;
 169       _Ad = t1;
 170
 171       t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
 172       t1 >>= 16;
 173       _Bd = t1;
 174
 175
 176       _Cd = _A + _C;
 177       _Dd = _B + _D;
 178
 179       t1 = (xC4S4 * (ogg_int16_t)(ip[0] + ip[4]));
 180       t1 >>= 16;
 181       _E = t1;
 182
 183       t1 = (xC4S4 * (ogg_int16_t)(ip[0] - ip[4]));
 184       t1 >>= 16;
 185       _F = t1;
 186
 187       t1 = (xC2S6 * ip[2]);
 188       t2 = (xC6S2 * ip[6]);
 189       t1 >>= 16;
 190       t2 >>= 16;
 191       _G = t1 + t2;
 192
 193       t1 = (xC6S2 * ip[2]);
 194       t2 = (xC2S6 * ip[6]);
 195       t1 >>= 16;
 196       t2 >>= 16;
 197       _H = t1 - t2;
 198
 199
 200       _Ed = _E - _G;
 201       _Gd = _E + _G;
 202
 203       _Add = _F + _Ad;
 204       _Bdd = _Bd - _H;
 205
 206       _Fd = _F - _Ad;
 207       _Hd = _Bd + _H;
 208
 209       /* Final sequence of operations over-write original inputs. */
 210       ip[0] = (ogg_int16_t)((_Gd + _Cd )   >> 0);
 211       ip[7] = (ogg_int16_t)((_Gd - _Cd )   >> 0);
 212
 213       ip[1] = (ogg_int16_t)((_Add + _Hd )  >> 0);
 214       ip[2] = (ogg_int16_t)((_Add - _Hd )  >> 0);
 215
 216       ip[3] = (ogg_int16_t)((_Ed + _Dd )   >> 0);
 217       ip[4] = (ogg_int16_t)((_Ed - _Dd )   >> 0);
 218
 219       ip[5] = (ogg_int16_t)((_Fd + _Bdd )  >> 0);
 220       ip[6] = (ogg_int16_t)((_Fd - _Bdd )  >> 0);
 221
 222     }
 223
 224     ip += 8;                    /* next row */
 225   }
 226
 227   ip = IntermediateData;
 228
 229   for ( loop = 0; loop < 8; loop++){
 230     /* Check for non-zero values (bitwise or faster than ||) */
 231     if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
 232          ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
 233
 234       t1 = (xC1S7 * ip[1*8]);
 235       t2 = (xC7S1 * ip[7*8]);
 236       t1 >>= 16;
 237       t2 >>= 16;
 238       _A = t1 + t2;
 239
 240       t1 = (xC7S1 * ip[1*8]);
 241       t2 = (xC1S7 * ip[7*8]);
 242       t1 >>= 16;
 243       t2 >>= 16;
 244       _B = t1 - t2;
 245
 246       t1 = (xC3S5 * ip[3*8]);
 247       t2 = (xC5S3 * ip[5*8]);
 248       t1 >>= 16;
 249       t2 >>= 16;
 250       _C = t1 + t2;
 251
 252       t1 = (xC3S5 * ip[5*8]);
 253       t2 = (xC5S3 * ip[3*8]);
 254       t1 >>= 16;
 255       t2 >>= 16;
 256       _D = t1 - t2;
 257
 258       t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
 259       t1 >>= 16;
 260       _Ad = t1;
 261
 262       t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
 263       t1 >>= 16;
 264       _Bd = t1;
 265
 266
 267       _Cd = _A + _C;
 268       _Dd = _B + _D;
 269
 270       t1 = (xC4S4 * (ogg_int16_t)(ip[0*8] + ip[4*8]));
 271       t1 >>= 16;
 272       _E = t1;
 273
 274       t1 = (xC4S4 * (ogg_int16_t)(ip[0*8] - ip[4*8]));
 275       t1 >>= 16;
 276       _F = t1;
 277
 278       t1 = (xC2S6 * ip[2*8]);
 279       t2 = (xC6S2 * ip[6*8]);
 280       t1 >>= 16;
 281       t2 >>= 16;
 282       _G = t1 + t2;
 283
 284       t1 = (xC6S2 * ip[2*8]);
 285       t2 = (xC2S6 * ip[6*8]);
 286       t1 >>= 16;
 287       t2 >>= 16;
 288       _H = t1 - t2;
 289
 290       _Ed = _E - _G;
 291       _Gd = _E + _G;
 292
 293       _Add = _F + _Ad;
 294       _Bdd = _Bd - _H;
 295
 296       _Fd = _F - _Ad;
 297       _Hd = _Bd + _H;
 298
 299       _Gd += IdctAdjustBeforeShift;
 300       _Add += IdctAdjustBeforeShift;
 301       _Ed += IdctAdjustBeforeShift;
 302       _Fd += IdctAdjustBeforeShift;
 303
 304       /* Final sequence of operations over-write original inputs. */
 305       op[0*8] = (ogg_int16_t)((_Gd + _Cd )   >> 4);
 306       op[7*8] = (ogg_int16_t)((_Gd - _Cd )   >> 4);
 307
 308       op[1*8] = (ogg_int16_t)((_Add + _Hd )  >> 4);
 309       op[2*8] = (ogg_int16_t)((_Add - _Hd )  >> 4);
 310
 311       op[3*8] = (ogg_int16_t)((_Ed + _Dd )   >> 4);
 312       op[4*8] = (ogg_int16_t)((_Ed - _Dd )   >> 4);
 313
 314       op[5*8] = (ogg_int16_t)((_Fd + _Bdd )  >> 4);
 315       op[6*8] = (ogg_int16_t)((_Fd - _Bdd )  >> 4);
 316     }else{
 317       op[0*8] = 0;
 318       op[7*8] = 0;
 319       op[1*8] = 0;
 320       op[2*8] = 0;
 321       op[3*8] = 0;
 322       op[4*8] = 0;
 323       op[5*8] = 0;
 324       op[6*8] = 0;
 325     }
 326
 327     ip++;                       /* next column */
 328     op++;
 329   }
 330 }
 331
 332 /************************
 333   x  x  x  x  0  0  0  0
 334   x  x  x  0  0  0  0  0
 335   x  x  0  0  0  0  0  0
 336   x  0  0  0  0  0  0  0
 337   0  0  0  0  0  0  0  0
 338   0  0  0  0  0  0  0  0
 339   0  0  0  0  0  0  0  0
 340   0  0  0  0  0  0  0  0
 341 *************************/
 342
 343 static void dequant_slow10( ogg_int16_t * dequant_coeffs,
 344                      ogg_int16_t * quantized_list,
 345                      ogg_int32_t * DCT_block){
 346   int i;
 347   memset(DCT_block,0, 128);
 348   for(i=0;i<10;i++)
 349     DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
 350
 351 }
 352
 353 void IDct10__c( Q_LIST_ENTRY * InputData,
 354              ogg_int16_t *QuantMatrix,
 355              ogg_int16_t * OutputData ){
 356   ogg_int32_t IntermediateData[64];
 357   ogg_int32_t * ip = IntermediateData;
 358   ogg_int16_t * op = OutputData;
 359
 360   ogg_int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
 361   ogg_int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
 362   ogg_int32_t t1, t2;
 363
 364   int loop;
 365
 366   dequant_slow10( QuantMatrix, InputData, IntermediateData);
 367
 368   /* Inverse DCT on the rows now */
 369   for ( loop = 0; loop < 4; loop++){
 370     /* Check for non-zero values */
 371     if ( ip[0] | ip[1] | ip[2] | ip[3] ){
 372       t1 = (xC1S7 * ip[1]);
 373       t1 >>= 16;
 374       _A = t1;
 375
 376       t1 = (xC7S1 * ip[1]);
 377       t1 >>= 16;
 378       _B = t1 ;
 379
 380       t1 = (xC3S5 * ip[3]);
 381       t1 >>= 16;
 382       _C = t1;
 383
 384       t2 = (xC5S3 * ip[3]);
 385       t2 >>= 16;
 386       _D = -t2;
 387
 388
 389       t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
 390       t1 >>= 16;
 391       _Ad = t1;
 392
 393       t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
 394       t1 >>= 16;
 395       _Bd = t1;
 396
 397
 398       _Cd = _A + _C;
 399       _Dd = _B + _D;
 400
 401       t1 = (xC4S4 * ip[0] );
 402       t1 >>= 16;
 403       _E = t1;
 404
 405       _F = t1;
 406
 407       t1 = (xC2S6 * ip[2]);
 408       t1 >>= 16;
 409       _G = t1;
 410
 411       t1 = (xC6S2 * ip[2]);
 412       t1 >>= 16;
 413       _H = t1 ;
 414
 415
 416       _Ed = _E - _G;
 417       _Gd = _E + _G;
 418
 419       _Add = _F + _Ad;
 420       _Bdd = _Bd - _H;
 421
 422       _Fd = _F - _Ad;
 423       _Hd = _Bd + _H;
 424
 425       /* Final sequence of operations over-write original inputs. */
 426       ip[0] = (ogg_int16_t)((_Gd + _Cd )   >> 0);
 427       ip[7] = (ogg_int16_t)((_Gd - _Cd )   >> 0);
 428
 429       ip[1] = (ogg_int16_t)((_Add + _Hd )  >> 0);
 430       ip[2] = (ogg_int16_t)((_Add - _Hd )  >> 0);
 431
 432       ip[3] = (ogg_int16_t)((_Ed + _Dd )   >> 0);
 433       ip[4] = (ogg_int16_t)((_Ed - _Dd )   >> 0);
 434
 435       ip[5] = (ogg_int16_t)((_Fd + _Bdd )  >> 0);
 436       ip[6] = (ogg_int16_t)((_Fd - _Bdd )  >> 0);
 437
 438     }
 439
 440     ip += 8;                    /* next row */
 441   }
 442
 443   ip = IntermediateData;
 444
 445   for ( loop = 0; loop < 8; loop++) {
 446     /* Check for non-zero values (bitwise or faster than ||) */
 447     if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] ) {
 448
 449       t1 = (xC1S7 * ip[1*8]);
 450       t1 >>= 16;
 451       _A = t1 ;
 452
 453       t1 = (xC7S1 * ip[1*8]);
 454       t1 >>= 16;
 455       _B = t1 ;
 456
 457       t1 = (xC3S5 * ip[3*8]);
 458       t1 >>= 16;
 459       _C = t1 ;
 460
 461       t2 = (xC5S3 * ip[3*8]);
 462       t2 >>= 16;
 463       _D = - t2;
 464
 465
 466       t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
 467       t1 >>= 16;
 468       _Ad = t1;
 469
 470       t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
 471       t1 >>= 16;
 472       _Bd = t1;
 473
 474
 475       _Cd = _A + _C;
 476       _Dd = _B + _D;
 477
 478       t1 = (xC4S4 * ip[0*8]);
 479       t1 >>= 16;
 480       _E = t1;
 481       _F = t1;
 482
 483       t1 = (xC2S6 * ip[2*8]);
 484       t1 >>= 16;
 485       _G = t1;
 486
 487       t1 = (xC6S2 * ip[2*8]);
 488       t1 >>= 16;
 489       _H = t1;
 490
 491
 492       _Ed = _E - _G;
 493       _Gd = _E + _G;
 494
 495       _Add = _F + _Ad;
 496       _Bdd = _Bd - _H;
 497
 498       _Fd = _F - _Ad;
 499       _Hd = _Bd + _H;
 500
 501       _Gd += IdctAdjustBeforeShift;
 502       _Add += IdctAdjustBeforeShift;
 503       _Ed += IdctAdjustBeforeShift;
 504       _Fd += IdctAdjustBeforeShift;
 505
 506       /* Final sequence of operations over-write original inputs. */
 507       op[0*8] = (ogg_int16_t)((_Gd + _Cd )   >> 4);
 508       op[7*8] = (ogg_int16_t)((_Gd - _Cd )   >> 4);
 509
 510       op[1*8] = (ogg_int16_t)((_Add + _Hd )  >> 4);
 511       op[2*8] = (ogg_int16_t)((_Add - _Hd )  >> 4);
 512
 513       op[3*8] = (ogg_int16_t)((_Ed + _Dd )   >> 4);
 514       op[4*8] = (ogg_int16_t)((_Ed - _Dd )   >> 4);
 515
 516       op[5*8] = (ogg_int16_t)((_Fd + _Bdd )  >> 4);
 517       op[6*8] = (ogg_int16_t)((_Fd - _Bdd )  >> 4);
 518     }else{
 519       op[0*8] = 0;
 520       op[7*8] = 0;
 521       op[1*8] = 0;
 522       op[2*8] = 0;
 523       op[3*8] = 0;
 524       op[4*8] = 0;
 525       op[5*8] = 0;
 526       op[6*8] = 0;
 527     }
 528
 529     ip++;                       /* next column */
 530     op++;
 531   }
 532 }
 533
 534 /***************************
 535   x   0   0  0  0  0  0  0
 536   0   0   0  0  0  0  0  0
 537   0   0   0  0  0  0  0  0
 538   0   0   0  0  0  0  0  0
 539   0   0   0  0  0  0  0  0
 540   0   0   0  0  0  0  0  0
 541   0   0   0  0  0  0  0  0
 542   0   0   0  0  0  0  0  0
 543 **************************/
 544
 545 void IDct1( Q_LIST_ENTRY * InputData,
 546             ogg_int16_t *QuantMatrix,
 547             ogg_int16_t * OutputData ){
 548   int loop;
 549
 550   ogg_int16_t  OutD;
 551
 552   OutD=(ogg_int16_t) ((ogg_int32_t)(InputData[0]*QuantMatrix[0]+15)>>5);
 553
 554   for(loop=0;loop<64;loop++)
 555     OutputData[loop]=OutD;
 556
 557 }
 558
 559 void dsp_idct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
 560 {
 561   funcs->IDctSlow = IDctSlow__c;
 562   funcs->IDct10 = IDct10__c;
 563   funcs->IDct3 = IDct10__c;
 564 #if defined(USE_ASM)
 565   if (cpu_flags & CPU_X86_MMX) {
 566     dsp_mmx_idct_init(funcs);
 567   }
 568 #endif
 569 }