theora-exp/lib/idct.c

   1 #include <string.h>
   2 #include <ogg/ogg.h>
   3 #include "dct.h"
   4 #include "idct.h"
   5
   6 /*Performs an inverse 8 point Type-II DCT transform.
   7   The output is scaled by a factor of 2 relative to the orthonormal version of
   8    the transform.
   9   _y: The buffer to store the result in.
  10       Data will be placed in every 8th entry (e.g., in a column of an 8x8
  11        block).
  12   _x: The input coefficients.
  13       The first 8 entries are used (e.g., from a row of an 8x8 block).*/
  14 static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){
  15   ogg_int32_t t[8];
  16   ogg_int32_t r;
  17   /*Stage 1:*/
  18   /*0-1 butterfly.*/
  19   t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16;
  20   t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16;
  21   /*2-3 rotation by 6pi/16.*/
  22   t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16);
  23   t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16);
  24   /*4-7 rotation by 7pi/16.*/
  25   t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16);
  26   /*5-6 rotation by 3pi/16.*/
  27   t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16);
  28   t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16);
  29   t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16);
  30   /*Stage 2:*/
  31   /*4-5 butterfly.*/
  32   r=t[4]+t[5];
  33   t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
  34   t[4]=r;
  35   /*7-6 butterfly.*/
  36   r=t[7]+t[6];
  37   t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
  38   t[7]=r;
  39   /*Stage 3:*/
  40   /*0-3 butterfly.*/
  41   r=t[0]+t[3];
  42   t[3]=t[0]-t[3];
  43   t[0]=r;
  44   /*1-2 butterfly.*/
  45   r=t[1]+t[2];
  46   t[2]=t[1]-t[2];
  47   t[1]=r;
  48   /*6-5 butterfly.*/
  49   r=t[6]+t[5];
  50   t[5]=t[6]-t[5];
  51   t[6]=r;
  52   /*Stage 4:*/
  53   /*0-7 butterfly.*/
  54   _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
  55   /*1-6 butterfly.*/
  56   _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
  57   /*2-5 butterfly.*/
  58   _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
  59   /*3-4 butterfly.*/
  60   _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
  61   _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
  62   _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
  63   _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
  64   _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
  65 }
  66
  67 /*Performs an inverse 8 point Type-II DCT transform.
  68   The output is scaled by a factor of 2 relative to the orthonormal version of
  69    the transform.
  70   _y: The buffer to store the result in.
  71       Data will be placed in every 8th entry (e.g., in a column of an 8x8
  72        block).
  73   _x: The input coefficients.
  74       Only the first 4 entries are used.
  75       The other 4 are assumed to be 0.*/
  76 static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){
  77   ogg_int32_t t[8];
  78   ogg_int32_t r;
  79   /*Stage 1:*/
  80   t[0]=OC_C4S4*_x[0]>>16;
  81   t[2]=OC_C6S2*_x[2]>>16;
  82   t[3]=OC_C2S6*_x[2]>>16;
  83   t[4]=OC_C7S1*_x[1]>>16;
  84   t[5]=-(OC_C5S3*_x[3]>>16);
  85   t[6]=OC_C3S5*_x[3]>>16;
  86   t[7]=OC_C1S7*_x[1]>>16;
  87   /*Stage 2:*/
  88   r=t[4]+t[5];
  89   t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
  90   t[4]=r;
  91   r=t[7]+t[6];
  92   t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
  93   t[7]=r;
  94   /*Stage 3:*/
  95   t[1]=t[0]+t[2];
  96   t[2]=t[0]-t[2];
  97   r=t[0]+t[3];
  98   t[3]=t[0]-t[3];
  99   t[0]=r;
 100   r=t[6]+t[5];
 101   t[5]=t[6]-t[5];
 102   t[6]=r;
 103   /*Stage 4:*/
 104   _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
 105   _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
 106   _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
 107   _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
 108   _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
 109   _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
 110   _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
 111   _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
 112 }
 113
 114 /*Performs an inverse 8 point Type-II DCT transform.
 115   The output is scaled by a factor of 2 relative to the orthonormal version of
 116    the transform.
 117   _y: The buffer to store the result in.
 118       Data will be placed in every 8th entry (e.g., in a column of an 8x8
 119        block).
 120   _x: The input coefficients.
 121       Only the first 3 entries are used.
 122       The other 5 are assumed to be 0.*/
 123 static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){
 124   ogg_int32_t t[8];
 125   ogg_int32_t r;
 126   /*Stage 1:*/
 127   t[0]=OC_C4S4*_x[0]>>16;
 128   t[2]=OC_C6S2*_x[2]>>16;
 129   t[3]=OC_C2S6*_x[2]>>16;
 130   t[4]=OC_C7S1*_x[1]>>16;
 131   t[7]=OC_C1S7*_x[1]>>16;
 132   /*Stage 2:*/
 133   t[5]=OC_C4S4*t[4]>>16;
 134   t[6]=OC_C4S4*t[7]>>16;
 135   /*Stage 3:*/
 136   t[1]=t[0]+t[2];
 137   t[2]=t[0]-t[2];
 138   r=t[0]+t[3];
 139   t[3]=t[0]-t[3];
 140   t[0]=r;
 141   r=t[6]+t[5];
 142   t[5]=t[6]-t[5];
 143   t[6]=r;
 144   /*Stage 4:*/
 145   _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
 146   _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
 147   _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
 148   _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
 149   _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
 150   _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
 151   _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
 152   _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
 153 }
 154
 155
 156 /*Performs an inverse 8 point Type-II DCT transform.
 157   The output is scaled by a factor of 2 relative to the orthonormal version of
 158    the transform.
 159   _y: The buffer to store the result in.
 160       Data will be placed in every 8th entry (e.g., in a column of an 8x8
 161        block).
 162   _x: The input coefficients.
 163       Only the first 2 entries are used.
 164       The other 6 are assumed to be 0.*/
 165 static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){
 166   ogg_int32_t t[8];
 167   ogg_int32_t r;
 168   /*Stage 1:*/
 169   t[0]=OC_C4S4*_x[0]>>16;
 170   t[4]=OC_C7S1*_x[1]>>16;
 171   t[7]=OC_C1S7*_x[1]>>16;
 172   /*Stage 2:*/
 173   t[5]=OC_C4S4*t[4]>>16;
 174   t[6]=OC_C4S4*t[7]>>16;
 175   /*Stage 3:*/
 176   r=t[6]+t[5];
 177   t[5]=t[6]-t[5];
 178   t[6]=r;
 179   /*Stage 4:*/
 180   _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
 181   _y[1<<3]=(ogg_int16_t)(t[0]+t[6]);
 182   _y[2<<3]=(ogg_int16_t)(t[0]+t[5]);
 183   _y[3<<3]=(ogg_int16_t)(t[0]+t[4]);
 184   _y[4<<3]=(ogg_int16_t)(t[0]-t[4]);
 185   _y[5<<3]=(ogg_int16_t)(t[0]-t[5]);
 186   _y[6<<3]=(ogg_int16_t)(t[0]-t[6]);
 187   _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
 188 }
 189
 190
 191 /*Performs an inverse 8 point Type-II DCT transform.
 192   The output is scaled by a factor of 2 relative to the orthonormal version of
 193    the transform.
 194   _y: The buffer to store the result in.
 195       Data will be placed in every 8th entry (e.g., in a column of an 8x8
 196        block).
 197   _x: The input coefficients.
 198       Only the first entry is used.
 199       The other 7 are assumed to be 0.*/
 200 static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){
 201   _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]=
 202    _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16);
 203 }
 204
 205 /*Performs an inverse 8x8 Type-II DCT transform.
 206   The input is assumed to be scaled by a factor of 4 relative to orthonormal
 207    version of the transform.
 208   _y: The buffer to store the result in.
 209       This may be the same as _x.
 210   _x: The input coefficients. */
 211 void oc_idct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
 212   const ogg_int16_t *in;
 213   ogg_int16_t       *end;
 214   ogg_int16_t       *out;
 215   ogg_int16_t        w[64];
 216   /*Transform rows of x into columns of w.*/
 217   for(in=_x,out=w,end=out+8;out<end;in+=8,out++)idct8(out,in);
 218   /*Transform rows of w into columns of y.*/
 219   for(in=w,out=_y,end=out+8;out<end;in+=8,out++)idct8(out,in);
 220   /*Adjust for scale factor.*/
 221   for(out=_y,end=out+64;out<end;out++)*out=(ogg_int16_t)(*out+8>>4);
 222 }
 223
 224 /*Performs an inverse 8x8 Type-II DCT transform.
 225   The input is assumed to be scaled by a factor of 4 relative to orthonormal
 226    version of the transform.
 227   All coefficients but the first 10 in zig-zag scan order are assumed to be 0:
 228    x  x  x  x  0  0  0  0
 229    x  x  x  0  0  0  0  0
 230    x  x  0  0  0  0  0  0
 231    x  0  0  0  0  0  0  0
 232    0  0  0  0  0  0  0  0
 233    0  0  0  0  0  0  0  0
 234    0  0  0  0  0  0  0  0
 235    0  0  0  0  0  0  0  0
 236   _y: The buffer to store the result in.
 237       This may be the same as _x.
 238   _x: The input coefficients. */
 239 void oc_idct8x8_10_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
 240   const ogg_int16_t *in;
 241   ogg_int16_t       *end;
 242   ogg_int16_t       *out;
 243   ogg_int16_t        w[64];
 244   /*Transform rows of x into columns of w.*/
 245   idct8_4(w,_x);
 246   idct8_3(w+1,_x+8);
 247   idct8_2(w+2,_x+16);
 248   idct8_1(w+3,_x+24);
 249   /*Transform rows of w into columns of y.*/
 250   for(in=w,out=_y,end=out+8;out<end;in+=8,out++)idct8_4(out,in);
 251   /*Adjust for scale factor.*/
 252   for(out=_y,end=out+64;out<end;out++)*out=(ogg_int16_t)(*out+8>>4);
 253 }