quicktime/ffmpeg/libavcodec/vp3dsp.c

   1 /*
   2  * Copyright (C) 2004 the ffmpeg project
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with this library; if not, write to the Free Software
  16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  17  */
  18
  19 /**
  20  * @file vp3dsp.c
  21  * Standard C DSP-oriented functions cribbed from the original VP3
  22  * source code.
  23  */
  24
  25 #include "common.h"
  26 #include "avcodec.h"
  27 #include "dsputil.h"
  28 #include "vp3data.h"
  29
  30 #define IdctAdjustBeforeShift 8
  31 #define xC1S7 64277
  32 #define xC2S6 60547
  33 #define xC3S5 54491
  34 #define xC4S4 46341
  35 #define xC5S3 36410
  36 #define xC6S2 25080
  37 #define xC7S1 12785
  38
  39 static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
  40 {
  41     int16_t *ip = input;
  42     uint8_t *cm = cropTbl + MAX_NEG_CROP;
  43
  44     int A_, B_, C_, D_, _Ad, _Bd, _Cd, _Dd, E_, F_, G_, H_;
  45     int _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
  46     int t1, t2;
  47
  48     int i, j;
  49
  50     /* Inverse DCT on the rows now */
  51     for (i = 0; i < 8; i++) {
  52         /* Check for non-zero values */
  53         if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
  54             t1 = (int32_t)(xC1S7 * ip[1]);
  55             t2 = (int32_t)(xC7S1 * ip[7]);
  56             t1 >>= 16;
  57             t2 >>= 16;
  58             A_ = t1 + t2;
  59
  60             t1 = (int32_t)(xC7S1 * ip[1]);
  61             t2 = (int32_t)(xC1S7 * ip[7]);
  62             t1 >>= 16;
  63             t2 >>= 16;
  64             B_ = t1 - t2;
  65
  66             t1 = (int32_t)(xC3S5 * ip[3]);
  67             t2 = (int32_t)(xC5S3 * ip[5]);
  68             t1 >>= 16;
  69             t2 >>= 16;
  70             C_ = t1 + t2;
  71
  72             t1 = (int32_t)(xC3S5 * ip[5]);
  73             t2 = (int32_t)(xC5S3 * ip[3]);
  74             t1 >>= 16;
  75             t2 >>= 16;
  76             D_ = t1 - t2;
  77
  78
  79             t1 = (int32_t)(xC4S4 * (A_ - C_));
  80             t1 >>= 16;
  81             _Ad = t1;
  82
  83             t1 = (int32_t)(xC4S4 * (B_ - D_));
  84             t1 >>= 16;
  85             _Bd = t1;
  86
  87
  88             _Cd = A_ + C_;
  89             _Dd = B_ + D_;
  90
  91             t1 = (int32_t)(xC4S4 * (ip[0] + ip[4]));
  92             t1 >>= 16;
  93             E_ = t1;
  94
  95             t1 = (int32_t)(xC4S4 * (ip[0] - ip[4]));
  96             t1 >>= 16;
  97             F_ = t1;
  98
  99             t1 = (int32_t)(xC2S6 * ip[2]);
 100             t2 = (int32_t)(xC6S2 * ip[6]);
 101             t1 >>= 16;
 102             t2 >>= 16;
 103             G_ = t1 + t2;
 104
 105             t1 = (int32_t)(xC6S2 * ip[2]);
 106             t2 = (int32_t)(xC2S6 * ip[6]);
 107             t1 >>= 16;
 108             t2 >>= 16;
 109             H_ = t1 - t2;
 110
 111
 112             _Ed = E_ - G_;
 113             _Gd = E_ + G_;
 114
 115             _Add = F_ + _Ad;
 116             _Bdd = _Bd - H_;
 117
 118             _Fd = F_ - _Ad;
 119             _Hd = _Bd + H_;
 120
 121             /*  Final sequence of operations over-write original inputs. */
 122             ip[0] = _Gd + _Cd ;
 123             ip[7] = _Gd - _Cd ;
 124
 125             ip[1] = _Add + _Hd;
 126             ip[2] = _Add - _Hd;
 127
 128             ip[3] = _Ed + _Dd ;
 129             ip[4] = _Ed - _Dd ;
 130
 131             ip[5] = _Fd + _Bdd;
 132             ip[6] = _Fd - _Bdd;
 133
 134         }
 135
 136         ip += 8;            /* next row */
 137     }
 138
 139     ip = input;
 140
 141     for ( i = 0; i < 8; i++) {
 142         /* Check for non-zero values (bitwise or faster than ||) */
 143         if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
 144              ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
 145
 146             t1 = (int32_t)(xC1S7 * ip[1*8]);
 147             t2 = (int32_t)(xC7S1 * ip[7*8]);
 148             t1 >>= 16;
 149             t2 >>= 16;
 150             A_ = t1 + t2;
 151
 152             t1 = (int32_t)(xC7S1 * ip[1*8]);
 153             t2 = (int32_t)(xC1S7 * ip[7*8]);
 154             t1 >>= 16;
 155             t2 >>= 16;
 156             B_ = t1 - t2;
 157
 158             t1 = (int32_t)(xC3S5 * ip[3*8]);
 159             t2 = (int32_t)(xC5S3 * ip[5*8]);
 160             t1 >>= 16;
 161             t2 >>= 16;
 162             C_ = t1 + t2;
 163
 164             t1 = (int32_t)(xC3S5 * ip[5*8]);
 165             t2 = (int32_t)(xC5S3 * ip[3*8]);
 166             t1 >>= 16;
 167             t2 >>= 16;
 168             D_ = t1 - t2;
 169
 170
 171             t1 = (int32_t)(xC4S4 * (A_ - C_));
 172             t1 >>= 16;
 173             _Ad = t1;
 174
 175             t1 = (int32_t)(xC4S4 * (B_ - D_));
 176             t1 >>= 16;
 177             _Bd = t1;
 178
 179
 180             _Cd = A_ + C_;
 181             _Dd = B_ + D_;
 182
 183             t1 = (int32_t)(xC4S4 * (ip[0*8] + ip[4*8]));
 184             t1 >>= 16;
 185             E_ = t1;
 186
 187             t1 = (int32_t)(xC4S4 * (ip[0*8] - ip[4*8]));
 188             t1 >>= 16;
 189             F_ = t1;
 190
 191             t1 = (int32_t)(xC2S6 * ip[2*8]);
 192             t2 = (int32_t)(xC6S2 * ip[6*8]);
 193             t1 >>= 16;
 194             t2 >>= 16;
 195             G_ = t1 + t2;
 196
 197             t1 = (int32_t)(xC6S2 * ip[2*8]);
 198             t2 = (int32_t)(xC2S6 * ip[6*8]);
 199             t1 >>= 16;
 200             t2 >>= 16;
 201             H_ = t1 - t2;
 202
 203
 204             _Ed = E_ - G_;
 205             _Gd = E_ + G_;
 206
 207             _Add = F_ + _Ad;
 208             _Bdd = _Bd - H_;
 209
 210             _Fd = F_ - _Ad;
 211             _Hd = _Bd + H_;
 212
 213             if(type==1){  //HACK
 214                 _Gd += 16*128;
 215                 _Add+= 16*128;
 216                 _Ed += 16*128;
 217                 _Fd += 16*128;
 218             }
 219             _Gd += IdctAdjustBeforeShift;
 220             _Add += IdctAdjustBeforeShift;
 221             _Ed += IdctAdjustBeforeShift;
 222             _Fd += IdctAdjustBeforeShift;
 223
 224             /* Final sequence of operations over-write original inputs. */
 225             if(type==0){
 226                 ip[0*8] = (_Gd + _Cd )  >> 4;
 227                 ip[7*8] = (_Gd - _Cd )  >> 4;
 228
 229                 ip[1*8] = (_Add + _Hd ) >> 4;
 230                 ip[2*8] = (_Add - _Hd ) >> 4;
 231
 232                 ip[3*8] = (_Ed + _Dd )  >> 4;
 233                 ip[4*8] = (_Ed - _Dd )  >> 4;
 234
 235                 ip[5*8] = (_Fd + _Bdd ) >> 4;
 236                 ip[6*8] = (_Fd - _Bdd ) >> 4;
 237             }else if(type==1){
 238                 dst[0*stride] = cm[(_Gd + _Cd )  >> 4];
 239                 dst[7*stride] = cm[(_Gd - _Cd )  >> 4];
 240
 241                 dst[1*stride] = cm[(_Add + _Hd ) >> 4];
 242                 dst[2*stride] = cm[(_Add - _Hd ) >> 4];
 243
 244                 dst[3*stride] = cm[(_Ed + _Dd )  >> 4];
 245                 dst[4*stride] = cm[(_Ed - _Dd )  >> 4];
 246
 247                 dst[5*stride] = cm[(_Fd + _Bdd ) >> 4];
 248                 dst[6*stride] = cm[(_Fd - _Bdd ) >> 4];
 249             }else{
 250                 dst[0*stride] = cm[dst[0*stride] + ((_Gd + _Cd )  >> 4)];
 251                 dst[7*stride] = cm[dst[7*stride] + ((_Gd - _Cd )  >> 4)];
 252
 253                 dst[1*stride] = cm[dst[1*stride] + ((_Add + _Hd ) >> 4)];
 254                 dst[2*stride] = cm[dst[2*stride] + ((_Add - _Hd ) >> 4)];
 255
 256                 dst[3*stride] = cm[dst[3*stride] + ((_Ed + _Dd )  >> 4)];
 257                 dst[4*stride] = cm[dst[4*stride] + ((_Ed - _Dd )  >> 4)];
 258
 259                 dst[5*stride] = cm[dst[5*stride] + ((_Fd + _Bdd ) >> 4)];
 260                 dst[6*stride] = cm[dst[6*stride] + ((_Fd - _Bdd ) >> 4)];
 261             }
 262
 263         } else {
 264             if(type==0){
 265                 ip[0*8] =
 266                 ip[1*8] =
 267                 ip[2*8] =
 268                 ip[3*8] =
 269                 ip[4*8] =
 270                 ip[5*8] =
 271                 ip[6*8] =
 272                 ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
 273             }else if(type==1){
 274                 dst[0*stride]=
 275                 dst[1*stride]=
 276                 dst[2*stride]=
 277                 dst[3*stride]=
 278                 dst[4*stride]=
 279                 dst[5*stride]=
 280                 dst[6*stride]=
 281                 dst[7*stride]= 128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
 282             }else{
 283                 if(ip[0*8]){
 284                     int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
 285                     dst[0*stride] = cm[dst[0*stride] + v];
 286                     dst[1*stride] = cm[dst[1*stride] + v];
 287                     dst[2*stride] = cm[dst[2*stride] + v];
 288                     dst[3*stride] = cm[dst[3*stride] + v];
 289                     dst[4*stride] = cm[dst[4*stride] + v];
 290                     dst[5*stride] = cm[dst[5*stride] + v];
 291                     dst[6*stride] = cm[dst[6*stride] + v];
 292                     dst[7*stride] = cm[dst[7*stride] + v];
 293                 }
 294             }
 295         }
 296
 297         ip++;            /* next column */
 298         dst++;
 299     }
 300 }
 301
 302 void ff_vp3_idct_c(DCTELEM *block/* align 16*/){
 303     idct(NULL, 0, block, 0);
 304 }
 305
 306 void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
 307     idct(dest, line_size, block, 1);
 308 }
 309
 310 void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
 311     idct(dest, line_size, block, 2);
 312 }